From 117206ba3945de15fc380588de3c779526977c7f Mon Sep 17 00:00:00 2001 From: leestott Date: Sat, 6 Sep 2025 15:23:27 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=90=20Update=20translations=20via=20Co?= =?UTF-8?q?-op=20Translator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sk/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 447 +++ .../1-Tools/solution/notebook.ipynb | 673 ++++ .../sk/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 685 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../sk/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1081 +++++++ .../3-Linear/solution/notebook.ipynb | 1111 +++++++ .../sk/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1255 ++++++++ .../sk/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 721 +++++ .../1-Introduction/solution/notebook.ipynb | 672 ++++ .../2-Classifiers-1/notebook.ipynb | 39 + .../solution/R/lesson_11-R.ipynb | 1285 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 279 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 648 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 499 +++ .../1-Visualize/solution/notebook.ipynb | 830 +++++ .../sk/5-Clustering/2-K-Means/notebook.ipynb | 229 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 642 ++++ .../2-K-Means/solution/notebook.ipynb | 544 ++++ .../2-K-Means/solution/tester.ipynb | 341 ++ .../solution/notebook.ipynb | 100 + .../sk/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../sk/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 162 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1124 +++++++ .../2-ARIMA/working/notebook.ipynb | 59 + .../3-SVR/solution/notebook.ipynb | 1019 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 695 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 458 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../sk/8-Reinforcement/2-Gym/notebook.ipynb | 392 +++ .../2-Gym/solution/notebook.ipynb | 524 +++ translations/sk/PyTorch_Fundamentals.ipynb | 2828 ++++++++++++++++ .../sl/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 447 +++ .../1-Tools/solution/notebook.ipynb | 677 ++++ .../sl/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 684 ++++ .../2-Data/solution/notebook.ipynb | 439 +++ .../sl/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1089 +++++++ .../3-Linear/solution/notebook.ipynb | 1113 +++++++ .../sl/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 685 ++++ .../4-Logistic/solution/notebook.ipynb | 1258 ++++++++ .../sl/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 724 +++++ .../1-Introduction/solution/notebook.ipynb | 699 ++++ .../2-Classifiers-1/notebook.ipynb | 41 + .../solution/R/lesson_11-R.ipynb | 1302 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 281 ++ .../3-Classifiers-2/notebook.ipynb | 165 + .../solution/R/lesson_12-R.ipynb | 647 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 304 ++ .../4-Classification/4-Applied/notebook.ipynb | 41 + .../4-Applied/solution/notebook.ipynb | 292 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 488 +++ .../1-Visualize/solution/notebook.ipynb | 892 ++++++ .../sl/5-Clustering/2-K-Means/notebook.ipynb | 231 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 640 ++++ .../2-K-Means/solution/notebook.ipynb | 550 ++++ .../2-K-Means/solution/tester.ipynb | 343 ++ .../solution/notebook.ipynb | 100 + .../sl/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../sl/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 162 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1149 +++++++ .../2-ARIMA/working/notebook.ipynb | 59 + .../3-SVR/solution/notebook.ipynb | 1029 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 705 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 466 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../sl/8-Reinforcement/2-Gym/notebook.ipynb | 394 +++ .../2-Gym/solution/notebook.ipynb | 526 +++ translations/sl/PyTorch_Fundamentals.ipynb | 2830 +++++++++++++++++ .../sr/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 447 +++ .../1-Tools/solution/notebook.ipynb | 677 ++++ .../sr/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 672 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../sr/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1084 +++++++ .../3-Linear/solution/notebook.ipynb | 1113 +++++++ .../sr/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 685 ++++ .../4-Logistic/solution/notebook.ipynb | 1255 ++++++++ .../sr/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 725 +++++ .../1-Introduction/solution/notebook.ipynb | 737 +++++ .../2-Classifiers-1/notebook.ipynb | 41 + .../solution/R/lesson_11-R.ipynb | 1298 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 281 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 650 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 500 +++ .../1-Visualize/solution/notebook.ipynb | 882 +++++ .../sr/5-Clustering/2-K-Means/notebook.ipynb | 231 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 642 ++++ .../2-K-Means/solution/notebook.ipynb | 548 ++++ .../2-K-Means/solution/tester.ipynb | 343 ++ .../solution/notebook.ipynb | 100 + .../sr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../sr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 164 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1140 +++++++ .../2-ARIMA/working/notebook.ipynb | 50 + .../3-SVR/solution/notebook.ipynb | 1023 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 699 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 469 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../sr/8-Reinforcement/2-Gym/notebook.ipynb | 394 +++ .../2-Gym/solution/notebook.ipynb | 526 +++ translations/sr/PyTorch_Fundamentals.ipynb | 2830 +++++++++++++++++ .../sv/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 447 +++ .../1-Tools/solution/notebook.ipynb | 677 ++++ .../sv/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 671 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../sv/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1089 +++++++ .../3-Linear/solution/notebook.ipynb | 1113 +++++++ .../sv/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1257 ++++++++ .../sv/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 727 +++++ .../1-Introduction/solution/notebook.ipynb | 711 +++++ .../2-Classifiers-1/notebook.ipynb | 41 + .../solution/R/lesson_11-R.ipynb | 1298 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 281 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 650 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 500 +++ .../1-Visualize/solution/notebook.ipynb | 821 +++++ .../sv/5-Clustering/2-K-Means/notebook.ipynb | 231 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 640 ++++ .../2-K-Means/solution/notebook.ipynb | 550 ++++ .../2-K-Means/solution/tester.ipynb | 343 ++ .../solution/notebook.ipynb | 100 + .../sv/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../sv/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 170 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1132 +++++++ .../2-ARIMA/working/notebook.ipynb | 59 + .../3-SVR/solution/notebook.ipynb | 1025 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 701 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 469 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../sv/8-Reinforcement/2-Gym/notebook.ipynb | 394 +++ .../2-Gym/solution/notebook.ipynb | 526 +++ translations/sv/PyTorch_Fundamentals.ipynb | 2830 +++++++++++++++++ .../sw/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 448 +++ .../1-Tools/solution/notebook.ipynb | 671 ++++ .../sw/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 671 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../sw/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1084 +++++++ .../3-Linear/solution/notebook.ipynb | 1111 +++++++ .../sw/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1255 ++++++++ .../sw/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 721 +++++ .../1-Introduction/solution/notebook.ipynb | 739 +++++ .../2-Classifiers-1/notebook.ipynb | 39 + .../solution/R/lesson_11-R.ipynb | 1294 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 279 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 648 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 500 +++ .../1-Visualize/solution/notebook.ipynb | 817 +++++ .../sw/5-Clustering/2-K-Means/notebook.ipynb | 229 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 642 ++++ .../2-K-Means/solution/notebook.ipynb | 544 ++++ .../2-K-Means/solution/tester.ipynb | 341 ++ .../solution/notebook.ipynb | 100 + .../sw/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../sw/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 162 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1131 +++++++ .../2-ARIMA/working/notebook.ipynb | 61 + .../3-SVR/solution/notebook.ipynb | 1017 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 693 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 425 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../sw/8-Reinforcement/2-Gym/notebook.ipynb | 392 +++ .../2-Gym/solution/notebook.ipynb | 524 +++ translations/sw/PyTorch_Fundamentals.ipynb | 2828 ++++++++++++++++ .../th/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 448 +++ .../1-Tools/solution/notebook.ipynb | 669 ++++ .../th/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 672 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../th/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1083 +++++++ .../3-Linear/solution/notebook.ipynb | 1109 +++++++ .../th/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1255 ++++++++ .../th/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 716 +++++ .../1-Introduction/solution/notebook.ipynb | 700 ++++ .../2-Classifiers-1/notebook.ipynb | 39 + .../solution/R/lesson_11-R.ipynb | 1294 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 279 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 648 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 300 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 493 +++ .../1-Visualize/solution/notebook.ipynb | 817 +++++ .../th/5-Clustering/2-K-Means/notebook.ipynb | 229 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 639 ++++ .../2-K-Means/solution/notebook.ipynb | 544 ++++ .../2-K-Means/solution/tester.ipynb | 341 ++ .../solution/notebook.ipynb | 100 + .../th/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../th/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 169 + .../1-Introduction/working/notebook.ipynb | 64 + .../2-ARIMA/solution/notebook.ipynb | 1095 +++++++ .../2-ARIMA/working/notebook.ipynb | 61 + .../3-SVR/solution/notebook.ipynb | 1013 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 689 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 447 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../th/8-Reinforcement/2-Gym/notebook.ipynb | 392 +++ .../2-Gym/solution/notebook.ipynb | 524 +++ translations/th/PyTorch_Fundamentals.ipynb | 2828 ++++++++++++++++ .../tr/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 448 +++ .../1-Tools/solution/notebook.ipynb | 677 ++++ .../tr/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 672 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../tr/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1086 +++++++ .../3-Linear/solution/notebook.ipynb | 1113 +++++++ .../tr/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1257 ++++++++ .../tr/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 727 +++++ .../1-Introduction/solution/notebook.ipynb | 717 +++++ .../2-Classifiers-1/notebook.ipynb | 41 + .../solution/R/lesson_11-R.ipynb | 1302 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 281 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 650 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 493 +++ .../1-Visualize/solution/notebook.ipynb | 821 +++++ .../tr/5-Clustering/2-K-Means/notebook.ipynb | 231 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 639 ++++ .../2-K-Means/solution/notebook.ipynb | 548 ++++ .../2-K-Means/solution/tester.ipynb | 343 ++ .../solution/notebook.ipynb | 100 + .../tr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../tr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 168 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1143 +++++++ .../2-ARIMA/working/notebook.ipynb | 59 + .../3-SVR/solution/notebook.ipynb | 1023 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 699 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 462 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../tr/8-Reinforcement/2-Gym/notebook.ipynb | 394 +++ .../2-Gym/solution/notebook.ipynb | 526 +++ translations/tr/PyTorch_Fundamentals.ipynb | 2830 +++++++++++++++++ .../vi/2-Regression/1-Tools/notebook.ipynb | 0 .../1-Tools/solution/R/lesson_1-R.ipynb | 448 +++ .../1-Tools/solution/notebook.ipynb | 675 ++++ .../vi/2-Regression/2-Data/notebook.ipynb | 46 + .../2-Data/solution/R/lesson_2-R.ipynb | 673 ++++ .../2-Data/solution/notebook.ipynb | 437 +++ .../vi/2-Regression/3-Linear/notebook.ipynb | 128 + .../3-Linear/solution/R/lesson_3-R.ipynb | 1086 +++++++ .../3-Linear/solution/notebook.ipynb | 1111 +++++++ .../vi/2-Regression/4-Logistic/notebook.ipynb | 269 ++ .../4-Logistic/solution/R/lesson_4-R.ipynb | 686 ++++ .../4-Logistic/solution/notebook.ipynb | 1256 ++++++++ .../vi/3-Web-App/1-Web-App/notebook.ipynb | 0 .../1-Web-App/solution/notebook.ipynb | 267 ++ .../1-Introduction/notebook.ipynb | 39 + .../solution/R/lesson_10-R.ipynb | 721 +++++ .../1-Introduction/solution/notebook.ipynb | 701 ++++ .../2-Classifiers-1/notebook.ipynb | 41 + .../solution/R/lesson_11-R.ipynb | 1298 ++++++++ .../2-Classifiers-1/solution/notebook.ipynb | 281 ++ .../3-Classifiers-2/notebook.ipynb | 163 + .../solution/R/lesson_12-R.ipynb | 648 ++++ .../3-Classifiers-2/solution/notebook.ipynb | 302 ++ .../4-Classification/4-Applied/notebook.ipynb | 39 + .../4-Applied/solution/notebook.ipynb | 290 ++ .../5-Clustering/1-Visualize/notebook.ipynb | 50 + .../1-Visualize/solution/R/lesson_14-R.ipynb | 493 +++ .../1-Visualize/solution/notebook.ipynb | 817 +++++ .../vi/5-Clustering/2-K-Means/notebook.ipynb | 231 ++ .../2-K-Means/solution/R/lesson_15-R.ipynb | 639 ++++ .../2-K-Means/solution/notebook.ipynb | 546 ++++ .../2-K-Means/solution/tester.ipynb | 343 ++ .../solution/notebook.ipynb | 100 + .../vi/6-NLP/4-Hotel-Reviews-1/notebook.ipynb | 0 .../4-Hotel-Reviews-1/solution/notebook.ipynb | 174 + .../vi/6-NLP/5-Hotel-Reviews-2/notebook.ipynb | 0 .../solution/1-notebook.ipynb | 172 + .../solution/2-notebook.ipynb | 137 + .../solution/3-notebook.ipynb | 260 ++ .../1-Introduction/solution/notebook.ipynb | 164 + .../1-Introduction/working/notebook.ipynb | 63 + .../2-ARIMA/solution/notebook.ipynb | 1137 +++++++ .../2-ARIMA/working/notebook.ipynb | 59 + .../3-SVR/solution/notebook.ipynb | 1019 ++++++ .../7-TimeSeries/3-SVR/working/notebook.ipynb | 695 ++++ .../1-QLearning/notebook.ipynb | 411 +++ .../solution/assignment-solution.ipynb | 460 +++ .../1-QLearning/solution/notebook.ipynb | 577 ++++ .../vi/8-Reinforcement/2-Gym/notebook.ipynb | 390 +++ .../2-Gym/solution/notebook.ipynb | 522 +++ translations/vi/PyTorch_Fundamentals.ipynb | 2828 ++++++++++++++++ 408 files changed, 191420 insertions(+) create mode 100644 translations/sk/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/sk/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/sk/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/sk/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/sk/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/sk/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/sk/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/sk/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/sk/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/sk/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/sk/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/sk/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/sk/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/sk/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/sk/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/sk/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/sk/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sk/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/sk/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/sk/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/sk/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/sk/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/sk/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/sk/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/sk/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/sk/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/sk/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/sk/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/sk/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/sk/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/sk/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/sk/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/sk/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/sk/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/sk/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/sk/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/sk/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/sk/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/sk/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/sk/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/sk/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/sk/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/sk/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/sk/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/sk/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/sk/PyTorch_Fundamentals.ipynb create mode 100644 translations/sl/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/sl/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/sl/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/sl/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/sl/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/sl/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/sl/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/sl/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/sl/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/sl/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/sl/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/sl/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/sl/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/sl/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/sl/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/sl/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/sl/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sl/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/sl/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/sl/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/sl/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/sl/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/sl/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/sl/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/sl/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/sl/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/sl/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/sl/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/sl/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/sl/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/sl/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/sl/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/sl/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/sl/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/sl/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/sl/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/sl/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/sl/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/sl/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/sl/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/sl/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/sl/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/sl/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/sl/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/sl/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/sl/PyTorch_Fundamentals.ipynb create mode 100644 translations/sr/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/sr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/sr/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/sr/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/sr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/sr/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/sr/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/sr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/sr/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/sr/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/sr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/sr/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/sr/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/sr/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/sr/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/sr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/sr/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sr/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/sr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/sr/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/sr/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/sr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/sr/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/sr/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/sr/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/sr/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/sr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/sr/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/sr/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/sr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/sr/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/sr/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/sr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/sr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/sr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/sr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/sr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/sr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/sr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/sr/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/sr/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/sr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/sr/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/sr/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/sr/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/sr/PyTorch_Fundamentals.ipynb create mode 100644 translations/sv/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/sv/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/sv/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/sv/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/sv/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/sv/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/sv/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/sv/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/sv/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/sv/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/sv/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/sv/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/sv/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/sv/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/sv/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/sv/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/sv/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sv/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/sv/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/sv/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/sv/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/sv/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/sv/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/sv/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/sv/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/sv/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/sv/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/sv/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/sv/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/sv/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/sv/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/sv/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/sv/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/sv/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/sv/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/sv/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/sv/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/sv/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/sv/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/sv/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/sv/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/sv/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/sv/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/sv/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/sv/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/sv/PyTorch_Fundamentals.ipynb create mode 100644 translations/sw/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/sw/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/sw/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/sw/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/sw/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/sw/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/sw/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/sw/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/sw/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/sw/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/sw/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/sw/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/sw/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/sw/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/sw/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/sw/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/sw/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sw/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/sw/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/sw/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/sw/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/sw/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/sw/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/sw/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/sw/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/sw/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/sw/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/sw/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/sw/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/sw/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/sw/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/sw/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/sw/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/sw/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/sw/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/sw/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/sw/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/sw/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/sw/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/sw/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/sw/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/sw/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/sw/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/sw/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/sw/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/sw/PyTorch_Fundamentals.ipynb create mode 100644 translations/th/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/th/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/th/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/th/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/th/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/th/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/th/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/th/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/th/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/th/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/th/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/th/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/th/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/th/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/th/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/th/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/th/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/th/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/th/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/th/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/th/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/th/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/th/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/th/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/th/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/th/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/th/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/th/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/th/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/th/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/th/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/th/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/th/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/th/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/th/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/th/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/th/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/th/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/th/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/th/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/th/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/th/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/th/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/th/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/th/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/th/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/th/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/th/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/th/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/th/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/th/PyTorch_Fundamentals.ipynb create mode 100644 translations/tr/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/tr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/tr/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/tr/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/tr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/tr/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/tr/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/tr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/tr/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/tr/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/tr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/tr/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/tr/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/tr/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/tr/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/tr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/tr/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/tr/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/tr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/tr/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/tr/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/tr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/tr/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/tr/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/tr/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/tr/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/tr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/tr/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/tr/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/tr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/tr/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/tr/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/tr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/tr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/tr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/tr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/tr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/tr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/tr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/tr/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/tr/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/tr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/tr/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/tr/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/tr/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/tr/PyTorch_Fundamentals.ipynb create mode 100644 translations/vi/2-Regression/1-Tools/notebook.ipynb create mode 100644 translations/vi/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb create mode 100644 translations/vi/2-Regression/1-Tools/solution/notebook.ipynb create mode 100644 translations/vi/2-Regression/2-Data/notebook.ipynb create mode 100644 translations/vi/2-Regression/2-Data/solution/R/lesson_2-R.ipynb create mode 100644 translations/vi/2-Regression/2-Data/solution/notebook.ipynb create mode 100644 translations/vi/2-Regression/3-Linear/notebook.ipynb create mode 100644 translations/vi/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb create mode 100644 translations/vi/2-Regression/3-Linear/solution/notebook.ipynb create mode 100644 translations/vi/2-Regression/4-Logistic/notebook.ipynb create mode 100644 translations/vi/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb create mode 100644 translations/vi/2-Regression/4-Logistic/solution/notebook.ipynb create mode 100644 translations/vi/3-Web-App/1-Web-App/notebook.ipynb create mode 100644 translations/vi/3-Web-App/1-Web-App/solution/notebook.ipynb create mode 100644 translations/vi/4-Classification/1-Introduction/notebook.ipynb create mode 100644 translations/vi/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb create mode 100644 translations/vi/4-Classification/1-Introduction/solution/notebook.ipynb create mode 100644 translations/vi/4-Classification/2-Classifiers-1/notebook.ipynb create mode 100644 translations/vi/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb create mode 100644 translations/vi/4-Classification/2-Classifiers-1/solution/notebook.ipynb create mode 100644 translations/vi/4-Classification/3-Classifiers-2/notebook.ipynb create mode 100644 translations/vi/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb create mode 100644 translations/vi/4-Classification/3-Classifiers-2/solution/notebook.ipynb create mode 100644 translations/vi/4-Classification/4-Applied/notebook.ipynb create mode 100644 translations/vi/4-Classification/4-Applied/solution/notebook.ipynb create mode 100644 translations/vi/5-Clustering/1-Visualize/notebook.ipynb create mode 100644 translations/vi/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb create mode 100644 translations/vi/5-Clustering/1-Visualize/solution/notebook.ipynb create mode 100644 translations/vi/5-Clustering/2-K-Means/notebook.ipynb create mode 100644 translations/vi/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb create mode 100644 translations/vi/5-Clustering/2-K-Means/solution/notebook.ipynb create mode 100644 translations/vi/5-Clustering/2-K-Means/solution/tester.ipynb create mode 100644 translations/vi/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb create mode 100644 translations/vi/6-NLP/4-Hotel-Reviews-1/notebook.ipynb create mode 100644 translations/vi/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb create mode 100644 translations/vi/6-NLP/5-Hotel-Reviews-2/notebook.ipynb create mode 100644 translations/vi/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb create mode 100644 translations/vi/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb create mode 100644 translations/vi/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/1-Introduction/solution/notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/1-Introduction/working/notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/2-ARIMA/solution/notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/2-ARIMA/working/notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/3-SVR/solution/notebook.ipynb create mode 100644 translations/vi/7-TimeSeries/3-SVR/working/notebook.ipynb create mode 100644 translations/vi/8-Reinforcement/1-QLearning/notebook.ipynb create mode 100644 translations/vi/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb create mode 100644 translations/vi/8-Reinforcement/1-QLearning/solution/notebook.ipynb create mode 100644 translations/vi/8-Reinforcement/2-Gym/notebook.ipynb create mode 100644 translations/vi/8-Reinforcement/2-Gym/solution/notebook.ipynb create mode 100644 translations/vi/PyTorch_Fundamentals.ipynb diff --git a/translations/sk/2-Regression/1-Tools/notebook.ipynb b/translations/sk/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sk/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/sk/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..51d2c746f --- /dev/null +++ b/translations/sk/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:40:43+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Úvod do regresie - Lekcia 1\n", + "\n", + "#### Uvedenie do kontextu\n", + "\n", + "✅ Existuje mnoho typov regresných metód a výber tej správnej závisí od odpovede, ktorú hľadáte. Ak chcete predpovedať pravdepodobnú výšku osoby v určitom veku, použijete `lineárnu regresiu`, pretože hľadáte **číselnú hodnotu**. Ak vás zaujíma, či by určitý typ kuchyne mal byť považovaný za vegánsky alebo nie, hľadáte **priradenie kategórie**, a preto by ste použili `logistickú regresiu`. O logistickej regresii sa dozviete viac neskôr. Zamyslite sa nad niektorými otázkami, ktoré môžete klásť na základe údajov, a nad tým, ktorá z týchto metód by bola vhodnejšia.\n", + "\n", + "V tejto časti budete pracovať s [malým datasetom o cukrovke](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Predstavte si, že by ste chceli otestovať liečbu pre pacientov s cukrovkou. Modely strojového učenia by vám mohli pomôcť určiť, ktorí pacienti by na liečbu reagovali lepšie, na základe kombinácií premenných. Dokonca aj veľmi základný regresný model, keď je vizualizovaný, môže ukázať informácie o premenných, ktoré by vám mohli pomôcť zorganizovať vaše teoretické klinické štúdie.\n", + "\n", + "Tak teda, poďme sa pustiť do tejto úlohy!\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Načítanie našej sady nástrojov\n", + "\n", + "Pre túto úlohu budeme potrebovať nasledujúce balíky:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov v R](https://www.tidyverse.org/packages), ktorá je navrhnutá tak, aby robila dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je [rámec balíkov](https://www.tidymodels.org/packages/) určený na modelovanie a strojové učenie.\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Nasledujúci skript skontroluje, či máte nainštalované balíky potrebné na dokončenie tohto modulu, a v prípade, že niektoré chýbajú, ich pre vás nainštaluje.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz načítajme tieto úžasné balíky a sprístupnime ich v našej aktuálnej R relácii. (Toto je len na ilustráciu, `pacman::p_load()` to už za vás urobil)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Dataset o cukrovke\n", + "\n", + "V tomto cvičení si precvičíme naše schopnosti regresie tým, že budeme robiť predpovede na datasete o cukrovke. [Dataset o cukrovke](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) obsahuje `442 vzoriek` údajov týkajúcich sa cukrovky, s 10 prediktívnymi premennými: `vek`, `pohlavie`, `index telesnej hmotnosti`, `priemerný krvný tlak` a `šesť meraní krvného séra`, ako aj výstupnú premennú `y`: kvantitatívne meranie progresie ochorenia jeden rok po základnom vyšetrení.\n", + "\n", + "|Počet pozorovaní|442|\n", + "|----------------|:---|\n", + "|Počet prediktorov|Prvých 10 stĺpcov sú číselné prediktívne premenné|\n", + "|Výstup/Cieľ|Stĺpec 11 je kvantitatívne meranie progresie ochorenia jeden rok po základnom vyšetrení|\n", + "|Informácie o prediktoroch|- vek v rokoch\n", + "||- pohlavie\n", + "||- bmi index telesnej hmotnosti\n", + "||- bp priemerný krvný tlak\n", + "||- s1 tc, celkový cholesterol v sére\n", + "||- s2 ldl, lipoproteíny s nízkou hustotou\n", + "||- s3 hdl, lipoproteíny s vysokou hustotou\n", + "||- s4 tch, celkový cholesterol / HDL\n", + "||- s5 ltg, pravdepodobne logaritmus hladiny triglyceridov v sére\n", + "||- s6 glu, hladina cukru v krvi|\n", + "\n", + "> 🎓 Pamätajte, že ide o učenie s učiteľom (supervised learning), a potrebujeme cieľovú premennú nazvanú 'y'.\n", + "\n", + "Predtým, než budete môcť manipulovať s údajmi v R, musíte údaje importovať do pamäte R alebo vytvoriť spojenie s údajmi, ktoré R môže použiť na vzdialený prístup k údajom.\n", + "\n", + "> Balík [readr](https://readr.tidyverse.org/), ktorý je súčasťou Tidyverse, poskytuje rýchly a priateľský spôsob, ako načítať obdĺžnikové údaje do R.\n", + "\n", + "Teraz načítajme dataset o cukrovke z poskytnutého zdrojového URL: \n", + "\n", + "Tiež vykonáme kontrolu údajov pomocou `glimpse()` a zobrazíme prvých 5 riadkov pomocou `slice()`.\n", + "\n", + "Predtým, než budeme pokračovať ďalej, predstavme si niečo, s čím sa často stretnete v kóde R 🥁🥁: operátor pipe `%>%`\n", + "\n", + "Operátor pipe (`%>%`) vykonáva operácie v logickej postupnosti tým, že posúva objekt ďalej do funkcie alebo výrazu. Môžete si operátor pipe predstaviť ako \"a potom\" vo vašom kóde.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` nám ukazuje, že tieto dáta obsahujú 442 riadkov a 11 stĺpcov, pričom všetky stĺpce majú dátový typ `double`.\n", + "\n", + "
\n", + "\n", + "> glimpse() a slice() sú funkcie v [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, ktorý je súčasťou Tidyverse, predstavuje gramatiku manipulácie s dátami a poskytuje konzistentnú sadu slovies, ktoré vám pomôžu riešiť najbežnejšie výzvy pri práci s dátami.\n", + "\n", + "
\n", + "\n", + "Teraz, keď máme dáta, zamerajme sa na jednu vlastnosť (`bmi`), ktorú použijeme ako cieľ pre toto cvičenie. To si bude vyžadovať výber požadovaných stĺpcov. Ako to môžeme urobiť?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) nám umožňuje *vybrať* (a prípadne premenovať) stĺpce v dátovom rámci.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Tréningové a testovacie dáta\n", + "\n", + "V supervidovanom učení je bežnou praxou *rozdeliť* dáta na dva podmnožiny; (zvyčajne väčšiu) množinu, s ktorou sa model trénuje, a menšiu „rezervnú“ množinu, na ktorej sa overí, ako model fungoval.\n", + "\n", + "Teraz, keď máme dáta pripravené, môžeme zistiť, či nám stroj dokáže pomôcť určiť logické rozdelenie čísel v tejto dátovej sade. Môžeme použiť balík [rsample](https://tidymodels.github.io/rsample/), ktorý je súčasťou rámca Tidymodels, na vytvorenie objektu obsahujúceho informácie o *tom, ako* rozdeliť dáta, a potom ďalšie dve funkcie rsample na extrakciu vytvorených tréningových a testovacích množín:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Natrénujte lineárny regresný model pomocou Tidymodels\n", + "\n", + "Teraz sme pripravení natrénovať náš model!\n", + "\n", + "V Tidymodels špecifikujete modely pomocou `parsnip()` tým, že definujete tri koncepty:\n", + "\n", + "- **Typ** modelu rozlišuje medzi modelmi, ako sú lineárna regresia, logistická regresia, modely rozhodovacích stromov a podobne.\n", + "\n", + "- **Režim** modelu zahŕňa bežné možnosti, ako sú regresia a klasifikácia; niektoré typy modelov podporujú obe možnosti, zatiaľ čo iné majú iba jeden režim.\n", + "\n", + "- **Engine** modelu je výpočtový nástroj, ktorý sa použije na natrénovanie modelu. Často ide o balíky v R, ako napríklad **`\"lm\"`** alebo **`\"ranger\"`**\n", + "\n", + "Tieto informácie o modelovaní sú zachytené v špecifikácii modelu, takže si jednu vytvorme!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Po tom, čo je model *špecifikovaný*, môže byť `odhadnutý` alebo `natrénovaný` pomocou funkcie [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), zvyčajne s použitím vzorca a nejakých údajov.\n", + "\n", + "`y ~ .` znamená, že budeme prispôsobovať `y` ako predpovedanú hodnotu/cieľ, vysvetlenú všetkými prediktormi/vlastnosťami, teda `.` (v tomto prípade máme iba jeden prediktor: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Z modelového výstupu môžeme vidieť koeficienty naučené počas tréningu. Predstavujú koeficienty priamky najlepšieho prispôsobenia, ktorá nám poskytuje najnižšiu celkovú chybu medzi skutočnou a predpovedanou premennou.\n", + "
\n", + "\n", + "## 5. Predikcia na testovacej množine\n", + "\n", + "Teraz, keď sme natrénovali model, môžeme ho použiť na predikciu progresie ochorenia y pre testovaciu množinu dát pomocou [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Toto sa použije na vykreslenie čiary medzi skupinami dát.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hurá! 💃🕺 Práve sme natrénovali model a použili ho na vytváranie predpovedí!\n", + "\n", + "Pri vytváraní predpovedí je v súlade s konvenciou tidymodels vždy vytvoriť tibble/dátový rámec s výsledkami, ktoré majú štandardizované názvy stĺpcov. To umožňuje jednoducho skombinovať pôvodné dáta a predpovede do použiteľného formátu pre ďalšie operácie, ako je napríklad vizualizácia.\n", + "\n", + "`dplyr::bind_cols()` efektívne spája viacero dátových rámcov podľa stĺpcov.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Zobrazenie výsledkov modelovania\n", + "\n", + "Teraz je čas vidieť to vizuálne 📈. Vytvoríme bodový graf všetkých hodnôt `y` a `bmi` z testovacej množiny a potom použijeme predpovede na nakreslenie čiary na najvhodnejšom mieste medzi skupinami údajov modelu.\n", + "\n", + "R má niekoľko systémov na tvorbu grafov, ale `ggplot2` je jedným z najelegantnejších a najvšestrannejších. Umožňuje vám vytvárať grafy **kombinovaním nezávislých komponentov**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Zamyslite sa trochu nad tým, čo sa tu deje. Priama čiara prechádza cez množstvo malých bodov údajov, ale čo presne robí? Vidíte, ako by ste mali byť schopní použiť túto čiaru na predpovedanie, kam by mal nový, nevidený údaj zapadnúť vo vzťahu k osi y grafu? Skúste slovami vyjadriť praktické využitie tohto modelu.\n", + "\n", + "Gratulujeme, vytvorili ste svoj prvý model lineárnej regresie, urobili ste s ním predpoveď a zobrazili ste ju v grafe!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/2-Regression/1-Tools/solution/notebook.ipynb b/translations/sk/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..aa7c5d8ca --- /dev/null +++ b/translations/sk/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,673 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importujte potrebné knižnice\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Načítajte dataset diabetes, rozdelený na údaje `X` a vlastnosti `y`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vyberte iba jednu funkciu, na ktorú sa zameriate pri tomto cvičení\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vyberte model a natrénujte ho na tréningových údajoch\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Použite testovacie údaje na predpovedanie línie\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zobrazte výsledky v grafe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:38:26+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/2-Data/notebook.ipynb b/translations/sk/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..53729346b --- /dev/null +++ b/translations/sk/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:45:49+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/sk/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..a3110e2cc --- /dev/null +++ b/translations/sk/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,685 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:47:59+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Vytvorenie regresného modelu: príprava a vizualizácia dát\n", + "\n", + "## **Lineárna regresia pre tekvice - Lekcia 2**\n", + "#### Úvod\n", + "\n", + "Teraz, keď máte pripravené nástroje na budovanie modelov strojového učenia pomocou Tidymodels a Tidyverse, ste pripravení začať klásť otázky o svojich dátach. Pri práci s dátami a aplikovaní riešení strojového učenia je veľmi dôležité vedieť, ako položiť správnu otázku, aby ste mohli plne využiť potenciál svojho datasetu.\n", + "\n", + "V tejto lekcii sa naučíte:\n", + "\n", + "- Ako pripraviť svoje dáta na budovanie modelov.\n", + "\n", + "- Ako používať `ggplot2` na vizualizáciu dát.\n", + "\n", + "Otázka, na ktorú potrebujete odpoveď, určí, aký typ algoritmov strojového učenia budete používať. Kvalita odpovede, ktorú dostanete, bude výrazne závisieť od povahy vašich dát.\n", + "\n", + "Pozrime sa na to prostredníctvom praktického cvičenia.\n", + "\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Importovanie údajov o tekviciach a privolanie Tidyverse\n", + "\n", + "Na spracovanie tejto lekcie budeme potrebovať nasledujúce balíky:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá je navrhnutá tak, aby robila dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Nasledujúci skript skontroluje, či máte nainštalované balíky potrebné na dokončenie tohto modulu, a v prípade, že niektoré chýbajú, ich pre vás nainštaluje.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz si spustime niektoré balíčky a načítajme [dáta](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) poskytnuté pre túto lekciu!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "Rýchly pohľad pomocou `glimpse()` okamžite ukáže, že existujú prázdne hodnoty a mix reťazcov (`chr`) a číselných údajov (`dbl`). `Date` je typu znakový reťazec a je tu aj zvláštny stĺpec s názvom `Package`, kde sú údaje zmiešané medzi `sacks`, `bins` a inými hodnotami. Údaje sú, úprimne povedané, trochu chaotické 😤.\n", + "\n", + "V skutočnosti nie je veľmi bežné dostať dataset, ktorý je úplne pripravený na použitie na vytvorenie ML modelu hneď po rozbalení. Ale nebojte sa, v tejto lekcii sa naučíte, ako pripraviť surový dataset pomocou štandardných knižníc v R 🧑‍🔧. Taktiež sa naučíte rôzne techniky na vizualizáciu údajov. 📈📊\n", + "
\n", + "\n", + "> Pripomenutie: Operátor pipe (`%>%`) vykonáva operácie v logickej postupnosti tým, že posúva objekt ďalej do funkcie alebo výrazu. Môžete si operátor pipe predstaviť ako výraz „a potom“ vo vašom kóde.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Kontrola chýbajúcich údajov\n", + "\n", + "Jedným z najbežnejších problémov, s ktorými sa dátoví analytici musia vysporiadať, sú neúplné alebo chýbajúce údaje. R reprezentuje chýbajúce alebo neznáme hodnoty pomocou špeciálnej hodnoty: `NA` (Not Available).\n", + "\n", + "Ako teda zistíme, že dátový rámec obsahuje chýbajúce hodnoty?\n", + "
\n", + "- Jedným z priamych spôsobov je použitie základnej funkcie R `anyNA`, ktorá vracia logické objekty `TRUE` alebo `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdá sa, že niektoré údaje chýbajú! To je dobré miesto, kde začať.\n", + "\n", + "- Ďalším spôsobom by bolo použiť funkciu `is.na()`, ktorá označuje, ktoré jednotlivé prvky stĺpcov chýbajú pomocou logickej hodnoty `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Dobre, práca je hotová, ale s takým veľkým dátovým rámcom by bolo neefektívne a prakticky nemožné kontrolovať všetky riadky a stĺpce jednotlivo😴.\n", + "\n", + "- Intuitívnejší spôsob by bol vypočítať súčet chýbajúcich hodnôt pre každý stĺpec:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Oveľa lepšie! Chýbajú niektoré údaje, ale možno to nebude mať vplyv na danú úlohu. Uvidíme, aké ďalšie analýzy prinesú výsledky.\n", + "\n", + "> Okrem skvelých balíkov a funkcií má R veľmi dobrú dokumentáciu. Napríklad použite `help(colSums)` alebo `?colSums`, aby ste sa dozvedeli viac o funkcii.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Gramatika manipulácie s dátami\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), balík v Tidyverse, je gramatika manipulácie s dátami, ktorá poskytuje konzistentnú sadu slovies, ktoré vám pomôžu vyriešiť najbežnejšie výzvy pri manipulácii s dátami. V tejto sekcii preskúmame niektoré zo slovies dplyr!\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` je funkcia v balíku `dplyr`, ktorá vám pomáha vybrať stĺpce na ponechanie alebo vylúčenie.\n", + "\n", + "Aby ste uľahčili prácu s vaším dátovým rámcom, odstráňte niekoľko jeho stĺpcov pomocou `select()`, pričom ponechajte iba tie, ktoré potrebujete.\n", + "\n", + "Napríklad v tomto cvičení bude naša analýza zahŕňať stĺpce `Package`, `Low Price`, `High Price` a `Date`. Vyberme tieto stĺpce.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` je funkcia v balíku `dplyr`, ktorá vám umožňuje vytvárať alebo upravovať stĺpce, pričom zachováva existujúce stĺpce.\n", + "\n", + "Všeobecná štruktúra funkcie mutate je:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Poďme si vyskúšať `mutate` na stĺpci `Date` vykonaním nasledujúcich operácií:\n", + "\n", + "1. Konvertovať dátumy (aktuálne typu character) na formát mesiaca (ide o americké dátumy, takže formát je `MM/DD/YYYY`).\n", + "\n", + "2. Extrahovať mesiac z dátumov do nového stĺpca.\n", + "\n", + "V R balík [lubridate](https://lubridate.tidyverse.org/) uľahčuje prácu s dátumovo-časovými údajmi. Takže použijeme `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` a pozrieme sa, ako dosiahnuť vyššie uvedené ciele. Stĺpec Date môžeme vynechať, pretože ho už nebudeme potrebovať v ďalších operáciách.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hurá! 🤩\n", + "\n", + "Ďalej vytvorme nový stĺpec `Price`, ktorý bude predstavovať priemernú cenu tekvice. Teraz vypočítajme priemer stĺpcov `Low Price` a `High Price`, aby sme naplnili nový stĺpec Price. \n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Yeees!💪\n", + "\n", + "„Ale počkaj!“, poviete si po rýchlom prehliadnutí celého dátového súboru pomocou `View(pumpkins)`, „Tu je niečo zvláštne!“🤔\n", + "\n", + "Ak sa pozriete na stĺpec `Package`, tekvice sa predávajú v rôznych konfiguráciách. Niektoré sa predávajú v mierach `1 1/9 bushel`, niektoré v mierach `1/2 bushel`, niektoré na kusy, niektoré na váhu a niektoré vo veľkých škatuliach s rôznymi šírkami.\n", + "\n", + "Poďme si to overiť:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Úžasné!👏\n", + "\n", + "Zdá sa, že tekvice je veľmi ťažké vážiť konzistentne, takže ich vyfiltrujme tak, že vyberieme iba tekvice, ktoré obsahujú reťazec *bushel* v stĺpci `Package`, a uložme to do nového dátového rámca `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() a stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): vytvára podmnožinu údajov obsahujúcu iba **riadky**, ktoré spĺňajú vaše podmienky, v tomto prípade tekvice so slovom *bushel* v stĺpci `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): zisťuje prítomnosť alebo neprítomnosť vzoru v reťazci.\n", + "\n", + "Balík [`stringr`](https://github.com/tidyverse/stringr) poskytuje jednoduché funkcie pre bežné operácie s reťazcami.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Môžete vidieť, že sme zúžili výber na približne 415 riadkov údajov obsahujúcich tekvice na veľké množstvo.🤩 \n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Ale počkajte! Ešte je tu jedna vec, ktorú treba urobiť**\n", + "\n", + "Všimli ste si, že množstvo v bušloch sa líši v jednotlivých riadkoch? Musíte normalizovať ceny tak, aby ste zobrazili cenu za bušel, nie za 1 1/9 alebo 1/2 bušela. Je čas na trochu matematiky, aby ste to štandardizovali.\n", + "\n", + "Použijeme funkciu [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) na *mutáciu* stĺpca Price v závislosti od určitých podmienok. `case_when` umožňuje vektorovo spracovať viacero `if_else()` vyjadrení.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz môžeme analyzovať cenu za jednotku na základe ich merania v bušloch. Celá táto štúdia o bušloch tekvíc však ukazuje, aké veľmi `dôležité` je `pochopiť povahu vašich údajov`!\n", + "\n", + "> ✅ Podľa [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308) hmotnosť bušľa závisí od typu produktu, pretože ide o objemové meranie. \"Bušel paradajok, napríklad, by mal vážiť 56 libier... Listy a zelené rastliny zaberajú viac miesta s menšou hmotnosťou, takže bušel špenátu váži iba 20 libier.\" Je to všetko dosť komplikované! Nezaoberajme sa konverziou bušlov na libry, namiesto toho stanovme cenu za bušel. Celá táto štúdia o bušloch tekvíc však ukazuje, aké veľmi dôležité je pochopiť povahu vašich údajov!\n", + ">\n", + "> ✅ Všimli ste si, že tekvice predávané na pol bušľa sú veľmi drahé? Dokážete zistiť prečo? Tip: malé tekvice sú oveľa drahšie ako veľké, pravdepodobne preto, že ich je oveľa viac na bušel, vzhľadom na nevyužitý priestor, ktorý zaberá jedna veľká dutá tekvica na koláč.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "DÔLEŽITÉ PRAVIDLÁ: \n", + "1. NEPRIDÁVAJTE '''markdown alebo akékoľvek iné značky okolo prekladu \n", + "2. Uistite sa, že preklad neznie príliš doslovne \n", + "3. Preložte aj komentáre \n", + "4. Tento súbor je napísaný vo formáte Markdown - nezaobchádzajte s ním ako s XML alebo HTML \n", + "5. Neprekladajte: \n", + " - [!NOTE], [!WARNING], [!TIP], [!IMPORTANT], [!CAUTION] \n", + " - Názvy premenných, funkcií, tried \n", + " - Zástupné symboly ako @@INLINE_CODE_x@@ alebo @@CODE_BLOCK_x@@ \n", + " - URL alebo cesty \n", + "6. Zachovajte všetko pôvodné formátovanie Markdownu \n", + "7. Vráťte IBA preložený obsah bez akýchkoľvek dodatočných značiek alebo formátovania \n", + "\n", + "A teraz nakoniec, len pre dobrodružstvo 💁‍♀️, presuňme stĺpec Month na prvú pozíciu, teda `pred` stĺpec `Package`. \n", + "\n", + "Na zmenu pozícií stĺpcov sa používa `dplyr::relocate()`. \n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Skvelá práca!👌 Teraz máte čistý, uprataný dataset, na ktorom môžete postaviť svoj nový regresný model! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Vizualizácia dát s ggplot2\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "Existuje *múdre* príslovie, ktoré hovorí:\n", + "\n", + "> \"Jednoduchý graf priniesol analytikovi dát viac informácií než akékoľvek iné zariadenie.\" --- John Tukey\n", + "\n", + "Úlohou dátového vedca je ukázať kvalitu a charakter dát, s ktorými pracuje. Na tento účel často vytvára zaujímavé vizualizácie, ako sú grafy, diagramy a tabuľky, ktoré zobrazujú rôzne aspekty dát. Týmto spôsobom dokáže vizuálne ukázať vzťahy a medzery, ktoré by inak bolo ťažké odhaliť.\n", + "\n", + "Vizualizácie môžu tiež pomôcť určiť, ktorá technika strojového učenia je pre dané dáta najvhodnejšia. Napríklad bodový graf, ktorý sa zdá sledovať líniu, naznačuje, že dáta sú vhodným kandidátom na cvičenie s lineárnou regresiou.\n", + "\n", + "R ponúka niekoľko systémov na tvorbu grafov, ale [`ggplot2`](https://ggplot2.tidyverse.org/index.html) je jedným z najelegantnejších a najuniverzálnejších. `ggplot2` vám umožňuje vytvárať grafy **kombinovaním nezávislých komponentov**.\n", + "\n", + "Začnime jednoduchým bodovým grafom pre stĺpce Price a Month.\n", + "\n", + "V tomto prípade začneme s [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), poskytneme dataset a estetické mapovanie (pomocou [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)), a potom pridáme vrstvy (ako [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) pre bodové grafy.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Je to užitočný graf 🤷? Prekvapilo vás na ňom niečo?\n", + "\n", + "Nie je obzvlášť užitočný, pretože iba zobrazuje vaše údaje ako rozptyl bodov v danom mesiaci. \n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Ako to urobiť užitočným?**\n", + "\n", + "Aby grafy zobrazovali užitočné údaje, zvyčajne je potrebné údaje nejako zoskupiť. Napríklad v našom prípade by zistenie priemernej ceny tekvíc za každý mesiac poskytlo viac informácií o základných vzoroch v našich údajoch. To nás privádza k ďalšiemu preletu nad **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Zoskupenú agregáciu v R je možné jednoducho vypočítať pomocou\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` mení jednotku analýzy z celého datasetu na jednotlivé skupiny, napríklad podľa mesiacov.\n", + "\n", + "- `dplyr::summarize()` vytvára nový dátový rámec s jedným stĺpcom pre každú zoskupovaciu premennú a jedným stĺpcom pre každú štatistiku, ktorú ste špecifikovali.\n", + "\n", + "Napríklad môžeme použiť `dplyr::group_by() %>% summarize()` na zoskupenie tekvíc do skupín na základe stĺpca **Month** a potom vypočítať **priemernú cenu** za každý mesiac.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Stručne!✨\n", + "\n", + "Kategorické prvky, ako sú mesiace, sú lepšie znázornené pomocou stĺpcového grafu 📊. Vrstvy zodpovedné za stĺpcové grafy sú `geom_bar()` a `geom_col()`. Viac informácií nájdete v `?geom_bar`.\n", + "\n", + "Poďme si jeden vytvoriť!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Toto je užitočnejšia vizualizácia dát! Zdá sa, že najvyššia cena za tekvice sa vyskytuje v septembri a októbri. Zodpovedá to vašim očakávaniam? Prečo áno alebo prečo nie?\n", + "\n", + "Gratulujeme k dokončeniu druhej lekcie 👏! Pripravili ste svoje dáta na vytváranie modelov a potom ste odhalili ďalšie poznatky pomocou vizualizácií!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/2-Regression/2-Data/solution/notebook.ipynb b/translations/sk/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..b4cd214e7 --- /dev/null +++ b/translations/sk/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcXklEQVR4nO3dfZRcdZ3n8fdnKg80GbAJdLKkSQyTycnoEodoLQlGOXE0JiauZNiZFRbOoqPkuIddnWE3M7BwxmEOOcTJLOJZ96wbkBFHJjrjYGTFMWRRhlkXohUTCYoRkAh0kPQY4gO2Etrv/lG3YqW6bj3d7qrum8/rnD5d9btP3/u7v/p09a3bfRURmJlZfv1arwswM7OJ5aA3M8s5B72ZWc456M3Mcs5Bb2aWc9N6XUA9Z511VixcuLDXZZiZTRl79uz554gYqDdtUgb9woULKZVKvS7DzGzKkPT9tGk+dWNmlnMOejOznHPQm5nlnIPezCznHPRmZjnX9KobSXcAbwcOR8R5SdtW4F8DLwFPAu+OiKN1ll0LfAQoALdHxJbxK717Lr/tIb765JHjz1cums1dV104Zr7VtzzA44dfPP588ZxZ7LpmVTdKTLVj7xBbdx7g0NER5vX3sWnNEjYsG+xpTWbWXa28o/8EsLambRdwXkS8BvgucF3tQpIKwP8A3ga8GrhM0qszVdsDtSEP8NUnj3D5bQ+d0FYb8gCPH36R1bc8MNElptqxd4jr7t7P0NERAhg6OsJ1d+9nx96hntVkZt3XNOgj4kHgSE3bfRHxcvL0YeCcOoteADwREd+LiJeATwMXZ6y362pDPq29NuSbtXfD1p0HGDk2ekLbyLFRtu480KOKzKwXxuMc/R8A/1CnfRB4pur5s0lbXZI2SipJKg0PD49DWXbo6Ehb7WaWT5mCXtL1wMvAXVkLiYhtEVGMiOLAQN2/4rU2zevva6vdzPKp46CX9C7KH9JeHvVvUzUEzK96fk7SNqWsXDS7pfbFc2bVnS+tvRs2rVlC3/TCCW190wtsWrOkRxWZWS90FPTJ1TR/DLwjIn6WMtvXgcWSzpU0A7gUuKezMnvnrqsuHBPq9a662XXNqjGh3uurbjYsG+TmS5Yy2N+HgMH+Pm6+ZKmvujE7yajZPWMlbQdWAWcBzwMfpHyVzUzgh8lsD0fE+yTNo3wZ5bpk2XXArZQvr7wjIja3UlSxWAz/UzMzs9ZJ2hMRxbrTJuPNwR30ZmbtaRT0/stYM7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzTYNe0h2SDkt6tKrt9yV9S9IvJdX9R/fJfAcl7Ze0T5LvJGJm1gOtvKP/BLC2pu1R4BLgwRaWf1NEnJ925xMzM5tY05rNEBEPSlpY0/YYgKQJKsvMzMbLRJ+jD+A+SXskbWw0o6SNkkqSSsPDwxNclpnZyWOig/4NEfFa4G3A1ZIuSpsxIrZFRDEiigMDAxNclpnZyWNCgz4ihpLvh4HPARdM5PbMzGysCQt6SbMknVZ5DLyV8oe4ZmbWRa1cXrkdeAhYIulZSe+R9LuSngUuBO6VtDOZd56kLyaLzgX+r6RvAl8D7o2IL03MbpiZWZpWrrq5LGXS5+rMewhYlzz+HvDbmaozM7PM/JexZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznmt54RNIdwNuBwxFxXtL2+8CfAa8CLoiIUsqya4GPAAXg9ojYMk51j7Fj7xBbdx7g0NER5vX3sWnNEjYsG2x7PatveYDHD794/PniObN4avhFXo5fzTNN8MTN68csu/Dae8e0Hdwydr5qN+zYz/bdzzAaQUHisuXzuWnD0pZqbWWf6+3PrmtWtbR+K8tyjJZv3sXzP3np+PO5p81g9/WrgebjpdF2x2u8t6vR/jSTpR+b6VV/TBWKiMYzSBcBPwU+WRX0rwJ+Cfwv4L/UC3pJBeC7wGrgWeDrwGUR8e1mRRWLxSiV6v7sqGvH3iGuu3s/I8dGj7f1TS9w8yVL2zrYtaHYSG3Y13vRVqSF/Q079vOph58e037FigVNXwCt7HPa/jjsW5flGNWGYsXc02bUba84uGV9w+0WXzl7XMZ7uxrtT7Owz9KPzYzX63+qk7QnIor1pjU9dRMRDwJHatoei4gDTRa9AHgiIr4XES8BnwYubrHmtmzdeeCEgwwwcmyUrTublXiiVkMeOOEdfqe2736mrfZqrexz2v60s58nuyzHKC3MG4V8K9sdr/Heronan6x61R9TyUSeox8Eqo/is0lbXZI2SipJKg0PD7e1oUNHR9pqnyxGU36bSmuvNlX3earJcowmartT8dhPZD9Oxf7otknzYWxEbIuIYkQUBwYG2lp2Xn9fW+2TRUFqq73aVN3nqSbLMZqo7U7FYz+R/TgV+6PbJjLoh4D5Vc/PSdrG3aY1S+ibXjihrW96gU1rlrS1nsVzZrU877RxeJ1ftnx+W+3VWtnntP1pZz9PdlmO0dzTZrTV3up2x2u8t2ui9ierXvXHVDKRQf91YLGkcyXNAC4F7pmIDW1YNsjNlyxlsL8PAYP9fR19ELPrmlVjQnDxnFljQr3eVTdpH7g2uurmpg1LuWLFguPvagpSyx9OtbLPafvjD2Jbl+UY7b5+9ZgQrHxw2Wy8NNrueI33djXan2ay9GMzveqPqaSVq262A6uAs4DngQ9S/nD2vwMDwFFgX0SskTSP8mWU65Jl1wG3Ur688o6I2NxKUe1edWNmdrJrdNVN06DvBQe9mVl7Ml1eaWZmU5uD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnNNg17SHZIOS3q0qm22pF2SHk++n5Gy7KikfcnXhNxdyszMGmvlHf0ngLU1bdcC90fEYuD+5Hk9IxFxfvL1js7LNDOzTjUN+oh4kPKtA6tdDNyZPL4T2DC+ZZmZ2Xjp9Bz93Ih4Lnn8A2BuynynSCpJeljShkYrlLQxmbc0PDzcYVlmZlYr84exUb7pbNqNZ1+Z3MPw3wG3SlrUYD3bIqIYEcWBgYGsZZmZWaLToH9e0tkAyffD9WaKiKHk+/eAB4BlHW7PzMw61GnQ3wNcmTy+Evh87QySzpA0M3l8FrAS+HaH2zMzsw61cnnlduAhYImkZyW9B9gCrJb0OPCW5DmSipJuTxZ9FVCS9E3gK8CWiHDQm5l12bRmM0TEZSmT3lxn3hLw3uTx/wOWZqrOzMwy81/GmpnlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Oca3rjEQBJdwBvBw5HxHlJ22zgM8BC4CDwbyPihTrLXgnckDy9KSLuzF72WKtveYDHD794/PniObPYdc0qAHbsHWLrzgMcOjrCvP4+Nq1ZwoZlg22tr9bBLevHrDvtDun1TBM8cfN6Lr/tIb765JHj7SsXzeauqy5saR3LN+/i+Z+8dPz53NNmsPv61Q33o7pf8mThtfeOaasco6zLvuaDX+LHvxg9/vz0mQUeuXEt0HxsNVp3s+22u+xgf19LY/yGHfvZvvsZRiMoSFy2fD43bWjtHkHnXnvvCeNcwFPj1M/dcrK8Jqoponk8SboI+Cnwyaqg/wvgSERskXQtcEZE/EnNcrOBElAEAtgDvK7eD4RqxWIxSqVSyzuRFsqL58zi6jct5rq79zNy7Fcv1L7pBW6+ZGnqC6FZyFfc+s7zx6x7PLQS9rUhX1Ed9o36JU8Du16AVDQLkmbL1oZ8xekzC/z5hqUNx1ajdTdycMv6jpetV0e1G3bs51MPPz1m/itWLGga9rUhX9FK2Gc5RuMpz68JSXsiolhvWkunbiLiQeBITfPFQOXd+Z3AhjqLrgF2RcSRJNx3AWtb2WY70kL58cMvsnXngTFBPHJslK07D7S9vlr11j0eqt/hp6kX8rXtjfrFWlMv5CvtnYytbkmrY/vuZ+rOn9ZeLe0tYTu/yfbayfqayHKOfm5EPJc8/gEwt848g0D1CHo2aRtD0kZJJUml4eHhDGWd6NDRkbbax2PddnKYyLE1HurVMZryG3xau+XDuHwYG+XzP5lGSkRsi4hiRBQHBgbGoywA5vX3tdU+Huu2k8NEjq3xUK+OglR33rR2y4csQf+8pLMBku+H68wzBMyven5O0jauFs+Zldq+ac0S+qYXTmjvm15g05olba+vVr11j4eVi2Y3nWfuaTOatjfqF2vN6TPrH9/TZxY6GlvdklbHZcvn15k7vb1a2o+CqfQj4mR9TWQJ+nuAK5PHVwKfrzPPTuCtks6QdAbw1qRtXO26ZtWYA1X5cGXDskFuvmQpg/19iPKVCY0+iE1bX62DW9aPWXc7pqm8jtpQb/Wqm93Xrx4T9rVX3TTqlzxJ+zCvlQ/5mi37yI1rx4R95aqbZmOr0bqbbbeTZVsZ4zdtWMoVKxYcfwdfkFr6IBbKH7jWjvNWr7rJcozG08nymqjV6lU324FVwFnA88AHgR3A3wILgO9TvrzyiKQi8L6IeG+y7B8A/zVZ1eaI+Ktm22v3qhszs5Ndo6tuWgr6bnPQm5m1J/PllWZmNnU56M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczy7lMQS/pA5IelfQtSX9YZ/oqST+StC/5+tMs2zMzs/ZN63RBSecBVwEXAC8BX5L0hYh4ombWf4qIt2eo0czMMsjyjv5VwO6I+FlEvAz8I3DJ+JRlZmbjJUvQPwq8UdKZkk4F1gHz68x3oaRvSvoHSf8ybWWSNkoqSSoNDw9nKMvMzKp1fOomIh6T9CHgPuBFYB8wWjPbN4BXRsRPJa0DdgCLU9a3DdgG5ZuDd1qXmZmdKNOHsRHx8Yh4XURcBLwAfLdm+o8j4qfJ4y8C0yWdlWWbZmbWnqxX3cxJvi+gfH7+b2qm/wtJSh5fkGzvh1m2aWZm7en41E3i7yWdCRwDro6Io5LeBxARHwN+D/gPkl4GRoBLI8KnZczMuihT0EfEG+u0fazq8UeBj2bZhpmZZeO/jDUzyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcy/T96SR8ArgIE3BYRt9ZMF/ARyjcO/xnwroj4RpZtdssNO/azffczjEYgoHK3lILErBm/xo9/8avb4y6eM4td16zi8tse4qtPHmlp/QKe2rIeYMxyKxfN5q6rLgTgt67/Ij8f/dW9Wk4piO9sXgfA8s27eP4nLx2fNve0Gey+fvUJtVcM9vexac0SNiwbbHv/CxKXLZ/PTRuWtrTs6lse4PHDLx5/XukfgB17h9i68wCHjo4wr6qmTrbX6BhVlm9lvQuvvXfMug9uWV+3HwsSK37jDA7+cIRDR0c4dUaBn700StRst+I1H/zSCWPl9JkFHrlxbcPtVjQaF82WbaTRmGom7fhN9LJZNBsDjepqdAza1Wg7E9036vSGT5LOAz4NXAC8BHwJeF9EPFE1zzrgP1EO+uXARyJiebN1F4vFKJVKHdU1Hm7YsZ9PPfx0W8ucUtAJL55WCHj9otl1fzisXDSbPQdfqLvOUwriFadOPyHkW6mjb3qBmy9Z2nQApe3/FSsWNA3f2pCvWDxnFle/aTHX3b2fkWO/Cr6+6QVeu+AVdfug0fZaOUaL58yqW0v1eusFZlaV9deGfMXpMwt12ysOblmf+qZhZcp4qV62kdqQr2gl7HfsHap7/FoZU1mWzaLZWG5U19+Vnk49Bu2GfaPtAOPSN5L2RESx3rQsp25eBeyOiJ9FxMvAP1K+b2y1i4FPRtnDQL+kszNssyu2736m7WXaDXkovwNNe9F+9ckjqev8+WjUDflmdYwcG2XrzgNN60rb/1b6pV6wVtq37jxwwmCu1JTWB422l6WWTo5vOyrrTwvzRiFf0WhcZNFoTDWTdvxaGVNZls2i2VhuVNd4HoNG2+lG32QJ+keBN0o6U9KplN+1z6+ZZxCo7ulnk7YxJG2UVJJUGh4ezlBWdqM5vq3toaMjTedJ2/+s/dLKtlvdXpZaJvr45nX8pB2/Vo5rlmWzaDaWu1VXo+10o4aOgz4iHgM+BNxH+bTNPqD5W5X09W2LiGJEFAcGBjpdzbgoSD3d/kSa19/XdJ60/c/aL61su9XtZalloo9vXsdP2vFr5bhmWTaLZmO5W3U12k43ash01U1EfDwiXhcRFwEvAN+tmWWIE9/ln5O0TWqXLa/9xaS5Uwrtv7hF+XxfPSsXzU5d5ykFMfe0GW3X0Te9wKY1S5rWlbb/rfTL4jmzUts3rVlC3/TCmJrS+qDR9rLU0snxbUdl/afPLNSdntZerdG4yKLRmGom7fi1MqayLJtFs7HcqK7xPAaNttONvskU9JLmJN8XUD4//zc1s9wD/HuVrQB+FBHPZdlmN9y0YSlXrFhw/Kd+9UugII15oS6eM4vvbF7X1gCoXHVz11UXjlmu8mHPdzavG/MCrHxotvv61WPCfu5pM/jO5nUn1F4x2N/X8oc7tftfkFr6IBZg1zWrxgRs5aqbDcsGufmSpQz296Gqmu666sK2t9fsGF2xYgG7rlnVdL1pH14e3LK+bj8WJFYumn18H2bNKBzfdu36H7lx7ZixUrnqptF2gYbjotmyjTQaU82kHb9WxlSWZbNoNpYb1dXoGLSr0Xa60TcdX3UDIOmfgDOBY8A1EXG/pPcBRMTHkssrPwqspXx55bsjounlNL2+6sbMbKppdNVNpuvoI+KNddo+VvU4gKuzbMPMzLLxX8aameWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5zLeivBP5L0LUmPStou6ZSa6e+SNCxpX/L13mzlmplZuzoOekmDwPuBYkScBxSAS+vM+pmIOD/5ur3T7ZmZWWeynrqZBvRJmgacChzKXpKZmY2njoM+IoaAvwSeBp4DfhQR99WZ9d9IekTSZyXNT1ufpI2SSpJKw8PDnZZlZmY1spy6OQO4GDgXmAfMknRFzWz/G1gYEa8BdgF3pq0vIrZFRDEiigMDA52WZWZmNbKcunkL8FREDEfEMeBu4PXVM0TEDyPiF8nT24HXZdiemZl1IEvQPw2skHSqJAFvBh6rnkHS2VVP31E73czMJt60TheMiN2SPgt8A3gZ2Atsk/TnQCki7gHeL+kdyfQjwLuyl2xmZu1QRPS6hjGKxWKUSqVel2FmNmVI2hMRxXrT/JexZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznOr7DFICkPwLeCwSwH3h3RPy8avpM4JOU7xX7Q+CdEXEwyzbzaMfeIbbuPMChoyPM6+9j05olbFg22HSaWSc8pk4+HQe9pEHg/cCrI2JE0t8ClwKfqJrtPcALEfGbki4FPgS8M0O9ubNj7xDX3b2fkWOjAAwdHeG6u/cfn542zS9M60Sj8eYxlV9ZT91MA/okTQNOBQ7VTL8YuDN5/FngzcmNxC2xdeeB4y+6ipFjo2zdeaDhNLNOeEydnDoO+ogYAv4SeBp4DvhRRNxXM9sg8Ewy/8vAj4Az661P0kZJJUml4eHhTsuacg4dHUltbzTNrBMeUyenjoNe0hmU37GfC8wDZkm6otP1RcS2iChGRHFgYKDT1Uw58/r7UtsbTTPrhMfUySnLqZu3AE9FxHBEHAPuBl5fM88QMB8gOb3zCsofylpi05ol9E0vnNDWN73ApjVLGk4z64TH1Mkpy1U3TwMrJJ0KjABvBko189wDXAk8BPwe8OWIiAzbzJ3KB2CNroLwFRI2XloZb5Y/ypK7km6kfBXNy8BeypdaXg+UIuIeSacAfw0sA44Al0bE95qtt1gsRqlU+zPDzMzSSNoTEcW60ybjG2wHvZlZexoFvf8y1sws5xz0ZmY556A3M8s5B72ZWc5Nyg9jJQ0D328y21nAP3ehnHZMxppgctblmlo3GeuajDXB5KyrWzW9MiLq/rXppAz6VkgqpX3C3CuTsSaYnHW5ptZNxromY00wOeuaDDX51I2ZWc456M3Mcm4qB/22XhdQx2SsCSZnXa6pdZOxrslYE0zOunpe05Q9R29mZq2Zyu/ozcysBQ56M7OcmxJBL+kOSYclPVrVNlvSLkmPJ9/PmAQ1/ZmkIUn7kq91Xa5pvqSvSPq2pG9J+kDS3rO+alBTr/vqFElfk/TNpK4bk/ZzJe2W9ISkz0iaMQlq+oSkp6r66vxu1VRVW0HSXklfSJ73rJ+a1NXTvpJ0UNL+ZNulpK2nWQVTJOgp33B8bU3btcD9EbEYuD953uuaAD4cEecnX1/sck0vA/85Il4NrACulvRqettXaTVBb/vqF8DvRMRvA+cDayWtoHwD+w9HxG8CL1C+wX2vawLYVNVX+7pYU8UHgMeqnveyn6rV1gW976s3JduuXDvf66yaGkEfEQ9S/n/21apvPH4nsGES1NRTEfFcRHwjefwTyi+AQXrYVw1q6qko+2nydHryFcDvUL6RPXS/r9Jq6ilJ5wDrgduT56KH/ZRW1yTW06yCKRL0KeZGxHPJ4x8Ac3tZTJX/KOmR5NRO139Fq5C0kPINX3YzSfqqpibocV8lv/bvAw4Du4AngaPJjewBnqXLP5Rqa4qISl9tTvrqw5JmdrMm4Fbgj4FfJs/PpMf9lFJXRS/7KoD7JO2RtDFp6/nrbyoH/XHJ7Ql7/s4H+J/AIsq/dj8H/LdeFCHp14G/B/4wIn5cPa1XfVWnpp73VUSMRsT5wDnABcBvdbuGWrU1SToPuI5ybf8KmA38SbfqkfR24HBE7OnWNlvRoK6e9VXiDRHxWuBtlE9TXlQ9sVevv6kc9M9LOhsg+X64x/UQEc8nL9RfArdRDo+ukjSdcqDeFRF3J8097at6NU2GvqqIiKPAV4ALgX6Vb2QP5bAd6nFNa5PTXxERvwD+iu721UrgHZIOAp+mfMrmI/S+n8bUJelTPe4rImIo+X4Y+Fyy/Z5n1VQO+sqNx0m+f76HtQDHD2LF7wKPps07QdsX8HHgsYi4pWpSz/oqraZJ0FcDkvqTx33AasqfH3yF8o3soft9Va+m71SFhCif3+1aX0XEdRFxTkQsBC4FvhwRl9PDfmpQ1xW97CtJsySdVnkMvDXZfu+zKiIm/RewnfKv98conw98D+XzhPcDjwP/B5g9CWr6a2A/8Ajlg3t2l2t6A+VfCx8B9iVf63rZVw1q6nVfvYbyDe0fofxi/NOk/TeArwFPAH8HzJwENX056atHgU8Bv97NvqqqbxXwhV73U5O6etZXSZ98M/n6FnB90t7TrIoI/wsEM7O8m8qnbszMrAUOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzv1/N8s9l//aWz4AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:11+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/3-Linear/notebook.ipynb b/translations/sk/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..ee1028c0b --- /dev/null +++ b/translations/sk/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ceny tekvíc\n", + "\n", + "Načítajte potrebné knižnice a dataset. Preveďte údaje do dátového rámca obsahujúceho podmnožinu údajov:\n", + "\n", + "- Zahrňte iba tekvice ocenené na základe bušlov\n", + "- Preveďte dátum na mesiac\n", + "- Vypočítajte cenu ako priemer vysokých a nízkych cien\n", + "- Preveďte cenu tak, aby odrážala cenu podľa množstva v bušloch\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Základný bodový graf nám pripomína, že máme údaje iba od augusta do decembra. Pravdepodobne potrebujeme viac údajov, aby sme mohli vyvodiť závery lineárnym spôsobom.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:08:32+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/sk/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..1fa36b98c --- /dev/null +++ b/translations/sk/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1081 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:14:27+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Lineárna a polynomiálna regresia pre stanovenie cien tekvíc - Lekcia 3\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "#### Úvod\n", + "\n", + "Doteraz ste preskúmali, čo je regresia, na základe vzorových údajov zo súboru údajov o cenách tekvíc, ktorý budeme používať počas celej tejto lekcie. Vizualizovali ste ju pomocou `ggplot2`.💪\n", + "\n", + "Teraz ste pripravení ponoriť sa hlbšie do regresie pre strojové učenie. V tejto lekcii sa dozviete viac o dvoch typoch regresie: *základná lineárna regresia* a *polynomiálna regresia*, spolu s niektorými matematickými základmi týchto techník.\n", + "\n", + "> Počas celého kurzu predpokladáme minimálne znalosti matematiky a snažíme sa ju sprístupniť študentom z iných oblastí, preto sledujte poznámky, 🧮 upozornenia, diagramy a ďalšie nástroje na učenie, ktoré vám pomôžu pochopiť.\n", + "\n", + "#### Príprava\n", + "\n", + "Pripomeňme si, že tieto údaje načítavate, aby ste si mohli klásť otázky.\n", + "\n", + "- Kedy je najlepší čas na kúpu tekvíc?\n", + "\n", + "- Akú cenu môžem očakávať za balenie miniatúrnych tekvíc?\n", + "\n", + "- Mal by som ich kúpiť v polovičných košoch alebo v krabici 1 1/9 bušlu? Poďme sa hlbšie ponoriť do týchto údajov.\n", + "\n", + "V predchádzajúcej lekcii ste vytvorili `tibble` (moderné prepracovanie dátového rámca) a naplnili ho časťou pôvodného súboru údajov, pričom ste štandardizovali ceny podľa bušlu. Týmto spôsobom ste však dokázali zhromaždiť iba približne 400 údajových bodov a iba pre jesenné mesiace. Možno môžeme získať trochu viac detailov o povahe údajov ich dôkladnejším čistením? Uvidíme... 🕵️‍♀️\n", + "\n", + "Pre túto úlohu budeme potrebovať nasledujúce balíky:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá robí dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je [rámec balíkov](https://www.tidymodels.org/packages/) pre modelovanie a strojové učenie.\n", + "\n", + "- `janitor`: [janitor balík](https://github.com/sfirke/janitor) poskytuje jednoduché nástroje na skúmanie a čistenie špinavých údajov.\n", + "\n", + "- `corrplot`: [corrplot balík](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) poskytuje vizuálny prieskumný nástroj na korelačnú maticu, ktorý podporuje automatické preusporiadanie premenných na odhalenie skrytých vzorov medzi premennými.\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Nasledujúci skript skontroluje, či máte balíky potrebné na dokončenie tohto modulu, a v prípade ich absencie ich nainštaluje.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Neskôr načítame tieto skvelé balíčky a sprístupníme ich v našej aktuálnej R relácii. (Toto je len pre ilustráciu, `pacman::p_load()` to už za vás urobil)\n", + "\n", + "## 1. Lineárna regresná priamka\n", + "\n", + "Ako ste sa naučili v Lekcii 1, cieľom cvičenia s lineárnou regresiou je byť schopný vykresliť *priamku* *najlepšieho prispôsobenia*, aby sme:\n", + "\n", + "- **Ukázali vzťahy medzi premennými**. Zobraziť vzťah medzi premennými.\n", + "\n", + "- **Vytvárali predpovede**. Presne predpovedať, kde by nový dátový bod mohol spadnúť vo vzťahu k tejto priamke.\n", + "\n", + "Na vykreslenie tohto typu priamky používame štatistickú techniku nazývanú **Regresia najmenších štvorcov**. Termín `najmenšie štvorce` znamená, že všetky dátové body okolo regresnej priamky sú umocnené na druhú a potom sčítané. Ideálne je, aby tento konečný súčet bol čo najmenší, pretože chceme mať čo najmenej chýb, teda `najmenšie štvorce`. Preto je priamka najlepšieho prispôsobenia tá priamka, ktorá nám dáva najnižšiu hodnotu súčtu štvorcov chýb - odtiaľ názov *regresia najmenších štvorcov*.\n", + "\n", + "Robíme to preto, že chceme modelovať priamku, ktorá má najmenšiu kumulatívnu vzdialenosť od všetkých našich dátových bodov. Tiež umocňujeme hodnoty na druhú pred ich sčítaním, pretože nás zaujíma ich veľkosť, nie ich smer.\n", + "\n", + "> **🧮 Ukáž mi matematiku**\n", + ">\n", + "> Táto priamka, nazývaná *priamka najlepšieho prispôsobenia*, môže byť vyjadrená [rovnicou](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` je '`vysvetľujúca premenná` alebo `prediktor`'. `Y` je '`závislá premenná` alebo `výsledok`'. Sklon priamky je `b` a `a` je priesečník s osou y, ktorý označuje hodnotu `Y`, keď `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"sklon = $y/x$\")\n", + " Infografika od Jen Looper\n", + ">\n", + "> Najprv vypočítajte sklon `b`.\n", + ">\n", + "> Inými slovami, a odkazujúc na pôvodnú otázku o údajoch o tekviciach: \"predpovedajte cenu tekvice za bušel podľa mesiaca\", `X` by označovalo cenu a `Y` by označovalo mesiac predaja.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Infografika od Jen Looper\n", + "> \n", + "> Vypočítajte hodnotu Y. Ak platíte okolo 4 dolárov, musí byť apríl!\n", + ">\n", + "> Matematika, ktorá vypočíta priamku, musí ukázať sklon priamky, ktorý tiež závisí od priesečníka, teda od toho, kde sa `Y` nachádza, keď `X = 0`.\n", + ">\n", + "> Metódu výpočtu týchto hodnôt si môžete pozrieť na webovej stránke [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Navštívte tiež [tento kalkulátor najmenších štvorcov](https://www.mathsisfun.com/data/least-squares-calculator.html), aby ste videli, ako hodnoty čísel ovplyvňujú priamku.\n", + "\n", + "Nie je to také strašidelné, však? 🤓\n", + "\n", + "#### Korelácia\n", + "\n", + "Ešte jeden pojem, ktorý treba pochopiť, je **Korelačný koeficient** medzi danými premennými X a Y. Pomocou bodového grafu môžete tento koeficient rýchlo vizualizovať. Graf s bodmi usporiadanými do úhľadnej priamky má vysokú koreláciu, ale graf s bodmi roztrúsenými všade medzi X a Y má nízku koreláciu.\n", + "\n", + "Dobrý model lineárnej regresie bude taký, ktorý má vysoký (bližší k 1 ako k 0) Korelačný koeficient, použitím metódy Regresie najmenších štvorcov s regresnou priamkou.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Tanec s dátami: vytvorenie dátového rámca na modelovanie**\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Načítajte potrebné knižnice a dataset. Konvertujte údaje na dátový rámec obsahujúci podmnožinu údajov:\n", + "\n", + "- Získajte iba tekvice ocenené na základe ceny za bušel\n", + "\n", + "- Konvertujte dátum na mesiac\n", + "\n", + "- Vypočítajte cenu ako priemer vysokých a nízkych cien\n", + "\n", + "- Konvertujte cenu tak, aby odrážala cenu za množstvo v bušeloch\n", + "\n", + "> Tieto kroky sme pokryli v [predchádzajúcej lekcii](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "V duchu čírej dobrodružnosti sa poďme pozrieť na [`janitor package`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor), ktorý poskytuje jednoduché funkcie na skúmanie a čistenie nečistých údajov. Napríklad sa pozrime na názvy stĺpcov našich údajov:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Môžeme to urobiť lepšie. Poďme zmeniť názvy týchto stĺpcov na `friendR` konvertovaním na konvenciu [snake_case](https://en.wikipedia.org/wiki/Snake_case) pomocou `janitor::clean_names`. Ak sa chcete dozvedieť viac o tejto funkcii: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Veľa poriadku s tidyR 🧹! Teraz si zatancujeme s dátami pomocou `dplyr`, ako v predchádzajúcej lekcii! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Dobrá práca!👌 Teraz máte čistý a uprataný dátový súbor, na ktorom môžete postaviť svoj nový regresný model!\n", + "\n", + "Čo tak rozptylový graf?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bodový graf nám pripomína, že máme údaje za mesiace iba od augusta do decembra. Pravdepodobne potrebujeme viac údajov, aby sme mohli vyvodiť závery lineárnym spôsobom.\n", + "\n", + "Pozrime sa znova na naše modelovacie údaje:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Čo ak by sme chceli predpovedať `cenu` tekvice na základe stĺpcov `mesto` alebo `balenie`, ktoré sú typu znakový reťazec? Alebo ešte jednoduchšie, ako by sme mohli nájsť koreláciu (ktorá vyžaduje, aby oba vstupy boli numerické) medzi, povedzme, `balenie` a `cena`? 🤷🤷\n", + "\n", + "Modely strojového učenia fungujú najlepšie s numerickými vlastnosťami namiesto textových hodnôt, takže vo všeobecnosti je potrebné previesť kategóriálne vlastnosti na numerické reprezentácie.\n", + "\n", + "To znamená, že musíme nájsť spôsob, ako upraviť naše prediktory, aby ich model mohol efektívne používať, proces známy ako `inžinierstvo vlastností`.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Predspracovanie údajov na modelovanie pomocou receptov 👩‍🍳👨‍🍳\n", + "\n", + "Aktivity, ktoré upravujú hodnoty prediktorov, aby ich model mohol efektívne využívať, sa nazývajú `inžinierstvo vlastností`.\n", + "\n", + "Rôzne modely majú rôzne požiadavky na predspracovanie. Napríklad metóda najmenších štvorcov vyžaduje `kódovanie kategóriálnych premenných`, ako sú mesiac, odroda a city_name. To jednoducho zahŕňa `preklad` stĺpca s `kategóriálnymi hodnotami` na jeden alebo viac `numerických stĺpcov`, ktoré nahradia pôvodný.\n", + "\n", + "Napríklad, predpokladajme, že vaše údaje obsahujú nasledujúcu kategóriálnu vlastnosť:\n", + "\n", + "| mesto |\n", + "|:--------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokio |\n", + "\n", + "Môžete použiť *ordinálne kódovanie*, aby ste nahradili každú kategóriu jedinečnou celočíselnou hodnotou, napríklad takto:\n", + "\n", + "| mesto |\n", + "|:-----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "A presne to urobíme s našimi údajmi!\n", + "\n", + "V tejto časti preskúmame ďalší úžasný balík Tidymodels: [recipes](https://tidymodels.github.io/recipes/) - ktorý je navrhnutý tak, aby vám pomohol predspracovať vaše údaje **predtým**, než začnete trénovať váš model. V jadre receptu je objekt, ktorý definuje, aké kroky by sa mali aplikovať na dátovú množinu, aby bola pripravená na modelovanie.\n", + "\n", + "Teraz si vytvoríme recept, ktorý pripraví naše údaje na modelovanie tým, že nahradí jedinečné celé číslo za všetky pozorovania v stĺpcoch prediktorov:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Úžasné! 👏 Práve sme vytvorili náš prvý recept, ktorý špecifikuje výsledok (cenu) a jeho zodpovedajúce prediktory, pričom všetky stĺpce prediktorov by mali byť zakódované ako sada celých čísel 🙌! Poďme si to rýchlo rozobrať:\n", + "\n", + "- Volanie `recipe()` s formulou určuje receptu *úlohy* premenných pomocou údajov `new_pumpkins` ako referencie. Napríklad stĺpec `price` bol priradený úlohe `outcome`, zatiaľ čo ostatné stĺpce boli priradené úlohe `predictor`.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` špecifikuje, že všetky prediktory by mali byť konvertované na sadu celých čísel, pričom číslovanie začína od 0.\n", + "\n", + "Sme si istí, že vás možno napadli myšlienky ako: \"Toto je tak super!! Ale čo ak by som potreboval potvrdiť, že recepty robia presne to, čo od nich očakávam? 🤔\"\n", + "\n", + "To je skvelá myšlienka! Vidíte, keď je váš recept definovaný, môžete odhadnúť parametre potrebné na skutočné predspracovanie údajov a potom extrahovať spracované údaje. Zvyčajne to nemusíte robiť, keď používate Tidymodels (o chvíľu uvidíme bežnú konvenciu -> `workflows`), ale môže to byť užitočné, keď chcete vykonať určitú kontrolu správnosti, aby ste si potvrdili, že recepty robia to, čo očakávate.\n", + "\n", + "Na to budete potrebovať ďalšie dva príkazy: `prep()` a `bake()`, a ako vždy, naši malí R kamaráti od [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) vám pomôžu lepšie pochopiť túto tému!\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): odhaduje potrebné parametre z tréningovej množiny, ktoré môžu byť neskôr aplikované na iné dátové množiny. Napríklad, pre daný stĺpec prediktora, ktorá hodnota bude priradená ako celé číslo 0, 1, 2 atď.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): vezme pripravený recept a aplikuje operácie na akúkoľvek dátovú množinu.\n", + "\n", + "Takže, poďme pripraviť a aplikovať naše recepty, aby sme naozaj potvrdili, že v pozadí budú stĺpce prediktorov najskôr zakódované pred tým, ako sa model prispôsobí.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hurá! 🥳 Spracované dáta `baked_pumpkins` majú všetky svoje prediktory zakódované, čo potvrdzuje, že kroky predspracovania definované ako náš recept fungujú podľa očakávania. Pre vás to môže byť ťažšie na čítanie, ale pre Tidymodels je to oveľa zrozumiteľnejšie! Nájdite si chvíľu na zistenie, ktorá pozorovaná hodnota bola namapovaná na zodpovedajúce celé číslo.\n", + "\n", + "Stojí tiež za zmienku, že `baked_pumpkins` je dátový rámec, na ktorom môžeme vykonávať výpočty.\n", + "\n", + "Napríklad, poďme sa pokúsiť nájsť dobrú koreláciu medzi dvoma bodmi vašich dát, aby sme potenciálne vytvorili dobrý prediktívny model. Na to použijeme funkciu `cor()`. Napíšte `?cor()`, aby ste sa dozvedeli viac o tejto funkcii.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ako sa ukazuje, medzi mestom a cenou existuje iba slabá korelácia. Avšak medzi balíkom a jeho cenou je o niečo lepšia korelácia. To dáva zmysel, však? Zvyčajne platí, že čím väčšia je krabica s produktmi, tým vyššia je cena.\n", + "\n", + "Keď už sme pri tom, poďme sa pokúsiť vizualizovať korelačnú maticu všetkých stĺpcov pomocou balíka `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Oveľa lepšie.\n", + "\n", + "Dobrá otázka, ktorú si teraz môžeme položiť na základe týchto údajov, je: '`Akú cenu môžem očakávať za daný balík tekvíc?`' Poďme na to!\n", + "\n", + "> Poznámka: Keď **`bake()`** použijete na pripravený recept **`pumpkins_prep`** s **`new_data = NULL`**, získate spracované (t.j. zakódované) tréningové dáta. Ak by ste mali iný súbor údajov, napríklad testovaciu množinu, a chceli by ste vidieť, ako by recept tieto údaje predspracoval, jednoducho by ste použili **`bake()`** na **`pumpkins_prep`** s **`new_data = test_set`**.\n", + "\n", + "## 4. Vytvorenie lineárneho regresného modelu\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz, keď sme vytvorili recept a skutočne potvrdili, že údaje budú správne predspracované, poďme vytvoriť regresný model, aby sme odpovedali na otázku: `Akú cenu môžem očakávať za dané balenie tekvice?`\n", + "\n", + "#### Natrénujte lineárny regresný model pomocou tréningovej množiny\n", + "\n", + "Ako ste už pravdepodobne zistili, stĺpec *price* je `výsledná` premenná, zatiaľ čo stĺpec *package* je `prediktorová` premenná.\n", + "\n", + "Na to najskôr rozdelíme údaje tak, že 80 % pôjde do tréningovej množiny a 20 % do testovacej množiny, potom definujeme recept, ktorý zakóduje stĺpec prediktora do množiny celých čísel, a následne vytvoríme špecifikáciu modelu. Recept nebudeme pripravovať a piecť, pretože už vieme, že údaje predspracuje podľa očakávania.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Skvelá práca! Teraz, keď máme recept a špecifikáciu modelu, musíme nájsť spôsob, ako ich spojiť do objektu, ktorý najprv predspracuje dáta (v zákulisí prep+bake), natrénuje model na predspracovaných dátach a zároveň umožní aj prípadné aktivity po spracovaní. Čo poviete na takýto pokoj na duši!🤩\n", + "\n", + "V Tidymodels sa tento praktický objekt nazýva [`workflow`](https://workflows.tidymodels.org/) a pohodlne uchováva vaše modelovacie komponenty! Toto by sme v *Python-e* nazvali *pipelines*.\n", + "\n", + "Tak poďme všetko zabaliť do workflow!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "👌 Navyše, pracovný postup môže byť prispôsobený/vytrénovaný podobne ako model.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Z výstupu modelu môžeme vidieť koeficienty naučené počas tréningu. Predstavujú koeficienty priamky najlepšieho prispôsobenia, ktorá nám poskytuje najnižšiu celkovú chybu medzi skutočnou a predpovedanou premennou.\n", + "\n", + "#### Vyhodnotenie výkonu modelu pomocou testovacej množiny\n", + "\n", + "Je čas zistiť, ako si model viedol 📏! Ako to urobíme?\n", + "\n", + "Teraz, keď sme model natrénovali, môžeme ho použiť na predpovede pre testovaciu množinu pomocou `parsnip::predict()`. Potom môžeme tieto predpovede porovnať so skutočnými hodnotami štítkov, aby sme zhodnotili, ako dobre (alebo nie!) model funguje.\n", + "\n", + "Začnime vytváraním predpovedí pre testovaciu množinu a následným pripojením stĺpcov k testovacej množine.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Áno, práve ste natrénovali model a použili ho na predikcie!🔮 Je dobrý? Poďme vyhodnotiť výkon modelu!\n", + "\n", + "V Tidymodels to robíme pomocou `yardstick::metrics()`! Pre lineárnu regresiu sa zamerajme na nasledujúce metriky:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Odmocnina z [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). Poskytuje absolútnu metriku v rovnakých jednotkách ako cieľová hodnota (v tomto prípade cena tekvice). Čím menšia hodnota, tým lepší model (v jednoduchom zmysle predstavuje priemernú cenu, o ktorú sa predikcie mýlia!).\n", + "\n", + "- `Coefficient of Determination (zvyčajne známy ako R-squared alebo R2)`: Relatívna metrika, pri ktorej vyššia hodnota znamená lepšie prispôsobenie modelu. V podstate táto metrika reprezentuje, koľko variability medzi predikovanými a skutočnými hodnotami cieľovej premenné dokáže model vysvetliť.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tam ide výkon modelu. Pozrime sa, či môžeme získať lepšiu predstavu vizualizáciou bodového grafu balíka a ceny, a potom použijeme predpovede na prekrytie najlepšej prispôsobenej čiary.\n", + "\n", + "To znamená, že budeme musieť pripraviť a spracovať testovaciu množinu, aby sme zakódovali stĺpec balíka, a potom to spojiť s predpoveďami vytvorenými naším modelom.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Skvelé! Ako môžete vidieť, lineárny regresný model nedokáže veľmi dobre generalizovať vzťah medzi balíkom a jeho zodpovedajúcou cenou.\n", + "\n", + "🎃 Gratulujeme, práve ste vytvorili model, ktorý dokáže predpovedať cenu niekoľkých druhov tekvíc. Vaša sviatočná tekvicová záhrada bude nádherná. Ale pravdepodobne dokážete vytvoriť ešte lepší model!\n", + "\n", + "## 5. Vytvorte polynomiálny regresný model\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Niekedy naše údaje nemusia mať lineárny vzťah, no aj tak chceme predpovedať výsledok. Polynomická regresia nám môže pomôcť robiť predpovede pre zložitejšie nelineárne vzťahy.\n", + "\n", + "Vezmime si napríklad vzťah medzi balením a cenou v našej dátovej sade tekvíc. Zatiaľ čo niekedy existuje lineárny vzťah medzi premennými – čím väčší objem tekvice, tým vyššia cena – niekedy tieto vzťahy nemožno znázorniť ako rovinu alebo priamku.\n", + "\n", + "> ✅ Tu sú [ďalšie príklady](https://online.stat.psu.edu/stat501/lesson/9/9.8) údajov, ktoré by mohli využiť polynomickú regresiu\n", + ">\n", + "> Pozrite sa znova na vzťah medzi odrodou a cenou v predchádzajúcom grafe. Zdá sa, že by tento bodový graf mal byť nevyhnutne analyzovaný priamkou? Možno nie. V tomto prípade môžete vyskúšať polynomickú regresiu.\n", + ">\n", + "> ✅ Polynómy sú matematické výrazy, ktoré môžu pozostávať z jednej alebo viacerých premenných a koeficientov\n", + "\n", + "#### Natrénujte model polynomickej regresie pomocou tréningovej množiny\n", + "\n", + "Polynomická regresia vytvára *zakrivenú čiaru*, ktorá lepšie zodpovedá nelineárnym údajom.\n", + "\n", + "Pozrime sa, či polynomický model bude lepší pri predpovedaní. Budeme postupovať podobným spôsobom ako predtým:\n", + "\n", + "- Vytvorte recept, ktorý špecifikuje kroky predspracovania, ktoré by sa mali vykonať na našich údajoch, aby boli pripravené na modelovanie, t. j.: kódovanie prediktorov a výpočet polynómov stupňa *n*\n", + "\n", + "- Vytvorte špecifikáciu modelu\n", + "\n", + "- Spojte recept a špecifikáciu modelu do pracovného postupu\n", + "\n", + "- Vytvorte model prispôsobením pracovného postupu\n", + "\n", + "- Vyhodnoťte, ako dobre model funguje na testovacích údajoch\n", + "\n", + "Poďme na to!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Vyhodnotenie výkonu modelu\n", + "\n", + "👏👏Vytvorili ste polynomiálny model, poďme urobiť predpovede na testovacej množine!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hurá, poďme vyhodnotiť, ako model fungoval na testovacej množine pomocou `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Oveľa lepší výkon.\n", + "\n", + "`rmse` kleslo z približne 7 na približne 3, čo naznačuje zníženie chyby medzi skutočnou cenou a predpovedanou cenou. Môžete to *voľne* interpretovať tak, že nesprávne predpovede sú v priemere nesprávne o približne 3 $. `rsq` sa zvýšilo z približne 0,4 na 0,8.\n", + "\n", + "Všetky tieto metriky naznačujú, že polynomiálny model funguje oveľa lepšie ako lineárny model. Skvelá práca!\n", + "\n", + "Pozrime sa, či to dokážeme vizualizovať!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Môžete vidieť zakrivenú čiaru, ktorá lepšie zodpovedá vašim údajom! 🤩\n", + "\n", + "Môžete ju urobiť ešte plynulejšou tým, že do `geom_smooth` zadáte polynomiálny vzorec, napríklad takto:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Podobne ako hladká krivka!🤩\n", + "\n", + "Tu je postup, ako vytvoriť novú predikciu:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Predikcia pomocou `polynomial model` dáva zmysel, vzhľadom na bodové grafy `price` a `package`! A ak je tento model lepší ako ten predchádzajúci, pri pohľade na tie isté údaje, budete musieť plánovať rozpočet na tieto drahšie tekvice!\n", + "\n", + "🏆 Skvelá práca! Vytvorili ste dva regresné modely v jednej lekcii. V poslednej časti o regresii sa naučíte o logistickej regresii na určenie kategórií.\n", + "\n", + "## **🚀Výzva**\n", + "\n", + "Otestujte niekoľko rôznych premenných v tomto notebooku, aby ste zistili, ako korelácia súvisí s presnosťou modelu.\n", + "\n", + "## [**Kvíz po prednáške**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Prehľad & Samoštúdium**\n", + "\n", + "V tejto lekcii sme sa naučili o lineárnej regresii. Existujú aj iné dôležité typy regresie. Prečítajte si o technikách Stepwise, Ridge, Lasso a Elasticnet. Dobrou možnosťou na štúdium je [Stanford Statistical Learning course](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Ak sa chcete dozvedieť viac o používaní úžasného frameworku Tidymodels, pozrite si nasledujúce zdroje:\n", + "\n", + "- Webová stránka Tidymodels: [Začnite s Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn a Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **ĎAKUJEME:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) za vytvorenie úžasných ilustrácií, ktoré robia R prístupnejším a zábavnejším. Viac ilustrácií nájdete v jej [galérii](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/2-Regression/3-Linear/solution/notebook.ipynb b/translations/sk/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..f41dd74ef --- /dev/null +++ b/translations/sk/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lineárna a polynomiálna regresia pre stanovenie cien tekvíc - Lekcia 3\n", + "\n", + "Načítajte potrebné knižnice a dataset. Konvertujte údaje na dataframe obsahujúci podmnožinu údajov:\n", + "\n", + "- Získajte iba tekvice ocenené na základe ceny za koš\n", + "- Konvertujte dátum na mesiac\n", + "- Vypočítajte cenu ako priemer vysokých a nízkych cien\n", + "- Konvertujte cenu tak, aby odrážala cenu za množstvo v koši\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bodový graf nám pripomína, že máme údaje iba od augusta do decembra. Pravdepodobne potrebujeme viac údajov, aby sme mohli vyvodiť závery lineárnym spôsobom.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdá sa, že korelácia je dosť malá, ale existuje nejaký iný dôležitejší vzťah - pretože cenové body v grafe vyššie sa zdajú mať niekoľko odlišných klastrov. Poďme vytvoriť graf, ktorý ukáže rôzne odrody tekvíc:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7VklEQVR4nO2deXxU5fX/34cQzACyR6WyBEEpyBIlYF3rAlQtAi61Rtx+xq9tNZbar1q7iHxb7FdxrVr7VZt+QYGIS12wdcMvtmqtLIoIRFHbhCKUhMhO2M/vj3uTzCQzSWbmzsydmfN+ve7rzj1z7+c+T2Zy5rnneZ7ziKpiGIZhZA/tUl0AwzAMI7mY4zcMw8gyzPEbhmFkGeb4DcMwsgxz/IZhGFlG+1QXoC306tVLCwoKUl0MwzCMtGLZsmWbVDW/qT0tHH9BQQFLly5NdTEMwzDSChGpCme3UI9hGEaWYY7fMAwjyzDHbxiGkWWkRYzfMBLJvn37WLduHbt37051UQwjJvLy8ujTpw+5ubltOt8cv5H1rFu3jkMPPZSCggJEJNXFMYyoUFVqa2tZt24dAwYMaNM1FuoxIlJTA0uWOPtMZvfu3fTs2dOcvpGWiAg9e/aM6onVHL8RlvJy6N8fxo1z9uXlqS5RYjGnb6Qz0X5/zfEbzaipgZISqKuDrVudfUlJ5rf8DSNbMMdvNKOyEjp0CLXl5jp2IzHk5ORQWFjIsGHD+M53vsOuXbsA6Ny5MwCVlZUEAgEKCwsbtieeeCJE4/zzz6ewsJBBgwbRtWvXhvPOPPNMfvKTnzScV1VVxVFHHcWWLVs4/fTTGTx4MCNHjuTkk0/m008/BWiw12tcdNFFSfpLGElBVX2/jRo1So3kUV2tGgioQuMWCDj2TGT16tWpLoJ26tSp4fWll16q9957b4j9n//8px577LFt0lq0aJF++9vfbjjetWuXDh48uKGekyZN0jlz5qiq6je/+U1dsmSJqqo++uijet555zWzG+lBuO8xsFTD+FRr8RvNyM+HsjIIBKBLF2dfVubY4yGjOosTWJlTTz2Vzz//3DO9QCDAfffdx3XXXccrr7zC9u3bmTJlSrPzTjvtNE/va/gXc/xGWIqLoaoKFi509sXF8ellVGdxAiuzf/9+XnnlFYYPH97svS+++CIk1PP222+3Wffcc8+lR48eXHHFFTzyyCNhz1mwYEHIfadMmdJwr5tvvjn6yhi+JaHj+EWkEtgOHAD2q2qRiPQA5gMFQCVwsapuTmQ5jNjIz4+/lQ+hncV1dY6tpATGjvVGP6kkqDJ1dXUUFhYCTou/pKSk2TkDBw5k+fLlMd/j+uuvp66ujsGDB4fYp0yZQiAQoKCggIceeqjBPnfuXIqKimK+n+FfkjGB6wxV3RR0fCvwpqreKSK3usc/CX+pkQnUdxbX+0lo7CxOO8efoMoEAoG4nHpbaNeuHe3aNX/INweffaQi1DMJmO2+ng1MTkEZjCRSUAB794ba9u1z7GlHRlXGyFYS7fgVeF1ElonIta7tcFXdAODuD0twGYwUk6jO4pSQwso0jfE/+OCDCb1fcIx/7NixCb2XkVzEGfGTIHGRr6nqehE5DHgDuAF4SVW7BZ2zWVW7h7n2WuBagH79+o2qqgq7noCRRtTUOBGRggJ/Of2KigqGDBkS3UV+rYyRtYT7HovIMlVtFsdLaIxfVde7+2oReR4YA2wUkd6qukFEegPVEa59DHgMoKioKHG/TkbS8Kqz2BdkVGWMbCNhoR4R6SQih9a/BsYDK4GXgCvd064EXkxUGQzDMIzmJLLFfzjwvJs8qD0wT1VfFZElwNMiUgKsBb6TwDIYhmEYTUiY41fVfwAjw9hrgbMSdV/DMAyjZWzmrmEYRpZhjj+FeJ3uxe96hmH4A3P8KcLrdC9+1zNaRkS4/PLLG473799Pfn4+EyZMAGDWrFmUlpYCMH36dDp27Eh1deOAuPr0zU1fA9x///3k5eWxdetWamtrG8bmH3HEERx55JENx2vWrGHYsGEh106fPp177rkHgKuuuooBAwZQWFjIyJEjefPNNxvOszTO6YU5/hTg9UInftczWqdTp06sXLmSOjcVxBtvvMGRRx4Z8fxevXpx7733tkm7vLyc0aNH8/zzz9OzZ0+WL1/O8uXL+f73v8+NN97YcNyh6SIMYbj77rtZvnw5DzzwAN///vdD3ps7d26D1rPPPtumshmpwRx/CvB6oRO/62UiiQiDnXPOOfzpT38CHGdd3EJK1Kuvvpr58+fz1Vdftaj5xRdfsGPHDmbMmEG5h49tJ554Il9++aVnekZyMcefArxO9+J3vUwjUWGwSy65hKeeeordu3ezYsUKTjjhhIjndu7cmauvvprf/OY3rZTV+QE59dRT+fTTT0PCQ/Hw6quvMnny5BCbpXFOH8zxpwCv0734Xa+eTOgsTmQYbMSIEVRWVlJeXs65557b6vk//OEPmT17Ntu2bYt4zlNPPcUll1xCu3btuOCCC3jmmWcinhtpwe5g+80338xRRx3FZZddxs9+9rOQ84JDPXfffXer5TdSRzLSMhthKC52Urh7le7F73rl5Y6D7NDBeZooK4t/cZdUkOgU0xMnTuSmm27irbfeora2tsVzu3XrxqWXXhpxYZUVK1bw2WefMW7cOAD27t3LUUcdxfXXXx/2/J49e7J5c+jSGF999RUDBgxoOL777ru54IILePDBB7nyyitZtmxZNNUzfIK1+FNIfj6MHu1dyhe/6mVSZ3Giw2BXX30106ZNC7sCVzh+/OMf8+ijj7J///5m75WXlzN9+nQqKyuprKxk/fr1fPnll0RKeNi5c2d69+7dMFrnq6++4tVXX+WUU04JOa9du3ZMnTqVgwcP8tprr0VZQ8MPmOM3Ek46dxbv2wc7dzp7iD8MVlcHmzaFPjEE06dPH6ZOndrm8nXt2osJE85nz549zd576qmnOP/880Ns559/Pk899VREvSeeeIIZM2ZQWFjImWeeye23387AgQObnSci/OIXv2DmzJkNNkvjnD4kNC2zVxQVFenSpUtTXQwjRmpqnE7QYGcXCDhr+fohwWWktMy1tU4ZRUDVqUPPns57sWRlXrsWgvtWDzsM+vWLvdwtlc/IPqJJy2wtfiNqKipg9mxn3xbScSGWffscp3rwIBw44OyrqkJb/tGEwerqQp0+OMeRWv7xls8wWsI6d42ouOEGePjhxuPSUghanzsiXncWJ5q9e52WdDAijj03N3q9nTsj2wOB1JfPyC6sxW+0mYqKUKcPznE0LX8vO58TSYcOTvgkGNXmfRVtpVOn6Oyt4XX5jOzCHL/RZhYvjs6ezuTmOjHzdu0gJ8fZ9+8fe2s6EHBi+sEcdlhsrf1ElM/ILizUY7SZMWOis6c7PXs6fRJ79zot6Xidar9+ztPOzp1OSz9Wp5+o8hnZQ8Jb/CKSIyIfisjL7vF0EflSRJa7W+tTFA1fMGSIE9MPprTUsWcqubmOk/bKqQYC0KtX/E6/Hq/L13T4qpGZJCPUMxVoGgW+X1UL3e3PSSiD4REPPQSrV8OsWc6+LR27Ruvk5OQ0jIEvLCyk0p3kEJxSuZ633norJF1zfn4+hYWFfP3rX+f+++9n9+7dfP3rX+fjjz9uuGbmzJmcddZZDfo9evRoSLFcP+Z+0aIP6dBBmDXrNT7+2BkuCs3TPIOTrjk4pXNhYSFbtmxJzB/H8JyEhnpEpA/wbeAO4MeJvJeRPIYMyexWfioIBAIsX768mT04pfJVV10V9trvfve7PPzww9TW1jJ48GAuuugiHnjgAa677jr++te/sn79eh599FGWLl1K9+7dASe3/oQJExry5u/bB7NmlVNYeAqvvFLOCSd8i6oqJ5QUiRtvvJGbbrop3qobKSDRLf4HgFuAg03spSKyQkT+ICLdw10oIteKyFIRWVqTjnP7jYymZmcNS75cQs3OxH03o02p3LNnTwYNGsSGDRs4++yz6d27N0888QQ33ngj06dPb3D64dizR3nzzWe5/fZZvP/+6+zZs7theKiReSTM8YvIBKBaVZtmcfodMBAoBDYAYVeTUNXHVLVIVYvy02H8n5E1lH9cTv8H+jPuyXH0f6A/5Svjz8tcV1fXEDKpT7MQbUrltWvXsnv3bkaMGAHAAw88wM9//nNqampCVvcKx9Kl7/K1rw2gT5+BjBp1Ou++++dWh4fef//9DWU+44wzoquwkVIS2eI/GZgoIpXAU8CZIjJHVTeq6gFVPQg8DmTomBAjE6nZWUPJSyXU7a9j656t1O2vo+TFkrhb/vWhnuXLl/P8888DbU+pPH/+fI499liOOuoopk6dSl5eHgBf+9rXOPPMM/nBD37Q6v2feaac4uJLaNcOzj77El5/vbzV4aHBq3ctWrQo+kobKSNhMX5V/SnwUwAROR24SVUvE5HeqrrBPe18YGWiymAYXlO5pZIOOR2o29+YayE3J5fKLZXkd/LuyTSalMr1Mf733nuPb3/725xzzjkcccQRgJNJs127ltt3Bw4c4LnnniM39yV+85s7OHhQ+eqrWjp02A4c6lmdDP+QiglcM0XkYxFZAZwB3JiCMhhGTBR0K2DvgdDA974D+yjoVuDpfaJNqQzOcoiXX355q6tyNWXhwoWMHDmSf/3rX1RWVrJ2bRUXXnghL7zwQpy1MPxKUhy/qr6lqhPc15er6nBVHaGqE4Na/4bhe/I75VM2qYxA+wBdDulCoH2Askllnrb2IbaUygA/+clP+N///V+2b9/e5nuVl5c3u9eFF17IvHnzANi1axd9+vRp2O677z4gNMYfPATV8D+WltnIeiKlZW6Jmp01VG6ppKBbgedO3zBiIZq0zJaywTBiIL9Tvjl8I22xJG2GYRhZhjl+wzCMLMMcv2EYRpZhjt8wDCPLMMdvJI2aGliyxNknipdfhmuucfZesGWLs1ykV4knvdbzOo2y3/UMbzDHbySF8nJnhahx45x9G3KORc3w4XDeec5C7uedB27KmphZtQo+/xw2bXL2q1YlTk9EQvLp7N+/n/z8/JD0y6XuYgg//OEP+dWvfkVtLXz8Mdxyyx1MmXI9tbVO1s36dMuFhYWcdNJJDdfn5+dz3HHHcfTRR/Otb32Lv/3tbyHlq9f78Y+nc/PN9zSkZQYoKChg06ZNQPMU0nfeeScAp59+OvXDrgsKChg6dDhDh47gtNO+yeuvV7WY5nnr1q1cccUVDBw4kIEDB3LFFVeEpKJetWoVZ555JscccwxHH300v/rVr6gfij5r1ixEhDfffLPh/Oeffx4R4dlnnw25z6xZsyguLg6xbdq0ifz8fPbs2QPApEmTOPHEE0POCU5DPXTo0JCkeVdddVXDfU4//XQGDx7c8Le56KKLuOOOOxqOg/92Dz74INOnT+eee+5p0An32W3cuJEJEyYwcuRIhg4dyrnnxr+EiQ3nNBJOTQ2UlEBdnbOBczx2rHfr7778Mqxskvzj448du+s7o2LLlsay1lNX59i7dfNer1OnTqxcuZK6ujoCgQBvvPEGRx55ZFitGTNmUFhYyMiRU1AVXnjh98yZ8yFVVXDwINx9990N6ZaDqU/tALBo0SIuuOACFi1axJAhQ9i3j4brVZ2tPi1z03w9kVJIN+WhhxbRtWsvHn30dh5/fAa9ez8eMc1zSUkJw4YN44knngDg9ttv55prruGZZ56hrq6OiRMn8rvf/Y7x48eza9cuLrzwQh555JGGFBbDhw+nvLycs846C3AmwI0cObLZfS644AJuuukmdu3aRceOHQF49tlnmThxIocccghbtmzhgw8+oHPnzvzzn/9kwIABDdfWp6H+7LPPGDVqFBdddBG5YZIZzZ07l6Ki0KHzP//5zwHnRy/4bzd9+vSQ88J9dtOmTWPcuHFMnToVcNJ5xIu1+I2EU1nZPMtjbq5j94pI2QVizToQKRTTaK8Blrh7L/TgnHPO4U9/+hPQmJkzHF26dOG22+5g5sxSZs68nu9975ccemg3RBzH3RbOOOMMrr32Wh577DHASb8sEnpOPGmZVRv1hg8/kZqaLyPqff755yxbtozbbrutwTZt2jSWLl3KF198wbx58zj55JMZP348AB07duThhx9ueNIAOPXUU1m8eDH79u1jx44dfP755xQWFja7V5cuXTjttNNYsGBBg+2pp55q+Fs/99xznHfeeVxyySURZ0kfffTRdOzYkc2bN0f7Z4mJDRs20KdPn4bjEfE+ymKO30gCBQXN/+H37XPsXjF5cnT21ojUqnfs5UB/YJy7bz1u1bKeQ72z2b17NytWrOCEE06IqHfZZcVs27aZnTu3ce65TohI1Vl0/eabb24IF0yZMiWixvHHH88nn3wCOD/MwZP4y8vv55JLCjnpJEdn/fr1De8Fp5AuLCxk/vz5zbRFGvXee+9VvvnNyRHTPK9evbohDFJPfUhk1apVrFq1ilGjRoVcM3DgQHbs2MG2bdvc+wljx47ltdde48UXX2TixIkR611cXNzg1NevX8+aNWsa0krX/+AWFxdHXAPhgw8+4Oijj+awww4L+/6UKVMa/jY333xzxHKEI9xnd/3111NSUsIZZ5zBHXfcEfJZxIqFeoyEk5/vxN1LSpyW/r59zrGXyyxMmODE+INWG2T48NjCPOA45EAgNDwTCEC3bjVACVDnbrjHY4HIFYqs13g8YsQIKisrKS8vbzWOu3HjOrZu/TcHDgh79uwgEOhM//6O448U6mlKcLqW3Fyn76WqynHal156I7fddhM9ezrvFwT9Src11HPDDWfw739vpEePw7j++hkR0zyrKtL0cSPIHul9IMR+ySWX8OCDD7J161buvfdefv3rX4e9ZsKECVx33XVs27aNp59+mosuuoicnBw2btzI559/zimnnIKI0L59e1auXMmwYcMAJzfR448/zj/+8Q9effXViPUOF+ppK+E+u29961sN93zllVc47rjjWLlyJfGsU2ItfiMpFBc7TmXhQmcfIYoRFytWwIIFzg/MggXOcTwceywMGuQsjj5okHMMlUDTZmuua49FL5SJEydy0003RQzz1DN16lR++cvpFBdfzDPP/BfDh9PgpNvKhx9+GJLbpWdPGnR6945eryl//esiKiurGDnyWJ59dlpEvWOPPZYPP/yQg0FxqoMHD/LRRx8xZMgQjj32WJrm6vrHP/5B586dOfTQxrTRY8aMYeXKlWzatIljjjkmYrkCgQBnn302zz//fEiYZ/78+WzevJkBAwZQUFBAZWVlSLjnxhtv5NNPP2X+/PlcccUV7N69O5Y/S0z06NGDSy+9lCeffJLRo0fz17/+NS49c/xG0sjPh9GjvW3pN2XCBPj972Nv6TelWzcnJNXYMi8Amgaq97n2WPRCufrqq5k2bRrDhw+PqPHKK69QXV3NFVdcwfTpt/Hyy8/z2Wer23T/ev7yl7/w2GOP8R//8R8h9txcJxwTFHWJiy5dAjz88APMmfMEX331VdhzBg0axHHHHceMGTMabDNmzOD4449n0KBBTJkyhXfeeYeFCxcCTqjphz/8Ibfcckszrf/+7/+O2NIPpri4mPvuu4+NGzfyjW98A3DCPK+++mpDKuxly5aFjfNfcMEFFBUVMXv27Db9DeLl//7v/9i1axcA27dv54svvqBfv35xaZrjN4yoyAfKgADQxd2X0VKYJxr69OnTMHojHLt37+ZHP/oRjzzyCCJCp06dmDlzZsNQTwiNExcWFrLX7WCZP38+hYWFHHPMMfz617/mueeeizorKTSP8d96660tnt+7d2+Ki4v57W9/C4RP81xWVsaaNWsYNGgQAwcOZM2aNZSVlQFOC/3FF19kxowZDB48mOHDhzN69OiQOtdzzjnntGkZyPHjx7N+/Xq++93vIiLuOgRrG34EAAYMGECXLl14//33m10/bdo07rvvvpCnlHqCY/xjx45ttSzBhPvsli1bRlFRESNGjODEE0/kmmuuYfTo0VHpNiXhaZlFJAdYCnypqhNEpAcwH6eJVAlcrKotdo9bWmZ/UVPjjMgpKIi/9T53Ljz9NFx8MbTQD5nQssWSltkZzVOJ8zVOXZbOHTtg61bo2hXCDI9PuV5dnTOBq1Mnp0/DSBzRpGVORot/KlARdHwr8KaqHg286R4baYKXE7H69oXLLoOXXnL2cT69JmWSWCP5wGhS6fTXrIFPPoENG5z9mjX+0lu71pmkVlnp7NeujU/P8I6EOn4R6QN8G/h9kHkSUB8cmw1MTmQZDO8Inoi1dauzLymJLQXD3Lmwbl2o7V//cuypLls6sGMHuCMZG9i2zbH7Qa+uDqqrQ23V1c0nsRmpIdEt/geAW4DgQNjh9cstuvuwg2FF5FoRWSoiS2sy9b83zfByItbTT0dnb414y5YOK9EFE5TNoE32ZOvt3Bmd3YiPaL+/CXP8IjIBqFbVZbFcr6qPqWqRqhbFM17V8A4vJ2JdfHF09taIp2x5eXnU1tamlfPv2jU6e7L1OnWKzm7EjqpSW1tLXl5em69J5ASuk4GJInIukAd0EZE5wEYR6a2qG0SkN1DdoorhG7yciDVlCvz0p054p56+fWPv4I2nbH369GHdunWk25Pljh0QPJQ8Ly/075lqvT17IHjN90MP9TZNh9FIXl5eSFqH1kjKYusicjpwkzuq526gVlXvFJFbgR6q2nxAbhA2qsdfZNqonnTm3Xfh9ddh/Hg4+WT/6VVUwOLFMGYMxDBy1IiTSKN6UuH4ewJPA/2AtcB3VDX8zA4Xc/yGYRjRE8nxJyVXj6q+Bbzlvq4FzkrGfQ3DMIzm2MxdwzCMLCOjHX8ylvqLB6/LV1EBs2c7+2zQSwZef0Z+/04aWYKq+n4bNWqURsu8eaqBgGrXrs5+3ryoJRKK1+UrLa1fN8nZSkszWy8ZeP0Z+f07aWQewFIN41OT0rkbL9F27tbUOFP2m+Y+r6ryx0gPr8tXUQFDhza3r14d20gKv+slA68/I79/J43MJJW5epJOMpb6iwevy7d4cXT2dNdLBl5/Rn7/ThrZRUY6/mQs9RcPXpdvzJjo7Omulwy8/oz8/p00souMdPz1szgDAejSxdl7vdRfPHhdviFDoGlq8tLS2MMoftdLBl5/Rn7/ThrZRUbG+Ovx+yxOr8vn9SxJv+slA68/I79/J43MIqUzd+PFZu4abcUcq2E0klWdu0Z2ktyFWAwjfTHHb2QE2bYQi2HEgzn+FOL1LM6XX4ZrrnH2idSL9T7hrps7FyZNin3lrXoSNVzy3Xfh9tudvRd4ref1Z24zi7OEcLO6/LbFMnPX73g9i3PYsNCZscOHJ0Yv1vuEu65Pn1Bb376xl7e6WlUkVE/EscfKuHGheuPHx66VCD2vP3ObWZx5EGHmbsqdelu2THP81dXOP1bwP20gELuTWrAgVKt+W7DAW73bbovtPpH0wm1z5nhb5lj/Bu+8E17vnXf8oed1fb3+Thr+IJLjt1BPCvA6LPHCC9HZY9WbPz+2+0RTjljX3PX6b/D669HZk63ndX1tZnF2kcg1d/NEZLGIfCQiq0Tkv1z7dBH5UkSWu9u5iSqDX/F6FufkydHZY9X77ndju0805Yh1zV2v/wbjx0dnT7ae1/W1mcVZRrjHAC82QIDO7utc4H3gG8B0nNW4sjbUo9oYT+3SxZt46vDhoY/p8cZ7I+nFep9w1/XtG2qLJ8YfT9kiMX58qF68MXmv9byur9ffSVUnVLR4sXchI6/1Mh2SHepx77vDPcx1N//PFksSxcVOZsaFC519cXF8eitWwIIFzhDGBQuc40ToHXFE6Hm9e8eupx5/G7zWW78+9HjDhvj0XnsN3nkHpk1z9q+9Fp+e15+5199Jr+dV2DwNDwn3a+DVBuQAy4EdwF2ubTpQCawA/gB0b00nE1v86YiXHZRz5oTX8kvnrtd62YbXncXW+RwbpKJzV1UPqGoh0AcYIyLDgN8BA4FCYANwb7hrReRaEVkqIktrbFCxL/CygzJSJ65fOne91ss2LK21v0nKqB5V3YKz2PrZqrrR/UE4CDwOhE3Oq6qPqWqRqhbl+yTpit8n83hN08k88XZQButF6sT1S+eu13r1/O53cNppzt4LvNbzakKdpbX2OeEeA7zYgHygm/s6ALwNTAB6B51zI/BUa1p+CPX4fTKP10SazNO9e2i5e/SIXa9Hj9i0IuF1Z7HXerH+7ZKl5+WEOlXvO5+9nrCWDZDsCVzACOBDnFj+SmCaa38S+Ni1vxT8QxBpS7Xj9/tkHq+JFE+NFPdurdyR9PLymtv8EgP2Wu+RR8L/7R55xB96Xve5ZNv/jF+J5PgTOapnhaoep6ojVHWYqv7StV+uqsNd+0RVjXOsROLx+2Qer4kUT40U326t3OH02rWDnJzm9/BLDNhrvUgjUGIdmeK1ntd9Ltn2P5Nu2MzdNuD3yTxeEymeGim+3Vq5w+kdPAgHDjS/h19iwF7rRRoaGeuQSa/1vO5zybb/mbQj3GOA37ZUh3pU/T+Zx2siTeaJtdzz5jmhnU6dnP28ec7Wvr1qTo6zj3fCUL1eu3be6eXkOMnecnLi1/O6T8PvfSTZ9j/jR4gQ6rEVuKLg3XedR8vx4+Hkk/2n5zXhVrPq2xfWrWs8p29fWLu2da3ycmeiUbt2Tmu/rAxuuSU2rUj06AGbN4ce19bGrtepE+zaFXq8Y0fk85Ndvlg/i2TpQfb9z/iNSCtwpbw135bNDy1+I/YOwHAdpR06xKYVCa87O++6K7zeXXf5o3xed8Z6rWf4A+Lp3BWRY0TkTRFZ6R6PEJFfePvbZPidWDsAw3WUHjwYm1Yk/N556vfOWK/1DH/T1s7dx4GfAvvAGbEDXJKoQhn+JNYOwHAdpe0ifPNi7Uz0e+ep3ztjvdYzfE64x4CmG7DE3X8YZFvelmu92DI11JOOmQtj7QAM11nsdWei152dnTqF6nXq5K/y+X3CmpF6iHMc/yYRGQhOdk0RuQgnz44RI+mauXDtWpgzByZOdPZt7fwLl/kxVq1I1NbCI4/Aqac6+3g6TsHpyL3rLigsdPbxdOwmonx33QXt2zvzIdq3d47jwevPw/AvbRrVIyJHAY8BJwGbgX8Cl6lqZUJL5+KXUT1eUVPjOOe6ukZbIOA4xFjSEnmtZ/gf+8yNthBpVE+bWvyq+g9VHYuTf+frqnpKspx+JuL3Waax0DShm5FY/PCZG+lLW0f1/FpEuqnqTlXdLiLdRWRGoguXqfh9lmm02AIZySfVn7mR3rQ1xn+OOqmVAVDVzUDWrZXrFfn5zgSmQAC6dHH2ZWWxP6J7rVdPpFZ8zc4alny5hJqdNdTUOBOz6upg61ZnX1LS9pZ/sJYXZIte/Wd+SPca8gYu4ZDuNZ585hVra5j9xhIq1tqjWybTvo3n5YjIIaq6B0BEAsAhiStW5lNcDGPHNp8Z6xe9+pm2HTo4LcuyMuce5R+XU/JSCR1yOrD3wF5+NqyMDh2KQ2LNOTnw5z/DuW7TIFKZmmqVTSqjeFjs6/1lm97ftpWz57oSONABcvbyt21lFBO73g2PlvPwv0rgYAf4y15K+5bx0PfiXH/R8CVt7dy9BZgI/C/OyJ6rgZdUdWZii+eQaZ27fidSx+GyT2oY9WR/6vY3vhFoH0Dvq2L3V6Fe/dBDYfduEHGuDf7xAKfl2/+B5lpVP6oiv1P0v1rZplextoahj/WH3KAPaV+A1ddWMaRf6vUMfxBv5+5M4A5gCHAs8KtkOX0j+UTqOFz8aSUdckLfyM3J5ef3VBIIQOfOjfbt252Y89694UNAlVvCa1VuqYytzFmmt/jTSqdlHszBXMfuAz3D37Q5LbOqvqKqN6nqf6rqa4kslJFaInUcjhlcwN4DoW/sO7CP711cQFUVPPyw09KPRPCok4Ju4bUKuhXEVuYs0xszuADaNZ0Ovc+x+0DP8DctOn4RecfdbxeRbUHbdhHZ1sq1eSKyWEQ+EpFVIvJfrr2HiLwhIp+5++7eVSe78WoN34aOw0Mat7IyGNIvn7JJZeTlBAhIF/JyApRNKiO/Uz6bNsG2bc4PRCTq6mDaNGc9101r87myWxkdJMAh2oVD2jVqxVTmTk7ZOkiA3INd6CDe6B3SztvyeaU3pF8+pX3LYF8AdneBfQFK+5bFHJYJ0dsTv57hc8JN5/ViAwTo7L7OBd4HvgHMBG517bcCd7WmlakpG7wkWWsCl5aq0rFa+dpipWO1lpa6tjCZHdu0BWnFW+Zhw0L14l2Tddw4b8vntV64zyJeVldV66zXF+vqqgTm/TCSBrGuuYvzVLCytfNa0egIfACcAHyKu84u0Bv4tLXrzfG3TLLWN42UutfLLdYyR1oPeMECb/8GflkzdvXq8HqrV8emZ2QmkRx/qzF+VT0IfCQi/aJ9mhCRHBFZDlQDb6jq+8Dh6q6z6+4Pi3DttSKyVESW1th00BZJ1vqmyUjRG2uZI60HHMkeazn8smbs4sXR2Q0jmLZ27vYGVrk5+V+q31q7SFUPqGoh0AcYIyLD2lowVX1MVYtUtSjfko+0SLLWN01Git5YyxxpPeBI9ljL4Zc1Y8eMic5uGCGEewxougHfDLe15dogjduBm7BQT0JI1vqmTeP5ccf4PSzz8OGhevHG+P2+Zmy4z8IwgiGWNXdFJA/4PjAI+BgoU9X9bflBEZF8YJ+qbnFn+r4O3OX+aNSq6p0icivQQ1VvaUnLJnC1jWStb1pR4YQUxoyBIUOa2yD867fecmYEFxfDiBGO9hFHwL//7V2ZX37ZCe9MngwTJsSv5/c1Y8N9FoZRT6QJXK05/vk4q269DZwDVKnq1DbecAQwG8jBCSk9raq/FJGewNNAP2At8B1V/aolLXP86U9wCohduyLP6DUMwztidfwfq+pw93V7YLGqHp+4YobHHH96Ey4FRDCWR94wEkOsKRsapuO0NcRjGE0JlwIiGMsjbxjJpTXHPzJ4ti4woq0zd43MpaICZs929m0hXAqIYHbvbvn9aIi2bKZnZCXhenz9ttmoHv8Q60iS4MXWc3NVO3RwNi9HpXg9yiXb9IzMg1hG9fgFi/H7g4oKGDq0uX316tARJTU1jTn4IfzrNWvglFNa1/K6bKZnZBNxpWU2DGjbbNHgZRiPPBL69GlcknHhQhg92unE/fzz6O7hRdlMzzAczPEbbaa12aJNl2FsKR+/1zNPTS8+PSO7MMdvtJkhQ6C0NNRWWtoYWohm9E5rWl6XzfQMoxGL8RtRE2m2aCzj9b2eeWp6htFITBO4/II5/vShfoZubq7zAyACeXlO2Mdm6BpGconk+NunojBG5lJcDGPHhh/JYzNzDcMfmOM3PCc/P9TJm8M3DH9hnbuGYRhZhrX4jaQRaWKXPREYRnIxx28kBUvLbBj+wUI9RsKJZmKXYRiJJ2GOX0T6isgiEakQkVUiMtW1TxeRL0Vkubudm6gyGMmjpgaWLAnvwC0ts2H4i0S2+PcD/6mqQ4BvANeLSH1aqftVtdDd/pzAMhhJIDg/T//+znEwraVl3revMe5vGEbiSZjjV9UNqvqB+3o7UAEcmaj7GamhaRgnXOgmP9+J4wcC0KWL08Lv0MF5HQg471kHr2Ekj6TE+EWkADgOeN81lYrIChH5g4h0j3DNtSKyVESW1lgA2LeEC+OEC90UFzvpGhYuhC+/hHXrnNdVVdaxaxjJJuEpG0SkM/AX4A5V/aOIHA5sAhT4FdBbVa9uScNSNviXcPl5bA1dw/AHKcnHLyK5wHPAXFX9I4CqblTVA6p6EHgcsESyaUzTME5LoZvgZQJtyUDDSB0JG8cvIgKUARWqel+QvbeqbnAPzwdWJqoMRnJomp8nnNO/4QZ4+OHw15eWwkMPJbKEhmEEk7BQj4icArwNfAwcdM0/A4qBQpxQTyXwvaAfgrBYqCe9ibRMYDC2ZKBheE/Ss3Oq6juAhHnLhm9mGW1ZDnDxYnP8hpEsbOaukXDashygLRloGMnDHL+RcMItExiMLRloGMnFkrQZSeGhh+C66xqXCQRbMtAwUoU5fiNpDBkS6uTN4RtGarBQj2EYRpZhjt8wDCPLMMdvRKalXMt+0DMMIybM8RvhaS3Xcqr1DMOImYQnafMCm7mbZLzOvGaZ3AwjJaQkSZuRprQ113Kq9AzDiAtz/EZzwi2ZFc8yWV7rGYYRF+b4jeZEk2s5FXqGYcSFxfiNyNTUtJxrOdV6hmG0SNKzcxoZQH6+tw7aa71w+P3HyvRMzw+oqu+3UaNGqWG0yrx5qoGAateuzn7ePNMzvfTV8wBgqYbxqQlz1kBfYBFQAawCprr2HsAbwGfuvntrWub4jVaprnb+2aBxCwQcu+mZXrrpeUQkx5/Izt39wH+q6hDgG8D1IjIUuBV4U1WPBt50j7MTv8+MTaeZtn4fgmp6pucjEub4VXWDqn7gvt6O0/I/EpgEzHZPmw1MTlQZfI3fZ8am20xbvw9BNT3T8xPhHgO83oACYC3QBdjS5L3NrV2fcaEevz9m+vSxtVXqY6xdungbszU900uFngcQIdST8OGcItIZ+Atwh6r+UUS2qGq3oPc3q2r3MNddC1wL0K9fv1FVVVUJLWdSWbLEaUlv3dpo69IFFi6E0aMzTy+Z+H2UhumZXhKJNJwzoY5fRHKBl4HXVPU+1/YpcLqqbhCR3sBbqjq4JZ2MG8fv91w4llvHMDKCpOfqEREByoCKeqfv8hJwpfv6SuDFRJXBtwTPZO3UyduZsV7q5eU5enl5oXqxdvqGu87vHdKmZ2QgiRzVczJwOXCmiCx3t3OBO4FxIvIZMM49zk7qn7a8euryWk8kdA+xd/qGu87vHdKmF5+e4V/CBf79tlnnrk/0Vq+O7T6R9PLy0u9vYHpGGkEKxvGnHr8+BidqDHEvoAhnnwi9xYtjK3e4+rZrBzk50WtFcw/TS52e4Wsy1/H7+TG4oCC04xRg9+74xhBP2A5VOHOhq4AJO7zXGzMmtrHK4eq7dy8cOBC9Vkv38PO47GzTM/xNuMcAv21Rh3r8/hhcXa2amxuql5sbu17NatWdTf5sO3HsXuvFMlY5Un3/53/8PY7a9OLTM1IOEUI9mZmds/6xNbiVWf/YGstIl0TodewYOk4+EIhdr3qxMyc6mH2uvdcQb/WKr4SxY6Mbqxypvscf7wwR9Wrcc3Fx9GUzvcTpGb4lMx2/3x+DvdY7bAzkNrHluvZE6EWbXrml+vo99bPpGRlIZsb4WxuHHqueX1ek6jUEPiyFXcBWnP2HpbG19hOh5/U8A8Mw4iIzHX894cahx0pxsROWWLjQ2RcX+0tv7Ukw+BA4L8/Zrz3JX3rg/TyDZODXkWHZqmd4Q7jAv9+2lHfu+p106MxOx8/D7wt1ZJueETUkeyEWL7eoHf/ixc6XLdjRdOni2DMRr+vrd71k4Pcfv2zTM2IikuPPzFBPosYk+/UxONmdz9GWuyU9j/+mL8/dzDWTqnl57ub4hNyRXBUMZjZXUMFgf02QyjY9w1vC/Rr4bYspZUOixjj79TF42LDQ1tXw4fHpjRsXqjd+fHzlLi0N1Sst9fxvMKxPrcLBhm1439rYxaqrtbTdb0P0Sts97J8WcLbpGTFBVoV66qmudsIJ8X7Z/P5P8c47oVr12zvvxKa3enVkPZ/m6lkw5yvXQQdLHtQFc76KSc/5EzTXWx3jnDhVDf/jFw9+n8BlE8JSTiTHn5mhnnry852FQ+IdNuj3x+DXX4/O3hqLF0fW82munhfm7IjK3hqLF26Lyt4qNTXOENZgysriC3H5faSZ13qGZ2S24/cKv0/gGj8+OntrjIkw8Wv8eNi1K9RWV9e2XD1791JDL5ZQRA294OBB2Lkz9Lxt22L+G0w+aWNU9tYYc3j4Fd8i2VslUTFvrxo36aJneII5/rbg9wlcJ5/c3MmPH+/YY6FXL2jfZFJ3+/bQo0fzORFtmSORn095yRv0p4pxLKQ/VZSf+0T4c9esianIE477N8NZDmjDNpzlTDju3zHpDelYRSkPhuiV8iBDOsbo+BM14GDuXJg0ydl7wcsvwzXXOHsvePdduP12Z+9HvYoKmD3b2WeDXj3h4j9+23yTj9+rPoNE6b3zjuq0abHH9uuJNPxy1izVnJxQe05Oq8Myw4b42+/Rano170eYNi22Mk+bpgq6gLO1hEd1AWd7oreawTqLK3Q1g+PTU1Xt3j20rj16xK6lqtqnT6he377x6SVrgIBf9Lzuc/GhHsnu3AX+AFQDK4Ns04EvgeXudm5btHzj+LOFSJ2xc+Y0d9SgumBBi3Jhf0cCe3QxRc215syJrcyRymZ6bWPBgpg+24h4PeAgWQMYYu2996leJMefyFDPLODsMPb7VbXQ3f6cwPsbsRIpFBUpDLNkSYtyYaMcB9pRQGXzk/fvj6nIEa/zi97TT0dnT7beCy9EZ28NrwccJGsAQyR7uus1IWGOX1X/CnyVKH0jwRQXwxtvwI9+5OyLi2PuRG74Hck7SJfAXgJ5Bym749/ks6n5yZE6llsj0nV+0bv44ujsrXF2uDZVC/bWmDw5OntreD3gIFkDGPzyffFarynhHgO82oACmod6KoEVOKGg7i1cey2wFFjar1+/6B6TjPiJFF8cPz7U3tY4a2mpVtNLF1PkxPZLS30ZE02oXt++oXrxxOQXLw4fCognDcbw4aFa8cb4Y/2uJEvP79+XdIzxa3jHfziQg/OkcQfwh7boWIw/ybQWX5wzR3XixLbHk1vSW7BAtaQk9lhyU/yud9ddqoWFzj4eEjUz9pFHVE891dl7QbTflWTr+f37EueADV84/ra+13Qzx59kZs0K76hnzYqtFRJJ76yzfNdCSis9G+WS2XoepDTxheMHege9vhF4qi065viTTKQWeqSRH62NNIik58NREKZner7Q8+iJLpLjT1jnroiUA+8Bg0VknYiUADNF5GMRWQGc4Tp/w28MGQKlpaG20lKorQ1/fmsjDcLpnXVWbFqR8PuoCtMzvWhIcHbThK25q6rhEnOUhbEZfuShh+C665wv7pgxjvOONHuwLSMNTjoJfv97Z6avqjP65M03Y9OKpgymZ3rpqJeomd71hHsM8NtmoR6fUF2t2q5d6ONnu3axZ+csKQm1xRMTjbVspteI30fh+F3Ph9lXycrsnIa3VFbCoYeG2jp3ji07Z24ufO97sHo1zJrl7B96KPllMz2Hmhp4++1Q29tvx549NNv0wPn+evV9hoRmN01YqMfIQGJ9/Gzpuvx8J4yUqrKZnkP9j3NdXaOtPqYcS/LAbNOrZ8gQb77P9eTnJySzqbX4jbYTa1ZRr7ORJuMe2abn9x8mv+ulG+HiP37bLMbvM2LNKup1NtJk3COb9Py+Apff9XwIEWL84rznb4qKinTp0qWpLoZhZD41NU64oz4MZ3ppjYgsU9WipnaL8RtGS2S4Y2iG1zHlbNNLEyzGbxiRKC+H/v1h3DhnX16e6hIZhieY4zeMcNTUQEmJM+pj61ZnX1IS33A/w/AJ5vgNIxwJnjJvGKnEHL9hhCPbh/sZGY05fsMIR6LmHlRUwOzZkfMepVqvpsZZStNCWhmNOX7DiITXU+ZvuAGGDoWrrnL2N9zgLz3rzM4abBy/YSSDigrHOTdl9erYpvh7rVdT4zj74BQGgYDzg5eFwx0zhUjj+K3FbxjJIMvzvxv+IpELsfxBRKpFZGWQrYeIvCEin7n77om6v2H4imzP/274ikS2+GcBZzex3Qq8qapHA2+6x4aR+URa1SzWTI5e6yUjkZ7hGxIa4xeRAuBlVR3mHn8KnK6qG0SkN/CWqg5uTcdi/EbGUFERuqqZ3/SyLUVFhhMpxp9sx79FVbsFvb9ZVcOGe0TkWuBagH79+o2qqqpKWDkNwzAykbTr3FXVx1S1SFWL8q3lYRiG4RnJdvwb3RAP7r46yfc3DMPIepLt+F8CrnRfXwm8mOT7G4ZhZD2JHM5ZDrwHDBaRdSJSAtwJjBORz4Bx7rFhGIaRRBK2EIuqRprfflai7mkYhmG0TlqkbBCRGiCRw3p6AZsSqO8HMr2OmV4/sDpmCsmsY39VbTY6Ji0cf6IRkaXhhjxlEplex0yvH1gdMwU/1NG3wzkNwzCMxGCO3zAMI8swx+/wWKoLkAQyvY6ZXj+wOmYKKa+jxfgNwzCyDGvxG4ZhZBnm+A3DMLKMjHf8ItJXRBaJSIWIrBKRqU3ev0lEVER6Bdl+KiKfi8inIvKt5Jc6Olqqo4jc4NZjlYjMDLJnRB1FpFBE/i4iy0VkqYiMCbom3eqYJyKLReQjt47/5dojLmCUTnVsoX53i8gnIrJCRJ4XkW5B16RN/SByHYPe94e/UdWM3oDewPHu60OBNcBQ97gv8BrO5LBerm0o8BFwCDAA+ALISXU9YqkjcAawEDjEfe+wDKzj68A5rv1cnDUe0rWOAnR2X+cC7wPfAGYCt7r2W4G70rGOLdRvPNDetd+VrvVrqY7usW/8Tca3+FV1g6p+4L7eDlQAR7pv3w/cAgT3cE8CnlLVPar6T+BzIMb17JJDC3X8AXCnqu5x36vPhppJdVSgi3taV2C9+zod66iqusM9zHU3xanLbNc+G5jsvk6rOkaqn6q+rqr7XfvfgT7u67SqH7T4GYKP/E3GO/5g3IVhjgPeF5GJwJeq+lGT044E/hV0vI7GHwrfE1xH4BjgVBF5X0T+IiKj3dMyqY4/Au4WkX8B9wA/dU9LyzqKSI6ILMdJWf6Gqr4PHK6qG8D5AQQOc09PuzpGqF8wVwOvuK/Trn4Qvo5+8zdZ4/hFpDPwHI6j2A/8HJgW7tQwtrQY8xpcR1XdhpOErzvO4/TNwNMiImRWHX8A3KiqfYEbgbL6U8Nc7vs6quoBVS3EafWOEZFhLZyednVsqX4i8nOc/8259aZwEgkvZJyEqeMIfOZvssLxi0gujrOYq6p/BAbixNM+EpFKnA/oAxE5AucXt2/Q5X1oDB/4ljB1BKcuf3QfPxcDB3ESRGVSHa8E6l8/Q+NjclrWsR5V3QK8BZxN5AWM0raOTeqHiFwJTACmqBv8Jo3rByF1nITf/E0qO0KSseH8oj4BPNDCOZU0drYcS2hnyz9Ijw6lZnUEvg/80n19DM4jpWRYHSuA093XZwHL0vhzzAe6ua8DwNs4zvBuQjt3Z6ZjHVuo39nAaiC/yflpVb+W6tjknJT7m4Tl4/cRJwOXAx+7cTeAn6nqn8OdrKqrRORpnC/ifuB6VT2QlJLGTtg6An8A/iAiK4G9wJXqfNsyqY7/AfxGRNoDu4FrIW0/x97AbBHJwXkaf1pVXxaR93DCdCXAWuA7kJZ1jFS/z3Ec3xtOJJK/q+r307B+EKGOkU5OVR0tZYNhGEaWkRUxfsMwDKMRc/yGYRhZhjl+wzCMLMMcv2EYRpZhjt8wDCPLMMdvZCwicsDN2rnKzZb4YxGJ+TsvIqe4mRc/cbdrg97Ld1NjfChOFtEfBL13gpt5MhuGTxtpgH0RjUymTp2p84jIYcA8nERut0cr5M6ynAdMVtUP3LS6r4nIl6r6J5zJY5+o6pUicjjwnog8C9QCDwPXaWMismjvLThDrw/Gcr1hNMXG8RsZi4jsUNXOQcdHAUtw0lb0B54EOrlvl6rq30TkSeBZVX3RvWYuMB8YjZN8cVqQ3lnAdOAG4CWcmZpfAicC/8+9ZgkwCmdi2Z3A6TiTlX6rqo+6uYdexMmplAv8QlVfdBPRvQIscvUmq2qVl38fI3sxx29kLE0dv2vbDHwd2A4cVNXdInI0UK6qRSLyTZykb5NFpCuwHDgaeBqYXf+D4Gp1Bf6pqj1E5CqgSFVL3ffaAe/hZNIsAi7EWQ9hhogcAryLMwP3X0BHVd3mPkX83b1ff5zp+yep6t8T8gcyshYL9RjZRn02xFzgYREpBA7g5DJCVf8iIr91Q0MXAM+p6n433BKulRS25aSqB0XkUZwfg1oRGQ+MEJGL3FO64jj4dcCvReQ0nCR6RwKHu+dUmdM3EoE5fiNrcEM9B3CyW94ObARG4gxy2B106pPAFOASnPzwAKtwWu4vBZ03CifHSiQOuhs4Pzg3qOprTcp0FU5ir1Gqus/N3pjnvr2z7bUzjLZjo3qMrEBE8oH/AR52E9V1BTa4HaaXAzlBp8/CWbcBVV3l2n4LXOU+ISAiPXGWCZxJ23gN+IGbWhoROUZEOrnlqHad/hk4IR7DSCjW4jcymYCbyTMXJ/Phk8B97nuPAM+JyHdwOlAbWtequlFEKoAXgmwbROQy4HERORSnBf+Aqi5oY1l+DxTg5GEXoAZnCcW5wAIRWYrTn/BJLBU1jGiwzl3DaIKIdAQ+xlncfWuqy2MYXmOhHsMIQkTG4rS6HzKnb2Qq1uI3DMPIMqzFbxiGkWWY4zcMw8gyzPEbhmFkGeb4DcMwsgxz/IZhGFnG/wfSVo6szTyIxgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lineárna regresia\n", + "\n", + "Použijeme Scikit Learn na natrénovanie modelu lineárnej regresie:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sklon priamky možno určiť z koeficientov lineárnej regresie:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polynomická regresia\n", + "\n", + "Niekedy je vzťah medzi vlastnosťami a výsledkami prirodzene nelineárny. Napríklad ceny tekvíc môžu byť vysoké v zime (mesiace=1,2), potom klesnúť počas leta (mesiace=5-7) a následne opäť stúpnuť. Lineárna regresia nedokáže tento vzťah presne zachytiť.\n", + "\n", + "V takom prípade môžeme zvážiť pridanie ďalších vlastností. Jednoduchým spôsobom je použiť polynómy z vstupných vlastností, čo by viedlo k **polynomickej regresii**. V knižnici Scikit Learn môžeme automaticky predpočítať polynomiálne vlastnosti pomocou pipeline:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Kódovanie odrôd\n", + "\n", + "V ideálnom svete by sme chceli byť schopní predpovedať ceny pre rôzne odrody tekvíc pomocou toho istého modelu. Aby sme zohľadnili odrodu, musíme ju najprv previesť do číselnej podoby, alebo ju **zakódovať**. Existuje niekoľko spôsobov, ako to môžeme urobiť:\n", + "\n", + "* Jednoduché číselné kódovanie, ktoré vytvorí tabuľku rôznych odrôd a potom nahradí názov odrody indexom v tejto tabuľke. Toto nie je najlepší nápad pre lineárnu regresiu, pretože lineárna regresia berie do úvahy číselnú hodnotu indexu, a tá pravdepodobne nebude číselne korelovať s cenou.\n", + "* One-hot kódovanie, ktoré nahradí stĺpec `Variety` štyrmi rôznymi stĺpcami, každý pre jednu odrodu, pričom obsahuje hodnotu 1, ak daný riadok patrí danej odrode, a 0 v opačnom prípade.\n", + "\n", + "Kód nižšie ukazuje, ako môžeme zakódovať odrodu pomocou one-hot kódovania:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lineárna regresia na odrode\n", + "\n", + "Teraz použijeme ten istý kód ako vyššie, ale namiesto `DayOfYear` použijeme našu one-hot-enkódovanú odrodu ako vstup:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Môžeme tiež skúsiť použiť ďalšie funkcie rovnakým spôsobom a skombinovať ich s číselnými funkciami, ako sú `Month` alebo `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polynomická regresia\n", + "\n", + "Polynomická regresia môže byť použitá aj s kategóriálnymi črtami, ktoré sú zakódované pomocou one-hot-encoding. Kód na trénovanie polynomickej regresie by bol v podstate rovnaký, ako sme videli vyššie.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:09:45+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/4-Logistic/notebook.ipynb b/translations/sk/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..d5867c2dd --- /dev/null +++ b/translations/sk/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Druhy tekvíc a farba\n", + "\n", + "Načítajte potrebné knižnice a dataset. Preveďte údaje na dataframe obsahujúci podmnožinu údajov:\n", + "\n", + "Pozrime sa na vzťah medzi farbou a druhom.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:21+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/sk/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..d646c6c81 --- /dev/null +++ b/translations/sk/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vytvorenie modelu logistickej regresie - Lekcia 4\n", + "\n", + "![Infografika: Logistická vs. lineárna regresia](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Kvíz pred prednáškou](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Úvod\n", + "\n", + "V tejto záverečnej lekcii o regresii, jednej zo základných *klasických* techník strojového učenia, sa pozrieme na logistickú regresiu. Túto techniku by ste použili na objavenie vzorcov na predpovedanie binárnych kategórií. Je táto cukrovinka čokoláda alebo nie? Je táto choroba nákazlivá alebo nie? Vyberie si tento zákazník tento produkt alebo nie?\n", + "\n", + "V tejto lekcii sa naučíte:\n", + "\n", + "- Techniky logistickej regresie\n", + "\n", + "✅ Prehĺbte si svoje porozumenie práce s týmto typom regresie v tomto [učebnom module](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Predpoklady\n", + "\n", + "Keďže sme pracovali s údajmi o tekviciach, sme už dostatočne oboznámení s tým, že existuje jedna binárna kategória, s ktorou môžeme pracovať: `Color`.\n", + "\n", + "Vytvorme model logistickej regresie na predpovedanie toho, *akú farbu bude mať daná tekvica* (oranžová 🎃 alebo biela 👻).\n", + "\n", + "> Prečo hovoríme o binárnej klasifikácii v lekcii zameranej na regresiu? Len z jazykového pohodlia, keďže logistická regresia je [v skutočnosti metóda klasifikácie](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), hoci založená na lineárnom prístupe. O ďalších spôsoboch klasifikácie údajov sa dozviete v nasledujúcej skupine lekcií.\n", + "\n", + "Pre túto lekciu budeme potrebovať nasledujúce balíky:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá robí dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je rámec [kolekcie balíkov](https://www.tidymodels.org/packages/) na modelovanie a strojové učenie.\n", + "\n", + "- `janitor`: Balík [janitor](https://github.com/sfirke/janitor) poskytuje jednoduché nástroje na skúmanie a čistenie nečistých údajov.\n", + "\n", + "- `ggbeeswarm`: Balík [ggbeeswarm](https://github.com/eclarke/ggbeeswarm) poskytuje metódy na vytváranie grafov v štýle \"beeswarm\" pomocou ggplot2.\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Alternatívne, skript nižšie skontroluje, či máte balíky potrebné na dokončenie tohto modulu, a nainštaluje ich za vás, ak chýbajú.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Definovanie otázky**\n", + "\n", + "Pre naše účely vyjadríme otázku ako binárnu: 'Biela' alebo 'Nie biela'. V našej dátovej sade sa nachádza aj kategória 'pruhovaná', ale má len málo záznamov, takže ju nebudeme používať. Táto kategória aj tak zmizne, keď odstránime nulové hodnoty z dátovej sady.\n", + "\n", + "> 🎃 Zaujímavosť: biele tekvice niekedy nazývame 'duchové' tekvice. Nie sú veľmi ľahké na vyrezávanie, takže nie sú také populárne ako oranžové, ale vyzerajú zaujímavo! Mohli by sme teda našu otázku preformulovať ako: 'Duch' alebo 'Nie duch'. 👻\n", + "\n", + "## **O logistickej regresii**\n", + "\n", + "Logistická regresia sa v niekoľkých dôležitých aspektoch líši od lineárnej regresie, ktorú ste sa učili predtým.\n", + "\n", + "#### **Binárna klasifikácia**\n", + "\n", + "Logistická regresia neponúka rovnaké funkcie ako lineárna regresia. Prvá ponúka predikciu o `binárnej kategórii` (\"oranžová alebo nie oranžová\"), zatiaľ čo druhá dokáže predpovedať `kontinuálne hodnoty`, napríklad na základe pôvodu tekvice a času zberu *o koľko sa zvýši jej cena*.\n", + "\n", + "![Infografika od Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Iné typy klasifikácií\n", + "\n", + "Existujú aj iné typy logistickej regresie, vrátane multinomiálnej a ordinálnej:\n", + "\n", + "- **Multinomiálna**, ktorá zahŕňa viac ako jednu kategóriu - \"Oranžová, Biela a Pruhovaná\".\n", + "\n", + "- **Ordinálna**, ktorá zahŕňa usporiadané kategórie, užitočné, ak chceme logicky zoradiť naše výsledky, napríklad naše tekvice zoradené podľa konečného počtu veľkostí (mini, malá, stredná, veľká, XL, XXL).\n", + "\n", + "![Multinomiálna vs ordinálna regresia](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Premenné NEMUSIA korelovať**\n", + "\n", + "Pamätáte si, ako lineárna regresia fungovala lepšie s viac korelovanými premennými? Logistická regresia je opakom - premenné nemusia byť v súlade. To je výhodné pre tieto dáta, ktoré majú pomerne slabé korelácie.\n", + "\n", + "#### **Potrebujete veľa čistých dát**\n", + "\n", + "Logistická regresia poskytne presnejšie výsledky, ak použijete viac dát; naša malá dátová sada nie je pre túto úlohu optimálna, takže to majte na pamäti.\n", + "\n", + "✅ Zamyslite sa nad typmi dát, ktoré by sa dobre hodili na logistickú regresiu.\n", + "\n", + "## Cvičenie - upravte dáta\n", + "\n", + "Najprv trochu upravte dáta, odstráňte nulové hodnoty a vyberte len niektoré stĺpce:\n", + "\n", + "1. Pridajte nasledujúci kód:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vždy sa môžete pozrieť na svoj nový dataframe pomocou funkcie [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html), ako je uvedené nižšie:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Poďme si potvrdiť, že sa skutočne budeme zaoberať problémom binárnej klasifikácie:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vizualizácia - kategóriálny graf\n", + "Teraz ste znova načítali údaje o tekviciach a vyčistili ich tak, aby ste zachovali dataset obsahujúci niekoľko premenných, vrátane farby (Color). Poďme si vizualizovať dataframe v notebooku pomocou knižnice ggplot.\n", + "\n", + "Knižnica ggplot ponúka niekoľko skvelých spôsobov, ako vizualizovať vaše údaje. Napríklad môžete porovnať rozdelenia údajov pre každú odrodu (Variety) a farbu (Color) v kategóriálnom grafe.\n", + "\n", + "1. Vytvorte takýto graf pomocou funkcie geombar, pričom použijete naše údaje o tekviciach a špecifikujete farebné mapovanie pre každú kategóriu tekvíc (oranžová alebo biela):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pozorovaním údajov môžete vidieť, ako sa údaje o Farbe vzťahujú k Odrode.\n", + "\n", + "✅ Na základe tohto kategóriálneho grafu, aké zaujímavé skúmania si dokážete predstaviť?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predspracovanie údajov: kódovanie vlastností\n", + "\n", + "Náš dataset tekvíc obsahuje textové hodnoty vo všetkých svojich stĺpcoch. Práca s kategorizovanými údajmi je pre ľudí intuitívna, ale pre stroje nie. Algoritmy strojového učenia pracujú dobre s číslami. Preto je kódovanie veľmi dôležitým krokom vo fáze predspracovania údajov, pretože nám umožňuje premeniť kategorizované údaje na číselné údaje bez straty informácií. Dobré kódovanie vedie k vytvoreniu dobrého modelu.\n", + "\n", + "Pre kódovanie vlastností existujú dva hlavné typy kóderov:\n", + "\n", + "1. Ordinálny kóder: je vhodný pre ordinálne premenné, čo sú kategorizované premenné, kde ich údaje nasledujú logické poradie, ako napríklad stĺpec `item_size` v našom datasete. Vytvára mapovanie, kde každá kategória je reprezentovaná číslom, ktoré zodpovedá poradiu kategórie v stĺpci.\n", + "\n", + "2. Kategorizovaný kóder: je vhodný pre nominálne premenné, čo sú kategorizované premenné, kde ich údaje nenasledujú logické poradie, ako všetky vlastnosti okrem `item_size` v našom datasete. Ide o kódovanie typu one-hot, čo znamená, že každá kategória je reprezentovaná binárnym stĺpcom: kódovaná premenná je rovná 1, ak tekvica patrí do danej Variety, a 0 v opačnom prípade.\n", + "\n", + "Tidymodels poskytuje ďalší šikovný balík: [recipes](https://recipes.tidymodels.org/) - balík na predspracovanie údajov. Definujeme `recipe`, ktorý špecifikuje, že všetky stĺpce prediktorov by mali byť kódované do množiny celých čísel, `prep` na odhad potrebných množstiev a štatistík potrebných pre akékoľvek operácie a nakoniec `bake` na aplikáciu výpočtov na nové údaje.\n", + "\n", + "> Bežne sa recipes zvyčajne používa ako predspracovateľ pre modelovanie, kde definuje, aké kroky by sa mali aplikovať na dataset, aby bol pripravený na modelovanie. V takom prípade je **veľmi odporúčané**, aby ste použili `workflow()` namiesto manuálneho odhadu receptu pomocou prep a bake. Všetko toto si ukážeme už čoskoro.\n", + ">\n", + "> Avšak momentálne používame recipes + prep + bake na špecifikáciu krokov, ktoré by sa mali aplikovať na dataset, aby bol pripravený na analýzu údajov, a následne extrahujeme predspracované údaje s aplikovanými krokmi.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Aké sú výhody použitia ordinálneho enkodéra pre stĺpec Item Size?\n", + "\n", + "### Analyzujte vzťahy medzi premennými\n", + "\n", + "Teraz, keď sme predspracovali naše údaje, môžeme analyzovať vzťahy medzi vlastnosťami a štítkom, aby sme získali predstavu o tom, ako dobre bude model schopný predpovedať štítok na základe vlastností. Najlepší spôsob, ako vykonať tento typ analýzy, je vizualizácia údajov. \n", + "Opäť použijeme funkciu ggplot geom_boxplot_, aby sme zobrazili vzťahy medzi Item Size, Variety a Color v kategóriálnom grafe. Na lepšiu vizualizáciu údajov použijeme enkódovaný stĺpec Item Size a neenkódovaný stĺpec Variety.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Použitie swarm grafu\n", + "\n", + "Keďže Color je binárna kategória (Biela alebo Nie), vyžaduje si '[špeciálny prístup](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)' k vizualizácii.\n", + "\n", + "Vyskúšajte `swarm graf`, aby ste zobrazili rozloženie farby vo vzťahu k item_size.\n", + "\n", + "Použijeme balík [ggbeeswarm](https://github.com/eclarke/ggbeeswarm), ktorý poskytuje metódy na vytváranie grafov v štýle beeswarm pomocou ggplot2. Beeswarm grafy sú spôsob, ako zobrazovať body, ktoré by sa za normálnych okolností prekrývali, tak, aby boli vedľa seba.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Teraz, keď máme predstavu o vzťahu medzi binárnymi kategóriami farieb a väčšou skupinou veľkostí, poďme preskúmať logistickú regresiu na určenie pravdepodobnej farby danej tekvice.\n", + "\n", + "## Vytvorte svoj model\n", + "\n", + "Vyberte premenné, ktoré chcete použiť vo svojom klasifikačnom modeli, a rozdeľte údaje na tréningovú a testovaciu množinu. [rsample](https://rsample.tidymodels.org/), balík v rámci Tidymodels, poskytuje infraštruktúru na efektívne delenie a resamplovanie údajov:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Teraz sme pripravení trénovať model prispôsobením tréningových vlastností tréningovej značke (farba).\n", + "\n", + "Začneme vytvorením receptu, ktorý špecifikuje kroky predspracovania, ktoré by sa mali vykonať na našich údajoch, aby boli pripravené na modelovanie, t.j. kódovanie kategóriálnych premenných na množinu celých čísel. Rovnako ako `baked_pumpkins`, vytvoríme `pumpkins_recipe`, ale nebudeme používať `prep` a `bake`, pretože to bude zahrnuté do pracovného toku, čo uvidíte o pár krokov neskôr.\n", + "\n", + "Existuje pomerne veľa spôsobov, ako špecifikovať logistickú regresiu v Tidymodels. Pozrite si `?logistic_reg()`. Zatiaľ špecifikujeme logistickú regresiu prostredníctvom predvoleného enginu `stats::glm()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Teraz, keď máme recept a špecifikáciu modelu, musíme nájsť spôsob, ako ich spojiť do objektu, ktorý najskôr predspracuje údaje (príprava + pečenie na pozadí), prispôsobí model na predspracovaných údajoch a zároveň umožní potenciálne aktivity po spracovaní.\n", + "\n", + "V Tidymodels sa tento praktický objekt nazýva [`workflow`](https://workflows.tidymodels.org/) a pohodlne uchováva vaše modelovacie komponenty.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Po definovaní *pracovného postupu* môže byť model `natrénovaný` pomocou funkcie [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html). Pracovný postup odhadne recept a pred spracovaním údajov ich predpripraví, takže to nebudeme musieť robiť manuálne pomocou funkcií prep a bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model vytlačí koeficienty naučené počas tréningu.\n", + "\n", + "Teraz, keď sme model natrénovali pomocou tréningových dát, môžeme robiť predpovede na testovacích dátach pomocou [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Začnime tým, že použijeme model na predpovedanie štítkov pre náš testovací súbor a pravdepodobností pre každý štítok. Keď je pravdepodobnosť vyššia ako 0.5, predpovedaná trieda je `WHITE`, inak `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Veľmi pekné! Toto poskytuje ďalší pohľad na to, ako funguje logistická regresia.\n", + "\n", + "### Lepšie pochopenie pomocou matice zámien\n", + "\n", + "Porovnávanie každej predikcie s jej zodpovedajúcou „skutočnou hodnotou“ nie je veľmi efektívny spôsob, ako určiť, ako dobre model predikuje. Našťastie má Tidymodels v rukáve ešte niekoľko trikov: [`yardstick`](https://yardstick.tidymodels.org/) - balík používaný na meranie efektívnosti modelov pomocou metrík výkonnosti.\n", + "\n", + "Jednou z metrík výkonnosti spojených s klasifikačnými problémami je [`matica zámien`](https://wikipedia.org/wiki/Confusion_matrix). Matica zámien opisuje, ako dobre klasifikačný model funguje. Matica zámien zaznamenáva, koľko príkladov v každej triede bolo modelom správne klasifikovaných. V našom prípade vám ukáže, koľko oranžových tekvíc bolo klasifikovaných ako oranžové a koľko bielych tekvíc bolo klasifikovaných ako biele; matica zámien vám tiež ukáže, koľko ich bolo klasifikovaných do **nesprávnych** kategórií.\n", + "\n", + "Funkcia [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) z yardstick vypočíta túto krížovú tabuľku pozorovaných a predikovaných tried.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Poďme interpretovať maticu zámien. Náš model má za úlohu klasifikovať tekvice do dvoch binárnych kategórií, kategórie `biela` a kategórie `nie-biela`.\n", + "\n", + "- Ak váš model predpovedá tekvicu ako bielu a v skutočnosti patrí do kategórie 'biela', nazývame to `pravý pozitívny`, čo je znázornené číslom v ľavom hornom rohu.\n", + "\n", + "- Ak váš model predpovedá tekvicu ako nie-bielu a v skutočnosti patrí do kategórie 'biela', nazývame to `falošný negatívny`, čo je znázornené číslom v ľavom dolnom rohu.\n", + "\n", + "- Ak váš model predpovedá tekvicu ako bielu a v skutočnosti patrí do kategórie 'nie-biela', nazývame to `falošný pozitívny`, čo je znázornené číslom v pravom hornom rohu.\n", + "\n", + "- Ak váš model predpovedá tekvicu ako nie-bielu a v skutočnosti patrí do kategórie 'nie-biela', nazývame to `pravý negatívny`, čo je znázornené číslom v pravom dolnom rohu.\n", + "\n", + "| Pravda |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Predikcia** | BIELA | ORANŽOVÁ |\n", + "| BIELA | TP | FP |\n", + "| ORANŽOVÁ | FN | TN |\n", + "\n", + "Ako ste možno uhádli, je preferované mať vyšší počet pravých pozitívnych a pravých negatívnych a nižší počet falošných pozitívnych a falošných negatívnych, čo naznačuje, že model funguje lepšie.\n", + "\n", + "Matica zámien je užitočná, pretože umožňuje odvodiť ďalšie metriky, ktoré nám môžu pomôcť lepšie vyhodnotiť výkon klasifikačného modelu. Poďme si ich prejsť:\n", + "\n", + "🎓 Presnosť: `TP/(TP + FP)` definovaná ako podiel predpokladaných pozitívnych, ktoré sú skutočne pozitívne. Tiež nazývaná [pozitívna prediktívna hodnota](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Návratnosť: `TP/(TP + FN)` definovaná ako podiel pozitívnych výsledkov z počtu vzoriek, ktoré boli skutočne pozitívne. Tiež známa ako `citlivosť`.\n", + "\n", + "🎓 Špecifickosť: `TN/(TN + FP)` definovaná ako podiel negatívnych výsledkov z počtu vzoriek, ktoré boli skutočne negatívne.\n", + "\n", + "🎓 Presnosť modelu: `TP + TN/(TP + TN + FP + FN)` Percento správne predpovedaných označení pre vzorku.\n", + "\n", + "🎓 F Miera: Vážený priemer presnosti a návratnosti, pričom najlepšia hodnota je 1 a najhoršia je 0.\n", + "\n", + "Poďme vypočítať tieto metriky!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vizualizácia ROC krivky tohto modelu\n", + "\n", + "Urobme ešte jednu vizualizáciu, aby sme si pozreli tzv. [`ROC krivku`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC krivky sa často používajú na zobrazenie výstupu klasifikátora z hľadiska jeho skutočných vs. falošných pozitívnych výsledkov. ROC krivky zvyčajne zobrazujú `True Positive Rate`/citlivosť na osi Y a `False Positive Rate`/1-špecifickosť na osi X. Preto je dôležitá strmosť krivky a priestor medzi stredovou čiarou a krivkou: chcete krivku, ktorá rýchlo stúpa a prechádza nad čiaru. V našom prípade sú na začiatku falošné pozitívne výsledky, a potom krivka správne stúpa a prechádza nad čiaru.\n", + "\n", + "Nakoniec použime `yardstick::roc_auc()` na výpočet skutočnej plochy pod krivkou (Area Under the Curve). Jedným zo spôsobov interpretácie AUC je pravdepodobnosť, že model hodnotí náhodný pozitívny príklad vyššie ako náhodný negatívny príklad.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Výsledok je približne `0.975`. Keďže AUC sa pohybuje v rozmedzí od 0 do 1, chcete dosiahnuť vysoké skóre, pretože model, ktorý je na 100 % presný vo svojich predpovediach, bude mať AUC rovné 1; v tomto prípade je model *celkom dobrý*.\n", + "\n", + "V budúcich lekciách o klasifikáciách sa naučíte, ako zlepšiť skóre vášho modelu (napríklad riešením problémov s nevyváženými údajmi, ako v tomto prípade).\n", + "\n", + "## 🚀Výzva\n", + "\n", + "Logistická regresia ponúka veľa možností na preskúmanie! Najlepší spôsob, ako sa ju naučiť, je experimentovať. Nájdite dataset, ktorý sa hodí na tento typ analýzy, a vytvorte s ním model. Čo ste sa naučili? tip: skúste [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) pre zaujímavé datasety.\n", + "\n", + "## Prehľad a samostatné štúdium\n", + "\n", + "Prečítajte si prvé strany [tohto článku zo Stanfordu](https://web.stanford.edu/~jurafsky/slp3/5.pdf) o niektorých praktických využitiach logistickej regresie. Zamyslite sa nad úlohami, ktoré sú lepšie prispôsobené jednému alebo druhému typu regresných úloh, ktoré sme doteraz študovali. Čo by fungovalo najlepšie?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:29:46+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sk/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/sk/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..869648e29 --- /dev/null +++ b/translations/sk/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logistická regresia - Lekcia 4\n", + "\n", + "Načítajte potrebné knižnice a dataset. Konvertujte údaje na dataframe obsahujúci podmnožinu údajov:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pozrime sa na naše dáta!\n", + "\n", + "Vizualizujme ich pomocou Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predspracovanie údajov\n", + "\n", + "Zakódujme črty a štítky, aby sme mohli lepšie vizualizovať údaje a trénovať model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Pozor**: Ignorovanie varovaní NIE je najlepšou praxou a malo by sa vyhnúť, kedykoľvek je to možné. Varovania často obsahujú užitočné správy, ktoré nám umožňujú zlepšiť náš kód a vyriešiť problém. \n", + "Dôvod, prečo ignorujeme toto konkrétne varovanie, je zabezpečenie čitateľnosti grafu. Zobrazenie všetkých dátových bodov s menšou veľkosťou značky, pri zachovaní konzistencie s farbou palety, vytvára nejasnú vizualizáciu.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:27:04+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/3-Web-App/1-Web-App/notebook.ipynb b/translations/sk/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sk/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/sk/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..305096828 --- /dev/null +++ b/translations/sk/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:02+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/1-Introduction/notebook.ipynb b/translations/sk/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..8e9c16fe2 --- /dev/null +++ b/translations/sk/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:40+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/sk/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..ebabb1cbc --- /dev/null +++ b/translations/sk/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,721 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T14:54:24+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Úvod do klasifikácie: Čistenie, príprava a vizualizácia dát\n", + "\n", + "V týchto štyroch lekciách sa zameriame na základný aspekt klasického strojového učenia - *klasifikáciu*. Prejdeme si používanie rôznych klasifikačných algoritmov na dátovom súbore o všetkých úžasných kuchyniach Ázie a Indie. Dúfame, že máte chuť na jedlo!\n", + "\n", + "

\n", + " \n", + "

Oslávte panázijské kuchyne v týchto lekciách! Obrázok od Jen Looper
\n", + "\n", + "\n", + "\n", + "\n", + "Klasifikácia je forma [supervised learning](https://wikipedia.org/wiki/Supervised_learning), ktorá má veľa spoločného s regresnými technikami. Pri klasifikácii trénujete model, aby predpovedal, do akej `kategórie` položka patrí. Ak je strojové učenie o predpovedaní hodnôt alebo názvov vecí pomocou dátových súborov, potom klasifikácia všeobecne spadá do dvoch skupín: *binárna klasifikácia* a *multiklasová klasifikácia*.\n", + "\n", + "Pamätajte:\n", + "\n", + "- **Lineárna regresia** vám pomohla predpovedať vzťahy medzi premennými a presne určiť, kde by nový dátový bod spadal vo vzťahu k tejto línii. Napríklad ste mohli predpovedať číselné hodnoty, ako *aká bude cena tekvice v septembri vs. decembri*.\n", + "\n", + "- **Logistická regresia** vám pomohla objaviť \"binárne kategórie\": pri tejto cenovej úrovni, *je táto tekvica oranžová alebo nie-oranžová*?\n", + "\n", + "Klasifikácia používa rôzne algoritmy na určenie ďalších spôsobov, ako priradiť dátovému bodu štítok alebo triedu. Poďme pracovať s týmito dátami o kuchyniach, aby sme zistili, či na základe skupiny ingrediencií dokážeme určiť jej pôvodnú kuchyňu.\n", + "\n", + "### [**Kvíz pred prednáškou**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Úvod**\n", + "\n", + "Klasifikácia je jednou zo základných aktivít výskumníka strojového učenia a dátového vedca. Od základnej klasifikácie binárnej hodnoty (\"je tento e-mail spam alebo nie?\") až po komplexnú klasifikáciu a segmentáciu obrázkov pomocou počítačového videnia, je vždy užitočné vedieť triediť dáta do tried a klásť im otázky.\n", + "\n", + "Ak to vyjadríme vedeckejšie, vaša klasifikačná metóda vytvára prediktívny model, ktorý vám umožňuje mapovať vzťah medzi vstupnými premennými a výstupnými premennými.\n", + "\n", + "

\n", + " \n", + "

Binárne vs. multiklasové problémy, ktoré klasifikačné algoritmy riešia. Infografika od Jen Looper
\n", + "\n", + "\n", + "\n", + "Predtým, než začneme proces čistenia našich dát, ich vizualizácie a prípravy na úlohy strojového učenia, poďme sa naučiť niečo o rôznych spôsoboch, ako môže byť strojové učenie využité na klasifikáciu dát.\n", + "\n", + "Odvodené zo [štatistiky](https://wikipedia.org/wiki/Statistical_classification), klasifikácia pomocou klasického strojového učenia používa vlastnosti, ako `fajčiar`, `hmotnosť` a `vek`, na určenie *pravdepodobnosti vývoja X choroby*. Ako technika supervised learning podobná regresným cvičeniam, ktoré ste vykonávali skôr, vaše dáta sú označené a algoritmy strojového učenia používajú tieto označenia na klasifikáciu a predpovedanie tried (alebo 'vlastností') dátového súboru a ich priradenie do skupiny alebo výsledku.\n", + "\n", + "✅ Predstavte si na chvíľu dátový súbor o kuchyniach. Na čo by mohol odpovedať multiklasový model? Na čo by mohol odpovedať binárny model? Čo ak by ste chceli určiť, či daná kuchyňa pravdepodobne používa senovku grécku? Čo ak by ste chceli zistiť, či by ste na základe daru tašky s potravinami plnej badiánu, artičokov, karfiolu a chrenu mohli vytvoriť typické indické jedlo?\n", + "\n", + "### **Ahoj 'klasifikátor'**\n", + "\n", + "Otázka, ktorú chceme položiť tomuto dátovému súboru o kuchyniach, je vlastne **multiklasová otázka**, pretože máme niekoľko potenciálnych národných kuchýň, s ktorými môžeme pracovať. Na základe dávky ingrediencií, do ktorej z týchto mnohých tried budú dáta patriť?\n", + "\n", + "Tidymodels ponúka niekoľko rôznych algoritmov na klasifikáciu dát, v závislosti od typu problému, ktorý chcete vyriešiť. V nasledujúcich dvoch lekciách sa naučíte o niekoľkých z týchto algoritmov.\n", + "\n", + "#### **Predpoklad**\n", + "\n", + "Pre túto lekciu budeme potrebovať nasledujúce balíky na čistenie, prípravu a vizualizáciu našich dát:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá robí dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je rámec [kolekcie balíkov](https://www.tidymodels.org/packages/) na modelovanie a strojové učenie.\n", + "\n", + "- `DataExplorer`: Balík [DataExplorer](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) je určený na zjednodušenie a automatizáciu procesu EDA a generovania správ.\n", + "\n", + "- `themis`: Balík [themis](https://themis.tidymodels.org/) poskytuje extra kroky receptov na riešenie nevyvážených dát.\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternatívne, skript nižšie skontroluje, či máte balíky potrebné na dokončenie tohto modulu, a nainštaluje ich za vás, ak chýbajú.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Neskôr načítame tieto úžasné balíky a sprístupníme ich v našej aktuálnej R relácii. (Toto je len na ilustráciu, `pacman::p_load()` to už za vás urobil)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Cvičenie - vyčistite a vyvážte svoje dáta\n", + "\n", + "Prvým krokom pred začatím tohto projektu je vyčistiť a **vyvážiť** svoje dáta, aby ste dosiahli lepšie výsledky.\n", + "\n", + "Zoznámme sa s dátami! 🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zaujímavé! Podľa vzhľadu to vyzerá, že prvý stĺpec je akýsi stĺpec `id`. Poďme získať trochu viac informácií o údajoch.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Z výstupu môžeme okamžite vidieť, že máme `2448` riadkov a `385` stĺpcov a `0` chýbajúcich hodnôt. Máme tiež 1 diskrétny stĺpec, *cuisine*.\n", + "\n", + "## Cvičenie - učenie sa o kuchyniach\n", + "\n", + "Teraz sa práca začína stávať zaujímavejšou. Poďme objaviť rozdelenie údajov podľa kuchyne.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Existuje konečný počet kuchýň, ale rozloženie údajov je nerovnomerné. Môžete to napraviť! Predtým však preskúmajte trochu viac.\n", + "\n", + "Ďalej priraďme každú kuchyňu do jej vlastného tibble a zistime, koľko údajov je k dispozícii (riadky, stĺpce) na jednu kuchyňu.\n", + "\n", + "> [Tibble](https://tibble.tidyverse.org/) je moderný dátový rámec.\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Cvičenie - Objavovanie hlavných ingrediencií podľa kuchyne pomocou dplyr**\n", + "\n", + "Teraz sa môžete hlbšie ponoriť do údajov a zistiť, aké sú typické ingrediencie pre jednotlivé kuchyne. Mali by ste odstrániť opakujúce sa údaje, ktoré spôsobujú zmätok medzi kuchyňami, takže sa poďme pozrieť na tento problém.\n", + "\n", + "Vytvorte funkciu `create_ingredient()` v R, ktorá vráti dataframe s ingredienciami. Táto funkcia začne odstránením nepotrebného stĺpca a zoradí ingrediencie podľa ich počtu.\n", + "\n", + "Základná štruktúra funkcie v R je:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "Úhľadný úvod do funkcií v R nájdete [tu](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Poďme na to! Využijeme [dplyr slovesá](https://dplyr.tidyverse.org/), ktoré sme sa učili v predchádzajúcich lekciách. Na zopakovanie:\n", + "\n", + "- `dplyr::select()`: pomáha vám vybrať, ktoré **stĺpce** chcete ponechať alebo vylúčiť.\n", + "\n", + "- `dplyr::pivot_longer()`: pomáha \"predĺžiť\" údaje, čím sa zvýši počet riadkov a zníži počet stĺpcov.\n", + "\n", + "- `dplyr::group_by()` a `dplyr::summarise()`: pomáha nájsť štatistické súhrny pre rôzne skupiny a usporiadať ich do prehľadnej tabuľky.\n", + "\n", + "- `dplyr::filter()`: vytvára podmnožinu údajov obsahujúcu iba riadky, ktoré spĺňajú vaše podmienky.\n", + "\n", + "- `dplyr::mutate()`: pomáha vytvárať alebo upravovať stĺpce.\n", + "\n", + "Pozrite si tento [*umelecky* ladený learnr tutoriál](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) od Allison Horst, ktorý predstavuje niektoré užitočné funkcie na spracovanie údajov v dplyr *(súčasť Tidyverse)*.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz môžeme použiť funkciu na získanie predstavy o desiatich najpopulárnejších ingredienciách podľa kuchyne. Poďme si to vyskúšať s `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "V predchádzajúcej časti sme použili `geom_col()`, pozrime sa, ako môžete použiť aj `geom_bar` na vytvorenie stĺpcových grafov. Použite `?geom_bar` na ďalšie čítanie.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Čo tak čínska kuchyňa?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Z vizualizácií dát môžeme teraz vynechať najbežnejšie ingrediencie, ktoré spôsobujú zmätok medzi odlišnými kuchyňami, pomocou `dplyr::select()`.\n", + "\n", + "Každý miluje ryžu, cesnak a zázvor!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Predspracovanie údajov pomocou receptov 👩‍🍳👨‍🍳 - Riešenie nevyvážených údajov ⚖️\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "Keďže táto lekcia je o kuchyniach, musíme dať `recepty` do kontextu.\n", + "\n", + "Tidymodels poskytuje ďalší šikovný balík: `recipes` - balík na predspracovanie údajov.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pozrime sa znova na rozdelenie našich kuchýň.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ako vidíte, počet kuchýň je dosť nerovnomerne rozdelený. Kórejské kuchyne sú takmer 3-krát početnejšie ako thajské kuchyne. Nevyvážené údaje často negatívne ovplyvňujú výkon modelu. Zamyslite sa nad binárnou klasifikáciou. Ak väčšina vašich údajov patrí do jednej triedy, model strojového učenia bude túto triedu predpovedať častejšie, jednoducho preto, že má k dispozícii viac údajov. Vyváženie údajov odstraňuje akúkoľvek skreslenosť a pomáha eliminovať túto nerovnováhu. Mnohé modely dosahujú najlepšie výsledky, keď je počet pozorovaní rovnaký, a preto majú tendenciu zápasiť s nevyváženými údajmi.\n", + "\n", + "Existujú dva hlavné spôsoby, ako sa vysporiadať s nevyváženými dátovými súbormi:\n", + "\n", + "- pridanie pozorovaní do minoritnej triedy: `Over-sampling`, napr. pomocou algoritmu SMOTE\n", + "\n", + "- odstránenie pozorovaní z majoritnej triedy: `Under-sampling`\n", + "\n", + "Teraz si ukážeme, ako pracovať s nevyváženými dátovými súbormi pomocou `receptu`. Recept si môžete predstaviť ako plán, ktorý popisuje, aké kroky by sa mali aplikovať na dátový súbor, aby bol pripravený na analýzu údajov.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Poďme si rozobrať naše kroky predspracovania.\n", + "\n", + "- Volanie funkcie `recipe()` s formulou určuje *úlohy* premenných pomocou údajov `df_select` ako referencie. Napríklad stĺpec `cuisine` bol priradený úlohe `outcome`, zatiaľ čo ostatné stĺpce boli priradené úlohe `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) vytvára *špecifikáciu* kroku receptu, ktorý synteticky generuje nové príklady minoritnej triedy pomocou najbližších susedov týchto prípadov.\n", + "\n", + "Ak by sme teraz chceli vidieť predspracované údaje, museli by sme [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) a [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) náš recept.\n", + "\n", + "`prep()`: odhaduje potrebné parametre z tréningovej množiny, ktoré môžu byť neskôr aplikované na iné množiny údajov.\n", + "\n", + "`bake()`: vezme pripravený recept a aplikuje operácie na akúkoľvek množinu údajov.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Poďme teraz skontrolovať rozdelenie našich kuchýň a porovnať ich s nevyváženými údajmi.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mňam! Dáta sú pekné, čisté, vyvážené a veľmi chutné 😋!\n", + "\n", + "> Zvyčajne sa recept používa ako predspracovateľ pre modelovanie, kde definuje, aké kroky by sa mali aplikovať na dátovú sadu, aby bola pripravená na modelovanie. V takom prípade sa typicky používa `workflow()` (ako sme už videli v našich predchádzajúcich lekciách) namiesto manuálneho odhadovania receptu.\n", + ">\n", + "> Preto zvyčajne nepotrebujete **`prep()`** a **`bake()`** recepty, keď používate tidymodels, ale sú to užitočné funkcie, ktoré môžete mať vo svojej výbave na potvrdenie, že recepty robia to, čo očakávate, ako v našom prípade.\n", + ">\n", + "> Keď **`bake()`** predspracovaný recept s **`new_data = NULL`**, dostanete späť dáta, ktoré ste poskytli pri definovaní receptu, ale už prešli krokmi predspracovania.\n", + "\n", + "Teraz si uložíme kópiu týchto dát na použitie v budúcich lekciách:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tento nový CSV súbor sa teraz nachádza v hlavnom priečinku s dátami.\n", + "\n", + "**🚀Výzva**\n", + "\n", + "Tento učebný plán obsahuje niekoľko zaujímavých datasetov. Prezrite si priečinky `data` a zistite, či niektoré obsahujú datasety vhodné na binárnu alebo viactriednu klasifikáciu. Aké otázky by ste mohli položiť tomuto datasetu?\n", + "\n", + "## [**Kvíz po prednáške**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Prehľad a samostatné štúdium**\n", + "\n", + "- Pozrite si [balík themis](https://github.com/tidymodels/themis). Aké ďalšie techniky by sme mohli použiť na riešenie nevyvážených dát?\n", + "\n", + "- Referenčná stránka [Tidy models](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham a G. Grolemund, [*R for Data Science: Vizualizácia, modelovanie, transformácia, úprava a import dát*](https://r4ds.had.co.nz/).\n", + "\n", + "#### ĎAKUJEME:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za vytvorenie úžasných ilustrácií, ktoré robia R prístupnejším a zábavnejším. Viac ilustrácií nájdete v jej [galérii](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) a [Jen Looper](https://www.twitter.com/jenlooper) za vytvorenie pôvodnej verzie tohto modulu v Pythone ♥️\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/sk/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..e29c69a24 --- /dev/null +++ b/translations/sk/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,672 @@ +{ + "cells": [ + { + "source": [ + "# Lahodné ázijské a indické jedlá\n", + "\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Nainštalujte Imblearn, ktorý umožní SMOTE. Toto je balík Scikit-learn, ktorý pomáha spracovávať nevyvážené údaje pri vykonávaní klasifikácie. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Táto množina údajov obsahuje 385 stĺpcov, ktoré označujú všetky druhy ingrediencií v rôznych kuchyniach z daného súboru kuchýň.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Zobrazte kuchyne v stĺpcovom grafe\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Vyvážte údaje pomocou SMOTE oversamplingu na najvyššiu triedu. Viac informácií nájdete tu: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:51:13+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sk/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/sk/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..4d7e9bb24 --- /dev/null +++ b/translations/sk/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:28+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/sk/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..423e84ad7 --- /dev/null +++ b/translations/sk/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1285 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:34:22+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Klasifikátory kuchýň 1\n", + "\n", + "V tejto lekcii preskúmame rôzne klasifikátory na *predpovedanie národnej kuchyne na základe skupiny ingrediencií.* Pri tom sa dozvieme viac o spôsoboch, akými môžu byť algoritmy využívané na úlohy klasifikácie.\n", + "\n", + "### [**Kvíz pred prednáškou**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Príprava**\n", + "\n", + "Táto lekcia nadväzuje na našu [predchádzajúcu lekciu](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb), kde sme:\n", + "\n", + "- Jemne uviedli klasifikácie pomocou datasetu o všetkých úžasných kuchyniach Ázie a Indie 😋.\n", + "\n", + "- Preskúmali niektoré [slovesá dplyr](https://dplyr.tidyverse.org/) na prípravu a čistenie našich dát.\n", + "\n", + "- Vytvorili krásne vizualizácie pomocou ggplot2.\n", + "\n", + "- Ukázali, ako sa vysporiadať s nevyváženými dátami ich predspracovaním pomocou [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Demonštrovali, ako `prep` a `bake` náš recept, aby sme si overili, že funguje podľa očakávaní.\n", + "\n", + "#### **Predpoklady**\n", + "\n", + "Na túto lekciu budeme potrebovať nasledujúce balíky na čistenie, prípravu a vizualizáciu našich dát:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá robí dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je rámec [kolekcie balíkov](https://www.tidymodels.org/packages/) na modelovanie a strojové učenie.\n", + "\n", + "- `themis`: [balík themis](https://themis.tidymodels.org/) poskytuje dodatočné kroky receptov na riešenie nevyvážených dát.\n", + "\n", + "- `nnet`: [balík nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) poskytuje funkcie na odhadovanie dopredných neurónových sietí s jednou skrytou vrstvou a na modely multinomiálnej logistickej regresie.\n", + "\n", + "Môžete ich nainštalovať takto:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternatívne, nasledujúci skript skontroluje, či máte nainštalované balíky potrebné na dokončenie tohto modulu, a v prípade, že chýbajú, ich nainštaluje za vás.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Rozdeľte údaje na tréningovú a testovaciu množinu.\n", + "\n", + "Začneme výberom niekoľkých krokov z našej predchádzajúcej lekcie.\n", + "\n", + "### Odstráňte najbežnejšie ingrediencie, ktoré spôsobujú zmätok medzi rôznymi kuchyňami, pomocou `dplyr::select()`.\n", + "\n", + "Každý miluje ryžu, cesnak a zázvor!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Perfektné! Teraz je čas rozdeliť údaje tak, aby 70 % údajov išlo na tréning a 30 % na testovanie. Pri rozdeľovaní údajov použijeme aj techniku `stratifikácie`, aby sme `zachovali pomer jednotlivých kuchýň` v tréningových a validačných datasetoch.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), balík v Tidymodels, poskytuje infraštruktúru na efektívne rozdeľovanie a resampling údajov:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Riešenie nevyvážených údajov\n", + "\n", + "Ako ste si mohli všimnúť v pôvodnej dátovej sade, ako aj v našej tréningovej sade, existuje pomerne nerovnomerné rozdelenie počtu kuchýň. Kórejské kuchyne sú *takmer* 3-krát častejšie ako thajské kuchyne. Nevývážené údaje často negatívne ovplyvňujú výkon modelu. Mnohé modely dosahujú najlepšie výsledky, keď je počet pozorovaní rovnaký, a preto majú tendenciu mať problémy s nevyváženými údajmi.\n", + "\n", + "Existujú dva hlavné spôsoby, ako riešiť nevyvážené dátové sady:\n", + "\n", + "- pridanie pozorovaní do menšinovej triedy: `Over-sampling`, napríklad pomocou algoritmu SMOTE, ktorý synteticky generuje nové príklady menšinovej triedy pomocou najbližších susedov týchto prípadov.\n", + "\n", + "- odstránenie pozorovaní z väčšinovej triedy: `Under-sampling`\n", + "\n", + "V našej predchádzajúcej lekcii sme ukázali, ako riešiť nevyvážené dátové sady pomocou `receptu`. Recept si môžeme predstaviť ako plán, ktorý popisuje, aké kroky by sa mali aplikovať na dátovú sadu, aby bola pripravená na analýzu údajov. V našom prípade chceme dosiahnuť rovnomerné rozdelenie počtu našich kuchýň pre našu `tréningovú sadu`. Poďme sa do toho pustiť.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Môžete samozrejme potvrdiť (pomocou prípravy a pečenia), že recept bude fungovať tak, ako očakávate - všetky označenia kuchyne majú `559` pozorovaní.\n", + "\n", + "Keďže tento recept budeme používať ako predspracovanie pre modelovanie, `workflow()` za nás vykoná všetku prípravu a pečenie, takže recept nebudeme musieť manuálne odhadovať.\n", + "\n", + "Teraz sme pripravení trénovať model 👩‍💻👨‍💻!\n", + "\n", + "## 3. Výber klasifikátora\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Teraz musíme rozhodnúť, ktorý algoritmus použiť na túto úlohu 🤔.\n", + "\n", + "V Tidymodels poskytuje [`parsnip package`](https://parsnip.tidymodels.org/index.html) konzistentné rozhranie na prácu s modelmi naprieč rôznymi enginmi (balíčkami). Pozrite si dokumentáciu k parsnip, aby ste preskúmali [typy modelov a enginy](https://www.tidymodels.org/find/parsnip/#models) a ich zodpovedajúce [argumenty modelov](https://www.tidymodels.org/find/parsnip/#model-args). Rozmanitosť môže byť na prvý pohľad dosť mätúca. Napríklad nasledujúce metódy zahŕňajú techniky klasifikácie:\n", + "\n", + "- C5.0 modely založené na pravidlách\n", + "- Flexibilné diskriminačné modely\n", + "- Lineárne diskriminačné modely\n", + "- Regularizované diskriminačné modely\n", + "- Modely logistickej regresie\n", + "- Modely multinomiálnej regresie\n", + "- Modely naivného Bayesa\n", + "- Podporné vektorové stroje\n", + "- Najbližší susedia\n", + "- Rozhodovacie stromy\n", + "- Ensemble metódy\n", + "- Neurónové siete\n", + "\n", + "A zoznam pokračuje!\n", + "\n", + "### **Aký klasifikátor zvoliť?**\n", + "\n", + "Takže, ktorý klasifikátor by ste si mali vybrať? Často je dobrým spôsobom testovania prejsť viacerými a hľadať dobrý výsledok.\n", + "\n", + "> AutoML tento problém elegantne rieši tým, že vykonáva tieto porovnania v cloude, čo vám umožňuje vybrať najlepší algoritmus pre vaše dáta. Vyskúšajte to [tu](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Výber klasifikátora však závisí aj od nášho problému. Napríklad, keď výsledok môže byť kategorizovaný do `viac ako dvoch tried`, ako v našom prípade, musíte použiť `algoritmus pre multiklasifikáciu` namiesto `binárnej klasifikácie.`\n", + "\n", + "### **Lepší prístup**\n", + "\n", + "Lepším spôsobom ako náhodne hádať je však riadiť sa nápadmi z tohto stiahnuteľného [ML Cheat sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott). Tu zistíme, že pre náš problém s multiklasifikáciou máme niekoľko možností:\n", + "\n", + "

\n", + " \n", + "

Časť Microsoftovho prehľadu algoritmov, ktorá podrobne opisuje možnosti multiklasifikácie
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Úvaha**\n", + "\n", + "Pozrime sa, či dokážeme logicky zhodnotiť rôzne prístupy vzhľadom na obmedzenia, ktoré máme:\n", + "\n", + "- **Hlboké neurónové siete sú príliš náročné**. Vzhľadom na náš čistý, ale minimálny dataset a fakt, že trénovanie prebieha lokálne cez notebooky, sú hlboké neurónové siete pre túto úlohu príliš náročné.\n", + "\n", + "- **Žiadny dvojtriedny klasifikátor**. Nepoužívame dvojtriedny klasifikátor, takže možnosť one-vs-all je vylúčená.\n", + "\n", + "- **Rozhodovací strom alebo logistická regresia by mohli fungovať**. Rozhodovací strom by mohol fungovať, rovnako ako multinomiálna regresia/multitriedna logistická regresia pre multitriedne dáta.\n", + "\n", + "- **Multitriedne Boosted Decision Trees riešia iný problém**. Multitriedny Boosted Decision Tree je najvhodnejší pre neparametrické úlohy, napríklad úlohy zamerané na vytváranie rebríčkov, takže pre nás nie je užitočný.\n", + "\n", + "Okrem toho, predtým než sa pustíme do zložitejších modelov strojového učenia, ako sú ensemble metódy, je zvyčajne dobré začať s najjednoduchším možným modelom, aby sme získali predstavu o tom, čo sa deje. Preto v tejto lekcii začneme s modelom `multinomiálnej regresie`.\n", + "\n", + "> Logistická regresia je technika používaná, keď je výstupná premenná kategóriálna (alebo nominálna). Pri binárnej logistickej regresii je počet výstupných premenných dva, zatiaľ čo pri multinomiálnej logistickej regresii je počet výstupných premenných viac ako dva. Viac informácií nájdete v [Pokročilé regresné metódy](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html).\n", + "\n", + "## 4. Trénovanie a hodnotenie modelu multinomiálnej logistickej regresie\n", + "\n", + "V Tidymodels, `parsnip::multinom_reg()`, definuje model, ktorý používa lineárne prediktory na predpovedanie multitriednych dát pomocou multinomiálneho rozdelenia. Pozrite si `?multinom_reg()` pre rôzne spôsoby/enginy, ktoré môžete použiť na fitovanie tohto modelu.\n", + "\n", + "V tomto príklade budeme fitovať model multinomiálnej regresie cez predvolený engine [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf).\n", + "\n", + "> Hodnotu pre `penalty` som vybral tak trochu náhodne. Existujú lepšie spôsoby, ako túto hodnotu zvoliť, napríklad pomocou `resamplingu` a `ladenia` modelu, o ktorých budeme hovoriť neskôr.\n", + ">\n", + "> Pozrite si [Tidymodels: Začnite](https://www.tidymodels.org/start/tuning/), ak sa chcete dozvedieť viac o ladení hyperparametrov modelu.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Skvelá práca 🥳! Teraz, keď máme recept a špecifikáciu modelu, musíme nájsť spôsob, ako ich spojiť do objektu, ktorý najprv predspracuje dáta, potom na predspracovaných dátach natrénuje model a zároveň umožní aj prípadné aktivity po spracovaní. V Tidymodels sa tento praktický objekt nazýva [`workflow`](https://workflows.tidymodels.org/) a pohodlne uchováva vaše modelovacie komponenty! Toto by sme v *Pythone* nazvali *pipelines*.\n", + "\n", + "Takže poďme všetko zabaliť do workflowu!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pracovné postupy 👌👌! **`workflow()`** môže byť nastavený podobne ako model. Takže, je čas trénovať model!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Výstup zobrazuje koeficienty, ktoré model naučil počas tréningu.\n", + "\n", + "### Vyhodnotenie vytrénovaného modelu\n", + "\n", + "Je čas zistiť, ako si model viedol 📏, vyhodnotením na testovacej množine! Začnime tým, že urobíme predpovede na testovacej množine.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Skvelá práca! V Tidymodels je hodnotenie výkonu modelu možné pomocou [yardstick](https://yardstick.tidymodels.org/) - balíka používaného na meranie efektívnosti modelov pomocou metrík výkonu. Ako sme to urobili v našej lekcii o logistickej regresii, začnime výpočtom matice zámien.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pri práci s viacerými triedami je vo všeobecnosti intuitívnejšie vizualizovať to ako tepelnú mapu, napríklad takto:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tmavšie štvorce v grafe matice zámien naznačujú vysoký počet prípadov, a dúfajme, že vidíte diagonálnu líniu tmavších štvorcov, ktorá označuje prípady, kde predpovedaná a skutočná značka sú rovnaké.\n", + "\n", + "Teraz vypočítajme súhrnné štatistiky pre maticu zámien.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ak sa zameriame na niektoré metriky, ako presnosť, citlivosť, ppv, na začiatok na tom nie sme zle 🥳!\n", + "\n", + "## 4. Hlbšie skúmanie\n", + "\n", + "Položme si jednu jemnú otázku: Aké kritériá sa používajú na určenie konkrétneho typu kuchyne ako predpokladaného výsledku?\n", + "\n", + "No, štatistické algoritmy strojového učenia, ako logistická regresia, sú založené na `pravdepodobnosti`; takže to, čo klasifikátor skutočne predpovedá, je pravdepodobnostné rozdelenie nad množinou možných výsledkov. Trieda s najvyššou pravdepodobnosťou je potom vybraná ako najpravdepodobnejší výsledok pre dané pozorovania.\n", + "\n", + "Pozrime sa na to v praxi tým, že urobíme tvrdé predikcie tried a aj pravdepodobnosti.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Môžete vysvetliť, prečo si model celkom iste myslí, že prvé pozorovanie je thajské?\n", + "\n", + "## **🚀Výzva**\n", + "\n", + "V tejto lekcii ste použili svoje vyčistené dáta na vytvorenie modelu strojového učenia, ktorý dokáže predpovedať národnú kuchyňu na základe série ingrediencií. Nájdite si čas na preštudovanie [mnohých možností](https://www.tidymodels.org/find/parsnip/#models), ktoré Tidymodels ponúka na klasifikáciu dát, a [iných spôsobov](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models), ako prispôsobiť multinomiálnu regresiu.\n", + "\n", + "#### POĎAKOVANIE:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za vytvorenie úžasných ilustrácií, ktoré robia R prístupnejším a pútavejším. Viac ilustrácií nájdete v jej [galérii](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) a [Jen Looper](https://www.twitter.com/jenlooper) za vytvorenie pôvodnej verzie tohto modulu v Pythone ♥️\n", + "\n", + "
\n", + "Pridal by som nejaké vtipy, ale nerozumiem jedlým slovným hračkám 😅.\n", + "\n", + "
\n", + "\n", + "Šťastné učenie,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Zlatý ambasádor Microsoft Learn.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/sk/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..38e1198e1 --- /dev/null +++ b/translations/sk/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:32:57+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sk/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/sk/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..f52d16c69 --- /dev/null +++ b/translations/sk/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:11+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sk/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/sk/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..67e117286 --- /dev/null +++ b/translations/sk/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,648 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:44:10+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Klasifikátory kuchýň 2\n", + "\n", + "V tejto druhej lekcii o klasifikácii sa pozrieme na `ďalšie spôsoby`, ako klasifikovať kategóriálne údaje. Tiež sa oboznámime s dôsledkami výberu jedného klasifikátora oproti inému.\n", + "\n", + "### [**Kvíz pred prednáškou**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Predpoklady**\n", + "\n", + "Predpokladáme, že ste dokončili predchádzajúce lekcie, pretože budeme nadväzovať na niektoré koncepty, ktoré sme sa už naučili.\n", + "\n", + "Na túto lekciu budeme potrebovať nasledujúce balíky:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [kolekcia balíkov pre R](https://www.tidyverse.org/packages), ktorá robí dátovú vedu rýchlejšou, jednoduchšou a zábavnejšou!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je [rámec balíkov](https://www.tidymodels.org/packages/) určený na modelovanie a strojové učenie.\n", + "\n", + "- `themis`: [balík themis](https://themis.tidymodels.org/) poskytuje dodatočné kroky pre recepty na riešenie nevyvážených údajov.\n", + "\n", + "Môžete ich nainštalovať pomocou:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Prípadne, nasledujúci skript skontroluje, či máte nainštalované potrebné balíky na dokončenie tohto modulu, a v prípade, že chýbajú, ich nainštaluje za vás.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Mapa klasifikácie**\n", + "\n", + "V našej [predchádzajúcej lekcii](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1) sme sa snažili odpovedať na otázku: ako si vybrať medzi viacerými modelmi? Do veľkej miery to závisí od charakteristík údajov a typu problému, ktorý chceme vyriešiť (napríklad klasifikácia alebo regresia?).\n", + "\n", + "Predtým sme sa naučili o rôznych možnostiach, ktoré máte pri klasifikácii údajov, pomocou prehľadovej tabuľky od Microsoftu. Pythonovský rámec pre strojové učenie, Scikit-learn, ponúka podobnú, ale podrobnejšiu prehľadovú tabuľku, ktorá vám môže ďalej pomôcť zúžiť výber vašich odhadovačov (iný výraz pre klasifikátory):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Tip: [navštívte túto mapu online](https://scikit-learn.org/stable/tutorial/machine_learning_map/) a kliknite na cestu, aby ste si prečítali dokumentáciu.\n", + ">\n", + "> [Referenčná stránka Tidymodels](https://www.tidymodels.org/find/parsnip/#models) tiež poskytuje vynikajúcu dokumentáciu o rôznych typoch modelov.\n", + "\n", + "### **Plán** 🗺️\n", + "\n", + "Táto mapa je veľmi užitočná, ak máte jasné pochopenie svojich údajov, pretože sa môžete „prejsť“ po jej cestách k rozhodnutiu:\n", + "\n", + "- Máme viac ako 50 vzoriek\n", + "\n", + "- Chceme predpovedať kategóriu\n", + "\n", + "- Máme označené údaje\n", + "\n", + "- Máme menej ako 100K vzoriek\n", + "\n", + "- ✨ Môžeme si vybrať Linear SVC\n", + "\n", + "- Ak to nefunguje, keďže máme numerické údaje\n", + "\n", + " - Môžeme skúsiť ✨ KNeighbors Classifier\n", + "\n", + " - Ak to nefunguje, skúste ✨ SVC a ✨ Ensemble Classifiers\n", + "\n", + "Toto je veľmi užitočná cesta, ktorú sa oplatí nasledovať. Teraz sa do toho pustíme pomocou [tidymodels](https://www.tidymodels.org/) frameworku na modelovanie: konzistentnej a flexibilnej kolekcie balíkov v R, vyvinutej na podporu správnej štatistickej praxe 😊.\n", + "\n", + "## 2. Rozdelenie údajov a riešenie nevyváženého dátového súboru.\n", + "\n", + "Z našich predchádzajúcich lekcií sme sa naučili, že existuje súbor spoločných ingrediencií naprieč našimi kuchyňami. Tiež sme si všimli, že rozdelenie počtu kuchýň bolo dosť nerovnomerné.\n", + "\n", + "S týmto sa vysporiadame takto:\n", + "\n", + "- Odstránime najbežnejšie ingrediencie, ktoré spôsobujú zmätok medzi odlišnými kuchyňami, pomocou `dplyr::select()`.\n", + "\n", + "- Použijeme `recipe`, ktorý predspracuje údaje, aby boli pripravené na modelovanie, aplikovaním algoritmu `over-sampling`.\n", + "\n", + "Toto sme už prešli v predchádzajúcej lekcii, takže to bude hračka 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Riešenie nevyvážených údajov\n", + "\n", + "Nevyvážené údaje často negatívne ovplyvňujú výkon modelu. Mnohé modely dosahujú najlepšie výsledky, keď je počet pozorovaní rovnaký, a preto majú tendenciu zápasiť s nevyváženými údajmi.\n", + "\n", + "Existujú dva hlavné spôsoby, ako riešiť nevyvážené dátové súbory:\n", + "\n", + "- pridanie pozorovaní do minoritnej triedy: `Over-sampling`, napríklad pomocou algoritmu SMOTE, ktorý synteticky generuje nové príklady minoritnej triedy na základe najbližších susedov týchto prípadov.\n", + "\n", + "- odstránenie pozorovaní z majoritnej triedy: `Under-sampling`\n", + "\n", + "V našej predchádzajúcej lekcii sme ukázali, ako riešiť nevyvážené dátové súbory pomocou `recipe`. Recipe si môžete predstaviť ako plán, ktorý popisuje, aké kroky by sa mali aplikovať na dátový súbor, aby bol pripravený na analýzu údajov. V našom prípade chceme dosiahnuť rovnomerné rozdelenie počtu našich kuchýň pre náš `training set`. Poďme na to.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Teraz sme pripravení trénovať modely 👩‍💻👨‍💻!\n", + "\n", + "## 3. Nad rámec multinomických regresných modelov\n", + "\n", + "V našej predchádzajúcej lekcii sme sa zaoberali multinomickými regresnými modelmi. Poďme preskúmať niektoré flexibilnejšie modely pre klasifikáciu.\n", + "\n", + "### Support Vector Machines.\n", + "\n", + "V kontexte klasifikácie je `Support Vector Machines` technika strojového učenia, ktorá sa snaží nájsť *hyperrovinu*, ktorá \"najlepšie\" oddeľuje triedy. Pozrime sa na jednoduchý príklad:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ neoddeľuje triedy. H2~ ich oddeľuje, ale iba s malým okrajom. H3~ ich oddeľuje s maximálnym okrajom.\n", + "\n", + "#### Lineárny Support Vector Classifier\n", + "\n", + "Support-Vector clustering (SVC) je súčasťou rodiny techník strojového učenia Support-Vector machines. V SVC je hyperplocha vybraná tak, aby správne oddelila `väčšinu` tréningových pozorovaní, ale `môže nesprávne klasifikovať` niektoré pozorovania. Tým, že umožníme niektorým bodom byť na nesprávnej strane, SVM sa stáva odolnejším voči odľahlým hodnotám, a tým lepšie generalizuje na nové dáta. Parameter, ktorý reguluje toto porušenie, sa nazýva `cost` a má predvolenú hodnotu 1 (pozri `help(\"svm_poly\")`).\n", + "\n", + "Vytvorme lineárny SVC nastavením `degree = 1` v polynomiálnom SVM modeli.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Teraz, keď sme zachytili kroky predspracovania a špecifikáciu modelu do *workflow*, môžeme pokračovať a natrénovať lineárny SVC a zároveň vyhodnotiť výsledky. Pre metriky výkonu vytvorme súbor metrík, ktorý bude hodnotiť: `presnosť`, `citlivosť`, `pozitívnu prediktívnu hodnotu` a `F mieru`.\n", + "\n", + "> `augment()` pridá stĺpec/stĺpce s predikciami k daným údajom.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Support Vector Machine\n", + "\n", + "Support vector machine (SVM) je rozšírením klasifikátora podporných vektorov, ktoré umožňuje zohľadniť nelineárnu hranicu medzi triedami. V podstate SVM využívajú *kernel trick* na rozšírenie priestoru vlastností, aby sa prispôsobili nelineárnym vzťahom medzi triedami. Jednou z populárnych a mimoriadne flexibilných funkcií jadra, ktorú SVM využívajú, je *Radial basis function.* Pozrime sa, ako bude fungovať na našich údajoch.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Oveľa lepšie 🤩!\n", + "\n", + "> ✅ Pozrite si:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> pre ďalšie čítanie.\n", + "\n", + "### Klasifikátory najbližšieho suseda\n", + "\n", + "Algoritmus *K*-najbližšieho suseda (KNN) predpovedá každé pozorovanie na základe jeho *podobnosti* s ostatnými pozorovaniami.\n", + "\n", + "Poďme ho aplikovať na naše dáta.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Zdá sa, že tento model nefunguje až tak dobre. Pravdepodobne zmena argumentov modelu (pozrite si `help(\"nearest_neighbor\")`) zlepší jeho výkon. Určite to vyskúšajte.\n", + "\n", + "> ✅ Pozrite si:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> aby ste sa dozvedeli viac o klasifikátoroch *K*-Najbližších Susedov.\n", + "\n", + "### Ensemble klasifikátory\n", + "\n", + "Ensemble algoritmy fungujú tak, že kombinujú viacero základných odhadov, aby vytvorili optimálny model, buď:\n", + "\n", + "`bagging`: aplikáciou *priemerovacej funkcie* na kolekciu základných modelov\n", + "\n", + "`boosting`: vytváraním sekvencie modelov, ktoré na seba nadväzujú, aby zlepšili prediktívny výkon.\n", + "\n", + "Začnime tým, že vyskúšame model Random Forest, ktorý vytvára veľkú kolekciu rozhodovacích stromov a potom aplikuje priemerovaciu funkciu na vytvorenie lepšieho celkového modelu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Dobrá práca 👏!\n", + "\n", + "Poďme tiež experimentovať s modelom Boosted Tree.\n", + "\n", + "Boosted Tree definuje ensemble metódu, ktorá vytvára sériu sekvenčných rozhodovacích stromov, kde každý strom závisí od výsledkov predchádzajúcich stromov v snahe postupne znižovať chybu. Zameriava sa na váhy nesprávne klasifikovaných položiek a upravuje prispôsobenie pre ďalší klasifikátor, aby ich opravil.\n", + "\n", + "Existujú rôzne spôsoby, ako tento model prispôsobiť (pozri `help(\"boost_tree\")`). V tomto príklade prispôsobíme Boosted stromy pomocou enginu `xgboost`.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ Pozrite si:\n", + ">\n", + "> - [Machine Learning for Social Scientists](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> - - Skúma model AdaBoost, ktorý je dobrou alternatívou k xgboost.\n", + ">\n", + "> pre viac informácií o Ensemble klasifikátoroch.\n", + "\n", + "## 4. Extra - porovnanie viacerých modelov\n", + "\n", + "V tomto cvičení sme vytvorili pomerne veľké množstvo modelov 🙌. Môže byť únavné alebo náročné vytvárať množstvo workflowov z rôznych sád predspracovania a/alebo špecifikácií modelov a potom jeden po druhom vypočítavať metriky výkonnosti.\n", + "\n", + "Pozrime sa, či to môžeme vyriešiť vytvorením funkcie, ktorá aplikuje zoznam workflowov na tréningovú množinu a následne vráti metriky výkonnosti na základe testovacej množiny. Použijeme `map()` a `map_dfr()` z balíka [purrr](https://purrr.tidyverse.org/) na aplikáciu funkcií na každý prvok zoznamu.\n", + "\n", + "> Funkcie [`map()`](https://purrr.tidyverse.org/reference/map.html) vám umožňujú nahradiť mnoho for-cyklov kódom, ktorý je stručnejší a ľahšie čitateľný. Najlepším miestom na učenie sa o funkciách [`map()`](https://purrr.tidyverse.org/reference/map.html) je [kapitola o iterácii](http://r4ds.had.co.nz/iteration.html) v knihe R for Data Science.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "[**workflowset**](https://workflowsets.tidymodels.org/) balík umožňuje používateľom vytvárať a jednoducho prispôsobovať veľké množstvo modelov, ale je primárne navrhnutý na prácu s technikami resamplingu, ako je `krížová validácia`, ktorú si ešte len preberieme.\n", + "\n", + "## **🚀Výzva**\n", + "\n", + "Každá z týchto techník má veľké množstvo parametrov, ktoré môžete upravovať, napríklad `cost` v SVM, `neighbors` v KNN, `mtry` (náhodne vybrané prediktory) v Random Forest.\n", + "\n", + "Preskúmajte predvolené parametre každého z nich a zamyslite sa nad tým, čo by úprava týchto parametrov znamenala pre kvalitu modelu.\n", + "\n", + "Ak chcete zistiť viac o konkrétnom modeli a jeho parametroch, použite: `help(\"model\")`, napr. `help(\"rand_forest\")`.\n", + "\n", + "> V praxi zvyčajne *odhadujeme* *najlepšie hodnoty* týchto parametrov trénovaním mnohých modelov na `simulovanom dátovom súbore` a meraním, ako dobre tieto modely fungujú. Tento proces sa nazýva **ladenie**.\n", + "\n", + "### [**Kvíz po prednáške**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Prehľad a samostatné štúdium**\n", + "\n", + "V týchto lekciách je veľa odborných výrazov, preto si nájdite chvíľu na preštudovanie [tohto zoznamu](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) užitočnej terminológie!\n", + "\n", + "#### POĎAKOVANIE:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za vytvorenie úžasných ilustrácií, ktoré robia R prístupnejším a pútavejším. Viac ilustrácií nájdete v jej [galérii](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) a [Jen Looper](https://www.twitter.com/jenlooper) za vytvorenie pôvodnej verzie tohto modulu v Pythone ♥️\n", + "\n", + "Šťastné učenie,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Zlatý študentský ambasádor Microsoft Learn.\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/sk/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..9d86ba1cb --- /dev/null +++ b/translations/sk/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vyskúšajte rôzne klasifikátory\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:42:38+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sk/4-Classification/4-Applied/notebook.ipynb b/translations/sk/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..959db7ee8 --- /dev/null +++ b/translations/sk/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:22+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/4-Classification/4-Applied/solution/notebook.ipynb b/translations/sk/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..342d57c27 --- /dev/null +++ b/translations/sk/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:41:47+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za záväzný zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/1-Visualize/notebook.ipynb b/translations/sk/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..a1646504b --- /dev/null +++ b/translations/sk/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:07:53+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/sk/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..b0cf02f10 --- /dev/null +++ b/translations/sk/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,499 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Nigerijská hudba zozbieraná zo Spotify - analýza**\n", + "\n", + "Clustering je typ [neučenej metódy](https://wikipedia.org/wiki/Unsupervised_learning), ktorá predpokladá, že dataset nie je označený alebo že jeho vstupy nie sú spojené s preddefinovanými výstupmi. Používa rôzne algoritmy na triedenie neoznačených dát a poskytuje skupiny podľa vzorov, ktoré rozpozná v dátach.\n", + "\n", + "[**Kvíz pred prednáškou**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Úvod**\n", + "\n", + "[Clustering](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) je veľmi užitočný na prieskum dát. Pozrime sa, či nám môže pomôcť objaviť trendy a vzory v tom, ako nigérijské publikum konzumuje hudbu.\n", + "\n", + "> ✅ Zamyslite sa na chvíľu nad využitím clusteringu. V reálnom živote clustering nastáva vždy, keď máte kopu bielizne a potrebujete roztriediť oblečenie členov rodiny 🧦👕👖🩲. V dátovej vede clustering nastáva pri analýze preferencií používateľov alebo pri určovaní charakteristík akéhokoľvek neoznačeného datasetu. Clustering pomáha urobiť poriadok v chaose, ako napríklad v zásuvke na ponožky.\n", + "\n", + "V profesionálnom prostredí sa clustering môže použiť na určenie vecí, ako je segmentácia trhu, napríklad na zistenie, ktoré vekové skupiny kupujú aké produkty. Ďalším využitím by mohlo byť odhaľovanie anomálií, napríklad na detekciu podvodov v datasete transakcií kreditných kariet. Alebo by ste mohli použiť clustering na identifikáciu nádorov v dávke medicínskych skenov.\n", + "\n", + "✅ Zamyslite sa na chvíľu nad tým, ako ste sa mohli stretnúť s clusteringom „v divočine“, napríklad v bankovníctve, e-commerce alebo podnikateľskom prostredí.\n", + "\n", + "> 🎓 Zaujímavé je, že analýza klastrov vznikla v oblasti antropológie a psychológie v 30. rokoch 20. storočia. Dokážete si predstaviť, ako mohla byť použitá?\n", + "\n", + "Alternatívne by ste ju mohli použiť na zoskupovanie výsledkov vyhľadávania – napríklad podľa nákupných odkazov, obrázkov alebo recenzií. Clustering je užitočný, keď máte veľký dataset, ktorý chcete zmenšiť a na ktorom chcete vykonať podrobnejšiu analýzu, takže táto technika môže byť použitá na získanie informácií o dátach pred vytvorením iných modelov.\n", + "\n", + "✅ Keď sú vaše dáta organizované v klastroch, priradíte im identifikátor klastru. Táto technika môže byť užitočná pri zachovaní súkromia datasetu; namiesto toho môžete odkazovať na dátový bod podľa jeho identifikátora klastru, namiesto odhaľovania identifikovateľných údajov. Dokážete si predstaviť ďalšie dôvody, prečo by ste odkazovali na identifikátor klastru namiesto iných prvkov klastru na jeho identifikáciu?\n", + "\n", + "### Začíname s clusteringom\n", + "\n", + "> 🎓 Spôsob, akým vytvárame klastre, má veľa spoločného s tým, ako zhromažďujeme dátové body do skupín. Poďme si rozobrať niektoré pojmy:\n", + ">\n", + "> 🎓 ['Transduktívny' vs. 'induktívny'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Transduktívna inferencia je odvodená z pozorovaných tréningových prípadov, ktoré sa mapujú na konkrétne testovacie prípady. Induktívna inferencia je odvodená z tréningových prípadov, ktoré sa mapujú na všeobecné pravidlá, ktoré sa až potom aplikujú na testovacie prípady.\n", + ">\n", + "> Príklad: Predstavte si, že máte dataset, ktorý je len čiastočne označený. Niektoré veci sú „platne“, niektoré „CD“ a niektoré sú prázdne. Vašou úlohou je poskytnúť označenia pre prázdne miesta. Ak si zvolíte induktívny prístup, trénovali by ste model hľadajúci „platne“ a „CD“ a aplikovali tieto označenia na neoznačené dáta. Tento prístup bude mať problém klasifikovať veci, ktoré sú vlastne „kazety“. Transduktívny prístup na druhej strane efektívnejšie spracováva tieto neznáme dáta, pretože pracuje na zoskupovaní podobných položiek a potom aplikuje označenie na skupinu. V tomto prípade by klastre mohli odrážať „okrúhle hudobné veci“ a „štvorcové hudobné veci“.\n", + ">\n", + "> 🎓 ['Nerovinná' vs. 'rovinná' geometria](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Odvodené z matematickej terminológie, nerovinná vs. rovinná geometria sa týka merania vzdialeností medzi bodmi buď „rovinnými“ ([Euklidovskými](https://wikipedia.org/wiki/Euclidean_geometry)) alebo „nerovinnými“ (ne-Euklidovskými) geometrickými metódami.\n", + ">\n", + "> „Rovinná“ v tomto kontexte odkazuje na Euklidovskú geometriu (časti ktorej sa učia ako „plánová“ geometria) a nerovinná odkazuje na ne-Euklidovskú geometriu. Čo má geometria spoločné s machine learningom? No, ako dve oblasti zakorenené v matematike, musí existovať spoločný spôsob merania vzdialeností medzi bodmi v klastroch, a to sa dá urobiť „rovinným“ alebo „nerovinným“ spôsobom, v závislosti od povahy dát. [Euklidovské vzdialenosti](https://wikipedia.org/wiki/Euclidean_distance) sa merajú ako dĺžka úsečky medzi dvoma bodmi. [Ne-Euklidovské vzdialenosti](https://wikipedia.org/wiki/Non-Euclidean_geometry) sa merajú pozdĺž krivky. Ak vaše dáta, vizualizované, neexistujú na rovine, možno budete potrebovať špecializovaný algoritmus na ich spracovanie.\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "> 🎓 ['Vzdialenosti'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Klastre sú definované ich maticou vzdialeností, napr. vzdialenosťami medzi bodmi. Táto vzdialenosť sa dá merať niekoľkými spôsobmi. Euklidovské klastre sú definované priemerom hodnôt bodov a obsahujú „centroid“ alebo stredový bod. Vzdialenosti sa teda merajú podľa vzdialenosti od tohto centroidu. Ne-Euklidovské vzdialenosti odkazujú na „clustroidy“, bod najbližší k ostatným bodom. Clustroidy môžu byť definované rôznymi spôsobmi.\n", + ">\n", + "> 🎓 ['Obmedzené'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Obmedzený clustering](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) zavádza „semi-supervised“ učenie do tejto neučenej metódy. Vzťahy medzi bodmi sú označené ako „nemôže byť prepojené“ alebo „musí byť prepojené“, takže na dataset sú aplikované určité pravidlá.\n", + ">\n", + "> Príklad: Ak je algoritmus voľne aplikovaný na dávku neoznačených alebo čiastočne označených dát, klastre, ktoré vytvorí, môžu byť nekvalitné. V príklade vyššie by klastre mohli zoskupovať „okrúhle hudobné veci“, „štvorcové hudobné veci“, „trojuholníkové veci“ a „sušienky“. Ak by boli dané určité obmedzenia alebo pravidlá („položka musí byť vyrobená z plastu“, „položka musí byť schopná produkovať hudbu“), mohlo by to pomôcť „obmedziť“ algoritmus na lepšie rozhodnutia.\n", + ">\n", + "> 🎓 'Hustota'\n", + ">\n", + "> Dáta, ktoré sú „šumové“, sa považujú za „husté“. Vzdialenosti medzi bodmi v každom z jeho klastrov môžu byť pri skúmaní viac alebo menej husté, alebo „preplnené“, a preto tieto dáta potrebujú byť analyzované vhodnou metódou clusteringu. [Tento článok](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) demonštruje rozdiel medzi použitím K-Means clusteringu a HDBSCAN algoritmov na preskúmanie šumového datasetu s nerovnomernou hustotou klastrov.\n", + "\n", + "Prehĺbte svoje pochopenie techník clusteringu v tomto [učebnom module](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Algoritmy clusteringu**\n", + "\n", + "Existuje viac ako 100 algoritmov clusteringu a ich použitie závisí od povahy dát. Poďme si prejsť niektoré hlavné:\n", + "\n", + "- **Hierarchický clustering**. Ak je objekt klasifikovaný podľa jeho blízkosti k blízkemu objektu, namiesto vzdialeného, klastre sa tvoria na základe vzdialenosti členov od ostatných objektov. Hierarchický clustering sa vyznačuje opakovaným spájaním dvoch klastrov.\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Centroid clustering**. Tento populárny algoritmus vyžaduje výber „k“, alebo počet klastrov, ktoré sa majú vytvoriť, po ktorom algoritmus určí stredový bod klastru a zhromaždí dáta okolo tohto bodu. [K-means clustering](https://wikipedia.org/wiki/K-means_clustering) je populárna verzia centroid clusteringu, ktorá rozdeľuje dataset na preddefinované K skupiny. Stred je určený najbližším priemerom, odtiaľ názov. Štvorcová vzdialenosť od klastru je minimalizovaná.\n", + "\n", + "

\n", + " \n", + "

Infografika od Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Clustering založený na distribúcii**. Založený na štatistickom modelovaní, clustering založený na distribúcii sa sústreďuje na určenie pravdepodobnosti, že dátový bod patrí do klastru, a priradenie ho podľa toho. Metódy Gaussovskej zmesi patria do tohto typu.\n", + "\n", + "- **Clustering založený na hustote**. Dátové body sú priradené do klastrov na základe ich hustoty, alebo ich zoskupenia okolo seba. Dátové body vzdialené od skupiny sú považované za odľahlé alebo šumové. DBSCAN, Mean-shift a OPTICS patria do tohto typu clusteringu.\n", + "\n", + "- **Clustering založený na mriežke**. Pre multidimenzionálne datasety sa vytvorí mriežka a dáta sa rozdelia medzi bunky mriežky, čím sa vytvoria klastre.\n", + "\n", + "Najlepší spôsob, ako sa naučiť o clusteringu, je vyskúšať si ho sami, a presne to urobíte v tomto cvičení.\n", + "\n", + "Budeme potrebovať niektoré balíčky na dokončenie tohto modulu. Môžete ich nainštalovať pomocou: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Alternatívne, skript nižšie skontroluje, či máte balíčky potrebné na dokončenie tohto modulu, a nainštaluje ich za vás, ak niektoré chýbajú.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Cvičenie - zoskupte svoje údaje\n", + "\n", + "Zoskupovanie ako technika je výrazne podporené správnou vizualizáciou, takže začnime vizualizáciou našich hudobných údajov. Toto cvičenie nám pomôže rozhodnúť, ktorý z metód zoskupovania by sme mali najefektívnejšie použiť vzhľadom na povahu týchto údajov.\n", + "\n", + "Začnime hneď tým, že importujeme údaje.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Niekedy môžeme chcieť získať trochu viac informácií o našich údajoch. Môžeme sa pozrieť na `údaje` a `ich štruktúru` pomocou funkcie [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Dobrá práca!💪\n", + "\n", + "Môžeme si všimnúť, že `glimpse()` vám poskytne celkový počet riadkov (pozorovaní) a stĺpcov (premenných), potom prvé záznamy každej premennej v riadku za názvom premennej. Okrem toho sa *dátový typ* premennej zobrazí hneď za názvom premennej vo vnútri `< >`.\n", + "\n", + "`DataExplorer::introduce()` dokáže túto informáciu prehľadne zhrnúť:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Skvelé! Práve sme zistili, že naše údaje nemajú žiadne chýbajúce hodnoty.\n", + "\n", + "Keď už sme pri tom, môžeme preskúmať bežné štatistiky centrálnej tendencie (napr. [priemer](https://en.wikipedia.org/wiki/Arithmetic_mean) a [medián](https://en.wikipedia.org/wiki/Median)) a miery rozptylu (napr. [štandardná odchýlka](https://en.wikipedia.org/wiki/Standard_deviation)) pomocou `summarytools::descr()`.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Pozrime sa na všeobecné hodnoty údajov. Všimnite si, že popularita môže byť `0`, čo znamená piesne, ktoré nemajú žiadne hodnotenie. Tieto údaje čoskoro odstránime.\n", + "\n", + "> 🤔 Ak pracujeme s klastrovaním, nesupervidovanou metódou, ktorá nevyžaduje označené údaje, prečo zobrazujeme tieto údaje s označeniami? Počas fázy skúmania údajov sú užitočné, ale pre fungovanie algoritmov klastrovania nie sú nevyhnutné.\n", + "\n", + "### 1. Preskúmajte populárne žánre\n", + "\n", + "Poďme zistiť najpopulárnejšie žánre 🎶 tým, že spočítame, koľkokrát sa vyskytujú.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "To dopadlo dobre! Hovorí sa, že obrázok má hodnotu tisíc riadkov dátového rámca (vlastne to nikto nikdy nehovorí 😅). Ale chápete, čo tým myslím, však?\n", + "\n", + "Jedným zo spôsobov, ako vizualizovať kategóriálne údaje (znakové alebo faktorové premenné), je použitie stĺpcových grafov. Poďme vytvoriť stĺpcový graf pre top 10 žánrov:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Teraz je oveľa jednoduchšie identifikovať, že máme `chýbajúce` žánre 🧐!\n", + "\n", + "> Dobrá vizualizácia vám ukáže veci, ktoré ste neočakávali, alebo vyvolá nové otázky o údajoch - Hadley Wickham a Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Poznámka: Keď je najvyšší žáner označený ako `Chýbajúci`, znamená to, že ho Spotify neklasifikoval, takže sa ho zbavme.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Z krátkeho prieskumu údajov sme zistili, že tri najpopulárnejšie žánre dominujú tomuto datasetu. Zamerajme sa na `afro dancehall`, `afropop` a `nigerian pop`, a navyše filtrujme dataset tak, aby sme odstránili všetko s hodnotou popularity 0 (čo znamená, že nebolo klasifikované s popularitou v datasete a môže byť považované za šum pre naše účely):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Pozrime sa, či existuje nejaký zjavný lineárny vzťah medzi číselnými premennými v našej dátovej sade. Tento vzťah je kvantifikovaný matematicky pomocou [korelačnej štatistiky](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Korelačná štatistika je hodnota medzi -1 a 1, ktorá naznačuje silu vzťahu. Hodnoty nad 0 indikujú *pozitívnu* koreláciu (vysoké hodnoty jednej premennej majú tendenciu zhodovať sa s vysokými hodnotami druhej), zatiaľ čo hodnoty pod 0 indikujú *negatívnu* koreláciu (vysoké hodnoty jednej premennej majú tendenciu zhodovať sa s nízkymi hodnotami druhej).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Údaje nie sú silne korelované, okrem `energy` a `loudness`, čo dáva zmysel, keďže hlasná hudba je zvyčajne dosť energická. `Popularity` má súvislosť s `release date`, čo tiež dáva zmysel, pretože novšie skladby sú pravdepodobne populárnejšie. Zdá sa, že dĺžka a energia majú tiež určitú koreláciu.\n", + "\n", + "Bude zaujímavé zistiť, čo dokáže algoritmus zhlukovania urobiť s týmito údajmi!\n", + "\n", + "> 🎓 Upozorňujeme, že korelácia neznamená kauzalitu! Máme dôkaz o korelácii, ale žiadny dôkaz o kauzalite. [Zábavná webová stránka](https://tylervigen.com/spurious-correlations) obsahuje niekoľko vizuálov, ktoré tento bod zdôrazňujú.\n", + "\n", + "### 2. Preskúmajte distribúciu údajov\n", + "\n", + "Položme si niekoľko jemnejších otázok. Sú žánre výrazne odlišné v ich vnímaní tanečnosti na základe ich popularity? Preskúmajme distribúciu údajov našich troch najpopulárnejších žánrov z hľadiska popularity a tanečnosti pozdĺž danej osi x a y pomocou [hustotných grafov](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vidíme, že existujú sústredené kruhy, ktoré sa zhodujú, bez ohľadu na žáner. Môže to byť tak, že nigerijské chute sa zbiehajú na určitej úrovni tanečnosti pre tento žáner?\n", + "\n", + "Vo všeobecnosti sa tri žánre zhodujú, pokiaľ ide o ich popularitu a tanečnosť. Určenie klastrov v týchto voľne zarovnaných údajoch bude výzvou. Pozrime sa, či nám rozptylový graf môže v tomto pomôcť.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Rozptylový graf rovnakých osí ukazuje podobný vzor konvergencie.\n", + "\n", + "Vo všeobecnosti môžete pri zhlukovaní použiť rozptylové grafy na zobrazenie zhlukov údajov, takže zvládnutie tohto typu vizualizácie je veľmi užitočné. V ďalšej lekcii použijeme tieto filtrované údaje a metódu k-means na objavenie skupín v týchto údajoch, ktoré sa zaujímavo prekrývajú.\n", + "\n", + "## **🚀 Výzva**\n", + "\n", + "V rámci prípravy na ďalšiu lekciu vytvorte graf o rôznych algoritmoch zhlukovania, ktoré môžete objaviť a použiť v produkčnom prostredí. Aké typy problémov sa zhlukovanie snaží riešiť?\n", + "\n", + "## [**Kvíz po prednáške**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Prehľad a samostatné štúdium**\n", + "\n", + "Predtým, než použijete algoritmy zhlukovania, ako sme sa naučili, je dobré pochopiť povahu vášho datasetu. Prečítajte si viac na túto tému [tu](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html).\n", + "\n", + "Prehĺbte si svoje znalosti o technikách zhlukovania:\n", + "\n", + "- [Trénovanie a hodnotenie modelov zhlukovania pomocou Tidymodels a priateľov](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Úloha**\n", + "\n", + "[Preskúmajte ďalšie vizualizácie pre zhlukovanie](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## POĎAKOVANIE:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) za vytvorenie pôvodnej verzie tohto modulu v Pythone ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) za vytvorenie úžasných ilustrácií, ktoré robia koncepty strojového učenia zrozumiteľnejšími a ľahšie pochopiteľnými.\n", + "\n", + "Šťastné učenie,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:11:16+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/sk/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..04bd0d607 --- /dev/null +++ b/translations/sk/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,830 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Získajte informácie o dátovom rámci\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pozrite sa na všeobecné hodnoty údajov. Všimnite si, že popularita môže byť „0“ - a existuje veľa riadkov s touto hodnotou\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "DÔLEŽITÉ PRAVIDLÁ: \n", + "1. NEPRIDÁVAJTE '''markdown alebo akékoľvek iné značky okolo prekladu \n", + "2. Uistite sa, že preklad neznie príliš doslovne \n", + "3. Preložte aj komentáre \n", + "4. Tento súbor je napísaný vo formáte Markdown - nezaobchádzajte s ním ako s XML alebo HTML \n", + "5. Neprekladajte: \n", + " - [!NOTE], [!WARNING], [!TIP], [!IMPORTANT], [!CAUTION] \n", + " - Názvy premenných, názvy funkcií, názvy tried \n", + " - Zástupné symboly ako @@INLINE_CODE_x@@ alebo @@CODE_BLOCK_x@@ \n", + " - URL adresy alebo cesty \n", + "6. Zachovajte všetko pôvodné formátovanie Markdownu \n", + "7. Vráťte IBA preložený obsah bez akýchkoľvek ďalších značiek alebo formátovania \n", + "\n", + "Odstráňte žánre „Missing“, pretože nie sú klasifikované v Spotify\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:08:28+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/2-K-Means/notebook.ipynb b/translations/sk/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..9b772a894 --- /dev/null +++ b/translations/sk/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,229 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:22+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Zameriame sa iba na 3 žánre. Možno dokážeme vytvoriť 3 klastre!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5SeaPjY01KEOSJGm4JhSskvwrsBj4Qt+0CHhQVT0OeC3wxST3He+1VXVEVc2tqrmzZs2aSBmSJEkjYaWDVZL9gGcAz6+qAqiqm6vq2v7xmcDFwCMa1ClJkjTyVipYJdkJeCOwS1XdONA+K8mM/vFDgTnAJS0KlSRJGnUzl3dCkmOA7YB1k1wBHEK3CnAN4OQkAKf3KwC3AQ5LcitwO/Cyqrpu3AtLkiRNM8sNVlW11zjNn13GuccBx020KEmSpKnIndclSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJamSFglWSo5JcneS8gbZ1kpyc5KL++9p9e5J8NMmCJOck2WKyipckSRolK9pj9Tlgp6Xa3gycUlVzgFP65wBPB+b0X/OAwydepiRJ0uhboWBVVacC1y3VvCtwdP/4aGC3gfbPV+d0YK0kG7QoVpIkaZRNZI7VelW1qH98JbBe/3hD4PKB867o2/5GknlJ5ieZPzY2NoEyJEmSRkOTyetVVUDdxdccUVVzq2rurFmzWpQhSZI0VBMJVlctGeLrv1/dty8ENh44b6O+TZIkaVqbSLA6Edi3f7wvcMJA+wv61YGPB64fGDKUJEmatmauyElJjgG2A9ZNcgVwCPAe4Ngk+wOXAXv0p58E7AwsAG4EXti4ZkmSpJG0QsGqqvZaxqHtxzm3gAMmUpQkSdJU5M7rkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyc2VfmGQT4MsDTQ8F3g6sBbwEGOvb31pVJ610hZIkSVPESgerqroQ2BwgyQxgIXA88ELgw1X1gSYVSpIkTRGthgK3By6uqssaXU+SJGnKaRWs9gSOGXh+YJJzkhyVZO3xXpBkXpL5SeaPjY2Nd4okSdKUMuFgleQewC7AV/qmw4GH0Q0TLgI+ON7rquqIqppbVXNnzZo10TIkSZKGrkWP1dOBs6rqKoCquqqqbquq24EjgS0bvIckSdLIaxGs9mJgGDDJBgPHdgfOa/AekiRJI2+lVwUCJLk3sAPw0oHm9yXZHCjg0qWOSZIkTVsTClZV9Sfg/ku17TOhiiRJkqYod16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKmRmcMuQNJd95vDHj3sEjTNPOjt5w67BGlasMdKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWpk5kQvkORS4AbgNmBxVc1Nsg7wZWA2cCmwR1X9bqLvJUmSNMpa9Vg9uao2r6q5/fM3A6dU1RzglP65JEnStDZZQ4G7Akf3j48Gdpuk95EkSRoZLYJVAd9NcmaSeX3belW1qH98JbDe0i9KMi/J/CTzx8bGGpQhSZI0XBOeYwU8saoWJnkAcHKSXw4erKpKUku/qKqOAI4AmDt37h2OS5IkTTUT7rGqqoX996uB44EtgauSbADQf796ou8jSZI06iYUrJLcO8l9ljwGngacB5wI7Nufti9wwkTeR5IkaSqY6FDgesDxSZZc64tV9e0kZwDHJtkfuAzYY4LvI0mSNPImFKyq6hLgseO0XwtsP5FrS5IkTTXuvC5JktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIysdrJJsnOQHSS5Icn6SV/Xt70iyMMnZ/dfO7cqVJEkaXTMn8NrFwOuq6qwk9wHOTHJyf+zDVfWBiZcnSZI0dax0sKqqRcCi/vENSX4BbNiqMEmSpKmmyRyrJLOBxwE/7ZsOTHJOkqOSrL2M18xLMj/J/LGxsRZlSJIkDdWEg1WSNYHjgFdX1R+Aw4GHAZvT9Wh9cLzXVdURVTW3qubOmjVromVIkiQN3YSCVZLV6ULVF6rqawBVdVVV3VZVtwNHAltOvExJkqTRN5FVgQE+C/yiqj400L7BwGm7A+etfHmSJElTx0RWBW4N7AOcm+Tsvu2twF5JNgcKuBR46YQqlCRJmiImsirwx0DGOXTSypcjSZI0dbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyke0WJEmaNFt/bOthl6Bp5rRXnjbp72GPlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyacEqyU5JLkyyIMmbJ+t9JEmSRsWkBKskM4BPAE8HNgP2SrLZZLyXJEnSqJisHqstgQVVdUlV3QJ8Cdh1kt5LkiRpJKSq2l80+Rdgp6p6cf98H2Crqjpw4Jx5wLz+6SbAhc0L0Z1ZF7hm2EVIk8yfc60K/Dm/+z24qmaNd2Dm3V3JElV1BHDEsN5/VZdkflXNHXYd0mTy51yrAn/OR8tkDQUuBDYeeL5R3yZJkjRtTVawOgOYk+QhSe4B7AmcOEnvJUmSNBImZSiwqhYnORD4DjADOKqqzp+M99JKcxhWqwJ/zrUq8Od8hEzK5HVJkqRVkTuvS5IkNWKwkiRJasRgpSaSzE1yn2HXIUnSMBms1MpLgO8ariRp6kmSYdcwXRisNCFJtgCoqpcCZwLHG640VYz3x8Q/MFrVJElVVZKtk+yfZPt+qyStBFcFakKSnA7cWFVP6Z8fDswBdq+qG4ZanLQCkmxDt6HxH4Bv9n9gVquq24dcmnS3SfJk4LPAl4FnAEcDX6+qBUMtbAqyx0oTUlWPB2Yk+Ub//OXARdhzpRG2pFcqyVzgKGBrYG/g60tClT1XWlUk2QR4GfDqqnoLsC/dB+QdhlrYFGWw0l028EdpJkBVbQvMWipc/RL4fpI1h1aotAx9r9T2wFuAF1fVK4D9gKuBjyw5Z3gVSpMvPWAb4GHAjknuXVVnAccA85KsPdQipyCDle6SJWPx/dMNk8yBv/Rc3T/JN/vnBwKnAusMp1JpudYCdgf+sX9+C/BpwLklmtYGemPXBWZW1ZHAu4DQ3YIO4Erghr5Nd4FzrLRSkrwO2Bm4J/D9qjq4bz8VoKq2GWJ50h0MTNBdD7ihqm5M8s/A14Gdq+rkJDsA76MbArnWXitNV0l2Bg4DFgJ/AvYHnk03DLga3S3v3l9V3xxakVPUpNwrUNNbkhcBu1TVtkk+Brw2yd9V1euqapsk30mycVVdPuxapSX6UPVM4JVAJTmNrodqN+A7SY6l+4R+WFVdM8RSpUmV5JHAO4EDgbOBLwL/r6r2TPJnYEfg3CWhaqmRCi2HQ4FarnEm8S4A9knySmBD4DHA3kk+BVBVOxqqNGqSPIyuN+oNwAfoQtShwLfohgSfCfxPVR2/ZP6gNE3dDFwAnFVVN1bVbsAGSQ6g68H9KfDYJHsaqu46f3louZb8o+onot9cVacmuR+wLfC+qrq4/7S/VZJ1quq6YdYrDRr4w7A2cFlV/W/f/htgK+CpVXVCkn2BY5P8uqp+OLyKpbYGhsFn0HWoXAdsAMwFftyf9iW6X/eLkxwN3Ar8wFB119ljpWVK8rAkm/WPXwt8nm45+gOq6nrg18Czk7yZrufq2YYqjYqBntZ79d/PAxYnORCgqi4ELgc2659/FfgXYNHdXKo0qfpQtStwLN0+VY8EPgF8LMmBSV5MNyy4oD//1qo6uqquGlrRU5iT1zWuJPcCPgZcRddlPA94Od2ta3YHtqALU7sBTwYOqqrzhlOtNL4kO9H9zF4CnA4U3Z5Va9J9Qv80sF9V/Y9DHpqukmwKfAb4d7qVgO8A9qHrldoR2Aj4alV9d1g1TicGKy1Tv5XCa4H7AudX1bv79g8DOwFPqqprktyzqv48xFKlO0jyeOC9dB8QHkO3jcKtdJ/aX0230/r3q+obQytSmmRJHgV8ELiwqg7q23YEPkf3O9yd1RtzKFB/Y3CielVdBLwbuB54TJLH9O2vAf4b+EE/Zn/LMGqVliXJhnQT1H/aD/G9D/gh3bySRVW1P/CGqvqGO6xrmvsV3Z5Uj0wyJ8kaVfUd4Dhg1nBLm54MVvqLwaGQJM9NshuwKV2v1fXA7gPhah7dpN/bvKeaRtBNdJNy90yyVVX9saq+DTyIrveKqlrcf7fbXtNSkhlVdQvwYrq5g68HdkmyLfAsYPEw65uuDFb6i4FQdSDdXj8A36D7Q/ReYH26bRb+vj929d1epDSOgdssPSrJdnRzqN5D11N1WJKn90PbGwO/H1qh0t2k/6B8W5KZVXUrXbhaDfhXulC1X1WdYY9tewYr/UWS1ZJsQDcZfXvgocApwM+r6hK6YcGZdBPa/aSvkdGvetoZOAF4Id1ePM+kG/47jW4DxE8AL6qqs/xjoulm4MPFnCTrL2nvt0+Y2fdcvQKYD/wdcJYLNiaHwWoVt9QfmBl0+5tcS7cr7zbAc6rq1iQv7895vbtSa9QkuTfdH419qmpfuo0/twXWo/tZPhj4I93PtzStDOxTtSNwIt0HiwOSPBz+JlzdSvfv5AF0NyB3L8tJYLBahS01p2pvYF5V3Uy3JP0gun2pbkzyPLr7SFVV3Ta8iqW/SrJa//0f6XaSvgbYBKCqTqDbt+oN/enH0n1SPyTJPe/+aqXJ04equXTDfc8EXgf8PbDbUuFqyZyr5wAf7IOWGjOtrsIGQtUBwIvo9jWhql6aZC3g1CQ/p9uder+qumJoxUq9JPeqqpuq6vYkTwQOp7tx7M+AjZPMrar5dCtXtwBmVNXVSY4AbndrEE03Se5DNwS+Rb99woL+g8dewHOTfKWqftXPuVqtD1e/HWbN05n7WK3ikqwNHAG8qaou6Zfi3twf24muJ+DSqvr1MOuU4C978vwH8Ay6rRMOp9vY8DNJHgocQLfIYjHwD8DBVXX8sOqVJsvS86OSbAJ8lG739Ff2Hzy2A54PvNvf4Xcfg9UqZrzJikm+Rrf673MDvVhbAedU1U1DKFO6gySr0wWpn9L9vD6NbthjbeAFVfXbJOvS7SK9KbCgquY7QVfTzcCcqh3othBJ/+HiEcCb6Ta/fW0frtauqt8NteBVjHOsViFLzama03/CAfgO8GDgn/pjzwXeRrdkXRolC+kmpX+Fbs7UYcDZwEFJ1q+qa6rq7Kr6Uj8c6OpVTSv9UF4l+We6HdWvoNtS5ANV9Su61dvrAx/vX3L9kEpdZdljtYpYKlS9lm5O1U3AfwH/RnfvqMfRDaE8DHheVZ07nGqlv7XUJ/T/BH5YVXv2x7amGxq8F92Qh/uradpJ8hBgtaq6uO+Z/U/gNcCSXqoNge9V1f79h+Y1quqc4VW86nLy+ipiIFQ9HngC8ERgDeAMYHFVHdzvYfVwuiGURUMrVhowEKoeSncLjmcBr07yTrqVTaf1E3V3oxsWNFhpOnoCcFGSK/p7tM6j+3k/lG4+4Wy6Ses3VdWBQ6xzlWewWoUkeSRwCF2v1GpVdVW/VP0nSR5YVa+gu+2BNDL6ULUL3bDfAuAS4NN0S8oPSvLRqvrvJOdWlbuqa1qqqi8kWRM4I8neVXVOkgcCZ/ZzqdYHPkQ3tUND5ByraWzp3aWr6hfAkXTBarsk61bVVcDWwBOTrOeO1Bo1fS/rwcCOwPF0Gxw+je4my9sCr+s3PzRUadoZ2FF9R+BRdEOAR/YrZC8F7pfkk3Q3VT6hqk729/hwOcdqmhpn88916O5y/h3g2XTDJl8HTu33+Jnh5p8aRUk2ottaYW26XdSfB3yKbhf1zwFjVXXG0AqUJlmSLYGPAK+pqtP7ebLPo/s9Dt39XP9UVT8aVo36K4cCp6mBUPUaYFe6VVRvotvs893AbcB+wK1JvgHcPpxKpTvXb0x7RZJ3AV+oqgVJPk93d4Dzq+qy4VYoTZ4kGwNvBM6tqtMBqupDfafUyXS3HTtpiCVqKQaraaa/fcE6VfWzfk7VFnQ3VX4D3f/f69NtpXAY3ZDgmVVlqNJUcC7w0n4/q2cBrzJUaRWwGDgH2DXJTlX1bfhLuJoBrDXU6nQHBqtppL+twf7A6kluBf6Xbhnu04Gdge3ptlk4iO7WHocOq1ZpJZxEt5J1F+BdVXXakOuRmh/RBFIAAAVUSURBVBtYBftPdKtgf0N3t4HfA7snubWqTgGoqvcPsVQtg5PXp4l+07gb6HamXgzsCTyiqhYC9wN+1t8f6hbgW3SrqqQpo6r+UFVHA8+tqv9ygq6moz5UPQ04ClgPOJNugdGJdD1X+/XHNaLssZomBobzdgQeC2wC3DPJZ4CfAJ/t96naDtihqq4cSqHSxN0G7qiu6affj20t4KXA7nSLji4Azuq3x/kKXa+t2+KMMFcFTiNJngR8DNgSeDywE7A63XyqNek2kbugqi4ZWpGSpDuV5E10Iw1PAZ7f77a+H3AqcKnzYkebQ4HTy5rAtVV1S1WdSrevyVPo7hm1TlV901AlSaMnyeZJDumf3hvYB9i7D1WPpVvV/UBD1ehzKHB6+RmwMMmewFeq6swkp9EF6KuGW5okadDARPUnAc8BdkxydVW9PcmmwCFJFgObA2+qqh8PtWCtEIPV9HI98GO6vaqelmQ+3T0Bn11V1wy1MkkS8NdA1YeqbYAvAAcCC4EnJ1mjqvZI8kS6jXE/3n9QjnMLR59zrKaYfvXfMruCk9wL2JRu4uOawGer6vy7qz5J0rL19/d7JPDDqrqtvzPGhlX13v5egJsD7wWOraqPDLNWrRyD1RTV/2N8EHADcMx4PVL9/dMW3+3FSZLGlWRX4CLgCrqtcbYHPky3WvvX/crAo+nmWX21qr44tGK1Upy8PgUleRHdxp8X093376Akj+6PZcn+PoYqSRotVXUCcCXwSbp7/X2X7t6XH+nnVT2G7t6YFwEbDqtOrTznWE0B44yrbwe8vqq+neRU4GC6DUHPdfxdkkbP4O/xqrouyY+Ap9Ft2nw8EOA/6Xqx9qe7HdkO/S2cFvu7fepwKHAKGFg58jLgDLpb1NwT+FD/D/QhdLv07l5Vvx9mrZKk8SXZFng08P2quiDJXnS/z79eVV9Lcu/+1C3p7o6xu3Nkpx6HAkdYkk3gL7c4eBawB/BbunC1Ft3Kv7WAR9F9yrllWLVKku5oydSMJFvRDf9tC7wxyUuq6hjgm8DeSfYA/kz3ofkJwK6GqqnJocARlWRH4PAkW9CNt78YOK+qFgGLkmwMbNO33wN4ZVXdOLSCJUl30H8w3hI4FNirqs7p9xp8Qh+ujkwyA7iwqm4Drk3y/v7erpqCDFYjKMlMuq7gg4HN6Jbf/gDYNckz+h3UP5PkfnR7nPypqsaGV7Ek6U6sBTwV2IHuRspfBW6nn0NVVZ+Ev9nfylA1hRmsRlBVLU5yMfA2uhvOPpmui/gmYJcki6vq21V1Pd2moJKkEVVV3+2nc/x7kt9W1TFJvgrMAP534DwnPU8DBqvRdQ5wI/AH4H5VdU2Sr9F9ytk3ya1VdcpQK5QkrZCqOrG/Pc2/JblHVR0NHDPsutSeqwJHxOBS3CT3AG7rd+V9Pd2NlA+pqjOSbES3iuSb/XwrSdIUkWQX4D10Q4NXelPl6cdgNQKWClUH0s2r+gPwjqr6c5K30t3/7z1V9ZMkM/pJjpKkKSbJLOfFTl8GqxGS5BXAc4HnAWcB3wPeXlUXJ3kn8HBgv6r68xDLlCRJy2CwGhFJ7gt8iG4l4HOAnYGr6bZaeHlVLUhy/6q6dohlSpKkO2GwGiFJ1gA2Bf6jqp7cbyw3RrcD7zuq6tahFihJku6UqwJHSFXdnORGYGZ/U+UHA6cAnzZUSZI0+uyxGjF9r9Wr6VaMPBB4TlVdMNyqJEnSijBYjaD+bubrA7dX1cJh1yNJklaMwUqSJKmR1YZdgCRJ0nRhsJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmN/B/Djeb5PsBsCgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/sk/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..50b2d8c12 --- /dev/null +++ b/translations/sk/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,642 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:24:20+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Preskúmajte K-Means zhlukovanie pomocou R a princípov Tidy dát.\n", + "\n", + "### [**Kvíz pred prednáškou**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "V tejto lekcii sa naučíte, ako vytvárať zhluky pomocou balíka Tidymodels a ďalších balíkov v ekosystéme R (nazveme ich priatelia 🧑‍🤝‍🧑) a datasetu nigérijskej hudby, ktorý ste importovali skôr. Pokryjeme základy K-Means pre zhlukovanie. Majte na pamäti, že ako ste sa naučili v predchádzajúcej lekcii, existuje mnoho spôsobov, ako pracovať so zhlukmi, a metóda, ktorú použijete, závisí od vašich dát. Skúsime K-Means, pretože je to najbežnejšia technika zhlukovania. Poďme na to!\n", + "\n", + "Pojmy, o ktorých sa dozviete:\n", + "\n", + "- Silhouette skórovanie\n", + "\n", + "- Metóda lakťa\n", + "\n", + "- Inercia\n", + "\n", + "- Variancia\n", + "\n", + "### **Úvod**\n", + "\n", + "[K-Means zhlukovanie](https://wikipedia.org/wiki/K-means_clustering) je metóda odvodená z oblasti spracovania signálov. Používa sa na rozdelenie a rozčlenenie skupín dát do `k zhlukov` na základe podobností ich vlastností.\n", + "\n", + "Zhluky môžu byť vizualizované ako [Voronoi diagramy](https://wikipedia.org/wiki/Voronoi_diagram), ktoré zahŕňajú bod (alebo 'semienko') a jeho zodpovedajúcu oblasť.\n", + "\n", + "

\n", + " \n", + "

Infografika od Jen Looper
\n", + "\n", + "\n", + "K-Means zhlukovanie má nasledujúce kroky:\n", + "\n", + "1. Data scientist začne tým, že špecifikuje požadovaný počet zhlukov, ktoré sa majú vytvoriť.\n", + "\n", + "2. Následne algoritmus náhodne vyberie K pozorovaní z datasetu, ktoré budú slúžiť ako počiatočné centrá zhlukov (t.j. centroidy).\n", + "\n", + "3. Potom sa každé zvyšné pozorovanie priradí k najbližšiemu centroidu.\n", + "\n", + "4. Následne sa vypočíta nový priemer každého zhluku a centroid sa presunie na tento priemer.\n", + "\n", + "5. Teraz, keď boli centrá prepočítané, každé pozorovanie sa opäť skontroluje, či by nemohlo byť bližšie k inému zhluku. Všetky objekty sa znovu priradia pomocou aktualizovaných priemerov zhlukov. Kroky priraďovania zhlukov a aktualizácie centroidov sa iteratívne opakujú, kým sa priradenia zhlukov prestanú meniť (t.j. keď sa dosiahne konvergencia). Typicky algoritmus končí, keď každá nová iterácia vedie k zanedbateľnému pohybu centroidov a zhluky sa stanú statickými.\n", + "\n", + "
\n", + "\n", + "> Upozorňujeme, že kvôli náhodnosti počiatočných k pozorovaní použitých ako východiskové centroidy môžeme získať mierne odlišné výsledky pri každom použití postupu. Z tohto dôvodu väčšina algoritmov používa niekoľko *náhodných začiatkov* a vyberie iteráciu s najnižším WCSS. Preto sa dôrazne odporúča vždy spustiť K-Means s viacerými hodnotami *nstart*, aby sa predišlo *nežiaducemu lokálnemu optimu.*\n", + "\n", + "
\n", + "\n", + "Táto krátka animácia využívajúca [ilustrácie](https://github.com/allisonhorst/stats-illustrations) od Allison Horst vysvetľuje proces zhlukovania:\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n", + "\n", + "\n", + "\n", + "Základná otázka, ktorá pri zhlukovaní vyvstáva, je táto: ako viete, na koľko zhlukov rozdeliť vaše dáta? Jednou z nevýhod použitia K-Means je skutočnosť, že budete musieť určiť `k`, teda počet `centroidov`. Našťastie `metóda lakťa` pomáha odhadnúť dobrú východiskovú hodnotu pre `k`. Vyskúšate si to o chvíľu.\n", + "\n", + "### \n", + "\n", + "**Predpoklady**\n", + "\n", + "Nadviažeme presne tam, kde sme skončili v [predchádzajúcej lekcii](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), kde sme analyzovali dataset, vytvorili množstvo vizualizácií a filtrovali dataset na pozorovania, ktoré nás zaujímajú. Určite si ju pozrite!\n", + "\n", + "Budeme potrebovať niekoľko balíkov na dokončenie tohto modulu. Môžete si ich nainštalovať pomocou: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Alternatívne, nasledujúci skript skontroluje, či máte balíky potrebné na dokončenie tohto modulu, a nainštaluje ich za vás, ak niektoré chýbajú.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Poďme na to!\n", + "\n", + "## 1. Tanec s dátami: Zúžme to na 3 najpopulárnejšie hudobné žánre\n", + "\n", + "Toto je zhrnutie toho, čo sme robili v predchádzajúcej lekcii. Poďme si trochu pohrať s dátami!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 To išlo skvele!\n", + "\n", + "## 2. Viac prieskumu údajov.\n", + "\n", + "Aké čisté sú tieto údaje? Poďme skontrolovať odľahlé hodnoty pomocou boxplotov. Zameriame sa na číselné stĺpce s menším počtom odľahlých hodnôt (aj keď by ste mohli odľahlé hodnoty vyčistiť). Boxploty môžu ukázať rozsah údajov a pomôžu pri výbere stĺpcov na použitie. Upozorňujeme, že boxploty neukazujú rozptyl, čo je dôležitý prvok pre dobré zoskupiteľné údaje. Pre ďalšie čítanie si pozrite [túto diskusiu](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot).\n", + "\n", + "[Boxploty](https://en.wikipedia.org/wiki/Box_plot) sa používajú na grafické znázornenie distribúcie `číselných` údajov, takže začnime *výberom* všetkých číselných stĺpcov spolu s populárnymi hudobnými žánrami.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Pozrite sa, ako výberový pomocník `where` uľahčuje tento proces 💁. Preskúmajte ďalšie podobné funkcie [tu](https://tidyselect.r-lib.org/).\n", + "\n", + "Keďže budeme vytvárať boxplot pre každú číselnú vlastnosť a chceme sa vyhnúť používaniu slučiek, preformátujme naše údaje do *dlhšieho* formátu, ktorý nám umožní využiť `facets` - podgrafy, z ktorých každý zobrazuje jednu podmnožinu údajov.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Oveľa dlhšie! Teraz je čas na nejaké `ggplots`! Aký `geom` použijeme?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Jednoduché-gg!\n", + "\n", + "Teraz môžeme vidieť, že tieto údaje sú trochu šumivé: pri pozorovaní jednotlivých stĺpcov ako boxplotov môžete vidieť odľahlé hodnoty. Mohli by ste prejsť dataset a odstrániť tieto odľahlé hodnoty, ale to by údaje dosť zredukovalo.\n", + "\n", + "Zatiaľ si vyberme, ktoré stĺpce použijeme na náš klastrovací cvičenie. Vyberme si číselné stĺpce s podobnými rozsahmi. Mohli by sme zakódovať `artist_top_genre` ako číselnú hodnotu, ale zatiaľ ho vynecháme.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Výpočet k-means zoskupovania v R\n", + "\n", + "K-means môžeme vypočítať v R pomocou zabudovanej funkcie `kmeans`, pozrite si `help(\"kmeans()\")`. Funkcia `kmeans()` prijíma dátový rámec so všetkými číselnými stĺpcami ako svoj hlavný argument.\n", + "\n", + "Prvým krokom pri používaní k-means zoskupovania je určiť počet klastrov (k), ktoré budú vytvorené vo finálnom riešení. Vieme, že v dátovom súbore sme identifikovali 3 hudobné žánre, takže skúsme 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Objekt kmeans obsahuje niekoľko informácií, ktoré sú dobre vysvetlené v `help(\"kmeans()\")`. Momentálne sa zamerajme na niekoľko z nich. Vidíme, že údaje boli rozdelené do 3 klastrov s veľkosťami 65, 110, 111. Výstup tiež obsahuje centrá klastrov (priemery) pre 3 skupiny naprieč 5 premennými.\n", + "\n", + "Vektor klastrovania je priradenie klastrov pre každé pozorovanie. Použime funkciu `augment`, aby sme pridali priradenie klastrov do pôvodného súboru údajov.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Perfektné, práve sme rozdelili náš dátový súbor do 3 skupín. Takže, aké dobré je naše zhlukovanie 🤷? Pozrime sa na `Silhouette skóre`.\n", + "\n", + "### **Silhouette skóre**\n", + "\n", + "[Silhouette analýza](https://en.wikipedia.org/wiki/Silhouette_(clustering)) môže byť použitá na štúdium vzdialenosti medzi výslednými zhlukmi. Toto skóre sa pohybuje od -1 do 1, pričom skóre blízke 1 znamená, že zhluk je hustý a dobre oddelený od ostatných zhlukov. Hodnota blízka 0 predstavuje prekrývajúce sa zhluky so vzorkami veľmi blízko rozhodovacej hranice susedných zhlukov. [zdroj](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Metóda priemerného silhouette skóre vypočíta priemerné silhouette skóre pozorovaní pre rôzne hodnoty *k*. Vysoké priemerné silhouette skóre naznačuje dobré zhlukovanie.\n", + "\n", + "Funkcia `silhouette` v balíku cluster sa používa na výpočet priemernej šírky silhouette.\n", + "\n", + "> Silhouette môže byť vypočítané s akoukoľvek [vzdialenosťou](https://en.wikipedia.org/wiki/Distance \"Distance\"), ako napríklad [Euklidovská vzdialenosť](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") alebo [Manhattanská vzdialenosť](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\"), o ktorých sme hovorili v [predchádzajúcej lekcii](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Náš skóre je **0,549**, teda presne v strede. To naznačuje, že naše údaje nie sú obzvlášť vhodné pre tento typ zoskupovania. Pozrime sa, či môžeme tento predpoklad vizuálne potvrdiť. Balík [factoextra](https://rpkgs.datanovia.com/factoextra/index.html) poskytuje funkcie (`fviz_cluster()`) na vizualizáciu zoskupovania.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Prekrývanie klastrov naznačuje, že naše údaje nie sú obzvlášť vhodné pre tento typ klastrovania, ale poďme pokračovať.\n", + "\n", + "## 4. Určenie optimálneho počtu klastrov\n", + "\n", + "Základná otázka, ktorá často vyvstáva pri K-Means klastrovaní, je táto - bez známych triednych označení, ako zistíte, na koľko klastrov rozdeliť vaše údaje?\n", + "\n", + "Jedným zo spôsobov, ako to môžeme zistiť, je použiť vzorku údajov na `vytvorenie série modelov klastrovania` s narastajúcim počtom klastrov (napr. od 1 do 10) a vyhodnotiť metriky klastrovania, ako je **Silhouette skóre.**\n", + "\n", + "Určme optimálny počet klastrov výpočtom algoritmu klastrovania pre rôzne hodnoty *k* a vyhodnotením **Súčtu štvorcov v rámci klastrov** (WCSS). Celkový súčet štvorcov v rámci klastrov (WCSS) meria kompaktnosť klastrovania a chceme, aby bol čo najmenší, pričom nižšie hodnoty znamenajú, že dátové body sú bližšie.\n", + "\n", + "Preskúmajme vplyv rôznych volieb `k`, od 1 do 10, na toto klastrovanie.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Teraz, keď máme celkový súčet štvorcov v rámci klastrov (tot.withinss) pre každý algoritmus zhlukovania s centrom *k*, použijeme [metódu lakťa](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) na nájdenie optimálneho počtu klastrov. Táto metóda spočíva v zobrazení WCSS ako funkcie počtu klastrov a výbere [lakťa krivky](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") ako počtu klastrov, ktoré sa majú použiť.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Graf ukazuje výrazné zníženie WCSS (teda väčšiu *súdržnosť*) pri zvýšení počtu klastrov z jedného na dva, a ďalšie výrazné zníženie z dvoch na tri klastre. Potom je zníženie menej výrazné, čo vedie k vytvoreniu `laktu` 💪 na grafe približne pri troch klastroch. Toto je dobrý indikátor, že existujú dva až tri rozumne dobre oddelené klastre dátových bodov.\n", + "\n", + "Teraz môžeme pokračovať a extrahovať model klastrovania, kde `k = 3`:\n", + "\n", + "> `pull()`: používa sa na extrakciu jedného stĺpca\n", + ">\n", + "> `pluck()`: používa sa na indexovanie dátových štruktúr, ako sú zoznamy\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Skvelé! Poďme si vizualizovať získané klastre. Máte záujem o interaktivitu pomocou `plotly`?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Možno by sme očakávali, že každý klaster (reprezentovaný rôznymi farbami) bude mať odlišné žánre (reprezentované rôznymi tvarmi).\n", + "\n", + "Pozrime sa na presnosť modelu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Presnosť tohto modelu nie je zlá, ale ani výborná. Môže to byť spôsobené tým, že dáta nie sú vhodné pre K-Means Clustering. Tieto dáta sú príliš nevyvážené, málo korelované a medzi hodnotami stĺpcov je príliš veľká variabilita na to, aby sa dali dobre zoskupiť. V skutočnosti sú vytvorené klastre pravdepodobne silne ovplyvnené alebo skreslené tromi kategóriami žánrov, ktoré sme definovali vyššie.\n", + "\n", + "Napriek tomu to bol celkom poučný proces!\n", + "\n", + "V dokumentácii Scikit-learn môžete vidieť, že model ako tento, s klastrami, ktoré nie sú veľmi dobre ohraničené, má problém s „variabilitou“:\n", + "\n", + "

\n", + " \n", + "

Infografika zo Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **Variabilita**\n", + "\n", + "Variabilita je definovaná ako „priemer štvorcových rozdielov od priemeru“ [zdroj](https://www.mathsisfun.com/data/standard-deviation.html). V kontexte tohto problému zoskupovania sa vzťahuje na dáta, pri ktorých hodnoty v našej dátovej sade majú tendenciu odchýliť sa príliš od priemeru.\n", + "\n", + "✅ Toto je skvelý moment na zamyslenie sa nad všetkými spôsobmi, ako by ste mohli tento problém napraviť. Upraviť dáta trochu viac? Použiť iné stĺpce? Použiť iný algoritmus? Tip: Skúste [škálovať svoje dáta](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) na ich normalizáciu a otestujte iné stĺpce.\n", + "\n", + "> Skúste tento '[kalkulátor variability](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)', aby ste lepšie pochopili tento koncept.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Výzva**\n", + "\n", + "Strávte nejaký čas s týmto notebookom a upravujte parametre. Dokážete zlepšiť presnosť modelu tým, že dáta viac vyčistíte (napríklad odstránením odľahlých hodnôt)? Môžete použiť váhy na pridanie väčšej váhy určitým vzorkám dát. Čo ešte môžete urobiť, aby ste vytvorili lepšie klastre?\n", + "\n", + "Tip: Skúste škálovať svoje dáta. V notebooku je komentovaný kód, ktorý pridáva štandardné škálovanie, aby sa stĺpce dát viac podobali z hľadiska rozsahu. Zistíte, že zatiaľ čo skóre siluety klesá, „zlom“ v grafe lakťa sa vyhladzuje. Je to preto, že ponechanie dát neškálovaných umožňuje dátam s menšou variabilitou niesť väčšiu váhu. Prečítajte si o tomto probléme viac [tu](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Kvíz po prednáške**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Recenzia a samostatné štúdium**\n", + "\n", + "- Pozrite si simulátor K-Means [ako tento](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Tento nástroj môžete použiť na vizualizáciu vzorových dátových bodov a určenie ich centroidov. Môžete upraviť náhodnosť dát, počet klastrov a počet centroidov. Pomáha vám to získať predstavu o tom, ako sa dáta môžu zoskupovať?\n", + "\n", + "- Pozrite si tiež [tento materiál o K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) zo Stanfordu.\n", + "\n", + "Chcete si vyskúšať svoje novo nadobudnuté zručnosti zoskupovania na dátových sadách, ktoré sa dobre hodia pre K-Means clustering? Pozrite si:\n", + "\n", + "- [Trénovanie a hodnotenie modelov zoskupovania](https://rpubs.com/eR_ic/clustering) pomocou Tidymodels a priateľov\n", + "\n", + "- [Analýza klastrov K-Means](https://uc-r.github.io/kmeans_clustering), UC Business Analytics R Programming Guide\n", + "\n", + "- [K-Means clustering s princípmi upravených dát](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Úloha**\n", + "\n", + "[Skúste rôzne metódy zoskupovania](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## ĎAKUJEME:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) za vytvorenie pôvodnej verzie tohto modulu v Pythone ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za vytvorenie úžasných ilustrácií, ktoré robia R prístupnejším a zábavnejším. Viac ilustrácií nájdete v jej [galérii](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Šťastné učenie,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

Ilustrácia od @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/sk/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..9b9e8b543 --- /dev/null +++ b/translations/sk/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,544 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:20:17+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Zameriame sa iba na 3 žánre. Možno dokážeme vytvoriť 3 klastre!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Aké čisté sú tieto údaje? Skontrolujte odľahlé hodnoty pomocou boxplotov. Zameriame sa na stĺpce s menším počtom odľahlých hodnôt (aj keď by ste mohli odľahlé hodnoty odstrániť). Boxploty môžu ukázať rozsah údajov a pomôžu vybrať, ktoré stĺpce použiť. Upozorňujeme, že boxploty neukazujú rozptyl, čo je dôležitý prvok dobrých zhlukovateľných údajov (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Tieto čísla pre nás veľa neznamenajú, takže si poďme zistiť 'silhouette skóre', aby sme videli presnosť. Naše skóre je v strede.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Použite tento model na určenie najlepšieho počtu klastrov na základe metódy lakťa.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Presnosť tohto modelu nie je zlá, ale ani výnimočná. Môže to byť tým, že údaje nemusia byť vhodné pre zhlukovanie metódou K-Means. Môžete vyskúšať inú metódu.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/sk/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..286219db2 --- /dev/null +++ b/translations/sk/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,341 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:14+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Zameriame sa iba na 3 žánre. Možno dokážeme vytvoriť 3 klastre!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/sk/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..9a5525681 --- /dev/null +++ b/translations/sk/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:04+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/sk/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sk/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/sk/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..94cf5b1c9 --- /dev/null +++ b/translations/sk/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:44+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/sk/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..3c8f9b89c --- /dev/null +++ b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:26+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..76d65ca9c --- /dev/null +++ b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:22:47+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..00c499297 --- /dev/null +++ b/translations/sk/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:07+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/sk/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..c40687482 --- /dev/null +++ b/translations/sk/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli a Rob J. Hyndman, \"Pravdepodobnostné energetické predpovedanie: Globálna súťaž v energetickom predpovedaní 2014 a ďalej\", International Journal of Forecasting, zv.32, č.3, str. 896-913, júl-september, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Načítajte údaje z CSV do Pandas dataframe.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zobraz všetky dostupné údaje o zaťažení (január 2012 až december 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nie sme zodpovední za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:03+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/sk/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..745ae224b --- /dev/null +++ b/translations/sk/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Nastavenie údajov\n", + "\n", + "V tomto notebooku ukážeme, ako:\n", + "\n", + "nastaviť časové rady údajov pre tento modul\n", + "vizualizovať údaje\n", + "Údaje v tomto príklade pochádzajú zo súťaže GEFCom2014 v predpovedaní1. Obsahujú 3 roky hodinových hodnôt spotreby elektrickej energie a teploty medzi rokmi 2012 a 2014.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli a Rob J. Hyndman, \"Pravdepodobnostné predpovedanie energie: Globálna súťaž v predpovedaní energie 2014 a ďalej\", International Journal of Forecasting, zv.32, č.3, str. 896-913, júl-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:08+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/sk/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..db6484f62 --- /dev/null +++ b/translations/sk/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli a Rob J. Hyndman, \"Pravdepodobnostné predpovedanie energie: Globálna súťaž v predpovedaní energie 2014 a ďalej\", International Journal of Forecasting, zv.32, č.3, str. 896-913, júl-september, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Inštalácia závislostí\n", + "Začnite inštaláciou niektorých potrebných závislostí. Tieto knižnice s ich príslušnými verziami sú známe tým, že fungujú pre toto riešenie:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Zobrazte všetky dostupné údaje o zaťažení (január 2012 až december 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Vytvorte tréningové a testovacie dátové sady\n", + "\n", + "### Rozdelenie údajov na tréningové a testovacie sady\n", + "Rozdelenie údajov na tréningové a testovacie sady je kľúčovým krokom pri vývoji modelov strojového učenia. Tréningová sada sa používa na naučenie modelu, zatiaľ čo testovacia sada slúži na vyhodnotenie jeho výkonu na neznámych údajoch.\n", + "\n", + "[!TIP] Uistite sa, že testovacia sada je reprezentatívna pre skutočné údaje, s ktorými sa model stretne v praxi.\n", + "\n", + "### Ako rozdeliť údaje\n", + "Existuje niekoľko spôsobov, ako rozdeliť údaje na tréningové a testovacie sady:\n", + "\n", + "1. **Manuálne rozdelenie**: Ručne vyberte podmnožinu údajov pre tréning a testovanie.\n", + "2. **Použitie knižníc**: Využite knižnice ako `scikit-learn`, ktoré poskytujú funkcie na jednoduché rozdelenie údajov, napríklad `train_test_split`.\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Rozdelenie údajov na tréningovú a testovaciu sadu\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "```\n", + "\n", + "[!NOTE] Parameter `test_size` určuje, aké percento údajov bude použitých na testovanie. V tomto prípade je to 20 %.\n", + "\n", + "### Vyváženie údajov\n", + "Ak máte nevyvážené údaje (napríklad v prípade klasifikácie, kde jedna trieda výrazne prevláda), je dôležité zabezpečiť, aby tréningová a testovacia sada obsahovali reprezentatívne vzorky všetkých tried.\n", + "\n", + "[!WARNING] Nevyvážené údaje môžu viesť k zaujatým modelom, ktoré nebudú dobre generalizovať.\n", + "\n", + "### Kedy použiť validačnú sadu\n", + "Okrem tréningovej a testovacej sady môžete potrebovať aj validačnú sadu na ladenie hyperparametrov modelu. Validačná sada by mala byť oddelená od tréningovej sady, aby sa predišlo nadmernému prispôsobeniu.\n", + "\n", + "```python\n", + "# Rozdelenie údajov na tréningovú, validačnú a testovaciu sadu\n", + "X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n", + "X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n", + "```\n", + "\n", + "V tomto príklade je 60 % údajov použitých na tréning, 20 % na validáciu a 20 % na testovanie.\n", + "\n", + "[!IMPORTANT] Nikdy nepoužívajte testovaciu sadu na ladenie modelu. Testovacia sada by mala byť použitá výhradne na konečné vyhodnotenie výkonu modelu.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Pôvodné vs škálované údaje:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAD7CAYAAACMlyg3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAZ+klEQVR4nO3df5BV5Z3n8fdHoOjwKyq0ZFYGOroRGBEDNNHEgJg4cUdXolIzi8YVzRiyZq1UyspkslZQRl3N7jBOyk00YWOUKJgfikw07tRIIok6M2rjChFtpSxFWX8UkAnQ/Ea/+8c5rZdL3+5z6T739u3zeVWdou95zjn3e56+fb8853nOcxQRmJlZMR1V7wDMzKx+nATMzArMScDMrMCcBMzMCsxJwMyswAbXO4BqjBkzJlpaWuodhplZQ1m7du3WiGjuqqyhkkBLSwttbW31DsPMrKFI2lSpzJeDzMwKzEnAzKzAnATMzAqsofoEzKz/OnDgAJs3b2bv3r31DqWwmpqaGDduHEOGDMm8j5OAmfWJzZs3M3LkSFpaWpBU73AKJyLYtm0bmzdv5qMf/Wjm/Xw5yMz6xN69exk9erQTQJ1IYvTo0VW3xJwEzKzPOAHU15HUv5OAmVmBuU/AzHLR8s1f9unxXvv2eX12rHPPPZcVK1Zw9NFHV9zmuuuuY/bs2Zx99tlVH3/NmjUsWbKEhx9+ONP6IzFnzhyWLFlCa2trr47jJGBWAKVfyH35ZdpoIoKI4JFHHulx2xtuuKEGEdWfLweZ2YBx6623MmXKFKZMmcJ3vvMdAF577TUmTpzIZZddxpQpU3jjjTdoaWlh69atANx4441MnDiRT3/601x88cUsWbIEgMsvv5z7778fSKasuf7665k+fTqnnHIK7e3tADz99NN88pOfZNq0aXzqU5/ipZdeyhzr73//ey644AKmTp3K6aefzvr167s95p49e5g/fz6TJ0/mwgsvZM+ePX1SZzVpCUj6GPA74P6IuDRddwlwCzAGeBT4YkT8vhbxmNnAs3btWu666y6eeuopIoLTTjuNM888k2OOOYaNGzeybNkyTj/99EP2eeaZZ3jggQdYt24dBw4cYPr06cyYMaPL448ZM4Znn32W22+/nSVLlvDDH/6QSZMm8fjjjzN48GBWr17NtddeywMPPJAp3uuvv55p06axatUqfv3rX3PZZZfx3HPPVTzmHXfcwbBhw3jxxRdZv34906dP73WdQe0uB30PeKbzhaSTgR8A5wHPAkuB24H5NYrHzAaYJ554ggsvvJDhw4cDcNFFF/H4448zd+5cJkyYcFgCAHjyySf5/Oc/T1NTE01NTZx//vkVj3/RRRcBMGPGDFauXAnA9u3bWbBgARs3bkQSBw4cqCrezoTxmc98hm3btrFjx46Kx/ztb3/LV7/6VQCmTp3K1KlTM79Xd3K/HCRpPvAH4Fclq78APBQRv42IDmARcJGkkXnHY2bF05kYemPo0KEADBo0iIMHDwKwaNEizjrrLJ5//nkeeuihPrlbOo9jdifXJCBpFHADcE1Z0cnAus4XEfEKsB84qYtjLJTUJqlty5YteYZrVjgt3/zl+0ujmzVrFqtWrWL37t3s2rWLBx98kFmzZnW7zxlnnPH+F21HR0fVo3a2b9/O8ccfD8Ddd99ddbzLly8HklFDY8aMYdSoURWPOXv2bFasWAHA888//34fQm/lfTnoRuDOiNhcdhPDCGB72bbbgcNaAhGxlORyEa2trZFTnGbWx2o9Cmn69OlcfvnlfOITnwDgyiuvZNq0abz22msV95k5cyZz585l6tSpjB07llNOOYUPf/jDmd/zG9/4BgsWLOCmm27ivPOqO9/FixfzxS9+kalTpzJs2DCWLVvW7TGvuuoqrrjiCiZPnszkyZMr9l1USxH5fK9K+jiwHJgWEfslLQb+fURcKukfgCcj4n+WbL8TmBMRaysds7W1NfxQGbPqVRoi2pdDR1988UUmT57cq2PUQ0dHByNGjGD37t3Mnj2bpUuX9lmnaz109XuQtDYiuryhIM+WwBygBXg9bQWMAAZJ+hPgH4FTSwI8ARgKvJxjPGZmh1m4cCEvvPACe/fuZcGCBQ2dAI5EnklgKfCTktdfJ0kKVwHHAf8iaRbJ6KAbgJURsTPHeMzMDtN5nb2ocksCEbEb2N35WlIHsDcitgBbJP0XkstFo4HVwBV5xWJmtRERnkSujo7k8n7Npo2IiMVlr1cAxU7BZgNIU1MT27Zt83TSddL5PIGmpqaq9vPcQWbWJ8aNG8fmzZvxUO766XyyWDWcBMysTwwZMqSqJ1pZ/+AJ5MzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMcweZ9QN9+YQvs2q4JWBmVmC5JgFJ90p6S9IOSS9LujJd3yIpJHWULIvyjMXMzA6X9+WgW4C/jIh9kiYBayT9X2BbWn50RBzMOQYzM6sg15ZARGyIiH2dL9PlxDzf08zMssu9T0DS7ZJ2A+3AW8AjJcWbJG2WdJekMRX2XyipTVKbn1hkZta3ck8CEfEVYCQwC1gJ7AO2AjOBCcCMtHx5hf2XRkRrRLQ2NzfnHa6ZWaHUZHRQRLwbEU8A44CrIqIjItoi4mBEvANcDXxO0shaxGNmZolaDxEdTNd9ApH+6yGrZmY1lNuXrqTjJM2XNELSIEnnABcDv5J0mqSJko6SNBq4DVgTEdvzisfMzA6X5xDRAK4Cvk+SbDYBX4uIX0i6GLgZOA7YATxKkiDMrB8ovYMZfBfzQJZbEoiILcCZFcruA+7L673NzCwbX4M3MyswJwEzswJzEjAzKzBPJW1mPfJU1wOXWwJmZgXmJGBmVmBOAmZmBeYkYGZWYO4YNsuZO1WtP3NLwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCizXJCDpXklvSdoh6WVJV5aUfVZSu6Tdkh6TNCHPWMzM7HB5twRuAVoiYhQwF7hJ0gxJY4CVwCLgWKAN+GnOsZiZWZlcbxaLiA2lL9PlRGAGsCEifg4gaTGwVdKkiGjPMyYzM/tA7n0Ckm6XtBtoB94CHgFOBtZ1bhMRu4BX0vXl+y+U1CapbcuWLXmHa2ZWKLkngYj4CjASmEVyCWgfMALYXrbp9nS78v2XRkRrRLQ2NzfnHa6ZWaHUZHRQRLwbEU8A44CrgA5gVNlmo4CdtYjHzMwStR4iOpikT2ADcGrnSknDS9abmVmN5JYEJB0nab6kEZIGSToHuBj4FfAgMEXSPElNwHXAencKm5nVVp6jg4Lk0s/3SZLNJuBrEfELAEnzgO8C9wJPAfNzjMWsX8h7WmlPW23Vyi0JRMQW4MxuylcDk/J6fzMz65mnjTAzKzAnATOzAnMSMDMrMD9j2Kyfceeu1ZJbAmZmBeYkYGZWYE4CZmYF5iRgZlZg7hg2s6q443pgcUvAzKzAnATMzArMScDMrMAyJQFJp+QdiJmZ1V7WjuHbJQ0F7gaWR0T5oyHNLAfuhLW8ZWoJRMQs4AvAHwNrJa2Q9Ke5RmZmZrnL3CcQERuBbwF/TfKcgNsktUu6qKvtJQ2VdKekTZJ2SnpO0p+lZS2SQlJHybKoL07IzMyyy3Q5SNJU4ArgPOBR4PyIeFbSvwP+BVhZ4dhvkCSM14FzgZ+V9S8cHREHexG/mZn1QtY+gf8F/BC4NiL2dK6MiDclfaurHSJiF7C4ZNXDkl4FZgBrjyxcMzPrS1mTwHnAnoh4F0DSUUBTROyOiHuyHEDSWOAkYEPJ6k2SgqR18VcRsTV76GZm1ltZk8Bq4GygI309DPgn4FNZdpY0BFgOLIuIdkkjgJnAc8Bo4Htp+Tld7LsQWAgwfvz4jOEWi0eQFFvp778321d7HBsYsnYMN0VEZwIg/XlYlh3TVsM9wH7g6s79I6ItIg5GxDvp+s9JGlm+f0QsjYjWiGhtbm7OGK6ZmWWRNQnskjS984WkGcCebrbv3E7AncBYYF5EHKiwaVQZj5mZ9YGsl4O+Bvxc0puAgI8A/ynDfncAk4GzSzuUJZ0G/AHYCBwD3Aas8U1oZma1lSkJRMQzkiYBE9NVL3Xzv3oAJE0AvgzsA95OGgWQrnsPuBk4DthB0jF8cdXRm5lZr1TzPIGZQEu6z3RJRMSPK20cEZtIWg2V3FfFe5sNOHl3xLqj17LIerPYPcCJJKN53k1XB1AxCZiZWf+XtSXQCvxJRESPW5qZWcPIOhrneZLOYDMzG0CytgTGAC9IepqkoxeAiJibS1RmZlYTWZPA4jyDKCrf6ds4/LuygSrrENHfpEM+PxYRqyUNAwblG5qZmeUt6+MlvwTcD/wgXXU8sCqvoMzMrDaydgz/V+AMkhu7Oh8wc1xeQZmZWW1kTQL7ImJ/5wtJg/lgvh8zM2tQWTuGfyPpWuBD6bOFvwI8lF9Y1hfcmVk/vlvXGkXWlsA3gS3A70jm/nmE5HnDZmbWwLKODnoP+N/pYmZmA0TWuYNepYs+gIg4oc8jMjOzmqlm7qBOTcCfA8f2fThmZlZLWS8HbStb9R1Ja4Hr+j4ks77Xl53k7nDvmuulMWW9HDS95OVRJC2Dap5FYGZm/VDWL/K/K/n5IPAa8Bfd7SBpKHA7cDbJpaNXgP8WEf8nLf8s8D1gPPAUcHn6IBozM6uRrJeDzjrCY78BnAm8DpwL/EzSKUAHsBK4kuR+gxuBnwKnH8H7mJnZEcp6Oeia7soj4tYu1u3i0NlHH05HGc0ARgMbIuLn6fEXA1slTYqI9myhm5lZb1UzOmgm8Iv09fnA08DGrG8kaSxwErABuApY11kWEbskvQKcDLSX7bcQWAgwfvz4rG9nNVL0zsCin781vqxJYBwwPSJ2wvv/c/9lRFyaZWdJQ4DlwLKIaJc0guQO5FLbgZHl+0bEUmApQGtrq+crMjPrQ1mnjRgL7C95vT9d1yNJRwH3pPtcna7uAEaVbToK2JkxHjMz6wNZWwI/Bp6W9GD6+gJgWU87SRJwJ0nCODciDqRFG4AFJdsNB05M15uZWY1kaglExH8HrgD+LV2uiIibM+x6BzAZOD8i9pSsfxCYImmepCaSm87Wu1PYzKy2qrnhaxiwIyLuktQs6aMR8WqljdPHUX6Z5MH0byeNAgC+HBHLJc0DvgvcS3KfwPwjOgMzqxtPmd34sg4RvZ5khNBE4C5gCMmX9xmV9klv/FI35auBSdUEa2ZmfStrx/CFwFxgF0BEvEkXI3nMzKyxZE0C+yMiSKeTTjtyzcyswWVNAj+T9APgaElfAlbjB8yYmTW8rHMHLUmfLbyDpF/guoh4NNfIrKH5Ttq+5zq1PPSYBCQNAlank8j5i9/MbADp8XJQRLwLvCfpwzWIx8zMaijrfQIdwO8kPUo6QgggIr6aS1RmZlYTWZPAynQxM7MBpNskIGl8RLweET3OE2S9404/q4bv1LW+0lOfwKrOHyQ9kHMsZmZWYz0lgdJpH07IMxAzM6u9npJAVPjZzMwGgJ46hk+VtIOkRfCh9GfS1xER5Q+GMTOzBtJtEoiIQbUKxGqrUsdiaad0lm36s952nla7vztrrRFlnTvIzMwGoFyTgKSrJbVJ2ifp7pL1LZJCUkfJsijPWMzM7HDVPFnsSLwJ3AScA3yoi/KjI+JgzjGYmVkFuSaBiFgJIKkVGJfne5mZWfXybgn0ZJOkIJmd9K8iYmv5BpIWAgsBxo8fX+Pw6iNLB2OjdM5aMVV7B7zvmK+fenUMbwVmAhOAGSSPqlze1YYRsTQiWiOitbm5uYYhmpkNfHVpCUREB9CWvnxH0tXAW5JGRsTOesRkZlZE/WWIaOfdyP0lHjOzQsi1JSBpcPoeg4BBkpqAgySXgP4AbASOAW4D1kTE9jzjMTOzQ+V9OehbwPUlry8F/gZ4CbgZOI7kucWPAhfnHEvduNPrA64L61RpAIQ7lWsr7yGii4HFFYrvy/O9zcysZ74Gb2ZWYE4CZmYF5iRgZlZg9b5j2I5Qlk61RuWOQbPacUvAzKzAnATMzArMScDMrMCcBMzMCswdw0egUkfkQOuUbcRjZnmv7jqSB8Lv0KwabgmYmRWYk4CZWYE5CZiZFZiTgJlZgbljuJfckfiB3tSF67EY+uoz4jvD+45bAmZmBZZrEpB0taQ2Sfsk3V1W9llJ7ZJ2S3pM0oQ8YzEzs8Pl3RJ4E7gJ+FHpSkljgJXAIuBYkofO/zTnWMzMrEzeTxZbCSCpFRhXUnQRsCEifp6WLwa2SpoUEe15xmRmZh+oV8fwycC6zhcRsUvSK+n6Q5KApIXAQoDx48fXMkbrZxrlbmbrO/795K9eHcMjgO1l67YDI8s3jIilEdEaEa3Nzc01Cc7MrCjqlQQ6gFFl60YBO+sQi5lZYdUrCWwATu18IWk4cGK63szMaiTvIaKDJTUBg4BBkpokDQYeBKZImpeWXwesd6ewmVlt5d0x/C3g+pLXlwJ/ExGLJc0DvgvcCzwFzM85ll5p9A6qRo/fzPKR9xDRxcDiCmWrgUl5vr+ZmXXP00aYmRWYk4CZWYE5CZiZFZinku6G71DNl+uiGPrD77lSDJ6S2i0BM7NCcxIwMyswJwEzswJzEjAzKzB3DJfpD51Y9gH/Pqwr/lz0HbcEzMwKzEnAzKzAnATMzArMScDMrMAK2zFc2rHkuwar404566/8d109twTMzAqsrklA0hpJeyV1pMtL9YzHzKxo+kNL4OqIGJEuE+sdjJlZkfSHJGBmZnXSH5LALZK2SnpS0px6B2NmViT1TgJ/DZwAHA8sBR6SdGLpBpIWSmqT1LZly5Z6xGhmNmDVNQlExFMRsTMi9kXEMuBJ4NyybZZGRGtEtDY3N9cnUDOzAareLYFyAajeQZiZFUXdkoCkoyWdI6lJ0mBJXwBmA/9Yr5jMzIqmnncMDwFuAiYB7wLtwAUR8XIdYzIzK5S6JYGI2ALMrNf7m9nAlmV6E08z0f/6BMzMrIacBMzMCsxJwMyswJwEzMwKrLDPEzAzq6RIHcZuCZiZFZiTgJlZgTkJmJkVmJOAmVmBFapj2A9IN7PeyPod0kidyW4JmJkVmJOAmVmBOQmYmRWYk4CZWYEVqmPYzKySSp2+RzKgpNo7jittX4s7l90SMDMrsLomAUnHSnpQ0i5JmyRdUs94zMyKpt6Xg74H7AfGAh8HfilpXURsqG9YZmbFUM8HzQ8H5gGLIqIjIp4AfgH853rFZGZWNIqI+ryxNA14MiKGlaz7OnBmRJxfsm4hsDB9ORF4qaaB9t4YYGu9g+hHXB+Hcn0cyvVxqL6qjwkR0dxVQT0vB40AdpSt2w6MLF0REUuBpbUKqq9JaouI1nrH0V+4Pg7l+jiU6+NQtaiPenYMdwCjytaNAnbWIRYzs0KqZxJ4GRgs6WMl604F3ClsZlYjdUsCEbELWAncIGm4pDOAzwP31CumnDTspaycuD4O5fo4lOvjULnXR906hiG5TwD4EfCnwDbgmxGxom4BmZkVTF2TgJmZ1ZenjTAzKzAnATOzAnMSyEDSUEl3pvMb7ZT0nKQ/S8taJIWkjpJlUdm+P5K0Q9Lbkq4pO/ZnJbVL2i3pMUkTan1+R0LSvZLeSs/rZUlXlpRVPKeBWh9QuU6K+hkBkPQxSXsl3Vuy7pL0b2mXpFVp32BnWbfziXW3b6MorxNJcyS9V/b5WFCyfb51EhFeeliA4cBioIUkcf5HkvsZWtIlgMEV9r0FeBw4BpgMvA38h7RsDMkNcn8ONAF/C/xrvc83Y52cDAxNf56UnteMns5poNZHD3VSyM9IGv8/ped2b0kd7QRmk9wwugL4Scn29wE/Tcs+nZ77yVn2bZSlizqZA2zuZvtc66TuFdKoC7CeZO6jnv7A3wQ+V/L6xs5fEsl0GP9cUjYc2ANMqvf5VVkXE4G3gL/o6ZyKUB9d1EkhPyPAfOBnJP+B6vzCuxlYUbLNiSSTSI5Mz20/cFJJ+T3At3vat97n2ss6qZgEalEnvhx0BCSNBU7i0BvbNknaLOkuSWPS7Y4B/ghYV7LdOpLsTfrv+2WR3DvxSkl5vybpdkm7gXaSL7xH6OacBnp9QMU66VSYz4ikUcANwDVlReXn8wrpl1y6HIyIl0u2764uSvft97qpE4DjJL0j6VVJf69kgk2oQZ04CVRJ0hBgObAsItpJJneaCUwgafqPTMshaZ5B0nyj5OeRJeWlZeXl/VpEfIUk1lkkN/7to/tzGtD1ARXrpIifkRuBOyNic9n6nj4f3c0n1qh10alSnbSTTKX/R8BnSD4jt6ZludeJk0AVJB1F0hTbD1wNEMk02G0RcTAi3knXf07SSJL5keDQOZJK50dq+PmTIuLdSKYBHwdcRffnNODrAw6vk6J9RiR9HDgb+Psuinv6fHR3rg1XF526q5OIeDsiXoiI9yLiVeAbJJeaoQZ14iSQkSQBd5I8AGdeRByosGnn3XdHRcS/kVwSOLWkvHR+pA2lZWkT8EQac/6kwXwQe5fnVLD6gA/qpNxA/4zMIekHeV3S28DXgXmSnuXw8zkBGEoyl1hP84l1t29/N4fKdVIu+OC7Of86qXdHSaMswPeBfwVGlK0/jaQT8ChgNEkv/mMl5d8GfkMy8mMSyR9858iPZpKm2zySkR//gwYY+QEcR9LBNQIYBJwD7ALm9nROA7E+MtRJoT4jwDDgIyXLEuD+9FxOJrm8MYuk0/NeDh0d9BOS0TDDgTM4fCRMxX3789JDnZxFcqlQwB8DjwF31apO6l45jbCkv6AA9pI0vzqXLwAXA6+mf/BvAT8GPlKy71CS+ZF2AO8A15Qd+2ySa4J7gDVAS73PN0N9NKdfWn9Iz+t3wJeynNNArI+e6qSIn5Gy+BeTjoRJX18CvJ7Wxz8Ax5aUHQusSsteBy4pO1bFfRtp4dDRQdcA/w/YDbwB3EbJ6J6868RzB5mZFZj7BMzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCsxJwMyswP4/zu7dqmtpqTMAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Poďme tiež škálovať testovacie dáta\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vytvorte testovací dátový bod pre každý krok HORIZON.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Porovnajte predpovede so skutočným zaťažením\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vypočítajte **strednú absolútnu percentuálnu chybu (MAPE)** pre všetky predpovede\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vykreslite predpovede oproti skutočným hodnotám za prvý týždeň testovacej množiny\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:56:55+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/sk/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..5a18ef99d --- /dev/null +++ b/translations/sk/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,59 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:10+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Prognózovanie časových radov pomocou ARIMA\n", + "\n", + "V tomto zápisníku si ukážeme, ako:\n", + "- pripraviť údaje časových radov na trénovanie modelu ARIMA pre prognózovanie časových radov\n", + "- implementovať jednoduchý model ARIMA na predpovedanie nasledujúcich HORIZON krokov dopredu (od času *t+1* po *t+HORIZON*) v časovom rade\n", + "- vyhodnotiť model\n", + "\n", + "Údaje v tomto príklade pochádzajú zo súťaže GEFCom2014 v prognózovaní. Skladajú sa z 3 rokov hodinových hodnôt elektrického zaťaženia a teploty medzi rokmi 2012 a 2014. Úlohou je predpovedať budúce hodnoty elektrického zaťaženia. V tomto príklade ukážeme, ako predpovedať jeden časový krok dopredu, pričom použijeme iba historické údaje o zaťažení.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli a Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, str. 896-913, júl-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/sk/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..05c4ee9de --- /dev/null +++ b/translations/sk/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1019 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "V tomto zápisníku demonštrujeme, ako:\n", + "\n", + "- pripraviť 2D časové rady na trénovanie modelu regresora SVM\n", + "- implementovať SVR pomocou RBF jadra\n", + "- vyhodnotiť model pomocou grafov a MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importovanie modulov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Načítať údaje\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Teraz musíte pripraviť údaje na trénovanie vykonaním filtrovania a škálovania vašich údajov.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zmeňte rozsah údajov na (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Pre náš SVR transformujeme vstupné dáta do formy `[batch, timesteps]`. Preto preusporiadame existujúce `train_data` a `test_data` tak, aby existovala nová dimenzia, ktorá sa vzťahuje na časové kroky. V našom príklade berieme `timesteps = 5`. Takže vstupy do modelu sú dáta pre prvé 4 časové kroky a výstup budú dáta pre 5. časový krok.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Vytvoriť predikciu modelu\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Predikcia úplného súboru údajov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za záväzný zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:03:07+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sk/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/sk/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..edb34cdb2 --- /dev/null +++ b/translations/sk/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,695 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "V tomto zápisníku ukážeme, ako:\n", + "\n", + "- pripraviť 2D časové rady na trénovanie modelu regresora SVM\n", + "- implementovať SVR pomocou RBF jadra\n", + "- vyhodnotiť model pomocou grafov a MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importovanie modulov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Načítať údaje\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Teraz musíte pripraviť údaje na trénovanie vykonaním filtrovania a škálovania vašich údajov.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zmeňte údaje tak, aby boli v rozsahu (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Pre náš SVR transformujeme vstupné údaje do formy `[batch, timesteps]`. Takže preformátujeme existujúce `train_data` a `test_data` tak, aby existoval nový rozmer, ktorý sa vzťahuje na časové kroky. V našom príklade berieme `timesteps = 5`. Vstupy do modelu sú teda údaje za prvé 4 časové kroky a výstup budú údaje za 5. časový krok.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Vytvoriť predikciu modelu\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Predikcia celého súboru údajov\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Aj keď sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie sa odporúča profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:05:37+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sk/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/sk/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..c1277fa60 --- /dev/null +++ b/translations/sk/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:02:49+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter a vlk: Úvod do posilňovacieho učenia\n", + "\n", + "V tomto návode sa naučíme, ako aplikovať posilňovacie učenie na problém hľadania cesty. Prostredie je inšpirované hudobnou rozprávkou [Peter a vlk](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) od ruského skladateľa [Sergeja Prokofieva](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Je to príbeh o mladom pionierovi Petrovi, ktorý odvážne opustí svoj dom a vydá sa na lesnú čistinu, aby prenasledoval vlka. Vytrénujeme algoritmy strojového učenia, ktoré Petrovi pomôžu preskúmať okolitú oblasť a vytvoriť optimálnu navigačnú mapu.\n", + "\n", + "Najskôr si importujeme niekoľko užitočných knižníc:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Prehľad posilňovacieho učenia\n", + "\n", + "**Posilňovacie učenie** (RL) je technika učenia, ktorá nám umožňuje naučiť sa optimálne správanie **agenta** v určitom **prostredí** prostredníctvom vykonávania mnohých experimentov. Agent v tomto prostredí by mal mať nejaký **cieľ**, definovaný pomocou **funkcie odmeny**.\n", + "\n", + "## Prostredie\n", + "\n", + "Pre jednoduchosť si predstavme Petrov svet ako štvorcovú dosku veľkosti `width` x `height`. Každé pole na tejto doske môže byť:\n", + "* **zem**, po ktorej Peter a ostatné bytosti môžu chodiť\n", + "* **voda**, po ktorej, samozrejme, nemôžete chodiť\n", + "* **strom** alebo **tráva** - miesto, kde si môžete oddýchnuť\n", + "* **jablko**, ktoré predstavuje niečo, čo by Peter rád našiel, aby sa nakŕmil\n", + "* **vlk**, ktorý je nebezpečný a treba sa mu vyhnúť\n", + "\n", + "Na prácu s prostredím definujeme triedu s názvom `Board`. Aby sme tento notebook príliš nezahltili, presunuli sme všetok kód na prácu s doskou do samostatného modulu `rlboard`, ktorý teraz importujeme. Môžete sa pozrieť do tohto modulu, aby ste získali viac podrobností o interných mechanizmoch implementácie.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Poďme teraz vytvoriť náhodnú dosku a pozrieť sa, ako vyzerá:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Akcie a Politika\n", + "\n", + "V našom príklade by Peterovým cieľom bolo nájsť jablko, pričom by sa mal vyhnúť vlkovi a iným prekážkam. Definujte tieto akcie ako slovník a priraďte ich k dvojiciam zodpovedajúcich zmien súradníc.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Strategia nášho agenta (Peter) je definovaná takzvanou **politikou**. Pozrime sa na najjednoduchšiu politiku nazývanú **náhodná prechádzka**.\n", + "\n", + "## Náhodná prechádzka\n", + "\n", + "Najprv vyriešme náš problém implementáciou stratégie náhodnej prechádzky.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Funkcia odmeny\n", + "\n", + "Aby sme našu politiku urobili inteligentnejšou, musíme pochopiť, ktoré ťahy sú „lepšie“ ako ostatné.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Vytvorte Q-Tabuľku alebo viacrozmerné pole. Keďže naša hracia plocha má rozmery `width` x `height`, môžeme Q-Tabuľku reprezentovať pomocou numpy poľa s tvarom `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Odošlite Q-Tabuľku do funkcie `plot`, aby ste zobrazili tabuľku na doske:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Podstata Q-Learning: Bellmanova rovnica a učebný algoritmus\n", + "\n", + "Napíšte pseudokód pre náš učebný algoritmus:\n", + "\n", + "* Inicializujte Q-Tabuľku Q rovnakými hodnotami pre všetky stavy a akcie\n", + "* Nastavte rýchlosť učenia $\\alpha\\leftarrow 1$\n", + "* Opakujte simuláciu mnohokrát\n", + " 1. Začnite na náhodnej pozícii\n", + " 1. Opakujte\n", + " 1. Vyberte akciu $a$ v stave $s$\n", + " 2. Vykonajte akciu presunutím sa do nového stavu $s'$\n", + " 3. Ak narazíme na podmienku konca hry alebo je celková odmena príliš malá - ukončite simuláciu \n", + " 4. Vypočítajte odmenu $r$ v novom stave\n", + " 5. Aktualizujte Q-Funkciu podľa Bellmanovej rovnice: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Aktualizujte celkovú odmenu a znížte $\\alpha$.\n", + "\n", + "## Využívanie vs. Preskúmavanie\n", + "\n", + "Najlepší prístup je nájsť rovnováhu medzi preskúmavaním a využívaním. Ako sa dozvedáme viac o našom prostredí, budeme pravdepodobne nasledovať optimálnu cestu, avšak občas si zvolíme nepreskúmanú cestu.\n", + "\n", + "## Implementácia v Pythone\n", + "\n", + "Teraz sme pripravení implementovať učebný algoritmus. Predtým však potrebujeme funkciu, ktorá premení ľubovoľné čísla v Q-Tabuľke na vektor pravdepodobností pre zodpovedajúce akcie:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "K pôvodnému vektoru pridávame malé množstvo `eps`, aby sme sa vyhli deleniu nulou v počiatočnom prípade, keď sú všetky komponenty vektora identické.\n", + "\n", + "Skutočný učebný algoritmus budeme spúšťať počas 5000 experimentov, tiež nazývaných **epochy**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Po vykonaní tohto algoritmu by mala byť Q-Tabuľka aktualizovaná hodnotami, ktoré definujú atraktivitu rôznych akcií v každom kroku. Vizualizujte tabuľku tu:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kontrola politiky\n", + "\n", + "Keďže Q-Tabuľka uvádza „atraktivitu“ každej akcie v každom stave, je pomerne jednoduché použiť ju na definovanie efektívnej navigácie v našom svete. V najjednoduchšom prípade môžeme jednoducho vybrať akciu, ktorá zodpovedá najvyššej hodnote v Q-Tabuľke:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Ak vyskúšate vyššie uvedený kód niekoľkokrát, môžete si všimnúť, že sa niekedy jednoducho \"zasekne\" a musíte stlačiť tlačidlo STOP v notebooku, aby ste ho prerušili.\n", + "\n", + "> **Úloha 1:** Upraviť funkciu `walk` tak, aby obmedzila maximálnu dĺžku cesty na určitý počet krokov (napríklad 100), a sledovať, ako vyššie uvedený kód občas vráti túto hodnotu.\n", + "\n", + "> **Úloha 2:** Upraviť funkciu `walk` tak, aby sa nevracala na miesta, kde už predtým bola. Tým sa zabráni tomu, aby sa `walk` dostala do slučky, avšak agent sa stále môže ocitnúť \"uväznený\" na mieste, z ktorého sa nedokáže dostať.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Cvičenie\n", + "## Realistickejší svet Petra a vlka\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Za autoritatívny zdroj by sa mal považovať pôvodný dokument v jeho pôvodnom jazyku. Pre dôležité informácie odporúčame profesionálny preklad vykonaný človekom. Nezodpovedáme za žiadne nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/sk/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..f256bf704 --- /dev/null +++ b/translations/sk/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,458 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:13:37+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter a vlk: Realistické prostredie\n", + "\n", + "V našej situácii sa Peter mohol pohybovať takmer bez toho, aby sa unavil alebo vyhladol. V realistickejšom svete si však musí občas sadnúť a oddýchnuť si, a tiež sa najesť. Urobme náš svet realistickejším zavedením nasledujúcich pravidiel:\n", + "\n", + "1. Pri presune z jedného miesta na druhé Peter stráca **energiu** a získava **únavu**.\n", + "2. Peter môže získať viac energie jedením jabĺk.\n", + "3. Peter sa môže zbaviť únavy odpočinkom pod stromom alebo na tráve (t.j. vstúpením na políčko s stromom alebo trávou - zelené pole).\n", + "4. Peter musí nájsť a zabiť vlka.\n", + "5. Aby Peter dokázal zabiť vlka, musí mať určité úrovne energie a únavy, inak prehrá súboj.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Definovanie stavu\n", + "\n", + "V našich nových pravidlách hry musíme sledovať energiu a únavu v každom stave hracej plochy. Preto vytvoríme objekt `state`, ktorý bude obsahovať všetky potrebné informácie o aktuálnom stave problému, vrátane stavu hracej plochy, aktuálnych úrovní energie a únavy, a či môžeme poraziť vlka v terminálnom stave:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Funkcia odmeny\n", + "\n", + "### Úvod\n", + "Funkcia odmeny je kľúčovým prvkom pri navrhovaní systémov posilňovacieho učenia. Definuje cieľ, ktorý sa agent snaží dosiahnuť, a poskytuje spätnú väzbu na základe jeho akcií.\n", + "\n", + "### Základné princípy\n", + "- **Jasnosť**: Funkcia odmeny by mala byť jednoduchá a ľahko pochopiteľná.\n", + "- **Konzistentnosť**: Mala by byť konzistentná s cieľmi systému.\n", + "- **Vyváženosť**: Treba zabezpečiť, aby odmeny neboli príliš vysoké alebo nízke, čo by mohlo ovplyvniť správanie agenta.\n", + "\n", + "### Príklad\n", + "Nasledujúci príklad ukazuje, ako môže byť funkcia odmeny implementovaná:\n", + "\n", + "```python\n", + "def reward_function(state, action):\n", + " if state == \"goal_state\":\n", + " return 100\n", + " elif action == \"invalid_action\":\n", + " return -10\n", + " else:\n", + " return 0\n", + "```\n", + "\n", + "### Bežné chyby\n", + "- **Príliš zložité funkcie odmeny**: Môžu zmiasť agenta a spomaliť proces učenia.\n", + "- **Nedostatočné odmeny**: Ak agent nedostáva dostatočnú spätnú väzbu, môže mať problém naučiť sa správne správanie.\n", + "- **Neúmyselné odmeny**: Nesprávne navrhnutá funkcia môže odmeňovať neželané správanie.\n", + "\n", + "### Tipy na optimalizáciu\n", + "- Testujte funkciu odmeny v rôznych scenároch.\n", + "- Sledujte správanie agenta a upravujte odmeny podľa potreby.\n", + "- Zvážte použitie kombinácie pozitívnych a negatívnych odmien na dosiahnutie rovnováhy.\n", + "\n", + "### Záver\n", + "Funkcia odmeny je základným stavebným prvkom úspešného systému posilňovacieho učenia. Dôkladné plánovanie a testovanie sú nevyhnutné na zabezpečenie jej efektívnosti.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Algoritmus Q-Learning\n", + "\n", + "Samotný algoritmus učenia zostáva takmer nezmenený, len používame `state` namiesto samotnej pozície na hracej ploche.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAFpCAYAAAC8p8I3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdd3xUVd7H8c+Zmt5J6CC9qVQpKoqIoggIuouCoojCIkXQXUV3F0XEh10bYAFBUQFBwRXXroggIjYiKkWE0EsI6Zle7j3PH5mMsBBqkjsJ580rr2Tu3Mz9zoT8cubcc88RUkoURVGU6sNkdABFURTlzKjCrSiKUs2owq0oilLNqMKtKIpSzajCrSiKUs2owq0oilLNVFrhFkL0FUL8LoTIEkJMrqzjKIqinG9EZYzjFkKYge1AH+AA8CNwq5Rya4UfTFEU5TxTWS3uS4AsKeUuKaUfeAsYWEnHUhRFOa9UVuGuB+w/6vaB0DZFURTlHFmMOrAQYhQwCsBqtXa66KKLjIpySj6fD4fDQVpamtFRylVUVITVaiU2NtboKOXKzs4mPT0ds9lsdJRy7du3j4YNGxodo1zBYJDc3Fzq1KljdJRyOZ1OgsEgSUlJRkcpV25uLgkJCdjtdqOjlGvLli14PB5xwjullBX+AXQHPjvq9sPAw+Xtn56eLiPZjh075Lx584yOcVIrVqyQ69evNzrGSU2bNk0WFBQYHaNcuq7LcePGGR3jpPLz8+X06dONjnFS69atk++9957RMU5q7ty5cseOHUbHOKlQXTxhzaysrpIfgeZCiAuEEDbgFuD9SjqWoijKeaVSukqklEEhxDjgM8AMLJBSbqmMYymKopxvKq2PW0r5MfBxZT2+oijK+UpdOakoilLNqMKtKIpSzajCrSiKUs2owq0oilLNqMKtKIpSzajCrSiKUs3U2MK9cOHCsqs2FaXKeb1eli9fbnSM0/L222/j9/uNjlGut956K6LzGaHGFe6VK1cycOBAgsEggwYNYvHixYbkkFJy3333GXJsxVhPPfUUw4cPJzc3l4EDB7Jp0yajI53QTz/9xMCBAyksLGTo0KHMnDnT6EjHyMzMZODAgRQVFTF06FBmzZpldKSIUaMKdzAYJCsrixtuuIF+/foxatQotm3bhs/nq9IcL7zwAq1ateKWW26hZcuWzJkzp0qPX9mklBQUFBgdIyK53W42bdrEI488wk033cTFF1/Mnj170HXd6GjH0HWdPXv20LlzZwYNGsTkyZPZvHkzbrfb6GhAab7du3fTtWtXBg0axIMPPsjmzZvxeDxGR4sINapwHz58mF27dtG3b1/69u1Lly5dsNlsbN68ucoyFBQUUFRUxKJFi/D5fCxcuJCCggIKCwurLENlW716NdOnTzc6RkRavXo1HTt2JD09ndtvv52hQ4fy0Ucf4fV6jY52DJfLxRdffMGQIUO45ZZbqF+/Pq1bt2bdunVGRwNKZxhcvXo1f/rTnxgyZAiNGjWiVatWfP3110ZHiwiGTetaGerXr0/Lli0ZPXo0hw4dYvz48TRt2pROnTpVWQaPx4OmacTFxfH999+zb98+4uLi8Hg8JCcnV1mOyrJixQq2b9/Ov//9b6OjRKR+/foxZMgQfv/9d6644gruuusupk+fTkxMjNHRjhEfH8+AAQMYOXIke/fu5aGHHkJKyQMPPGB0NAASEhLo168fI0eOZM+ePTz44IMAEZPPaDWqcAP079+fNm3acOeddzJ+/HgaNGhQpcevV68eMTEx/OlPf2LZsmVcdNFFNG7cmMGDB5ORkRHRc1GXR0qJrut8+eWX7Ny5k3HjxlXJ8whPYWmqXm8Mp02bxv79+5k4cSLdunWjY8eORkc6oa5du/Kvf/2LcePGcfvtt9OoUSOjIx2je/fuzJgxg3HjxvHll1/y5ZdfGh0pYlSv34jTkJGRQY8ePYiNjaV79+6GTIo/ceJEfvjhB6ZOnYrD4WDTpk38+9//ZtCgQWRnZ1d5nnO1f/9+rrzySj7//HPuu+++KlmsYf/+/Xz33Xfce++9bNu2rVr1bbZo0YKrrrqKWbNm0aBBAyyWyGwfJScn06NHDxITE+natSvNmzc3OtIxyvKtW7eOtLS0iMhXUFCAw+EwOkbNa3FHAqvVitVqZdmyZeFtixYt4uDBgzz//PM0adKEP//5zyQkJBiY8vT89NNPvPnmm3z66adVurrOqFGjaNq0Kffccw8333wzr7/+Op07d66y458rIQRXXXUVq1ev5ocffqBXr15GR6q2oqKijI5AMBhk4cKFHDx4ELvdTt26dRk2bBhCnHiBmsqmCncVqlevHnfeeSfffvstjz76KM8++6xhP/jTsXXrVt5//30eeOCBKi3aH3zwAX369GHo0KE89NBD6LrOY489Rmpq6nH7Dho0iBtvvLHKsinnp2AwyIwZM3jzzTdxOp2MGjWKYcOGHbPP119/zSuvvHLC7+/cuTPjx4+vsDw1snA/+OCDzJ49OyKLYosWLWjRogVdu3Zl5MiRzJo1i+jo6Ih7O33kyBFmzJjBc889d8KCWZkuvfRSlixZwk033cTkyZN54oknuPnmm2nXrt1x+y5fvpypU6ee9PE+++wz0tPTKyvuSU2cOJHRo0fTuXNn4uPjDclwKgsXLuTWW2/lww8/NDrKCQkhmDVrFg8++KBhJ8VHjBjBf//7XzZs2MCzzz7LgQMHjjt30atXL/7xj3+c8Pvj4uIqNE9kVYsKkpOTQ+3atSOycJdp2bIl48ePp3///gwfPpzbb78dq9VqdCwAtm3bxrRp01i0aJEhJwZTUlKoX78+48aN4/rrr+fAgQO0adPmhH2cDz/8MJMnTz7p4xn5/yA1NZXCwsKIG8d9tHr16nHw4EGjY5RLCEHt2rXJyckxLMOrr75K9+7deffdd+nQoQM333wzmZmZx+1XVb8vNbJwVwdCCDp06MCaNWt4/fXX+de//kWbNm0YPHiwYZmOHDnC/Pnz8fv9PP/884aO5njqqacoKSnhww8/5Kuvvip3PyFERP+BBhg6dCiLFi1i3LhxRkdRzpLVamXAgAHhqTSGDx9u6P89VbgjwB133MGqVavCfcr9+/evkv8QUkrWr1/PkSNH6N+/P5MnT+ZPf/oTF198MSkpKZV+/FNJSEhg6NChRsc4ZyNGjKBjx46qcJ8jIQRSSkOKpdVqZdq0aezevZuYmBgyMjKqPMPRatxwwOpICMHVV1/N6NGjyczMpFOnTpX+1lVKSZs2bVi6dCnr1q2jXr16/N///R/XXXcddevWrdRjn2+EEKSmpqppAs5B06ZNueyyy3jzzTcNzXHBBRcYXrRBFe6IYrfbmTp1Kj/88APPP/98pR5r/fr19O7dm4cffpioqCh69uzJxo0bK/WY5yshBPPnz6/QUQXnG7PZjNVqjbipA4xSI7tKhg0bRq1atYyOcdYsFgszZsyo1GPk5eWRlpaGx+Ph5ptvZvXq1eTm5lbqMc9nqamp3H777UbHOCEhBH/729+MjnFKXbp0UVM1h9TIwn3NNdcYHSHiDRgwgAkTJuDxeMjIyODJJ5/kwIEDRseqsRITE+nbt6/RMU5ICFEtziW0adPG6AgRo0YWbuXUhBDs2LGDNWvWkJOTw8GDB7HZbEbHUhTlNKjCfR6z2Wzq3YmiVEPq5KSiKEo1owq3oihKNaMKt6IoSjWjCreiKEo1owq3oihKNXNOo0qEEHsAB6ABQSllZyFECvA20BjYA/xZSllzVspVFEUxWEW0uHtJKdtLKcuWJ5kMrJJSNgdWhW4riqIoFaQyukoGAm+Evn4DUMuTKIqiVKBzLdwS+FwIkSmEGBXaliGlLFsR9zBg/FRaiqIoNci5Xjl5mZTyoBAiHVgphNh29J1SSimEOOGsMKFCPwpKl/XZsWPHOUapPAcOHKCoqCiiM+bl5aHrekRndLlc7N69m7y8PKOjlMvv90f0a1hSUoLL5YrojIcPH47435eioiL2798f0ZNWnWzVpHMq3FLKg6HPR4QQK4BLgBwhRB0pZbYQog5wpJzvnQfMA0hNTZVr1qw5lyiVqqioiAMHDhDJGXfu3ElMTAz5+flGRylXXl4e69evx263Gx2lXE6nM6J/zl6vl29zv+W/a/5rdJRyxWTH0NvTO6KXazt48CCZmZlkZWUZHaVcJ339pJRn9QHEAvFHfb0e6As8BUwObZ8M/PtUj5Weni4j2Y4dO+S8efOMjnFSK1askOvXrzc6xklNmzZNFhQUGB2jXLquy3Hjxhkd46Ty8/Nlp+mdJBH8r/a62vK9994z+qU6qblz58odO3YYHeOkQnXxhDXzXFrcGcCK0DJCFmCJlPJTIcSPwDIhxEhgL/DncziGoiiK8j/OunBLKXcBF59gez7Q+1xCKYqiKOVTV04qiqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDM1rnBLKZk5cyaHDh2K6AlkFEVRzlaNKtw7duzgggsuIC0tjbvuuosbb1RTgSuKUvPUmMItpeSHH37gr3/9K23atGHx4sXUqVOH3377zehoNcaBAwfYtWuX0TEU5bx3rvNxRwwpJYcPH6ZBgwbMmjWLBx98kJSUlIie5rS6kFIyZcoUpJSYTCZ8Ph9PPvkkZrPZ6Gh89tlnNGnShObNmxsdRVGqTI0p3CaTiX79+tGrVy80TeOXX34hOTmZJ5980uhoQGnx03U9IordmdJ1nXfffZcVK1ZgsVgYNGgQU6dONfS5OBwOrrnmGq677jqWLl1KcXExK1asMCyPolSlGtNVAtCiRQt27drFNddcw+LFi/n000+NjgSUrgiyadMmhgwZwpYtWygqKjI60hmZMGECc+fO5euvv2bDhg0sWrSI0aNHG5pp586dtG7dmrvvvptnnnmGYDDIgQMHDM2kKFWlRhVuk8lEdHQ0ZrMZu90eMSutPPXUUzz22GM888wzjBkzhg8++MDoSGfk+uuvp2/fvqxcuZJ69eoxaNAg5s+fb2imd955h5tuuomXX36ZjRs30r9/f0P/UGuaxiuvvMI333xjWIaaYNOmTRF/HuXzzz/H5XIZmqHGdJVEqszMTJKSkpgzZw6PPPIIl156KZs3byYnJ4eMjMhfR3nFihXs3buXuXPncvjwYb766isef/xxLBZj/+uUnYSuVasW69evZ+/evWzfvt2wPEIIoqOj+eqrr5g7dy6xsbHMmTOH0EIjyikEAgHuuece6tevj9frxe128+KLL0bU67dz504ee+wx2rdvz7Jly2jdujUPPPCAIVlqZOGOjo7G6/UipTT8B9+qVSuWLVtGVlYWkydP5r333uOpp55i9erVrFy5MvwuwehC+L+klHz11Vds3bqVSZMmERMTg8PhQNd1EhMTjY5HfHw8q1ev5sUXX6Rbt25ceumlhuYxmUwMGzaM4uJicnNzcblcdOzYEYCZM2fSsWNHLBYL0dHRuFwuNm/ezJdffsm4ceOIjY3FZKpRb37PmNPpZMeOHTz55JO4XC4GDBiAw+EI//7u37+fYcOGnfB7V61aRUpKSqXmk1Jy8OBB4uPjueOOO9i6dSvTp09n1KhR4X0+/vhjZsyYcdz3pqWlsXLlygrNE1nVooLMmTOHTp06sWHDBsMLd2xsLI0aNWLKlCmMHDmSjz/+mEWLFnHZZZcxYMAApJQMHTqUtm3b0r17d0OzHm3t2rV88MEHPP300+HXMD4+3uBUfzCbzRw4cIDU1FRuvfVWw3/OZRITE0lMTERKSWZmJlD67mDq1Km0a9eOwYMHc/fdd9OnTx969epF06ZN+fnnn6lbt67ByY01efJknn32Wb799ltefPFF9u/ff8x1GPXq1Qu/nv+rKn72gUCAV199lX/84x/Mnj2bL7/8km3btjFw4MDwPtdee225GStajSzcQoiIumry3nvv5S9/+Qsvv/wyq1evDm9ftWoVAG+88Qbr1q2LiML9008/8fHHHwMcU7QjUdnPOBIzCiHCuZ599lkAfvnlF0aMGEHv3r2pV68eycnJzJw5k6VLl3L//fdH5POoKrNnz6Zly5bMmzePe++9lz179rBq1aqIeU1sNhv33nsvd955J6+++ipRUVEkJiby0UcfGZKnRhbuSGQymRgzZswJ77vjjjuqOM3xpJTs2bOHpUuXcvXVV3P55ZdHzC9NeSI93/+6+OKLWbx4MTNmzKBTp040aNCA9evX06lTJ6OjGc5qtTJ//nw2bNhAfHw8r776qtGRjtOqVSumTJnC888/T+/evenbt69hWVThVoDSUREPPfQQr732GrGxsUbHOS09e/akR48eRsc4I23atCE7O5sFCxbQs2dPFi5cyF/+8pdq90eooplMJvr06cPll1+OyWTCZrMZHek4iYmJXHvttfTo0cPw8xKqcCsAWCwWli1bZnSMM2K1WrFarUbHOGMrV64kKyuL9evXs3PnTqPjRJSoqCijI5xSJJzrUYVbUQzQrFkzmjVrZnQMpZqqsWOQlixZct6//VQUpWaqsS3u1q1bGx1BURSlUtTYFreiKEpNpQq3oihKNaMKt6IoSjWjCreiKEo1owq3oihKNXPKwi2EWCCEOCKE2HzUthQhxEohxI7Q5+TQdiGEmC2EyBJC/CqE6FiZ4RVFUc5Hp9Pifh3434vyJwOrpJTNgVWh2wDXAc1DH6OAORUTU1GU6kRdQ1G5Tlm4pZRrgYL/2TwQeCP09RvAjUdtXyhLfQckCSHqVFRYRVGqh0ianbMmOts+7gwpZXbo68NA2VIu9YD9R+13ILRNURRFqSDnfHJSlv5pPeM/r0KIUUKIDUKIDR6P51xjKIqinDfOtnDnlHWBhD4fCW0/CDQ4ar/6oW3HkVLOk1J2llJ2jo6OPssYiqIo55+znavkfeAOYEbo83+P2j5OCPEW0BUoPqpLpVyapvHee++dZZTKl5eXx86dOyM64+bNm9m7dy85OTlGRynX4cOH+fTTT4nkP9QlJSUR/XN2u93EZsfS5L0mRkcpV/yeeDa7Nkd0P/euXbuwWCxs3rz51DsbRNO0cu87ZeEWQiwFrgTShBAHgEcpLdjLhBAjgb3An0O7fwxcD2QBbmDE6QT0+wVjxkTuiucxMTp33BET0auy7927l8TExIjOaLfbqVWrVkQv1GCxWCL6NXQ6nXSxd2FGxvGL0kaKbYXbcJgcEf06xsTE8GTKk7gz3EZHKZdf+Mu975SFW0p5azl39T7BvhIYe9rJwt9n4vBh49dbLE9iYhZ16uRHxJqQ5cnJySEjI+OsM0op+f777xk8ePAx20ePHs3DDz9cISuSrFq1ik6dOmGz2XA4HCSnJJFTeIj42ERKAkf4vHAhu9xbMAUs2EUcQjeT7ThEt+S+XHPBLfjdPurXakhJSQmxsbEUFhYSExNDIBBA0zRiY2ORUhIdHU1BQQFxcXE4HA4SExPDt30+H4mJifh8PqSUREVFYTKZwuuULlmyJKJ/zgUFBfz4448RnVHXdfLy8iI646+//kr+hfkUNys2Okq54kxx5d5XY6d1VU5fMBhk/fr1XHPNNfh8vmPue+yxx7BarUycOJGYmJhzPpaUOvmBQ+xybcGEzvvZL9EstiN+3Y+NaFrYunLIt49iTxGtkjrQKPUiEqzJ/G31MOKtqYzt8A9q2epgC9gwmUzoug6ULn2laRpSSnw+H0IINE1DCEEgEAjfL4TA7/eH34YGg8GIXCZLUU5GFe7znKZprFixgokTJx5XtMs8+uijFBcX88QTT5zzUmESycYj3zNr43QyYjNomNiI4mCAX3ZvZc+h/bRp1gBrwMb2XVnktSjigsTWCA5glwlEiwSW/ryAlikXcm2z/kTZohFCYDab0XU93KcaCASwWq1omobFYkHTNOx2O0IILBYLwWCwNIuUBAIBVbiVakfNVXKeE0Lw3XffkZ1d/jnkYDDIO++8UyGLo5qEmc5pV1En0Iktvxfw65ZcNv6aTckhG3Z3bVz7Yzi43c+Wjbl8v3EjW3b9yNqf1uBxBVm/81uOOPKZu/5FCnx5OBwOoPStucfjwWKxYDIJYmKi8Xo9WK1WfD4fUVFRuFyucGs7NjY2XMQr4l2EolQ11eI+z2VnZ/P777+fcgRASUkJGzZsoGvXrud0PF3XiTXHMLv/bO5aMYJPNn+M7oNoGYVN2vgpS+NPl9zEyD5dKHYVYfPYOOD+BG9JPnkFhezQdhIMmBk4pz8rx68GwGazERUVhdfjZvOqGWT9uJhgUKN19zvodMPjOBwOUlNT8Xq9REdHk5eXh91uJxgM4na7SU1NPafnpChVTbW4z3Nms/m0ugpOd79TMZlM2O12vE4PL980l+tb9cNiNtOkVhO6NevGRY3bsTd3L1sObibfUUB2fjax+Y1w/Z7IhQmt8RTnge5FKxbcPftuhBB4vV4KCvJx5Gxh55Z1FJZ4qdduAEl12+MoKSEuLo7c3FyEELhcLtLS0rBYLFgsFpKSks75OSlKVVMt7vNcrVq1aNCgwSn3s9vttG3b9pyPJ6XE7/eTnJxMIBBgzk0v8Y/of/Ju5rsUOYuINccSI6LxCT9H8rdRXFhMvDWBgd0H4nQ4iSaF/NwjmJIP4c8JoGlBrFYrq1fM5MiebyjM3k+HqyZx+YBJBIOl93k8HpKTk9E0jZiYGIqLizGbzUgpcTqdJCYmnvPzUpSqpFrc5zmTycSIESNo1qzZSfd76qmnsFgq5u+8yWTCZDIhpSQ5OoXHr32cIZ1vxRlwsSt3N5sPbuXH3T+yr3A/Teo3pWHdhuzK3oXD6yBepHJJw57kbfBhb32Y1957hYDfy49r/oPXZ2Hg6AV06TMq/Phlw/zMZjNA+HYZNYudUh2pFvd5TghBu3btuOyyyzCZTGzfvv2Y+zMyMmjUqBG9e/eukJOTUFq4nU4nsbGxuFwuEuwJzOj3JI9f9yiDXhxMYUkhWft3kR6fRoEznzhrPF63FwKS3Nx84qyx9Ok0gAMHtvO1XMF3Y14jWZP07XUbjVp3x2q14na7sdvt4ZOTTqcTm82G3+8nJiYGTdPQdf2cR8mcqaysLOrUqRPRFyEpkU8VbgWz2cycOXO4++67ycrKCo+NBmjYsCHz588nJSWlQo5VNs46NTWVgoICkpKScLlc2Kw2/E4/H479kD0Fe/gg8wNcXhemoIlYWwwlRSUgBR63F7vZxpCrh9D54s6s/fVz5q+fwhX9hnBxtxvQNA2n00lKSgolJSUkJiZSVFREWloaDoeD6Oho8vPziYmJQUqJy+Wqkiv8ioqKmDt3bvgPSoMGDRg+fHilH1epmVThPs9JKZFSMnnyZJYuXXpM0Qb48ccfGTVqFCtXriQuLu6cuxaEENjtdgoKCoiOjqa4uBir1UowGCQuLg4pJc3SmzG+z3iklNgsZg6v+4LDP7xLjD2K1F7XkdS9N1a7ncLCQgKHg3iKBJdefRM2mw0pJUlJSeTt2cOPr75AwYF9JDdtTac77iEpvVa4v1vXdXRdr7J5UwoKCvjss894/fXX2bFjB//85z+5/fbbVVeNclZU4T5PlRXs/fv388gjj7B8+fLjinaZ77//ni5duvD666/TuXNnzGbzWRecshZ3YmIixcXFJCQk4Ha7sVgs4bHY+L2YfF62TRmP9HupP2gYnR/+P3Rhwmo2sXvev8j/JZOgppOVV4Q99wi+zT+y4Zu1HPn1JwKaRushd9Fh8C34fV40r4+lo27HWeJkwJSpJFzQlIwGDTGZTLhcLux2+7m8lKf1nCdNmsQbb7zBrFmzGDlyJA899BDPPPMMf/3rXyv12KcjPz+f5OTkCusKUyqfKtznISkluq7z7rvvsnz5ct59992TzkQG8Pvvv/OXv/yFUaNGMWTIEFJSUs66eJvNZgKBQPgqxrITiWazGc1RzKF5T+Hal0Xr+x/HGp9AoKgQ764dIMAnod7g22g0fCxBl4N6X62i8/bfyP9mLY0vv4oLh95NMOjHVViI31GMJkFHMuDvjxHUdL5+cyG/rlvH6Fdep0nHTuGTlpVJCMFzzz3HbbfdRvfu3Vm7di1vvPEG3377baUf+2RycnL45ptvWLlyJVdeeSVNmzalc+fOhmZSTo8q3OeZspb2vHnzuP/++8OTLZ2OX375hbFjx7J+/XoWLFiA1Wo94+IthDhmHpGyPxhSSggG2Tvn/9ByDtFk2F/w5x4mmHsYgaTsMEKCf99uvFKiAwktW5PUvhOaP4inKJ+SvTvRpESToEmJLiWaDrqUBHVJxxsGENB13vzr/dzyf/+m+TleUHS6UlNTadiwIZmZmaSmpjJhwoQqOe7JbNy4kVdeeYWXX36ZBQsW8NFHH7Fw4UKjYymnQRXu84ymabz66qs8/PDDeL3es3qMJUuWoGkar732GlFRUWf0vVJKgsEgycnJx5yctFgs7F+xGE/Wb1xw218g4EXoIETo45jHKC3gINHcLvxSlhbrUIHWdIkuCRfvoCbRpE4wtE+7nr3wef3MHTOaSW8vp3XHjmf1OpyJsvHiEyZMoF27doZfal9SUsLbb7/NvHnzmD59Ok888QRLlizh/fffZ8CAAYZmU05NFe7ziK7rvPXWW4wdO/aUXSMnI6XkP//5DykpKTz55JNndAGLyWQiKiqK7OxsUlNTycvLIzY2Fp/bRcEX79Ny2Fg0dzHSBAiBKdRCN4k/ji2lLF0sT0ooK9K6RNclQamj6RJNg2CocAd0naCEoK6j6QJN12nd41KOHDiAJy/vrF+H0yWlZMeOHcTFxXHJJZdU+vFOR3x8PDfffDPPPPMMmZmZrFmzhszMTMaMGWN0NOU0qMJ9HlmyZAnDhw8/pmuk7GKYshnzymMymcJ901A6A99LL72Epmk8/fTTxMWVP3fw0cpa3NHR0QQCgfCJwfx1X2CLjcObdxCzSWAyl54oE2YwH1W4dVnaqpa6AE1HlzpSgtRDLW29rEBLAnpp90hQlwQlpQVcL+1GCQR1Uus34qX7JjB/y1ZEJfZ1SymZOHEiP//8c6Ud40wJIWjcuDEej4eDBw/y8ccfc+WVV1bYRVZK5VKnkSPQo48+espCehHdQoQAACAASURBVKYWLFjAhAkTjuvP7tKlC/369TtlX3VGRgZjxx6/Rsb8+fO57777zmiZqrJjlX2WUuL4aT0xjZuheVzoHhfS7QKvCzxuhNeN2efB7PMgvKW3pdeF9LrRPW50txvd7UJ3u9DcTjS3m4DbddSHE7/rjw+vw4HX5aBu86ZovrPrLqoJ2rZty9y5c2nRogUzZ87kzjvvNDqScppU4Y4gH330Ea1bt6ZHjx506dKFKVOmnPNjlnWP3H///RQWFoa3R0VF0aRJE959911atGhxyseJi4tj2rRprF+/njZt2hzz+G+88QYjRow4rT82ZfNne71eLBYLfr8/tM2E1Pzhwq17XEiPC+lxQ6hYC2/p13g8cNR+utdF0BP6cLsJup0EQ0Xb73bhczrxuxz4XE68TjdepxOv04mnuLjcIZAV6bbbbuPtt9+u9ONUZw6Hgx9++IEnnniC4hP8XDRNo7i4+JiPOXPm0L59e3r3Pm4xrhpPvS+qZLm5uWzatOm09v3++++5+uqrsdlsvP3227zyyivhJcnOhpSSnJwcXnrpJYqL/1iiqW7duvzrX//ixhtvPKNLr+Pi4ujWrRvLly/n1ltvZdOmTUgp0TSNL774gk8//fSUrXdd1/H5fCQlJeF2u0lISMDv9+P3+ZH5OdhDXTfCLDCZBMIsECYTpW0MSRDQdJ2grhPUSrtBAqGvA1IS0EIfusQf1AnqUFJSjDkmFr8m8etH3R+6CKcy7dq1i+joaOrXr1+pxzkXHTp0IDMzkyuvvNKwDD179qRr16706dOHFi1a8Pzzz5OWlha+v6CggDlz5hzzPUOGDGHjxo1VHTUiqMJdyfLz81mzZs1p7bt161ZcLhdr167l7rvvJiYmhtzc3HO6JFvXdQKBQHhypfT0dKZMmcKgQYPOar4MIQStW7fmhRde4K9//Ss//PBD+D6/v/zFTcuYTCZsNhv5+fnUqlWLwsJC4uPjiUpIJPurT7GZTJCUBKHijal0SEnQ70PYo9Ep67cGn8uBOy8Xv6bjC+r4dYlP0/EFJZrJgiUtgwCC4kMHiKldD7+uE9DAp2kEdcjNPoz/LEfWnK7XXnuN2267LaLnJnn66afp1KmTYUXwo48+YtCgQdx1110sWLCAtLQ07rzzzmMuTkpNTWXVqlWG5ItEqnBXslatWvH444+f1r7Lli3j0Ucf5dlnn+XWW2/loosuol27dmd9bCEE6enpTJs2jb/97W9kZWXxn//8hw4dOpxTIRFC0L17d1577TXGjRvH999/z6OPPkrv3r1P2Veu6zp+v59atUovP09KSsLv91Nn8HByv1lF0e+b0Oo1JDYtHd0k0E2CoIDg/p1YGzRFAp6cQwRKivH6fKXdHkENvybxBCW+oIZX0/Ej0Pfvw4+Z6AYNKc7ORsTGEtDAq+kUFxSwa8tW2t9wI1TSZeeZmZlYLBYuvvjiSnn8mqJJkyYsX76cmJgYunXrxsqVK3n88ccZPHiwmhKgHKpwR5Abb7yRPn36cO+99/LOO++c9kiNk7FarfTq1YvVq1cTDAZJTU095peh7CrKUymb26PssmiLxUKbNm147733wl0fpzvTnq7r4XUiy94J2Os2RLfYCLjcsHsHaBq2uDgCUsMM+EuKEb/+UDpWW9MIaDp+Tcev/dE9EpR6aOw2BDQNb1EBvqBOfl4enoCGH0FCg8YUFhZy5OBhvP4gN4wZU2nFIT8/H5PJVGETdNVUrVu3Jjs7m3vuuYdevXqRnZ3NpZdeqor2SajCHUFsNhs2m42lS5dW6OOazeZyV3rRNI1GjRoRHx9PSUlJuY/RsWPHY4bvlUlISDijLEIIbDYbDocDu92Ox+MJF3HNHo1fl8iAhrmkmKAWQDu0PzQcUCAADRm+yMav6wQ1gV8/uu9aD/d5B/XSC26CWgBNg0BQw+N0UpCdgy4BYSI6rnK6MPx+P7///nuFLD5xPvjss8/YtWsXX3/9NVlZWUbHiXhqVMl5zmKxMHjwYBo2bFjuPkIIHnjggQqZjKlsBZykpCQ8Hg/x8fHouo7FYqHxsLvxhfqpXQUFuJ0OfJqOV9PxaDpuTccb1PEES2/7NfCFWt3HtLx1vfSKSb3s5GXpNl1CSUFh6YrwJhNdbhqMiKqc2QFdLhcffPABgwcPrpTHr4maNGnCHXfcYXSMakG1uJXTmu2voiZjKpvWNS8vj7i4OIqKirDZbAQCAepe2oeNOuhSR5cBdIcbgnrp+UlR2saQUg9dhAPB0MU2/tDJSr9eNlpE4tdK7w+UFXApEVFReD2+0n20IO2vvJKGTZpUyPP6XyNHjjxuFESkEkLw1ltvGR1DOQOqxa1UKSklgUCAtLQ03G43iYmJ4ZVoHC438V16lraygxpOhxN3oLSF7Q7ooa9laYs7qOMJanhCI0q8QQ1fUMOnafiDEr+m4dd0AqFiHgjquJxu/D4/8bVqce1fRmOOiqagoKDCn+OuXbuA0hZkdSCEoGXLlkbHUM6AKtxKlSq7AMftdmO1WvF6veFZAqPj42kxdCTeoAwVaA1vaLSIN6jhDWpHFe3SLhRvUIa7V3yaxBfqLvFrAr8Ofk0eM947ICUZzZtTUlBI9/4DKmUhhYcffpiZM2eqk2tKpVGFW6lyZRftCCHCI1qklFgsFpKbtaT+NQNChTrUqg6W9m3/0b8t8QRK7/eF9vOFRpkEQsW7tLtEKy3iusSvQ1DTadPzSjRhocdNN2OxWCplzclJkyYdc/GIolQ0VbiVKlVWtGNiYggEAkRHR4cXUfB4PJhi40ht1x4/ptJWt1baNeIOarjDRTxYerIyfLu0Ne7VSsdw+3SJN1h6sY1f1/CFWtu6MJFcrx4ORwkX9uyJpmm4XK4Kf47dunUzfNpWpWZTJyeVKlU2reuRI0dITU0lPz+fuLg4AoEASUlJaJpGiyHD2bluDXvXrkIgwnNyA0gpwhNaBeUfQwMDUhLUQicjQ5e0+8r6uDUdabHRrmcvfly1hhe//QZbVBRSyjMezqgokUC1uJUqVXZyMi4uDp/PR2xsbPiCHK/Xi9/vxyQErQfcjGaNwqOF+rYDGp7AH61r99F93prEG5Slre1Qt8nRwwSDmGhwUQcCCC6/+SY0q41gMEgwGMTpdBr9kijKGTtl4RZCLBBCHBFCbD5q22NCiINCiJ9DH9cfdd/DQogsIcTvQohrKyu4Un2ZzWY0TcNqtR4zj4rFYgkPO2x41bXEtGqLNyhxByXuoI776BOToe1l/d++QGl/ty980vKPfu/0Zi2ISU5hz5atXNirF7FxceF5yNX800p1dDot7teBvifY/pyUsn3o42MAIUQb4Bagbeh7XhJCVP5qrMo5OZO5tM9V2ZqTZdO5lp2klFKGiymUXhbfb9rTmJJTjyrYWqiAS1yhk5LewB/F3KOBJ1S0vZqGbrGSUL8Rlrh4igsKGHzfBFpeckl43LoQolJOTipKZTtl4ZZSrgVOd7DrQOAtKaVPSrkbyAIiY60mpVx2uz1cMKG0RXx0QZNSVtiwuf/tKomJiQnPgeLxeMIr7NhsNuo2a84tLy0gvmFjPAE99FHaReIrG99ddjWlpodHoviCEl9Q4pcCrz9ASUEhHa7uw9UjRhAVHY3D4UDTtEo7Oakole1c+rjHCSF+DXWlJIe21QP2H7XPgdC24wghRgkhNgghNgQCnnOIoZyrpKQkkpNLf4Rms5nRo0fz/PPPhy9xj42NpXbt2hVyrLIrJ4uKioiKigrPjxIMBomNjcVutyOlxOv14nA4aHZJN254/P/oMPjP+KQIjzLxmy1ccPmV4SGC3qBGVFo6cbXr4tW00svhfQFsMTEMGj+ePnfdhRACr9dLUlISZrMZi8VCfHx8hTwvRalKZ9vBNweYRumSrdOAZ4C7zuQBpJTzgHkA8fEZ0uc7yyTKORNC8Prrr+NyuRBCULduXeLi4rjiiivCJw7PZEHgU7HZbKSnp2M2m6lVq1b4QpWjZx4sG05nMpno1Kcv7bpfRv+/TQZCq7ybBDFJSTiPuvLRYrODEMfMsW2LiiK9YUP00JDD6OhohBDhdxDqIhmlOjqrwi2lzCn7WggxH/gwdPMg0OCoXeuHtikRTAhBo0aNjtveqlWrSjne0X3ZR3fRlPnfeVFMJhPW5GTikpOP2zc54/TeCZQ9YtnxVMFWqrOz6ioRQtQ56uYgoGzEyfvALUIIuxDiAqA58MP/fr+iKIpy9sSpRhQIIZYCVwJpQA7waOh2e0q7SvYAo6WU2aH9/05pt0kQmCil/ORUIRITU2SLFvef7XOodFari7Zt807YKo0Uhw8fxm63h/uqI9H27du54IILInokx6ZNm7jwwguNjlGuQCDAnj17aN68udFRylVQUIDf76+w8yKVYc+ePWyttZVAbMDoKOXa/ux2iguKT/jW8JSFuyrEx6dLv/93o2OUKyFhD3XrfsO2bcOMjlKuRo0+5aWXatGpUyejo5Rr5syZjBgxokL7yyva3//+d6ZPn250jHIVFRWxcOFCJkyYYHSUcm3YsIH8/HyuvTZyL+NYtGgRPXv2jOjGWMuWLTly5MgJC3eEXH0g8Psjt6UYCOSjafaIzqhp0cTGxkZ0i9tqtZKYmBixGcvmTInUfFCa0Wq1RnTGmJgY3G53RGe02+3ExcVFdMaTnYdRl7wriqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDOqcCuKolQzqnCfpzZv3hyeiU9RlOolQsZxK1Vl//79LFy4EJ/Ph81mo1WrVtx8881Gx1IU5QyoFvd5RErJ3r17+eWXXxg3bhwtW7Zk6dKlVbqQgqIo504V7vOI1+tl9uzZzJo1i8cff5zWrVtz/fXXs3jxYqOjnRWv1xuez1tRzieqq+Q8Eh0dzYQJE7j33nt56aWXuOiii7j88st59913jY52xj755BN27dpFbm4uF154If3798dmsxkdS1GqhGpxn2eaNGnClVdeyezZs3n44Yfp3Lkza9asMTrWGbv//vupU6cOffv25ZFHHsHtdhsdqVwvvPACHk9krvL00UcfkZWVZXQM5Qypwn2eqVu3Lvfddx933XUX48ePZ8yYMXz++ef8+uuv1aav+5///CczZ86kfv36/Prrr6xYsYJ7773X6FjlWrFiBX6/3+gYJ/Ttt99y8GDkr3Wyb98+HnvsMaNjlGvPnj1MnTq1yo6nCvd5qnnz5uFZ5h5//HGee+45tmzZYnSs0zJlyhQmT57Mxo0b2bhxI6NHj2bWrFlGx6qWateuzeHDh9E0zegoJ+X1etm9e7fRMcrl9XrZu3dvlR1PFW4Fi8XCK6+8wsKFC6tFt4nVauWGG27g448/JjMzk4suuojY2FijY1VL48aNY+7cuRHd1aQcTxVuBShd5/GRRx7hu+++Y926dUbHOaVp06Yxfvx4+vXrx4svvhheXFhRzgeqcCthSUlJjB07lmXLlrFt27Zq0+etKEar6sWnVeFWjhEfH8+sWbN46qmn+Omnn4yOoyjVQlU3clThVo4jhODFF1/kww8/ZPXq1UbHKVeTJk2QUrJr1y6jo5Tryy+/pGfPntjtdqOjlOuOO+5gwYIFRscol5SS5cuXM2jQIKOjlCstLY2GDRuycePGKjmeKtzKCUVFRTF+/HjWrl3Lhg0bIrLbpDoU7tWrV9OzZ0+ioqKMjlKu4cOH8/rrrxsdo1xSSt555x1uvPFGo6OUq6xw//zzz1VyPFW4lXKlpKTw0EMPMXfuXLZt22Z0HEVRQlThVk4qKiqK+fPnM2fOHL755huj4yiKgircymkQQjB9+nTWrl1bLcZ5K2cuNzeXN954w+gYx3nvvfcYO3YsBw4cYMyYMRHZeNB1nYkTJ7Jo0SIWLVrExIkT0XW9Uo+pCrdyWuLj4xkzZgyffPIJmzdvjsg+70hSUlJChw4dePXVVxkzZgzXX3+90ZHKdffdd5OXl8cDDzxAhw4d2Llzp9GRgNKC+P3333PJJZeQmppKgwYN2Lp1a6UXxTPl9/tZs2YNvXr1olevXqxZs6bSpzhQhVs5bUlJSTz55JM888wzbN682eg4ANSrV4+EhASjYxxny5Yt9OjRgxEjRjB79myio6Mj8iTqwYMH8fl8rF+/nuuuu47Bgwfz22+/RcQf5u+++47Y2FgGDRpEp06duPvuu9myZQv79u0zOtoxJk2axLx582jfvj3t27dn3rx5TJo0qVKPqQq3ckbMZjPz589nyZIlEdFtMmrUKC655BKjYxzniy++oHfv3lx66aU0btyYK664IiLf5v/yyy9cdNFF1K5dm759+9K9e3fWr18fEYW7R48euFwupkyZwnPPPcekSZNo164djRs3NjraMV544QWGDBlCdHQ0drudIUOG8MILL1TqMdV83MoZs1gsPPjgg8yZMwe73U737t2NjhRxxo0bR+vWrXn++ed58803Wbp0Kdu3bzc61nGuv/56/v3vf7Nr1y5uvfVWRowYwUcffYTJFBltumHDhrFt2zb+/ve/h1vekcZkMvHcc8+FhwI+99xzlf76qcKtnJXk5GQmTJjAQw89xAUXXEDt2rWNjhRREhMTyczMZNGiRXTr1i2ip51955132LNnD0uWLGHdunWkp6cbHSmsXbt2tG3blp49e0ZUrqMJIbjxxhvDE3VVxbw5qnArZy0uLq7S3xJWVyaTiXr16vHQQw8BVT+XxZlIS0sjNTWVTp06RWROIUTEFu2jVeVEZ6dszwshGgghVgshtgohtggh7gttTxFCrBRC7Ah9Tg5tF0KI2UKILCHEr0KIjpX9JBTjCCEi8pc9UlSX16e65FRKnU5HTBB4QErZBugGjBVCtAEmA6uklM2BVaHbANcBzUMfo4A5FZ5aURTlPHbKwi2lzJZS/hT62gH8BtQDBgJlI/bfAMomEhgILJSlvgOShBB1Kjy5oijKeeqMTn0KIRoDHYDvgQwpZXborsNARujresD+o77tQGjb/z7WKCHEBiHEhkAgMhdSVRRFiUSnXbiFEHHAf4CJUsqSo++TpYM+z2jgp5RynpSys5Sys9UafSbfqiiKcl47rcIthLBSWrTflFK+G9qcU9YFEvp8JLT9INDgqG+vH9qmKIqiVIDTGVUigFeB36SUzx511/vAHaGv7wD+e9T24aHRJd2A4qO6VBRFUZRzdDrjuC8Fbgc2CSHKZgl/BJgBLBNCjAT2An8O3fcxcD2QBbiBERWaWFEU5Tx3ysItpVwHlDfAs/cJ9pfA2DOPYvzcCKcW+RkjYY6JU4n0jJGeD1TGilIdMp6IiITgiYnJsn3724yOUS6z2U9iohObLcXoKOUKBktISrJU6dVbZ+rIkSOkpqZiNpuNjlKuAwcOYbHUNTrGSWgETIewpluNDlIu3a0TF4yLyFkbyxQUFBAXF4fNZjM6SrkWL15MYWHhCRvNEVG44+MzpNOZY3SMciUmZvHUU6u55557jI5Srvfee4+MjAy6du2Kz+fDarX+MW+xSeewby+FwRykLrFgAwSegJsYcwJNE9oidDM2mxVN0xBCEAwGEUJgMpkIBoPYbLbw57LHDwaDmM3mY/YtuwIvGAxitZYWl7Ir8p544gnGjh1LcnKyQa/SyUkp+fOfJ/DOO88bHaVcdnsB7aZcQ+YjmUZHKVftb2ozN28uAwcONDpKuV5++WV69+5Ns2bNjI5SroyMDHJyck5YuNVcJTWMpmnk5+cTFW/jh8IPSY9qRNDkZafzF7L9e3F4nTi8xdSNborH7yHdWp8dUb+xOz+LcV3/jt8XQAiB0+lECIHdbsfpdJKWlobT6SQlJYXi4mJSUlIoKSkhNjaWoqIirFYrNpsNm82GxWLB6XRGbIFWlOpOFe4aJqvoF/5T+ByiWHDYtxerjCIYlMSSTJq9HkkkU+R24dEDpNjrg27lk53vEm2JZ9qXD3JLu5HUjWlAfHw8UkqCwSCpqam4XC7sdjt5eXnExcVRUlJCdHQ0Pp+PpKQkpJRomhaeIc1ms5Gfn09SUhIWi/pvpigVSf1G1TC1Yhrx1qqNpESlcFGti2iS3opdh/bwxrqlNGuRSK3YOHb8mo25XpBL2/TEHIwi2pJEgSMPe0w8C36YQ7/WN9I2+WIsFitWq5Xc3FzS09NxuVykpKZSkJ9PYmIixcXFxMbGUlJSgtVaum9sbCwmkwmXy0VycnLEzOusKDWJKtw1TDQxzOu3gAc//xsfbf2EzzZ/gV23kZFcG3+uHZ8jjebpjThUtButSOfbn7+lfrsUsg4folmqnyJ3MV6fRtMrWpFkiUYIQVxcHH6/H58jm+3b3sdR4iAlvS5pTXqjaRpRUVHhfuyytfZMJhNer5fo6Gg165yiVDDVHKphTCYTLVKa8Y+r/o7JItiZv5NCTyFxUbG4/W7cARcN0hvQOq09CZ5mNE5og2O7RPh1zPjYd+QQn21axfQPnwBKT9jpug5S4+DWz1jz1kQyP/4HmZ8/gwid19Z1HV3Xw0OrTCYTUspqO9RKUSKdKtw1jNVqJeAP0L1+d/4z9D+kxaViMpsp8hZjtVnwaX62HthCriOX3/dt4+sN39Ioph0DMm7nl1W/06VVA2IcZpZ/spxAMACAo6SII3t/ZO1Hz1PkttPl5lfpc9ebBLTSUSV+vz88gqXsJKWu66q1rSiVRHWV1DDFxcXh/ujWtdvwzYR1DH7lZrLzs7FLGzZpJwo7ufm5SL9ORnJtNKmRcySPAR2HUPRbEYn2InyJ0ezcv51WF7TlqxVPsy3zQxpc0JrLrh5Fu0tuoKSkhLiYGLxeLykpKWiaRiAQwOl0IqUkJiaGvLw8UlNT1clJRalg6jeqhik7WWixWPB6vWTE1GbBrQv4YNMHzPlyDocKssEvibfE06ZeG2zCxpGiI8RYonGUOBAaxBc3xpFQxNT/TuRPTYeQ9duvJNVuQ/+RM0nNaITX6yUmJga/34/VasXtdofHb0dHl870qGka8fHx6uSkolQCVbhrmLITgoFAIHwRTstaLWjRaxKX1OtCjiuHJ995koN5h9iVs5OUqFRs2MjPy8PnDuB1ehhz4xjG9xhHccwBXn/uXyQf0Xhg2nySazXA7XYTHR2N1+vFbreHL8op6+cuOzlZVtDtdrvBr4ii1DyqcNcwuq5jsVjw+/3HnCSUEro36U5UdBR92/TFarPidDixmQUHd22nVmIqPgkxKbWIskWRnJRMSUkhv1/wM73u6kfj5u0RQqBpGiaTCWdeLgGLmYCmk1q3HiaTKVy8gfC+6gSlcq6OHDlCWlqaevd2FFW4a5ioqKjwuGqfzwcQnhvEbrfj9/uJj4onb8N6ogIeHEdyiD+0l5KiQpIu7EBC+24492Sx2+Nh/+EjbPr6G7p1vIzAwX0c2rGNqOhoSuKS2fv1KvZt/oW4WnWIadKCuNQ06rVtS0bzluHL4BMTE9Uvm3LWsrOzWbt2LWvXrqVHjx40a9aMrl27Gh0rIqjCXcO4XC5SU1NxOp1ERUWh6zo+nw8hBB6PhyiPg91vziU2ORV/dAyJtWqT0OMKpBAIwHNgL7K4ALseJHb3dnr43MhVH3Lo4B6EyUJhwE90ej1a9O5L097XIjWd379Zy+HNv7BvYyYOj5cbH/knyWlpFBcXk5qaqoq3clY2btzIm2++yZw5c1iwYAGff/65KtwhqnDXMAkJCaVzlURF4Xa7MZlMWK1WpJTEWs38PP4eEps0J7nnNZjMFpAa/oP7SifulRKz2UJis1boUhLboCnNBt+Cpun43CVYouPQpE4gEMRTXIAuQdMl9dtdTB0pKc7P5/1Zz/LqvaMZ9/pikpKSKm0mwEAggMViUcMNa6iioiKWL1/OnDlzmDp1KjNmzGDx4sW8//77DBgwwOh4hlNNoRqmpKSEtLS08JA8q9VKIBDAW5jP93ffSEzdetS57iZ0RzF6cQHSUYzwOhEeJ3hdSFcJWkEuwYJcdJeDYHE+mqMQ4ffjLyogUFhI0FFC0OUi6HYRcLvwOx34nKXdMwMnPoDzcDYv3Dmc/Tt3omlahT6/vLw8Nm7cyC233MLPP//M4cOHK/TxlciQmJjI4MGDefrpp/npp59YuXIlGzdupF+/fkZHiwiqcNcwUVFRuFwuhBAEAgE0TcNsNpP7wTJSGjSl3rWDCORlg9eN8Loxed0Irwfh82LyehAeF8JTeh8eJ9LtRHM7CHrcBN1Ogh4nuidUtJ1Ogk4nPpcTv8uJz+Ui4PHS45ah5OzeyZbVX1Z4i3jZsmU89NBDzJo1i6lTp/Lyyy9X6OMrkUEIQZMmTQgEAhw6dIjRo0dz1VVXRfRc7lVJFe4aJiYmhqKiIgA8Hk/pKA+fB8f2X0lq1Y5g3mHwuksLt8+FyefG7Hdj9rkx+T0Inxvhc4PHhfS6kV4X0u1GelxoHjdBt4ugy0XA5SDgcuJ3Owm6XPidLvwuBz63AxPQ+MKL+f6//6U4N7fCntvevXvZv38/r7zyCrNnz2bu3LlIKdm0aVOFHUOJHG3btuWFF15g0aJFNGjQgNtvv93oSBFDFe4IIKWkqKiIFStWsHTp0nN6rOLiYjIyMpBSEhcXh8ViIXvNZ+Dzo2sBNI8L6SktzKUtbhdmnxuLz4XJ60L4QsXa60G63eguN7rHheZxoLtLi3fA80c3ScDlxOd24nM58LuceJ0uPM4SajdrhqOgAGdhYQW9SlCnTh1q167NunXrGDFiRPhEVfPmzSvsGErk6dWrV7W8+tbr9VJYWMjgwYMpLCzE6/VW2GNXv1ejhtm5cydZWVm89NJLXHLJJTzyyCPn9HiJiYnk5OQQHx+Py+XCbDYTY7fisJnR/V70IEiTCUwgTQJMApPZhBAgdRC6BF0idYmuaeh66QlITdfRdAhqkoCU+HVJUJMEdZ2ADgFdJxC67dd1grpADwagAsdx22w2mjRpwgsvvICu66SnpyOEICoqqsKOUNx5nAAAIABJREFUoSgVZebMmSxfvpzly5dz1VVXMXz4cCZNmlQhj60Kt0F8Ph/Tp0/HZDJh/n/2zjxMiur63++t3qene1b2fTMoRECWQNxQIqIRlyRuuH0JKjHiL0YFJLgnGjdcokYkiiARxYhbNCFxjcEFRVAEkQAyyLDNMHvvtdzfH91dzigDA0zTPXjf5+mnq6uqqz59u/vUrXPPPcfh4Nlnn7Wnix8I0WiUQCAAYM9ajMViWPFYsuesgUNzYGlgOQSWpmFpAg2BJVMG27IwLYllSttoG5ZMGmgzuWyYSYOdMK2UsZboJuiWTBlxC1PXD/jzfJvx48czfvx45syZwx//+Efee++9Vj+HQrE/rFixghdffNF+/f7773PYYYfx/PPPs3DhQhYtWkR5eTldu3Y94HMpw51B0rMWH3vsMY4//nj69+8PwF133cW7777L1KlT6dmzJ7179261czocDrs6TXpg0ulw0bB+Lb5AAcLnw3BoCEey1y00AcKBACySRtewwLRMdFMmH5ZElxa6AQnTxJBJg50woWLzJvLad0TXHOgmyZ64BQkjmXQqU1x++eVUV1ezdOlSVqxYwVFHHZWxcymyixCCm266iTvvvJPrr78+23K46aabdtthGDRoEOPHj7df79y5k549e3L00UdTWVmJx+NptQLKynBnkIqKCgYPHsytt97Kr3/9a1avXk3Xrl2ZOXMmV155JYFAoNWjLtKj7kIIO5e2p7QduNzUr/0c0acf0uNBahrSIZBCkgg3IDx54HJhGgZ6wiAei1D75RoShkHMkMQtScwwiZkWcRMC/QZiut248vKIhSMYQqCbkriZdJls+3ozdZWViAxGARQXF1NYWMimTZsYNGiQijjIIVrzdy2EYMCAAbz00kutdswD4frrr99tp8Ttdje5a163bh0PP/wwo0aNYsqUKVx77bXKcLcF3njjDaZNm8bQoUNZtWoV/fv35//+7/8YOXJkxs6ZTuva0NCA3+/HMAw4cgQlo05k5z+fx4yGKezZBzMvD1MTOITE3LkV4fSA202ioY74rgoSZtKPHTctDFOSMCS6aWIYEt202LrqY+IGOEs7ENcN8OeD20tCCmp3VbN5/XpG//Iyijt1ythnBbj66qv5yU9+wpgxYygsLMzouRQt51DOUZOXl9ei/X71q18xefJkZs6cyZo1a1pVg4oqyRBSSjuDXiKRYObMmQQCATt7XqbIy8ujrq4OIQSxWAzDSBY7iMYTGJYkHgnTsHMbsVA99V9vor7sK8I1tYS2fk39pg2EK5JGO91z1k1JIjXoaFgSw5KYMj1gaVK3bSt1O3aw43//o2b7dio2l7H9q41YFvT+4ZH48vMz+nkhabxnzZqV8fMoFPuKEII77rij1Y+retwZQgjBaaedxg9/+EPuvvtuHn30UdasWcM999yT0fMmEgny8/OJRqO43W5M08Q0TXxdumA4XGDoiIYGpNuNrKrEIS2E0JIz3gFTJgcm9bSv2pIkUhEjugW6tFKRJSR94VJikhzEjMdiRENRLCHw5AeJxeNYlpXxXCU//elP7fEDxaGJpmlomoZhGG0yNLC1UT3uDNK+fXu2bdtGXV0d1113HcuWLTso503fpja+Xe194a/RSjsSMU0ikRjhujqiuklUt4jqFhHDIqKbRAyLqCGJGxA3LOKGRcIgFTWSjBbRLYlpfNMLT5gWFoJwfZhoNIphWAz66TiOu2DCQfm8Qgj69u17UM6lyA59+vTh+OOP56mnnsq2lJxAXboyiBACp9PJb37zm4N2TrfbTTQatXsn8E3xXq2wHcbXm5DSxAxF0EwLh5AIJKQHMwFLymTMtmXZPe94ymgnrORApW5Z6DJp0E0LDMAk6ULpf/RxONDI8/pUZkBFq5CusJTO9/59RxnuQ4x0Dch0WlfDMNB1Hcuy6HnxFXz824/RLAvDSqAhcGiSZELXJBYyOelGSgxJKn5bohvJiTUJ08IwIWGRmnCT8oNbJnHDwuH1oHlcjLt8MvX19Xi9XmW8Fa3C6NGjD+lBz31BGe5DjEAgwK5du/B6vYRCIYQQuFwuHA4HvX50NMvy8kk01KEJcGoCzRIIIdNZXTFlssdtkexxmxYYqZmSycHKpNFOWCZxE3QzuV/ClEinix+ffR7rVn5Kj4ED8fv9yh+paDV69OiRbQk5w167QkKIbkKIt4UQXwgh1gghfpNaf4sQYqsQ4tPU49RG75khhNgghFgnhDg5kx9A0ZRQKERBQQFSSrxeLy6XC9M0sSyLiK5z4oNP2vHYETPp247qFpGUnztqmkQNk6huEjOs5EM3SRhmctJNKkQwYaSnt5vELTBMi/4/PoZP3n6bKY/Nwe12EwqF1K2tQpEBWtIdMoBrpZQrhBAB4BMhxOupbfdLKe9tvLMQ4gjgPGAA0Bl4QwhxmJSydRMzK3aL2+0mFos1qfmYdlW43W487TvQ8egT+fq/b6Kl/IaCpJ9boiGRqZ530ndtWhaGlN9Mebe+CRFMWBZxM+nv9gQLiMYS/OjUU+nYowemaeJyuVShA4UiA+y1xy2l3C6lXJFabgDWAl328JYzgGellHEp5SZgAzCiNcQq9o7X66WhoQEhBIlEAsuycDgcyWRTeXk4C4vpPOLHxA2ZiipJ9qyjhkw+p6JMooZF3DSJmZKYSeqR7G3HzeQAZdJVYmEJJwNO/AnRRIIfn34mgWAQ0zTx+/3KcCsUGWCfRo2EED2BIUA6rm2KEGKVEGKuEKIota4LsKXR28rZs6FXtCL19fW0a9cOy7KShtrpRNd1dF2npqYGf14eA867hK4njCVqJV0hYd0knDCJpMIDIylXSThlwGO6ScwwiOsmcd1KulqM5ECl6XDxg2OOp3pXFUf95CS6DBxIbW0tLpeLXbt2tXoFHIVCsQ+GWwiRDywGrpZS1gOPAn2AwcB2YJ+mrgkhLhdCLBdCLNf16L68VbEHgsEg1dXVaJpGJBJB13VcLhcul4vCwkIikQgOl4vuJ52K4fLZcdtRUyZjuc3Ua0MSNSz7ETMkMVMSTfu4LQleL+379EU6HUTq6+jSvz/BggIKCwvRdZ3i4mKVP0ShyAAtGvIXQrhIGu2npZQvAEgpdzba/hfg1dTLrUC3Rm/vmlrXBCnlHGAOQCDQQcbj+yNf8W0ikQjBlKsiXeU9Hc+dSCTwer2YpsmIs84mWl3Fq7fcQFNvxjfx3KYlkwWBU1PcDZnMHKhbFlI4yA8WgdvD9k1lXH7PPQw49lii0agdv97Q0EAwGFTGW6FoZVoSVSKAJ4C1Usr7Gq1vnD3oLGB1avkV4DwhhEcI0QvoB3zUepIVe8Ln81FfX2/nSjEMw54u7Pf7icViSCmpr6/n+F9OZuwNt2A4XMnetGEl/d6GRUI4iDZaFzMtElIjZpjEDUkcQSQaY0fZ11x08630+9GPkpkIPR47flz5uBWKzNCSHvfRwEXA50KIT1PrfgecL4QYTDLFRRkwGUBKuUYI8RzwBcmIlCtVRMnBw+Fw4HQ6cTqd9mSF9HLjbU6nE7fHw6gL/o++Q0fy+qMPU78rWR9SAqMmXMB/n/4rUoJlSZy+PLr98Ies/eADLAkSQXGnjlzwu99R3K0bTpfLPm76nE6nUxluhSID7NVwSymXArv79/1jD++5Hbj9AHQp9hNN0ygtLW12e0FBAQB+vx9I5lNp3749A4477jv7jp146X7rcLlc+/1ehUKxZ9RcZIVCoWhj5Mh8ZInHU51tEc3idtcTi8Wors5djZFIhFAolNMadV2ntrY2x/NNmDn9W/R4anHoDjzVnmxLaRZ3yE0kEsnp32IsFqO+vj6nNe7pfyJy4U9UXFwsr7vuumzLaJZwOExlZSU9e/bMtpRm2b59Ox6Ph+Li4mxLaZZ169bRu3fvnHajfPbZZwwaNCjbMppF13WWLv2KmpofZFtKs3i91QwZEqdThqsfHQibNm2iffv2tsswF7n33nuprq7e/SBRuqBtNh/t27eXucz69evlnDlzsi1jj7z44ovy/fffz7aMPfL73/9eVldXZ1tGs1iWJadMmZJtGXukqqpKDh16u0ymBMvNR8eOS+VLL72U7abaI7Nnz5br16/Ptow9krKLu7WZysetUCgUbQxluBUKhaKNoQy3QqFQtDGU4VYoFIo2hjLcCoVC0cZQhluhUCjaGMpwKxQKRRtDGW6FQqFoYyjDrWhCKBQiHA5nW4ZCodgDOZKrRJFtLMti8eLFbNy4EafTSa9evfjZz36m0rIqFDmI6nErADBNk+nTpzNy5EgGDRrE1KlTsy1JoVA0gzLcCgAmT57M4sWL2bVrF7qu88wzzzBlypRsy1IoFLtBuUoUADzyyCOMGjWKSy65BI/Hw/Tp01mxYkW2Ze2Vbdu2kZ+fTzAYzLaU3bJt2zYCgQCBQCDbUhSHEKrHrQDA7XYzZswYqqurmTt3LkcffbRdhiyXeeyxx/joo9wtafroo4+yfPnybMtQHGIow60AkrUqZ82axcSJE/H5fEybNk0NTCraBLW1tdx1113ZlnFQUYb7IGIYBqFQKNsy9kjv3r156aWXuOyyy3K8Uo1CARMnTuT000+nT58+9OvX76C597JdyUkZ7oPEhx9+yKJFi7j11ltZsmQJkUgk25KapaSkhD59+vDxxx9nW4pC0SxfffUVPp+Pq6++mt69ezN16lQ+//xzTNPM2DnLyspYsmQJU6ZM4V//+hdfffVVxs61J9q04Q6FQixYsGCv+0kpuf3225k5cybvv//+QVD2Xa655hpqa2uZMGECM2bMYNu2bVnR0VLuueceZsyYkW0ZCsV3SCQS3HDDDVx66aXs2rWLL774go0bN9KtWze2bNmCZVkZO/cLL7zAggULmDVrFgsXLuS5557L2Ln2RO6PPjXDzJkz+eyzzzj99NMZPXo0Dz30EAMGDLC3X3zxxWzdutV+PW3aNPLy8ujevftB1/roo49y5ZVXMnLkSC699FK2b9/OZZddxhtvvGH7kYUQOedTFkIgpcw5XWnSt6q5qk9xYKTLdAGUl5dzySWXAOByuZgxYwYnnXQSv/vd74hGo4wePZoJEybwt7/9LWM1TdeuXUtlZSUPPPAAV1xxBf/73/947733+Ne//gXAuHHjdjv/IRP/7TZpuGtra/n666958MEH0XWd9957j5EjR9K3b180LXkTsXDhQrp27Wq/x+/329sONhMnTuTUU09l2LBh/PWvf2Xy5MnMnDmT4cOH2z/Md955h4KCgqzo2x3BYJBrr72WO+64g5kzZ2Zbzm75z3/+g6ZpHH/88dmW0izFxcVUV1djWVbWfn9tidraWnbt2gXAypUrueOOOwDo2rUrr7zyir1ffn4+Qghef/11Kisruffee1mzZg15eXkZ03bYYYdRUlLCSy+9xJw5c3j66aeprq7mmmuuAeAf//gHQ4cO/c773nrrLYqKilpVS5s03B988AGDBw8mPz+fqVOnsmLFCkaPHs0LL7yAx+PJtrzv4PV6Of7443nwwQfp1q0bxcXF9O7dO6fjpIUQeDwe4vF4tqU0i2EYADkdtvjb3/6WMWPG8JOf/CSnLsy5hmmaLFy4kM2bN/O///0PgEGDBrFy5co9vi8vL48ePXrw0EMPZVyjw+Fg4MCBLFy4EF3X+fjjjznnnHPsGP1zzz2Xc889N+M6oI0a7lNOOYXZs2ezYcMGrrnmGi655BJmzpyZk0Y7zc0330xNTQ2rV69uM77j/v378+677/LFF19wxBFHZFuO4hAnkUgwZswYbrjhhmxLaZZx48Yxbtw4Fi9ezPz587PmpmuThhvg4Ycfpry8nEceeYRnnnmGnj17ZlvSXikqKuLYY4/NtowW07lzZ9xuN2VlZRx++OE55UuOxWIkEgl0XScajeL1enNKn2LfcDgcTJo0KdsyWszPf/7zrJ6/zRrubt260bVrV0aMGIHD4ci2nEOW66+/nlNPPZVRo0a1up/uQDjyyCNxu93U19fz6KOPsnHjRgoLC7Mtqwm1tbV8+eWX1NXVsXz5cnr06EHfvn2zLUtxCNBmDTck/bDKaGcWTdMyGl61P7z66qtMmDCBY445hvfee4+OHTuyaNEiJk+enG1pTfjoo4+47bbbqKio4KmnnsIwDJ5++ulsy1IcAqhhbsVeueWWW3IqsqRjx45s27aNI488knPPPZetW7c2iSDKBUKhEC+99BJPPPEE/fr1484772To0KF26JhCcSDs1XALIbxCiI+EEJ8JIdYIIW5Nre8lhFgmhNgghFgkhHCn1ntSrzektvfM7EdQZJpRo0bx5ZdfZluGzbBhw1i5ciVTp07l73//O/PmzeOYY47Jtqwm5OXlMXbsWBYtWsTChQvZtm0bq1evblNjHIrcpSWukjhwopQyJIRwAUuFEP8ErgHul1I+K4SYDUwCHk0910gp+wohzgPuAg5OjIwiIwghePPNN7MtowkfffQRq1at4osvvmDz5s05NzCpaRo9evRg7ty5tG/fnjfeeINRo0ZlNM5Y8f1hr4ZbJmeIpDMjuVIPCZwITEitnw/cQtJwn5FaBngeeFgIIaTKWNSmyTXDKIRg0KBBDBo0KNtSmmXIkCG88sorLFq0iKeffjqnw1UVbYsW+biFEA4hxKdABfA6sBGolVIaqV3KgS6p5S7AFoDU9jqgpDVFKxRtiXPPPVcZbUWr0iLDLaU0pZSDga7ACKD/gZ5YCHG5EGK5EGJ5NBo90MMpFArF94Z9iiqRUtYCbwOjgEIhRNrV0hVIZ3TaCnQDSG0vAKp2c6w5UsphUsphPp9vP+UrFArF94+WRJW0E0IUppZ9wEnAWpIG/Bep3S4BXk4tv5J6TWr7W8q/rVAoFK1HS6JKOgHzhRAOkob+OSnlq0KIL4BnhRB/AFYCT6T2fwJYIITYAFQD52VAt0KhUHxvaUlUySpgyG7Wf0XS3/3t9THg7FZRp1AoFIrvoGZOKhQKRRtDGW6FQqFoYyjDrVAoFG2MnMgOaFkW7733XrZlNMuOHTvYvn17TmssKyujpqYm5zL5Naa6upqPP/4Yv9+fbSnNEolEcvp7DoVCeL3VdOyYuxqLitZRVtaQ0+24fft2Vq1axc6dO7MtpVn29F/OCcMtpaSq6juh3jlDXV0d0Wg0pzWGw2GefFKjoSF3NXbvnuBHP6ohFotlW0qz1NQYXHRR7rah0xmh07iP8U17IdtSmsW9KUg4fE5O/19isRg31N5AzJm7v8W4bL5sYE4YbofDwemnn55tGc2yYcMGTNPMaY2WZVFR0YEdO0ZlW0qzlJSsYuzYsTlVkKExUkoWLHidTZty93v2eKoJdryXTadvyraUZun4XkcG7BqQ0/+X7du3s+24bdT1rcu2lGbJd+Q3u035uBUKhaKNoQy3QqFQtDGU4VYoFIo2hjLcCoVC0cZQhluhUCjaGMpwKxQKRRtDGW6FQqFoYyjDrVAoFG0MZbgVCoWijXHIGO5Zs2aRSCSyLUOhUCgyTps33O+88w5HHXUUPXv2ZPTo0dxyyy3ZlqRQKBQZpU0bbl3X2bhxI//v//0/jjjiCObNm0dNTQ27du3KtjSFQqHIGG3acMdiMTZu3MjAgQP597//zWuvvUa7du346quvsi1tryQSCZ5//vlsy1AoFG2QNm24A4EAI0eOZOLEiZx00knMnDmTsrIyRoz4TinMnCMej/PII49kW4ZCkXPce++9VFdXZ1tGTtOmDTfA2LFjWbJkCX/4wx946aWXsi1HoVDsJ6tWraJPnz50796dn/3sZ1x00UXZlpSztHnD7fV66dKlC08//TSHH344hYWFlJeXZ1uWQqHYByzL4tNPP2XatGn07duX5557jvz8fDZu3JhtaTlJmzfcaYQQdOvWjf79+/Pmm29mW44iy5SXl/Pqq69mW4aihViWxdatW+nSpQtlZWU88MADlJaWUlFRkW1pOckhY7jT/PSnP2XFihWq1/09ZuLEiUybNo01a9Zw/PHHqyijNoDT6WTs2LFcccUVFBcX89RTT/Hoo48yY8YMPv3005yupZoNcqJ0WWvSqVMnvF4vmzZtokuXLgghsi1pt2zZsoUuXbpkW0ab4euvv25xrcrly5czb948OnXqRFlZGZs2baKkpCRnfwuKJIMHD2bt2rXceOONLF26lNLSUgCmTJlCRUUFDzzwAB06dKCgoCDLSrPPIWe4Ae666y6GDBnCJ598krN/1gsuuIBPPvkk2zLaDHPnzmXTppbVWdy+fTsPPvggJ598Mueccw7PPvssw4YNy7BCxYHicDjIz8/n/vvvb7J+3rx57Nixg+nTp9O/f3+6devGhAkT0LRDzmHQYg5Jww0wffp07rnnHqZPn55tKYpWYF9mxA4ZMoTevXvTvn17fvnLX7J06dKcvYArWkbHjh2ZP38+S5cuZe3atVx22WWMGzeOs88+O9vSssIhe8k67bTTeOONN1T+ku8hL7zwAiNGjOCdd97hn//8J+3bt8+2JEUrccwxxzBp0iSmTp1KWVkZ7777brYlZYVDtsft9/u54YYbuO222/jDH/6QbTk2O3bsYOPGjYTDYd5//326detGjx49si3rkKJXr1707NmTcePGfa9vpw9VNE2jf//+HHbYYd/bO6lD9ledDg90Op05NQX+lVde4Y9//CN1dXU88sgj/OUvf8m2pEMSIYQy2oc4mqYpw90cQgivEOIjIcRnQog1QohbU+vnCSE2CSE+TT0Gp9YLIcSfhBAbhBCrhBBHZfpDNEfv3r1xuVysW7cuWxKasHnzZjZs2MDs2bPp3Lkzf/rTn3C73axYsSLb0hQKRRuiJV2SOHCilHIQMBgYJ4QYmdo2VUo5OPX4NLXuFKBf6nE58Ghri94Xrr76ahYvXkxNTU02ZQDQpUsXevTowZIlS1iyZAnLli1D13UGDBiQbWkKhaINsVcft5RSAqHUS1fqIffwljOAp1Lv+1AIUSiE6CSl3H7AavcDv9/P448/no1Tfwen00nfvn3585//jKZpvPLKK5x99tl4PJ5sS1MoFG2IFjkBhRAOIcSnQAXwupRyWWrT7Sl3yP1CiLT16QJsafT28tQ6BXDyySfz8ssv43Q6efHFF7nggguyLUmhULQxWmS4pZSmlHIw0BUYIYQYCMwA+gPDgWJgnwKmhRCXCyGWCyGWR6PRfZTd9rn44ou/twMrCoXiwNinYXcpZS3wNjBOSrldJokDTwLpJNhbgW6N3tY1te7bx5ojpRwmpRzm8/n2T71CoVB8D2lJVEk7IURhatkHnAR8KYTolFongDOB1am3vAJcnIouGQnUZcu/rVAoFIciLZmA0wmYL4RwkDT0z0kpXxVCvCWEaAcI4FPgV6n9/wGcCmwAIsDE1petUCgU319aElWyChiym/UnNrO/BK48cGkKhUKh2B1qaplCoVC0MZThVigUijaGMtwKhULRxlCGW6FQKNoYynArFApFGyMn8nEbhsFjjz2WbRnNUldXR3l5eU5r/Oqrr+jePY/S0lXZltIswWAZCxYsyOncLIZRzcCBufs9OxwxCjYVMPCxgdmW0ix52/P4IPYBO3bsyLaUZlm9ejV96vqQKMjdQitfG183uy0nDLfD4WDMmDHZltEs5eXlaJqW0xqdTicjRxbzwx/+MNtSmuWJJ8r4/e+PRdcD2ZbSLCedtIIXX8zd77m+vp7FiyuYOGb30yMkEomFlBKBsNcBaMJhr8skq1atora2luOOOy7j59pf6urqmDViFl27ds22lGYZpY1qdltOGG4hBH379s22jD2yfv36nNa4evVqOnTokNMa/X4/DQ09iceLsi2lGSSa5m7VNty+fTv5+fkEAq1zsaqursbv99OrVy+qqqqSK3069eFaCgoK+azibd6LvEpDrAbLEPi1YsLxMJF4mEm9b8Xr8tEpvytF/hLq6upwuVyEQiFKS0vZtWsXwWCQSCRCaWkp4XAYh8OBruuYponD4SAcDtvbCgoKqKystKuxpwtX7Ny5E4fDkdO/xYKCArp27Uq3bt0IhUL4fD7C4TAulwun00k0GiUQCNjb4vE4QghcLheRSIRgMEhDQwM+nw9d1/F4PCSnsIDb7SYUCpGfn084HCYvLw/DMLAsC4/HQ0NDA4FAgEgkgtfrxbIsDMPA6XTi9XrtHEZ7KgSSE4ZboThU+fOf/8yJJ57ICSec0KrHjRohPo++Q8ioo7x+DVWxHXirAwjLSXutF118P+SLXR/jdAQYGBiMlu/gs+oPeHXDIk7ucTZjepxGB28XpJR4vV7i8bhtRNLGybIs2xiljUh6XyEEkUgEt9ttP7vd7lb9jAeDUChEQUEBoVCIoqIiDMNA13WKi4upqamhqKjINsJSSuLxOKWlpdTU1FBcXEwkEiEvL49oNIoQAsuy7GNWVVVRUFBAXV0dTqcTTdOorq6msLCQqqoqgsEg9fX1CCHweDxEo1E8Hk+Lks8pw61QtEE0ofGnjx5BN+N0DXald1FvPA4/895aQDDg5rAenajaHKYqvoZBA2spdrdHNy06+fqwZscqMJy083Tg5MNOB7CNTnpZ0zQsy0LTNAzDaHJuIUST0nBtuYSYz+cjFArhdDqpr6/H4XCgaRp1dXVcddVVDBs2jMmTJxOJROzPXFtbi9frpb6+HqfTSSwWw+lMmlJN0+yLW0FBAYlEAr/fj2VZzJ8/nzfffJPHHnuMgoICdF23t0kpW2y0QRluhaJN4nHk8Yfhf+bMRWdQ4TbZ4KwmT+RRLHqQF/MQKctn19YoX+6owJP3Od6qYmqKd+F3FuPU3NTVx4glEozsehxO6cLv9xMOhxFCJG/9XZJELIzL6QDhxZISh8NBPB7H7/djGAYul4twOEwgEGizhjscDlNUVER9fT35+fmYpomu6wSDQf7xj3/w8ssvY5omF198MYWFhcTjcYLBoN3jDoVCuN2O+SXYAAAgAElEQVRuYrEYgN3jLiwspLa2loKCArZu3cqbb77J9OnTicfjPPnkk9TW1hIMBgmFkjVq0sbe5/O1qC1VOKBC0QaJxWL0bteT5855jnq9lrc3vMO/1/6bL3as4eOvVvD6Z+9wyUmXcsbgczg2eD7VO6Czv4ianZXUh+r4onwdX5Sv54+v34Hm1QiHwwSDQUzTxCVj/PXGH7D4D0fw7K2HoYercLvdCCEoLCwkHA7bvdK8vDxqampsw5Vp1qxZYxu71sDlcmEYBg6HA9M0k4O6qTsKgGg0yvTp0+nRowfLli1DCGH7ow3DQNM0pJRomobD4cDhcNj+brfbzapVqxg+fDhXXHEF4XAYSAZjpN1KLpcLl8tl9+ZVj1uhOITJy8ujsrKSLv7OPPqz2Vz13FVU1FTQt6QfDunASpj87b1F+B1+orEIbqeLnR856d9jGNsqNlJfUkGp3o1n/rWIsT3HceqPTqWyshKvGz7514PUhXTadx9Gv8E/QbjyiMfjOBwOqqur7cHJ4uJiKisrKSkpyXiPu6qqigceeACn04lpmnTr1o3LLrvsgI/rdDrRdR1N09B13f4cc+fObXIxSiQSTJgwgYsuuoizzjqLnj17ctdddyGlTF7sXC4gaYgvu+wydu7cycKFC3n22Wepq6uzj2OaJnPmzOGyyy7DsiycTqc9juBwOFqu+4A/uUKhOOhEIhHy8/MBGOYdxjMXLeSMv5zJlxXrCDgD+ISPuIhTGd/FjsrtVO+q5qfDT6PU3RkLB0fmD+Pfn/2TYo8Tj+aioaGBuooN/P2VB6jYvJz2XY7i2HNmUdi+J5oQOBwOLMuipKSEcDiM0+mkqqqKQCBATU0NeXl55OXlZeSzSimpqqri448/Zu7cuaxfv54bbriBSy+99IAvGNFolOLiYurr6wkGgxiGQSKRYOHChSQSTWO8t23bxl133cVrr72G3+9n+fLlmKbZZB9N03jttdeQUrJy5crdfpY5c+Zw3nnnUVhYSCgUQgiB1+slkUjYPf69oVwlCkUbJN07k1KiCY2+xf1481dv0rfjYdTH6lm3438s37yCVVtWEcgPMnzAcKJ6lK93bkY4Neq3Jhjd5xTy85zc+NcpbNq2ga83rObLzz/h2NNn8PMpCyjp2BtBcjAybVDSYYFCCJxOJ5Zl2S6CxrRmD1xKyfTp05kzZw533HEHHTp04De/+Q0PPvjgAR87feHxeDxUV1cTiUQA0HXd3ue+++5rModj9erVLFu27DtGG5I+7hUrVjQx2h06dGD+/Pn2a6fTSbt27dB1nYKCAvx+P5C8i1KuEoXiEEbTNGKxGCLVG9Z1nY4FHVky+VVe+/w1Xv38H3yw5n12VO0kkghTZTmIOxJYCQsMWLvuC8YOP5njSn9B+1GCq+47nx9UOhg8bAyHDT2FvPwC20inox6EECQSCVwuF6Zp4na77UHKbxuc9O1/a33Wu+66iwsuuACHw8Hzzz/PkiVLWLp06QEfOx0GWF9fT3Fxsd3jTrs+IGnEX3zxRYqKinZrrPfGmDFjmlwIDMNg165dFBYWUldXZ/e4VTigQnGIE4vFbNdENBrF7/dTW1tLIBDgxL5j+PnwX7BkxRJ2NOwgEUsQ8OYTjUSJRxMgBcYJBt07dOPEESdSXFRMcEcxW97/jJN+diWl7TtTVVWF3+9H13WcTqdtpNPxyV6vl9raWnviTiAQyGgcd4cOHbjwwgtZsGABpmly3XXXtcpx0+GALlfSXZQeIGxsoH0+H/tb0PyXv/wld999N//+97/tdQ6Hg2Aw2CQcELAHgFvCIWe4DcOweyEKxaFKXl4e9fX1QPIPn56Nl/bZhsNhTh5yMnW1teS53URrq/h6/sPENqzF26kL/X/7exIuFw5g147t7Fi5DY+/Pd2696W+upqiQICErrPh7y/wyd8WIFxe+p9+Dn1Gn0hRSQmmaVJaWkooFKKkpMSOY84UBQUFdOzYkXPOOYfJkye3Wr6beDxOfn4+kUgEn89nz2L0er32PolEAo/HY0ee7AtnnHEGQJOBTikl4XAYv99vr3e73U165XvjkDHcUkqWL1/OBx98gKZpjBw5kqFDh7bZ+FKFYk+Ew2F7Nl80GiU/P9+OG04/71y5DFG+ibLXnsPl83PkrfeD5kI4NMxdO1h74/WYQsOKWVhrP6f9kUdR9vw8trz7NpGGevK79eIHZ57P+NtmYRk6X7z1On+deD7ugiJO/H/XkN+xMz369aOurg6fz2cPlmaK2tpa8vLyWjVJWWP/vZTSdvG89NJLdOzYkYaGBjZv3syKFSu+MxGpJWzYsIGhQ4eyYcMG+3xnnXWW3bFsHHq4L7bqkDHclmVx1llnMWvWLHt58+bNynArDkk8Hk8TH3cikcDr9aLrOl6vl13v/ovNs26k23mXMmDaHQgB4XVrSf8dpBAMvPE+pIDYju0UfbiURCKBQ2gMmzINnC7i0QiJaIRIVQWWlPQYOpzuQ0dQV13N4ptmEuzWnUvufQBfMJjxHnemcLlcxONxNE2zp/ILIZr0kB966CEeeuih/Tr+tddey7Zt25g1axaQ9NdfffXVeDweLMvC7XbbF4t9acNDJqrkxhtv5PHHH6ekpISOHTvy2GOPcdNNN2VbVpslEolw8803Z1uGohnS0RyNJ4BYloUQgsp3lrD+gVvoOWEywd6HEd9aRrx8MyIWRsTCEAtDNEx045dE1q/FaKil/YhRdD7meAq69yJauYPw1i3EqnZhhMMY0Qh6JEK8IUSsvg6Hw8HxF11M/ZYtPP7rK+wwtrZIOqwy7W9OG9JZs2btt1/726SNNiS/txtvvJG6umQ7hkIhotGonQelpe3YNi+Tu+Hqq6/m/PPPZ/z48TidThYvXsxzzz2XbVltFl3XW2XUXpEZ0lEdjWfyRSIRRNVOdr70V7qfeQGe4lKsuio0NIRIzQgEBGAhwUouY0kSkRCmlBgWmJbEkhJLJpeN9LMlMbHQTXB7fBwz4UJefvB+Hv7lRK5b+EzGP28ikcDn87XqcdPT171eLzU1NUgpeeSRR7j33nubuEaKiopwOBxNwiJramp2e8yCggJcLpd9IbUsy95XSsnjjz+Ow+Hg5ptvtiNVTNPcp3DAQ6bHXVpaSmFhIU8++SRXXXUVPp+PkpKSbMtSKDJC2qedzjxXV1dHYUEBOz5fSbC0I/7CEqxQLcQiiHgILR7BEQ+jxSPJR7r3HQ1DLATRMFYkjIyEMCMhjEgII9xAIhxCDzWQCDWQCDcQb0g+x0L1WIbOSZMupaa8nIaKiox+3o0bN7J06VIuuOCCVj1uQ0MDhYWFJBIJAoEAjz32GLfddluTyTdHHHEEK1asoLy8nI0bN1JRUcHy5csZPnz4d453+OGH89Zbb1FeXs7nn39OeXk5H330EYMGDbL3MU2TP//5z9x9991s27bNngofiURa3OM+ZAy3pmksXryYp556iqOOOooHH3xwj/lsc5WXX36ZrVu3ZluGIsdJJyTyeDyYppkMa6urpfY/S9B8XvSGGohFkNEIxJKGWotHcMbDOOIRRCwC8Yi9jxkJI6MRrGgYKxrBikQwIhGMSAg9EiaRfg6HSYRDJMIh4uEQeiyBy5/PO89mtsedprXHrHw+H5FIBKfTyc6dO7/jXh0wYACzZ8+muLjY9oXX19fTrl07Zs2aRb9+/ex9PR4P1113Hf369SMejxMIBNB1nQ4dOvDEE08wYsSIJseeNWsW4XDYHmz9XocDDho0iIEDc7esU3Ps3LmTn//855x55pk888wzeL1e5s2bl21Zihwl7RqB5B8+kUjg0QSxr76gZMxpWNEwpqbh0ESye6aBQ3OgaWBJEJYESyItibQspCmxLDAtC8sCw5LolkSXFrqZdKEYlpVcZ0kMM7UsoWPPHuit5A8+2Oi6Tl5eHrFYjF/96ld2dEma7du3M23aNEzTpH///jz88MN4vV4ikQhDhgxh7NixrF+/HoCxY8dywgkn2C6dSCTCLbfcwsqVK7Esi82bNzc5txCCK6+8khdeeAG3271PoYaHnOFuK1iWxfr16+0fyY4dO/D5fIwbN45LLrmESZMmsXPnTjp06JBlpYpcpHH4mh3SpgmkZWLFIhgaaJoDSxNITYAmkA4BacNkgbQklmVhmclnwwLDtDAk6IaFIZN+7YRpJQ25aWFYFglLoJsS3bLQTYtYuPWy9R1s0gUMnE4nTzzxBP/5z3+YMGGCvb26upoPP/yQPn36cOedd+JwOIhEIng8HuLxeJNIkEAgQLt27ewoH7/fz0033cQpp5zCihUrvnPuP/3pT5x//vlNCli0lEPScB933HG8/fbb9O3bN+vhgJs3b+aNN974znrTNFm2bJn9OhwOs2HDBu6//35uuOEGTj75ZN54441W9+kpDg0SiYQ9U9E0TbxeL7G6WsxwhNjObfiCBZiaA80hEBoIhwChYaFhITGkxLSSBtkw071qiSEtEibo6R61mRyMjEajxHUdPD4SlkwZbtAtk3gkQiZjSqSUvP322xmpYdk4qZPD4eDdd9/9zj6HH344ixYtIj8/H6fTyeuvv05FRQWFhYUMGjSISy65BMMw+NGPfsSyZcsoKyvD5/Nx5pln4vV6efnllznttNP47LPPmhz3448/5uyzz7Y7b/sSmXNIGu5JkyYxZMiQVskedqA0rhTSGI/Hw+OPP27r27JlC8cddxznn38+Tz75JK+99hqffPLJwZZr4/P5GD9+PC+++CJnnXVW1nS0dc466ywWLFjAyJEjWzUiwuv1UlFRgRACv9+frIMYyMeSUP/lGhz9+iN8XtC0VE87FUmiGwiPF1NaScNrGIS3bSEWDhMzLRKmJG5I4pZJ3ABXSQcIBIlFosQTCYRhkkjtp1uShGGyefVq+g4fsXfR+4mUktmzZ+82215rkK70EwqFmD17Nqeffjrr1q1j3bp19vlnzZrFPffcgxCCqqoqrrnmGn784x/z/PPPc9ZZZ9npWSdPnszzzz/PfffdByRnct94441NjHKXLl0YM2YMf/3rX5k+fTp5eXktzgqY5pA03LlE9+7dmThx9xW5G9OxY0eWLFnCvHnzGD16NJMmTToI6prH7XZz5JFH8s477yjDfQAcddRRTJ06tdVD2dLFetOTRQKBAA2hBo6Yfjtrbr0a8/MwpT8YiPS4MTWBKUDEI1i1NTg6dMYyTBo2rME0JLF4nLiuEzct4gZEDZO4YREzLfQd29BxIP0FOAoKkZEYhsOJbkLCtNjw+So0dx5HHHNsq322g0m6sK/X68Xr9fLRRx9RWlrKhRdeaO/z5Zdfsm7dOt59913OPfdcJk2aRHFxsR3uZ5qmXTzBNE3y8/MZP348c+fO5f7776esrMzORwJQWFjI/fffz1VXXUWvXr3sqkP7MgFHGe4cweVy8YMf/IDbb7+9yTRYhaI5TNO07+aSvUYHIlCEblho4TDVX3xKQd/+aKaBwzIRehy9citsL0/GalugWxYJK9mDThjJXrRJKnZbQiKeIKabxOoaiG/ZQsy0MFwe/B07s61sMw0NEXqOOIyBGXBjHAzShX3j8TjFxcUUFRWxZcsWYrGYPakJkr3uTZs2ceedd7JmzRpeeeUVnnzySaSU+Hw+O3xw4MCBXHfddVx//fUsWrToO+4PTdOIRqNs376dww8/3J7k43K5iMViLZ7O32LDLYRwAMuBrVLK04QQvYBngRLgE+AiKWVCCOEBngKGAlXAuVLKspaep7W44IILeOaZZ9qcj7gthjAqDj7pqdpp451OrxoCLK+XRDwGukG4tgbC9YhQA5om0BBIJKa0sGTScBsWKZ/1N75rI+3/tpL+cMuSmFJiWmDqOqGaWmKRKA6PFylbP0zvYJGfn29XY6+trcXtdrNx40Z+/OMfc/LJJ1NfX28PYM6ePRspJX//+98ZNWoU06dPt6vd+/1+pJRce+21LFiwoInRnjJlit0jTycH27BhA507d7bLxe3rHdm+9Lh/A6wFgqnXdwH3SymfFULMBiYBj6aea6SUfYUQ56X2O3cfztMqTJ48mfHjx7c5w50rTJo0iTVr1lBVVcUnn3xiD84ocoN4PG5nsItEIuTl5SXTrB7+Q4qOGcvOf72EhYGsqsIpLDTDQmgCkTLclmxkiKVM+rZN2cSAG40GLw2ZHLA0pcTQJfGaOiwJDq+X8dOm2jlSMsGMGTO4++67M3LstMspkUhQUFCAlJJjjz2WE088kVgsZlem0TSNfv36cc011wDwwAMP8Nvf/tYOJ0wkEvYsyfvuu8822jfffDNXXHEFXq/XnuXq9XqJxWJ2VkfArhbf0tS4LereCSG6Aj8FHk+9FsCJwPOpXeYDZ6aWz0i9JrV9jMjC5VgIoWZO7ic1NTVs3LiRadOmccYZZ+D1etmxY0e2ZSka4ff7CYVCTXJJFxQUEBcOgj36YlgQ1y2ikSjRaIKIaRE1LCJG8jlqWMSMpLGO6jI5MGlZJFLhf7qUxC2JYUoMKUikety6ZaH585OuBLcP3TAYddLJGStbBrBs2TJGjRqVkWPn5eU1acO0y6O+vh6fz0d9fb1d3f7www+332cYhl1LMhaL4XK5mhQBTtOvXz+KiopwuVxomkYwGCQajVJQUGDnR0n3tPcln3lLe9wPANOAQOp1CVArpUxP5i8HuqSWuwBbAKSUhhCiLrX/rharagXy8/NZvHjxwTzlIcP8+fO5/PLL6du3L4lEgjPPPJMHH3xwvzOkKVqfSCRCIBBoslxXV0cgEEDr2Q+tXWdiO8rRZQIHAodGKjNgsq8mZdNed3pyjR0tYproZtJ4J6x0PLfEMCFWU4sl4MgxJ+AtLqGyspLCwkJbT1sineclHUeddlWmixK7XC6klDgcjiaDh0IIO+46ncOk8SNNuhp8ep2u63acd9rFlfajNx7A3Bt77XELIU4DKqSUrRqbJoS4XAixXAixvLWycClah6uvvprbbruN//73vxQVFXHhhRdy2223ZVuWohFpv2s0GrUHvNK39T2OHo23S3eipkUsFR2S7GFbxAyDmGEQNUyihvnNdttIpwYqTZmM504b81Sct24lXSilPXvx1eo1nPbrKQSDwYxWv8kk6VDAtHFuHNOdzsCYzr7Yq1evJoUR0vMz0i6StP+7qqoKSJYsGzhwoL0tHXWiaRqmaTZ5H7R+HPfRwOlCiFMBL0kf94NAoRDCmep1dwXSCTa2At2AciGEEyggOUjZBCnlHGAOQIcOHdpmTshDmEWLFrF69Wo+/PBDnnvuuTbZmzqUSf/x03/+dARE2uAMm3obf79wPNFoCIcQyYFJmex1S8ACrHQWQCSGkYwkSRpnC8OEhJU05rplpaJPkgbcEwjSvu8PaNe3L8WdOtnlvjL1OTM5YJ8uEhwMBqmrq8PtduNyuexKQtXV1QQCASKRCIWFhRx77LG8/PLLhMNhpkyZQrdu3WzDDlBeXm5nAhw6dCidOnWy86Snc8rU1NTYleXTpcsSiUTrhgNKKWcAMwCEEKOB66SUFwgh/gb8gmRkySXAy6m3vJJ6/UFq+1uyrSbr/R6Tzvmyr1NxFd8lEz9/0zTtP3r6lj4SieB2u4lGoxT27kNe915UrPkUTWg47JSuFhINKVI9wNTgpGnJVArXdD4SYfe0dcsiZiZdJgnLJBAsRHO76TVoEIHCQurr69E0LSO97ltuuYUbbrjBroTe2qSzA8ZiMQoLC7EsC9M0KS4utsuyRaNRAoEAUko7PwxAZWUllZWVzR47fReUzr2taRo1NTX4/X6qq6ttH3ra7ZIuFtwSDuRSNh24RgixgaQP+4nU+ieAktT6a4DrD+AciizicDiU0W4FMtEb9fv9NDQ0EAqFcDqddjxyJBKhpKSESCTCKY88SVy3iBsmUd1MuUdk8jlhEdWT7pN42o1iSqImxAxBzLBImBZxM7leNy0ShklRl+70O/pYvHl+xp53Hg0NDZSWlmZscDLtg85Ujz4QCFBTU4Pb7aampsaOq04XQN61axcOh4P6+noikQjDhw+nW7duez1ux44dOeGEE+wLgsfjQdM0ux5oaWmpHcmSvijtSxvuk+GWUr4jpTwttfyVlHKElLKvlPJsKWU8tT6Wet03tf2rfTmHQqHYO9FolLy8PHw+n52EPz0DsK6uDq/Xi3S6GXTRpUlDbSYNd0T/xredjC4xk/5vUzYy4slp7XHDIm77uyXBjl3oPWwE28rK+MnEidQ1hPD5fNTW1jYp9dWWiEQidsX1YDBohzQWFhba7hHTNPH7/Xi9Xo4++mjmz59PYWFhs8d0u908/vjjjB49Go/HQ0NDA7quI6W0o1VqamqScfepCjjAPrWhmu2hULRBPB4Puq7bUQrRaNSewZefn58sDFBUTOmo49DadSJqSCKGRcRMhgR+ExYov1k2LWK6mexlG8kQwbhpkrAk7mAB7fv2o6piJ5GGEL0HDyYQCBCPx/H7/Rm7M5s6dep38li3Jl6vl3A4jNPpJBwO2+GA6YtgQ0MDDoeDWCxm16Q8/PDDWblyJfPmzSMYDBIIBAgGgwSDQe6//37WrVvHqFGjCAQCJBIJ8vLy7LuGdGX3QCCAYRhNih9nIhxQoVDkEI2nYqcjIhrnzkgPWvYaMYphF1/KW/ffgx4J2++XqYk4UiYHKU3S/m6S6VztCTgW3uJS8jt0IhKN4vF4uev1f9saGg+KZoLi4uKMHDdN4/JiaRqXJ2u8LZ0+V9M02rdvzymnnMLXX3+NYRj2zEjAHm9I59e2LMuOHmn8HUFyfKJx1ElLUYZboWiDmKZph6qlDadhGGiahq7r9rPb7ebYSb/ClJJX/3ArsomBSkaYmJJkTHd6Wrv8Ji+3IQWaKamrqaFnp05ces89aKlMePF43I5JFkK0yUrvjY1uenYjJHvi6XS50LQ3nN7WeOJM45A+XddxuVx2pIiu6/Z7E4mEvS39nTW+ULQU5SpRKNog6ZjtWCxmJ/dPr0tXLU/f6muaxogJF/OLe/9E1yHDk/7s1KPLsBF4O3QkZlqph6TfcaOJWySnwFsQi0Q56qSfMPGPfySvqAiPx4NlWeTn5xOPx8nPz2+zcdxpw5qeDJM2no2NbnqqeroHns7kl3arpEMW0ymcXS6XXczZsiycTqe93eVyYRhGk23pC96+3LW0vUukQtFGiEajVFZWEovFKC8vR9d1SktLW+34aTeCEAKfz4cQwl5XVFSEEILOnTvb20+8+P849uxzMRv1AB0uF5ZlYpnf9MSdbjd6o2K5AG6vF7fXa/cOg8GgnVairSaYguQF0OPxNGlD+MZdkt7WmHQ19t1tS7Mnv/X++LS/jTLcCkWG+O9//8u1115LRUUF1157LSUlJTz99NOtdvzGE1PSBmRvz44WJgrzNhM33dxx2yqNUyg3/ix7+ny58NmVq0ShyACRSIQ333yTuXPnMnDgQP7yl78wYMAAli5dmm1pikMAkQuTGouKiuRFF12UbRnNEo/H7VlUuUpdXR1OpzNjM8xag507d7JzZylSZiYCoTUoLNxKjx5d9r7jXjBNk82bN9O7d282btxIz549qa+vx7KsA/odmaZJVVUV7du3P2CNmSIcDmOaJsFgcO87Z4mqqiry8/NbPFMxGyxYsICamprddutzwnALISqBMAc5g+A+UIrStj8obfuH0rZ/HGraekgp2+1uQ04YbgAhxHIp5bBs69gdStv+obTtH0rb/vF90qZ83AqFQtHGUIZboVAo2hi5ZLjnZFvAHlDa9g+lbf9Q2vaP7422nPFxKxQKhaJl5FKPW6FQKBQtIOuGWwgxTgixTgixQQiR9aILQogyIcTnQohPhRDLU+uKhRCvCyHWp56LDpKWuUKICiHE6kbrdqtFJPlTqh1XCSGOypK+W4QQW1Pt92mq5F1624yUvnVCiJMzqKubEOJtIcQXQog1QojfpNZnve32oC3r7ZY6l1cI8ZEQ4rOUvltT63sJIZaldCwSQrhT6z2p1xtS23tmQds8IcSmRm03OLU+G/8JhxBipRDi1dTrzLTbt6sTH8wH4AA2Ar0BN/AZcESWNZUBpd9adzdwfWr5euCug6TlOOAoYPXetACnAv8EBDASWJYlfbeQLG/37X2PSH2/HqBX6nt3ZEhXJ+Co1HIA+F/q/Flvuz1oy3q7pc4ngPzUsgtYlmqT54DzUutnA1ekln8NzE4tnwcsyoK2ecAvdrN/Nv4T1wALgVdTrzPSbtnucY8ANshkNZ0EyfqVZ2RZ0+44A5ifWp4PnHkwTiqlfBeobqGWM4CnZJIPSRZz7pQFfc1xBvCslDIupdwEbCD5/WdC13Yp5YrUcgOwFuhCDrTdHrQ1x0Frt5QmKaUMpV66Ug8JnAg8n1r/7bZLt+nzwBghMpPEYw/amuOg/ieEEF2BnwKPp14LMtRu2TbcXYAtjV6Xs+cf8cFAAv8WQnwihLg8ta6DlHJ7ankH0CE70vaoJZfackrq1nRuI7dSVvSlbkGHkOyd5VTbfUsb5Ei7pW73PwUqgNdJ9vJrpZTGbjTY+lLb60jWoD0o2qSU6ba7PdV29wsh0vPYD3bbPQBMA9KpFkvIULtl23DnIsdIKY8CTgGuFEIc13ijTN7b5EQoTi5pacSjQB9gMLAdmJUtIUKIfGAxcLWUsr7xtmy33W605Uy7SSlNKeVgoCvJ3n3/bGn5Nt/WJoQYCMwgqXE4UEyykPlBRQhxGlAhpfzkYJwv24Z7K9C4ZHLX1LqsIaXcmnquAF4k+cPdmb7FSj1XZE9hs1pyoi2llDtTfy4L+Avf3NYfVH1CCBdJw/i0lPKF1OqcaLvdacuVdmuMlLIWeBsYRdLNkE4D3ViDrS+1vQCoOojaxqXcT1ImC5Y/SXba7mjgdCFEGUmX74nAg2So3bJtuD8G+qVGXt0knfSvZEuMEMIvhAikl4GxwOqUpktSu10CvJwdhR0Bo5UAAAF0SURBVLAHLa8AF6dG0kcCdY3cAgeNb/kQzyLZfml956VG03sB/YCPMqRBAE8Aa6WU9zXalPW2a05bLrRbSkc7IURhatkHnETSD/828IvUbt9uu3Sb/gJ4K3U3c7C0fdnoYixI+pAbt91B+V6llDOklF2llD1J2rG3pJQXkKl2+//t2z1uwkAQhuG3g5qOlgNQpUxBC9fIMZByi5wgkVJwBeAANBAgRX5ukibFDIIGJBf2stL7SC7ASPtphEfyjt3GZLXJQUx+v4l9tHnhLCNigv8BfJ7yEHtPK+AHWAKDjvK8E7fNf8T+2NO1LMTk/CXreAAeCuV7zfX3+eccXvx+nvm+gGmLuR6JbZA9sMtjdg+1u5GteN1yrTGwzRxH4Pni2tgQw9EF0Mvv+/n5N8+PCmRbZ+2OwBvnJ086vyZy3Qnnp0paqZtvTkpSZUpvlUiSGrJxS1JlbNySVBkbtyRVxsYtSZWxcUtSZWzcklQZG7ckVeYf2tkbinO+r1AAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Výsledky\n", + "\n", + "Pozrime sa, či sme boli úspešní v tréningu Petra, aby bojoval proti vlkovi!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Teraz vidíme oveľa menej prípadov utopenia, ale Peter stále nie je vždy schopný zabiť vlka. Skúste experimentovať a zistiť, či môžete tento výsledok zlepšiť úpravou hyperparametrov.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/sk/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..7830bcc9a --- /dev/null +++ b/translations/sk/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:07:53+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter a vlk: Úvod do posilňovacieho učenia\n", + "\n", + "V tomto tutoriáli sa naučíme, ako aplikovať posilňovacie učenie na problém hľadania cesty. Prostredie je inšpirované hudobnou rozprávkou [Peter a vlk](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) od ruského skladateľa [Sergeja Prokofieva](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Je to príbeh o mladom pionierovi Petrovi, ktorý odvážne opustí svoj dom a vydá sa na lesnú čistinu, aby prenasledoval vlka. Vytrénujeme algoritmy strojového učenia, ktoré Petrovi pomôžu preskúmať okolitú oblasť a vytvoriť optimálnu navigačnú mapu.\n", + "\n", + "Najskôr si importujeme niekoľko užitočných knižníc:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Prehľad posilňovacieho učenia\n", + "\n", + "**Posilňovacie učenie** (RL) je technika učenia, ktorá nám umožňuje naučiť sa optimálne správanie **agenta** v určitom **prostredí** prostredníctvom vykonávania mnohých experimentov. Agent v tomto prostredí by mal mať nejaký **cieľ**, definovaný pomocou **funkcie odmeny**.\n", + "\n", + "## Prostredie\n", + "\n", + "Pre zjednodušenie si predstavme Petrov svet ako štvorcovú dosku veľkosti `width` x `height`. Každé pole na tejto doske môže byť:\n", + "* **zem**, po ktorej Peter a ostatné bytosti môžu chodiť\n", + "* **voda**, po ktorej, samozrejme, nemôžete chodiť\n", + "* **strom** alebo **tráva** - miesto, kde si môžete oddýchnuť\n", + "* **jablko**, ktoré predstavuje niečo, čo by Peter rád našiel, aby sa nakŕmil\n", + "* **vlk**, ktorý je nebezpečný a treba sa mu vyhnúť\n", + "\n", + "Na prácu s prostredím definujeme triedu nazvanú `Board`. Aby sme tento zápisník príliš nezahltili, presunuli sme všetok kód na prácu s doskou do samostatného modulu `rlboard`, ktorý teraz importujeme. Môžete sa pozrieť do tohto modulu, aby ste získali viac podrobností o interných častiach implementácie.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Poďme teraz vytvoriť náhodnú dosku a pozrieť sa, ako vyzerá:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Akcie a politika\n", + "\n", + "V našom príklade by Peterovým cieľom bolo nájsť jablko, pričom sa vyhne vlkovi a ďalším prekážkam. Na dosiahnutie tohto cieľa môže v podstate chodiť, kým nenájde jablko. Preto si na každej pozícii môže vybrať jednu z nasledujúcich akcií: hore, dole, doľava a doprava. Tieto akcie definujeme ako slovník a priradíme ich k dvojiciam zodpovedajúcich zmien súradníc. Napríklad pohyb doprava (`R`) by zodpovedal dvojici `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Strategia nášho agenta (Petra) je definovaná takzvanou **politikou**. Pozrime sa na najjednoduchšiu politiku nazývanú **náhodná prechádzka**.\n", + "\n", + "## Náhodná prechádzka\n", + "\n", + "Najprv vyriešme náš problém implementáciou stratégie náhodnej prechádzky.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Poďme vykonať experiment náhodnej prechádzky niekoľkokrát a pozrieť sa na priemerný počet vykonaných krokov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Funkcia odmeny\n", + "\n", + "Aby sme našu politiku urobili inteligentnejšou, musíme pochopiť, ktoré ťahy sú „lepšie“ ako ostatné.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Vytvorte Q-Tabuľku alebo viacrozmerné pole. Keďže naša hracia plocha má rozmery `šírka` x `výška`, môžeme Q-Tabuľku reprezentovať ako numpy pole s tvarom `šírka` x `výška` x `len(akcie)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Preneste Q-Tabuľku do funkcie na vykreslenie, aby ste vizualizovali tabuľku na doske:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Podstata Q-Learning: Bellmanova rovnica a učebný algoritmus\n", + "\n", + "Napíšte pseudokód pre náš učebný algoritmus:\n", + "\n", + "* Inicializujte Q-Tabuľku Q rovnakými hodnotami pre všetky stavy a akcie\n", + "* Nastavte rýchlosť učenia $\\alpha\\leftarrow 1$\n", + "* Opakujte simuláciu mnohokrát\n", + " 1. Začnite na náhodnej pozícii\n", + " 1. Opakujte\n", + " 1. Vyberte akciu $a$ v stave $s$\n", + " 2. Vykonajte akciu presunutím do nového stavu $s'$\n", + " 3. Ak narazíme na podmienku konca hry alebo je celková odmena príliš malá - ukončite simuláciu \n", + " 4. Vypočítajte odmenu $r$ v novom stave\n", + " 5. Aktualizujte Q-Funkciu podľa Bellmanovej rovnice: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Aktualizujte celkovú odmenu a znížte $\\alpha$.\n", + "\n", + "## Využívanie vs. Preskúmavanie\n", + "\n", + "Najlepší prístup je nájsť rovnováhu medzi preskúmavaním a využívaním. Ako sa dozvedáme viac o našom prostredí, budeme pravdepodobne nasledovať optimálnu cestu, avšak občas si zvolíme nepreskúmanú cestu.\n", + "\n", + "## Implementácia v Pythone\n", + "\n", + "Teraz sme pripravení implementovať učebný algoritmus. Predtým však potrebujeme funkciu, ktorá premení ľubovoľné čísla v Q-Tabuľke na vektor pravdepodobností pre zodpovedajúce akcie:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Pridáme malé množstvo `eps` k pôvodnému vektoru, aby sme sa vyhli deleniu nulou v počiatočnom prípade, keď sú všetky komponenty vektora identické.\n", + "\n", + "Skutočný učebný algoritmus budeme spúšťať počas 5000 experimentov, nazývaných aj **epochy**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Po vykonaní tohto algoritmu by mala byť Q-Tabuľka aktualizovaná hodnotami, ktoré definujú atraktivitu rôznych akcií v každom kroku. Vizualizujte tabuľku tu:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kontrola politiky\n", + "\n", + "Keďže Q-Tabuľka uvádza „atraktivitu“ každej akcie v každom stave, je pomerne jednoduché použiť ju na definovanie efektívnej navigácie v našom svete. V najjednoduchšom prípade môžeme jednoducho vybrať akciu zodpovedajúcu najvyššej hodnote v Q-Tabuľke:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Ak vyskúšate vyššie uvedený kód niekoľkokrát, môžete si všimnúť, že sa niekedy jednoducho \"zasekne\" a musíte stlačiť tlačidlo STOP v notebooku, aby ste ho prerušili.\n", + "\n", + "> **Úloha 1:** Upraviť funkciu `walk` tak, aby obmedzila maximálnu dĺžku cesty na určitý počet krokov (napríklad 100), a sledovať, ako vyššie uvedený kód občas vráti túto hodnotu.\n", + "\n", + "> **Úloha 2:** Upraviť funkciu `walk` tak, aby sa nevracala na miesta, kde už predtým bola. Tým sa zabráni tomu, aby sa `walk` dostal do slučky, avšak agent sa stále môže ocitnúť \"uväznený\" na mieste, z ktorého sa nedokáže dostať.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "To, čo tu vidíme, je, že na začiatku sa priemerná dĺžka cesty zvýšila. Pravdepodobne je to spôsobené tým, že keď o prostredí nič nevieme, máme tendenciu uviaznuť v zlých stavoch, ako je voda alebo vlk. Keď sa však začneme učiť a využívať získané poznatky, dokážeme prostredie skúmať dlhšie, no stále nevieme presne, kde sa nachádzajú jablká.\n", + "\n", + "Keď sa naučíme dostatok informácií, agentovi sa cieľ dosahuje ľahšie a dĺžka cesty sa začne skracovať. Napriek tomu sme stále otvorení skúmaniu, takže často odbočíme z najlepšej cesty a skúmame nové možnosti, čo spôsobí, že cesta je dlhšia, než by bola optimálna.\n", + "\n", + "Na grafe tiež pozorujeme, že v určitom bode sa dĺžka náhle zvýšila. To poukazuje na stochastickú povahu procesu a na to, že v určitom momente môžeme „pokaziť“ koeficienty Q-Tabuľky tým, že ich prepíšeme novými hodnotami. Ideálne by sa tomu malo predísť znižovaním rýchlosti učenia (t.j. ku koncu tréningu upravujeme hodnoty Q-Tabuľky len o malé hodnoty).\n", + "\n", + "Celkovo je dôležité pamätať na to, že úspech a kvalita procesu učenia výrazne závisí od parametrov, ako sú rýchlosť učenia, pokles rýchlosti učenia a diskontný faktor. Tieto parametre sa často nazývajú **hyperparametre**, aby sa odlíšili od **parametrov**, ktoré optimalizujeme počas tréningu (napr. koeficienty Q-Tabuľky). Proces hľadania najlepších hodnôt hyperparametrov sa nazýva **optimalizácia hyperparametrov** a zaslúži si samostatnú tému.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Cvičenie\n", + "#### Realistickejší svet Petra a vlka\n", + "\n", + "V našej situácii sa Peter mohol pohybovať takmer bez toho, aby sa unavil alebo vyhladol. V realistickejšom svete si však musí z času na čas sadnúť a oddýchnuť si, a tiež sa najesť. Urobme náš svet realistickejším zavedením nasledujúcich pravidiel:\n", + "\n", + "1. Pri presune z jedného miesta na druhé Peter stráca **energiu** a získava **únavu**.\n", + "2. Peter môže získať viac energie jedením jabĺk.\n", + "3. Peter sa môže zbaviť únavy odpočinkom pod stromom alebo na tráve (t. j. vstupom na políčko s umiestneným stromom alebo trávou - zelené pole).\n", + "4. Peter musí nájsť a zabiť vlka.\n", + "5. Aby Peter dokázal zabiť vlka, musí mať určitú úroveň energie a únavy, inak prehrá boj.\n", + "\n", + "Upravte funkciu odmeny podľa pravidiel hry, spustite algoritmus posilňovaného učenia na naučenie najlepšej stratégie na výhru v hre a porovnajte výsledky náhodného pohybu s vaším algoritmom z hľadiska počtu vyhraných a prehratých hier.\n", + "\n", + "> **Poznámka**: Možno bude potrebné upraviť hyperparametre, aby to fungovalo, najmä počet epoch. Keďže úspech v hre (boj s vlkom) je zriedkavá udalosť, môžete očakávať oveľa dlhší čas trénovania.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/8-Reinforcement/2-Gym/notebook.ipynb b/translations/sk/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..e80ff99bd --- /dev/null +++ b/translations/sk/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,392 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:16:11+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Korčuľovanie na CartPole\n", + "\n", + "> **Problém**: Ak chce Peter uniknúť vlkovi, musí sa pohybovať rýchlejšie ako on. Uvidíme, ako sa Peter môže naučiť korčuľovať, konkrétne udržiavať rovnováhu, pomocou Q-Learningu.\n", + "\n", + "Najskôr nainštalujme knižnicu gym a importujme potrebné knižnice:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Vytvorte prostredie cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Aby sme videli, ako prostredie funguje, spustime krátku simuláciu na 100 krokov.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Počas simulácie potrebujeme získať pozorovania, aby sme sa rozhodli, ako konať. V skutočnosti nám funkcia `step` vracia aktuálne pozorovania, funkciu odmeny a príznak `done`, ktorý naznačuje, či má zmysel pokračovať v simulácii alebo nie:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Môžeme získať minimálnu a maximálnu hodnotu týchto čísel:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Poďme tiež preskúmať inú metódu diskrétizácie pomocou intervalov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Poďme teraz spustiť krátku simuláciu a pozorovať tieto diskrétne hodnoty prostredia.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Z tohto grafu nie je možné nič povedať, pretože kvôli povahe stochastického tréningového procesu sa dĺžka tréningových relácií veľmi líši. Aby sme tento graf lepšie pochopili, môžeme vypočítať **bežiaci priemer** cez sériu experimentov, povedzme 100. To sa dá pohodlne urobiť pomocou `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Menenie hyperparametrov a sledovanie výsledkov v praxi\n", + "\n", + "Teraz by bolo zaujímavé skutočne vidieť, ako sa správa natrénovaný model. Spustime simuláciu a budeme postupovať podľa rovnakej stratégie výberu akcií ako počas tréningu: vzorkovanie podľa pravdepodobnostného rozdelenia v Q-Tabuľke:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Uloženie výsledku ako animovaný GIF\n", + "\n", + "Ak chcete zapôsobiť na svojich priateľov, môžete im poslať animovaný GIF obrázok vyvažovacej tyče. Na to môžeme použiť `env.render` na vytvorenie obrazového rámu a následne ich uložiť ako animovaný GIF pomocou knižnice PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/sk/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..dff270c8b --- /dev/null +++ b/translations/sk/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,524 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:19:04+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "sk" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Korčuľovanie na CartPole\n", + "\n", + "> **Problém**: Ak chce Peter uniknúť vlkovi, musí sa pohybovať rýchlejšie ako on. Uvidíme, ako sa Peter môže naučiť korčuľovať, konkrétne udržiavať rovnováhu, pomocou Q-Learningu.\n", + "\n", + "Najskôr nainštalujme knižnicu gym a importujme potrebné knižnice:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Vytvorte prostredie cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Aby sme videli, ako prostredie funguje, spustime krátku simuláciu na 100 krokov.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Počas simulácie potrebujeme získať pozorovania, aby sme rozhodli, ako konať. V skutočnosti nám funkcia `step` vracia aktuálne pozorovania, funkciu odmeny a príznak `done`, ktorý naznačuje, či má zmysel pokračovať v simulácii alebo nie:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Môžeme získať minimálnu a maximálnu hodnotu týchto čísel:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Poďme tiež preskúmať inú metódu diskrétizácie pomocou intervalov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Poďme teraz spustiť krátku simuláciu a pozorovať tieto diskrétne hodnoty prostredia.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Z tohto grafu nie je možné nič povedať, pretože kvôli povahe stochastického tréningového procesu sa dĺžka tréningových relácií veľmi líši. Aby sme tento graf lepšie pochopili, môžeme vypočítať **bežiaci priemer** cez sériu experimentov, povedzme 100. To sa dá pohodlne urobiť pomocou `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAD4CAYAAAANbUbJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO2dd3gVZfbHvycdAiGUAKEZelGqkY4gICDo4rr6U3dVVKxrWdeKde2ylnXX1bWiYu8FpYmAKCol9AABAgQIBAglQALp7++PO3Mzd+70O7fk3vN5njyZeeedmXfu3HvmzHlPISEEGIZhmOgmLtwDYBiGYYIPC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMDJIR7AADQokULkZWVFe5hMAzD1CtWrVp1SAiRYaVvRAj7rKws5OTkhHsYDMMw9Qoi2mW1L5txGIZhYgAW9gzDMDEAC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMDsLBnGIaJAUyFPRGlENEKIlpHRBuJ6DGp/V0i2klEa6W/flI7EdFLRJRPROuJaECwL4JhwoUQAp/n7EFldW24h8IwhlgJqqoAMFoIUUpEiQCWEtFcads9QogvVP3PA9BV+hsE4FXpP8NEHXNz9+OeL9aj4HAZ7hnfI9zDYRhdTDV74aFUWk2U/owqnkwG8J603zIA6USUGfhQGSbyOHaqCgBw6ERlmEfCMMZYstkTUTwRrQVwEMACIcRyadNTkqnmRSJKltraAtij2L1QalMf8wYiyiGinOLi4gAugWHCB4V7AAxjEUvCXghRI4ToB6AdgIFEdAaA+wH0AHAWgGYA7pO6a33//d4EhBBvCCGyhRDZGRmW8vgwDMMwDrHljSOEKAHwE4AJQogiyVRTAeAdAAOlboUA2it2awdgnwtjZRiGYRxixRsng4jSpeUGAMYCyJPt8EREAC4EkCvtMgvAVZJXzmAAx4QQRUEZPcOEmVrpnVUYTmPFBt+v34dVu46GexiMDlY0+0wAi4loPYCV8NjsvwfwIRFtALABQAsAT0r95wDYASAfwJsA/ur6qBkmQnjn150AgFnr+OX11o/W4E+v/hbuYTA6mLpeCiHWA+iv0T5ap78AcEvgQ2OYyGfbQY+jWnmVr599eVUNVhYcwYiuPB/FRAYcQcswQeDRWRtx5YwVyNt/PNxDYRgALOyZKOPD5bvw2co95h2DjKzxnyivDtk5K6pr8I9vc1FVw9G8jD8s7Jmo4sGvc3Hvl+tt7fPBsl3YeajM1XHIE5Wh9MO/94v1mPn7Lpz97OIQnpWpL7CwZ2KSkpOVKD5RASEEHvomFxe+8mtAx5twemvNdgqhtP9l2yEAQNGxchwqrbC0z9wNRciaNtsbCcxELyzsmZhkwBMLcNZTP3pdJwMVdh2aN9Rs31tSHtBx7XCkrC5lw/8WbzfsW3yiAmUV1XhtiaffzkNluPWj1fh4xe6gjpEJHyzsmZjE6x8vAvOPv3poFgCgVVqK5vZtB04EdHynmD28znrqR5z/36UorfDMKdQKge/XF+H+rzb49T1cWoGsabOxcPMBS+cuq3A2T3Gqsiak8w3Xv5cTUw83FvZMTBNoKJRsptETrtW14Qm2+s6C3//OQ2XYXuyZqyg1mEjeVOTxKHrn1wJL5z79H/ORf9D+Q67nI/PQ9cG55h1dYsGmA5oPt2Bj1cTmNizsmZimNkDN/rf8wwCAlxZu09xeFaY895U2NeSftribjHDjPnsup1v2h+cNKNQs33EY2U/+iHm5oU8qwMKeiXpeXrQNd362VnNbgLIeW0zMNG5r9vtKTiFr2mx8u3avq8dtmZbsXe720FzNCetgpoQ4WRk6F1XA13zn5C3EKTmSl9a6wmMhO6cMC3sm6nn+h634arW5cHzw6w2ue6XY1bDNeG7+FgDA3z7Rfng5JbNJ3ZxDZXUt1u4p8a6T5ED6a/5h3dw3gc591ITY3KU83dh//YwCl11v9ZAfag0S40NyPiUs7JmoJGvabNz3hbm/vdKM8+Hy3brmGKe4bcZJSdT/yd43wXmlrMR4a6JAby5ALazJps9pqOc21Oa7/cdD4zU1e73HfPP1GnffzKzAwp6JWj7N8Y2k3V5c6tdHrZAGatZR47YMS06o0wgHPf0j8vYfR8GhMmRNm42PVuxyfNz4OGvCOU5HiKuFtcXDeQl11K/64ZSU4FwU7iguxUPfbECthZtdcPgkAGD/sdC55MqwsGdihjEvLPFrU/88F2856Oo50xsmunq8FMXr/4HjFXjz550Y9fxPAIA9R045Pq6eEAeA2RvqJhP1uqmFNdmMHS456dx8tufISaxTmJ2soH6ot0hN1u5ogZs+WIUPlu3GVhPbf/GJCnTKSAUAnKqqcXw+p7CwZ2Ia9eu8kfWhsroWk176Bb/mH7J8/GqXNdZklQYaqDeRTFqKdgLcE+VVPr7oeh9PpcpcdeSkvZq8pQ598wFgxLOLMdlmBLRbnxsAlFV4BHd1jf4xtx44gbOe+hE7ikMzN6AFC3sm5thz5KR3Wf2bH9lNPyXxvpJT2LjvuC3f7CqX7TjJicER9o00hP2O4lJMfTfHp03vYaiM3gX0Hx56lIdY01V/biWnnBeM31vieaP6YlWhbp/cvaH3vlHDwp6JOUYoEoXd+tFqn21G5gdZPOxWPCzMqDHQ9pyg9uLQe5b8aUA7W8fVemaMfmEJVhQc8WnTM/esUZlRGibZE/bqCeK1e0rwyLe5ul4+xSc8Ub2L8qxF9cpU1dTio+W7/Wz26jcTJ5yq1H9gac0XhRoW9kxMIycPk3n7151+Oej3HDmJDQ79oqtq3fbG8RX2esLQonONl2cll04zCo9qzwscV7ms2nXFVNv8L3zlV7z3+y6/ojAysqZ8rerNw4zXl2zHA19vwOc5vlp4WoPA51aM5me27GdhzzARx6SXlvqsj3h2MS54ealO7zpOf2SenznCyI7rBPVEpJ4Zx6p3jczPW61F0Cona5WotdpaIXD9eznIUb0Z6KGnWS/bcdjS/laRbeay6UXG7n06dqoKX632fWAcLtM3BVVUh35CVg0LeybieG5+HuZv3B+047dpop20TMZKgI+W5lpWWYN3fyvwaat2WbP/RFWYRe/wahu6kkADoLR4YcFWn/XiExVYsOkArn/PXPP+bt0+fLNW23//pI5ppOCw/0RnTa3Aw9/k+szJKDl4vBxfSf7tB1R+9XbnPvo+9gPu/GwdNu6re+NTH1NJhc7DbF/JqaDcDy1Y2DMRxyuLt+PG91f5tdfUCld+GOkNkxztpzy3nvfI9Ll5PutVLmv2TVTmBr0UBvM36tuyQyFb5DQSRy24VN728RpsLtLOpaPnovjYd5v82tbsPor3l+3C3z/Vji6+Q9HevplvSuqaWoH8g6XImjYby228TWzcWzfuHq0b6/ar0LiO938vwNDpi3Drx2ssny8QWNgz9YbOD8zBzR+sNu9owiYdwWIHPWHfv0O6z7rbrpcNk6xN0KrZUHgMD3/jmfB0w4PnyhnLMeCJBbrbW0spnxPsRlepsOOPLkft1kjX98rifOQfrLOV/7a9TojLkawyNULgmTmbAQCv/7xD9xzPzN2MborMnD9trYvLGNalhe5+WuUpP1qxR3MswcJU2BNRChGtIKJ1RLSRiB6T2jsS0XIi2kZEnxJRktSeLK3nS9uzgnsJTCwxL0DzTiBeF0rBeqqyRtNdsEtGI591ZWRpba0wfNW3NgZfQW1Vbl/y+m94f9kulFZUY6mNOAE9ftl2CEfKKnVt0bLg7dgiNaDznLKRIE1+rtTWCuwrOYXn5m/BJa/9ptlXbbOvqRVYmOcR3Ivy9APrXl+ywyff0dDOdQLeaJ5kh0buncY23VMDxYpmXwFgtBCiL4B+ACYQ0WAA/wTwohCiK4CjAKZK/acCOCqE6ALgRakfw0QE3wSQk0QpaEe/sASXvv67Rh/fdeXE3yuL8zHo6YW6NmUr2H1Y1dQKHC+v8nq1LNx8EFe/s9Lx+dVco3Msed7DKDLXCqcqrV+vLGxrhMDlby7z7C89kI+ZmJOcJmLLaFwXeWv3EFbSK7iJqbAXHuR3oUTpTwAYDeALqX0mgAul5cnSOqTtY8huViQmavlpy0EUn3CneMPGfcfw8iJ7ics+DKAykVogrCs85memUNvQlRO0SySPl0CSbvnbwI0FxqSXfkGfR3/wrt+hY892itI0okR+MDr55Zcoom+bNUqyLBTlB0tNLbBLykEjP59HPm9chN2p4FW+WdUKgRlLd+K5+Xn6OyjI0ckgGiws2eyJKJ6I1gI4CGABgO0ASoQQ8jtWIYC20nJbAHsAQNp+DEBzjWPeQEQ5RJRTXOxu4QQmMhFC4Op3VuKyN/w1Yidc8N+leP6HrYZ91JpwUYn1/DGnKmt8smC+9ctOvz5+9m/VqnKCVv5xV+j4jjvBzIyTF6aiIAelB7qT9ABK09fD3+Si0wNzLAljWdgfVXgiyZ+PWe6d71R28xcXbLVUhvGmDxSOBAJ44vtNeMWk/m+4sCTshRA1Qoh+ANoBGAigp1Y36b/Ws9zvTgkh3hBCZAshsjMy9EPUmehB/r1q2S9lRktJvewczwi1Nn7QxltFz0fm4V8Kl8IvV/uHw6vH0CzV19NHy/UykND8Sb0zHe/rFupJ5x83+QvFuZI/vlk+fy3ziVbZPjmD6c9bi5E1bbbmseT9lG9OAsJSKoZftvkqnP9ZuA1TZ3rcRn/NP4SsabNNJ9vdzLcTDGx54wghSgD8BGAwgHQikmcY2gGQHWULAbQHAGl7EwDWIiuYqMb7am/Qx+hBoIeRO6bd4KJAGdTJ9yVWK1gnkPS2Qzr7vSSHnNtUroLXafjSWw1SeusXf8+XCf/+xa/tsCTI//dTvu6xrnp7hV9bVY2wlD5Zzw8eAP7y1nIAwAfLjFNIq1NLRBpWvHEyiChdWm4AYCyAzQAWA7hY6jYFwLfS8ixpHdL2RSJUUQNMRFNnx3VXAMvfLq2vmVNty2nZP2WQDaBdlGOrSSlDNXn7j+OdXz0mJPXRwvHDmptr7hE1tIvnoWRUkWnBpgN4Zq41+7b8nXFiWtebV1BiZR5pywHjlAdHDQLZjLhueEdH+9nFiu9PJoCZRBQPz8PhMyHE90S0CcAnRPQkgDUAZkj9ZwB4n4jy4dHoLwvCuJl6iMvBpF4Kj57CjkOl2FfirzEfL3eWJ/3Vn5zZXZXeGYDH5FFRXYN4xQPOivBRImu61wzr6Gekj1Q9qpmUH753uya6faxE18pU1wg8Omujo7eitTZz3euR1sBYXOq9zew+bOx91ad9uuF2tzAV9kKI9QD6a7TvgMd+r24vB3CJK6NjogorZhw91HZXZaTq6Bd+QnWtwMCsZv47OpSFTic2m6uKYFTXCnR/aB7OPK2pt00vmZgVIkGzt4a7I3t/2S5NW74V1u62J+yzmjf0VpRSYpbBVK94ydnPGXsCZSu+G8GEI2iZkBHIBNYuxY/vVGUNXltSp3nLppKaCNRyZW1Pr1C3HY6drLKl2RYdc/5QCRR54nXFTnfs2E4FPQD8bjOZml49XrM6uQePOxtjqBzTWdgzISOQGBKl//oJHdOMlmdHuMW/m7VVNxYdw3u/+04SGj3fLvivvepNbmImGJduCzyKN1jojd0sqZ3Te223hKNTWNgzIcOJfVkIgQWbDvjYQ4+d0hb2WrbZUCv7ZRXVPuMwE3p6ZE2b7edi+MrifNw6uovlYwSiDQfKGoXppEDDw+qKGctDORxbqEs/ymjlt1GiDnjL23/c0kR/qBzGQpucgYlpnMi9GUt34snZmzFU4XJ47os/W95/9e7QRine9fk6n3Ut3+zz+xj7yiv91pVFU37NP+xXNtHtZ9nbV2fbLgiixU6FgB/1/E946fL+OHSiAteGyPMkGHyrk4ZZDy0XUk1Y2DPRhmyzlwXUF6sKMbxLC7Q2yC//5GxPJkK7Hiwyf/0w8CyZgaBVg9aozi3g67depkoE5nISTT9G92gVlOPeLvnmbztYig6q9MKRhtIcGIr8NWzGYaIOWdjLybnu/nxdRL/Ou4GWZm9nolrd1T/rpfvCKJjeIR+v2I1/zrPmW+8WRoF1fTVcQ5XC/sUfjdNx6GHnIREqMw4Le8YV9L7cQghvoJGydJ1snlDmG49GtD4WI+28TJUnXy3MQ6FpntFW3ze+PmKU0fLu8d392pR1g79Y5Z8iwwr/WWg9QV+o8kSysGcCZs6GInR6YI5m6t73ft+FSS8txW/5h/Dh8rqMk3IIuhJ19Gm0onYRnbOhCFnTZuO6mSv9hItSTt00srPfHMRelc9+kt1K4xo8MLEnLh/YPuDjuMnoHi2Dclx1MRjA15/eaaUxW8Le0Rnsw8KeCZj7vlwPAPhcQwvK2++pCrXzcJmpa5q60He0otbW5XmFHzcfRO5e3wee0uX0tSXbsXiLb8Ku3YoH7PIHxmDj4+NxaXZggjopIQ4jutpPTtiiUbJ5J4cESyAmJ/gLe6UHlds1hLXQ8y5zGxb2TMDILmk/aFSRypVqdC7bcSTkbpCRipFZQZ2Qy+wzi1MYfFulpSAxPk63Lm2wCaY1wqkLqxmZGs4BynkRvYLnbtKicfAekkpY2DOuoZViYIOkqS42KPWmh7KIRTRhJOzV+ffNkqZZqVyVZrH8nbKEoLqwuRXcKkqjRbA07OYabyNK000gZSz1yGru643EZhwmIjlUWoGsabPx3bp9OF5ehSe/3+Td1qJRku5+aSkJttMl9Htcv6B1faZWCNTUCgx5ZqFfmcRyVU1X2fU0EBIs2vHH9apzuxwaAamUlVRVh+5txWmJQquo8+4EWrrRKizsGVtskbT36XPz8MycPLy1tK56U28DL47z+7aJWjPOZzcOsdW/tLwaldW1KDpW7p3vkLFSaMMuWsLkvDNa+7UpTUiRVkk0GLni9dIvh8JOr4Rz4zARyQGpCtDeklPe/N1yHdbzemfins/XadaF/WHj/rDZkoON3QIpLy3K934Wai2y3IWShWkpviYYLcVeK9mX+s2rbXqDgMcSyXRt1UizPdiafbhgYc/oIoTw8xzZXHTcuyxPmsn/5+Xux+erCjXrwhYcPomhnVtonqf/4z/gjk/WaG6rDxxxULSi1yPzAfhPPBpVTDJiYu86TT1O9fDR0uzj4wjjT/eNllULuQYabolGhLgoWMDozTEEazJYD9bsmbDT+9EfcI6qJux36+oKM/+oKshcWmGcKEqtccocPVmFb2zmHXGbu8d1c7yvmxPJLR16ZijNLuoHtJawv/Ss9pg6vJNPmzrRl16qXz3s9jdjapDz6Dx/SV/N9mCbG9VzW2yzZ8JGaUU1sqbNRmlFtd9k0sET+vnU1bnLlfbnsT1bRrQZZ4jOW4cVurdu7No4lmwtNu+kgZHASIj33fbjnWdjcKfmGNixGWZMyfa2b1NFMyfpZH/Uw+lbiR7BNKe0SkvGsC6ee948Vd+xIBh8fP1g/H1snXLB3jhM2DjjH/N1tyl/fyO66gvI8qoa9Hh4nnd9cKfmPtkcI4U2kp91IIXJu7RshC9vHurWkByhHL16cjVBdW2dM+ps1crUCEoTHQAkxYfXLuM0VYEVFtw50rusNnsFkxFdW6Brq8b429iu3jZOl8BEPOkNPRqRlqDUCg5atsN9jwqnvHbFAKx8cKz3XSM+gB9cw6QEDOgQmjqiehjJq8Nllbhy8GnedaVwaZWmn3HULH97sLGS5O2/l/tVTLWE0qQYaHyAnYLh708d5F1u38wzAc6aPVNvsPK6LSBw48hOpv1CxYiuGchonOy1z8bZ+CXcf14Pv7Zwuyoanb/kZBUenNTT9jG3mAR0BZvINfoBtymKyDx0fi9Hx/j8xqF47YoBIXuzYGHP4EhZJY7rlPpzyk9b/CNmX9Dw0gkWPTPTDLfLNm55HsHOJNmNIzujkyLaNBIwG32lg0T46RpRtI0tRuO6QSA1i43o0lLb5RKw7hlz1zj/bJl2ad0kBRPOMC5k4yamwp6I2hPRYiLaTEQbiehvUvujRLSXiNZKfxMV+9xPRPlEtIWIxgfzApjAGfDEAvR59Afb+2nlapf52ydrfdZnLN2JvSXuFsD+6e5RutvMfrPqH7WVH/n401uhhzQZG2la51drjMvfNUryCOnbbJQ1/EPfNn5tTtIoOMXK/OzI7vYTtv2osNeridbAP8CaZl8N4C4hRE8AgwHcQkTye8uLQoh+0t8cAJC2XQbgdAATAPyPiOw57DJhxWjiVcncXP/EZ3ocOO5+3pQsA+3a6oSrnOjKimb/+pXZmHfH2dYGF2TsCt24OELB9Em2NNLxp3t895VvMWYxBdcN74ibRna2NTYtRvdoaclmn5aSiB1Pe/VM3DyqMz5Q2MXVPOTAnBUtmAp7IUSREGK1tHwCwGYAbQ12mQzgEyFEhRBiJ4B8AAPdGCzjLsdOVaGi2j88f3An37woe46c9EbORhp6KRrMZL0s2+VJSLtm02BUiLLD97cNd/2Y6jmVoV1a4Pf7R/t4rsjLKYnaouOh83uhUbI13U7P5XFsz1Z4++qz0L6ptfKFSpv39+v3GZp/AvG6knnnmrP82sxKTQLASw4nk93Cls2eiLIA9AcgV564lYjWE9HbRCTXMmsLYI9it0IYPxyYMNH3sR9w0f9+865nTZuNmb8V+PUb8exiDHp6YQhHZh09bwyt6M9hXeoeYuq6n+GeYLVLexfruA7q2AyAtsDKbNLAR0C2TW+AgumTsPyBsbqBaLLw7WpgGwf0I3RrpNw0vTVKBpqx58gpNG2o7zfvxl0epfE5vXlVXbyC3ptxqs2IZLexLOyJqBGALwHcIYQ4DuBVAJ0B9ANQBOAFuavG7n6PWiK6gYhyiCinuNhZIAnjnG2Sp8XGfb6+1f+YtTEcw3FMok7gT8Mk/4nEBIXLjSy/5PS/ekLg+hHBjeIMBKMso3aQg67s5P9q0iARt47u6tP2x/4enU42iZlNsOoVs1Gn4bBD4+QEpFp8s3CKlmKgDEB79A+na+53tgXtP5hYEvZElAiPoP9QCPEVAAghDgghaoQQtQDeRJ2pphCAslROOwB+sfBCiDeEENlCiOyMjPB+CLFIcWnwco87wUwL1EPrrXxsz5Z4WMMdTlkFSv7BJkuZD/Vs9r3bafvPy/t/fpO9jJdu4pYlSb72QLM9pjf0zCPEe4W9cX+9eRzZlVcvvYYR8fFkrNkH8Q1umuSSm9E4GZec2c5nW9v0Bq6nk7CLFW8cAjADwGYhxL8U7UqfoT8CyJWWZwG4jIiSiagjgK4AVrg3ZMYNjL54Rn7z/YMUPETkzJ6qtc9bU87yKxABAGWVdUFC8l7HpZJwejKgfVPjzI+h9E5R888/9XHlOHKErdF9//624XjtigGGx5Ft8LIZx+zhcUZbbfdYWaN/aFJPPGLRh13O0NmuaQOkN0zE+X0yNR/EWt/fyf38vY6ccOPZnbDj6YlIS0nEMxf19tlmN/VEMLAygmEArgQwWuVm+SwRbSCi9QDOAfB3ABBCbATwGYBNAOYBuEUIEfzaXowtWjXWj5w0ErqntzH2X7fCoxf0woWqH1gcEZo5yFGSoVP3VEuDG9WtpWK7578c6aun2evVVd15qMyvrbVBNGowOPO0puadLBAvmbeMhP0ZbZtY9gk/Q/qO9G9vPL42TbQfpLXSOFKTE3CtxehUeXK5b7t0EBFe/vMAnJXVzKdPzkNj0UfjTe2JC8+wdA4ziMj7oFMXjFGnrAgHVrxxlgohSAjRR+lmKYS4UgjRW2r/gxCiSLHPU0KIzkKI7kKIucG9BMYJRknJ9IT920t3ouRk4MFXU4ZmITXZ16ZeUV3ryMPFzmv5PEWNXHm/RtI4GupMniXreJ3IKGMH/jKog+WxuIHepdsVK3KQUdMAE4LJz4pBnZpj+QNjfNIua6FV/xUAaix+D/oqJnDJGySnzZJ7Ruk+uPXMRaMc+PDrYbVaWDAJ/wiYoCGEwKK8A5rBT0a/Jz0l5PHvN+H79UXaG21ARLhYZdOsrK7VtPEqPWiCid5Dw8yMfbKi7qX15lGB+5e7gd1H5l3juuH9qQP9NGG7KD25jHLuyOgJdavZLpVxFrKicEqnQLiTNMLPumQmA/yTzIUDFvZRRnVNLW7/eA027TuOV5dsx7Xv5uAJRZ1YGSNPCbVbYjDo38H3Fb9FoyTU1Aq/4tjn9vQtsBEIsouhFnoPuETJU2WgjiBUCqxQa29W71Oyib04MT4OI7oGrsUeVgVcDeroeVC/8mdtW79aB5FNHVZ81gHf5HVyLd+vdSKJzfLPTOrtb6IKdDJX77rDBQv7KGPHoTLMWrcPE1/6Bc/O2wIAmPn7LszLLUKZorjI+sJjeocIC/FxhFoh8CeVxq/8kVqN7NXjuhH6idi0BOel2e3RvFEyCqZPwmc6XjdWA4jcpGD6JM+CjixSm8OSwmRCaJqahILpkzCpT6amSadG9dok55dXpmA2QvnduELK6qlnSjMzmb94aT+seHCM3z5P/7G34wC2SX1Cl/fGCizsowy97/RNH6zGtK82eNeX79RPNzw3N3BTjV2ICEJ4XrevHprl0y5z7bCO+G3aaEfHv/+8HprauSwYSeOXcG4v/bcK2QunV6Z54I+ebdpNOiiCrNQvbZP7u+NtYkYPgyIu3Vt5Jm1lU9fAjs1QXeM70LvGdcODE3viAo2cPEr+fWk/AHXzLYDHx/+tq7LxmI6Pu555RyYpIQ4tVU4L8XGEPw/q4JPzvz7Dwj7KMHrz3H2kruqU0WTo6t0luttG92ipuy0QjpRVorSiGodKK5CSWKctKzWyuDhCG40i2B9ep58LRaZVWophGmOtj23fMf3EbfKErpXJxFJFXninWSO1JpCV9/rZi/Xty+eFKLOiUVKyW0d3wZc3D8V9E3rgzauy8f7UgX5BU8kJ8bj+7E5+DgK3j/EN3jq/TybuHtcN94yvy/NDRBjbq5WuKa2Rg8/9pMkDor7Bwj7qMJD2CsFkZQJN+xDByQkjuzJ+u3afz+u9cmJN71Vcfv0HgDeuPFP3HHYn6YwmCmWBVGthMvGEwnw2pJP2hLPaD1vtpy3nAFIH68go507BDLIAAB5KSURBVGDUIwpVJoi2Gg9imfg48rqKnturFZIT4i1PxGarXEwT4uNw6+iufh5dRlg1ZXXKqJv0VR5f+WCRuXxgB812La4ZlmWpXzBhYR9lGP2wjygKYzv10Q5iWVAvyrwvPpq9wcWdJgVR6b1yt0pLMfxstCbjurbUN0skeAOH7H0getegFkbqdXm/sxSTzMojpSrSQ6ifx6EoaD2sS3NcMeg0844K1MJezx1YmWbgkxsG2x8crJcenHVrnX1emcvmNI0gvWcu6o1bzrGWMtrouxQqWNhHGUZf6T1H6swSTos5Oy2IbQel0FAKLiOZNfOagbh9TFdN+/hzF/fBkM7NbQs9I/kgexPp+efroTeERJN6r17ThuLzUNqse2TWCZN+qijRUCj2fxrQznbFJav+9ACw4oEx+PLmoX4ZWa1iteyk8jNVmpMCVXL0ooVDSejKzjAhwaq72M/bIjf5nFJoLMqrq3hl5GqY1SIVd56rnYUxo7EnmMaugmv0WT5zUW9cMyzLtjlMT76ZuW2SV9YLRRvhg6mD8P6yAiQn1D10/tC3DVo1Tsalbyzz9gsWk/pkYvb6Ikdup2qFw+j+tkxLQcsAIpSdpOJQfm5OzZf5T52HgsMnDatjhQrW7KOI4+VVun7GMq8t2Q4AeOfXghCMKHDi4wj92ns0VacR57JA1hImsneIli+6kYxMSYzXDL03Q8scAAAX9PH1QFGfW09YDe/aAq9fme3XrpzIDmbuffnYTu6NOuulUVR3oITClKVFQnxcRAh6gDX7qMJKacHpc/NcqSQUKpQTl04LM8sapNbut47u6peq13u+IAiIjhrVtQZ0SEc7VcK1MT1bIS0lAcclTx5ZMFo1JyhTAARzmkWeS3fyWQWrxqwWgeamkYeqVaqxvsCaPeMKcnpbtyk8esorrALNJWXFnKFMtbxi5+HATqg5Bv+27q3T/K6tSYNEzL59hHf913zPWL5b55ctXJMmivsRTJlaG4Bm73TeyAlOFYVogoV9PWb5jsPImjYbBRoZGENF89QkbHlyQtBCw1ftOqpYc/aDlQWs2e99zcPn+nhjHCo1rrdqh/sm9ECvzDS0VmR6bCNNJv+hbxvNB5GWgDqh8Nm3SqVBYfhAkeW1k3kBdVCV1UnUYBMsxSXcsBkngjlwvBylFdW64ePfrPVoeb9uP2RYfDuYpCTGIzkh3kcMd2jW0CeAyy2cygLZVm8mkNRZH7cXlzo7oQY3j+rsnR8474zWmHBGa0zuV1etM2+/f6IsrdE6sWuv31NiOd+MXWQvIidmEqXb6gMTe0SMbXvtI+PCPYSgwMI+gpHrvnpzoahIkn5oVdXB09zMGCj5fSsF6ZmnNdUV9p0znD+UQq33bZVKN7rNq1f4B35pmVq0nk25e+1nT3QaQGeFJy48A+2aNsCo7vYjq5U2+xvOjux5pLQGHlHZKk07TXJ9gIV9PaZK0ozCWWJQjvRUCiYj7w+tdAdK1JWfOrZIDdjobJQmwYhQBsLIV9gzMw3T5c/Upcfb6J7BSXEBeIq7PDjJWjUpNS0b1x/BeU73lvjX//WNuORmdmCbfT1mzgZPwrJXFm8P2xjkPDZKsbS5SF8jNpuUO3bKtziK0uUwmP7iWgQrD5AW8gNySKfm6Bugq6kavaId4SZbSkx3ncVqVOGEiHDRgHY+8Qz1DRb29Ri5apS6xJ8dPphqnkTMCsrJxC0G5g8zd7uemZ5Iw2uHeQRAIAU1ZN95py8GoawbKrwTnYpGDWFvZ/Lwrauy8anD9AKhxEnAE2MfFvZRQEJ8nKWEXDJZ02Z7l4cHmCNexurP1UzwPj7Zk6L2ppGd0LRhok+6Y7s4qWmrxKzoh1WsJOHScmFs1rBu/PIEq53Sh2N7tcIgh+kFooV7xnc3TL0cS7Cwr8dcdlZ7AJ6MiP/7KT9o51ELXPm8SqxaWJSavVYmQNkdr2VaCtY8Mg7dFT9Uu5GgRcfKAQB7j+qnKjYiOdH5K/tfR3X2ZuDsoBM1q6QulqDug1SmIHj9yjPx0fWDcPc4a1kWGQ+3nNMF8+44O9zDiAh4grYekxAvuxQCz/+wNWjnaZPu682hLNwtY2RP3/T4eCzZUoybP1ztEwGqFU2qDqEH6kxEdmNwZBfQds2MJ4X1cKLZt01vgN5tm+DeCT0AeEwpfdqbF7/wPgT1iogTMLSzO29hkUboQqtiGxb2UUKz1CQcKXMvCEhJlSr4peRkFR6c2NNH6zZS7BsmJXiTWCknaLX20cqEKAfb2I24TEmMk87jzCbsRNj/qqqkNdag2pUdIiXgyE2i74oiG9NvMxG1J6LFRLSZiDYS0d+k9mZEtICItkn/m0rtREQvEVE+Ea0nosiquhtFyHKxvKomaIIe0M59f/3ZnXzyjJt5yshavFwrFAC+X+9f/vBsjcLX8gSeXWEve3uo3TmtEsoJWjPClcgrmLSV8gEp6xcwwcOKZl8N4C4hxGoiagxgFREtAHA1gIVCiOlENA3ANAD3ATgPQFfpbxCAV6X/jENOVlajYZL/rZJF39Nz8oJ6/gwL/tC5e40LmDeTik8r2bTPP0BIyzPj6Yt64+VF+bYLrjx6wem4emgWWjusAevWBK0bRGNul0m9M9HsuiQM6Rzbk8ihwvTbLIQoEkKslpZPANgMoC2AyQBmSt1mArhQWp4M4D3hYRmAdCKqv5EILiGEcJxqttcj83WOGciIrJNoISpJq/jGvRO64zmD2qhWldXOGY3w4qX9bGvaSQlx6NbKuSdGUnz99amuDxARhnZpEfL4iVjF1q+HiLIA9AewHEArIUQR4HkgAJAjUNoC2KPYrVBqUx/rBiLKIaKc4uLILaThFh3vn4O7PlsX8HFeX7IdP0gTpBVVdQWRz8pyVmbQCu2aNsDFOrVPZZR1YGW6t2qMS7L9PXdkjjtI6gUA/7msH2ZM8c/h7jaJCaETQt55BZ6tZIKEZWFPRI0AfAngDiGEUYIO7fxN6gYh3hBCZAshsjMygpOkKdL4yqSwiBHHpACqZ+bm4Yb3V/kdzzc7pDWmDDGvGdqjdWPExZGhhg5om1+ClcJ2cr+2GNPTnYlP7eN7gtQymzjz4nECK7dMsLEk7IkoER5B/6EQ4iup+YBsnpH+y/XjCgEo1bl2AKwl4Y5S7AQ86fHb9kM+6+8v2+V7DgensFLPU37FNnvV1hL2ai8eM+6T3BXDzX8u64+C6ZMcT+w6Qf74QlnQg4ktrHjjEIAZADYLIf6l2DQLwBRpeQqAbxXtV0leOYMBHJPNPbHKzsOB55tX1/h8+JvcgI95pgXTz+Yi/5c4rYhE2TWwqSKcv1xhZtIiSxVs9OeB1qNDo41mqZ5JcHWa5Wcu6o2+7cz99BnGDCveOMMAXAlgAxGtldoeADAdwGdENBXAbgCXSNvmAJgIIB/ASQDXuDriesjyHUcCPsb+4+V+2n2gNE4211wzNTxZtLR42TVQqZdWmKRevnNcd9z+8RrveiS5Ooaai/q3hRACF/b3nd66fGAHXB7DD0HGPUyFvRBiKfTjH8Zo9BcAbglwXFGFVlSomoMnypHRKNlrLlHb4P+7cBsOnnA3lXGDJHNvk/SG/vllxvVq7dcmW3mUJqvKamPNvqzCd4JWy6MnVoiLI8PJbIYJlNhVpUKIme03d+8xDHxqIT7PKfS2qQtnuCXo7SYH03IXvW10F782cqDZK/OZF0yf5GeqYhjGPfjXFQIqdDTcrGmzkTVtNj5cvhsAsKKgztwTjBzkfx/bDRdKpfCmnWdtMnScRri/VoBPqvSWoJxkNRP2IzSiZRmGCQ6cGycEmAm9j1d4hH2ipNl+tnIPdh1xPqk7Y0o2ps7M8WufOqIjGibG465x3ZCabH7rVz98LtIteqQkxMd5I2QfkiaP9R5yMpzHnGFCBwt7FymvqsHGfcf9wvorqqzViJWLNt/75fqAxqH0QR/ZLQNLtnqC1uLIo5VbEfRA4Pngza5blvWRlJaAYaIV/pW5yKOzNuJPr/6GXZKr5Wc5e5B/sNRUw5UJhqL77jVneZfdqmlqFbO5CiLCfRN6YNatw0M0IoaJXVizd5FNkk96yckqnNYcuPeL9UhOiMP1Izp5++wrOaVbdNvNHCEfXz8YLdOSfY7ptPC2Xc48rSlW7TqKszqalxS8eVTnEIyIYRgW9i6yvtCT+fFwWYU3VUBFda2PZp9g4F7oZhpbrUyCdo7fVueBZIUGUoUnKy6noeaFS/oikc1GTAzCwj4IrCw4iiGd6hKDKSdodxSXoWXjFM2UwMEOlQ+VEUf2l49EYf8nk4RuDBOtsIoTBI6dqsI9X9RluFSmDZCXH9JId/DubwX4dOVuW+d6aFJPy31DVQDjkQtOx9ndMnweeAzDhBfW7IPAR8t9BbZSs1+ytRiz1u7TzQh535cbbJ1rytAsPDl7s6W+ZgUwWqelYP9xT5FurefCm1dlW8rJ37FFKt67dqClMTEMExpY2IcApQviO78WAACSXIoWVdcm/UPfNo6PpXwWaAn7c12qp8owTOhhM04AlFVU47n5eag0CZoqq/Qv0lHpkj1bra2/eGk/x8dSeu6E2k2TYZjgwsI+AK6bmYNXFm/HN2uNi5L8ss3dbJVGaEWlWk0wdk4PZQFx14bEMEwEwMI+AH7fcRgAsGZ3ScjPTQTT6lEyP/x9JF68tK9pv39ccDoW3TUSDZPiMS1CCokwDOMObLN3gd0B5LExY94dIzDh37/4tV8x6DTLKXE7tkhFxxaputu/u3U45m/cj8T4OHTKaIRNj09wPF6GYSIT1uxdoE2TBkGrt9o8VTv75Ucr7LloGtG7XRPcPb67a8djGCbyYGHvAkO7NMftn6wx7+gAoarV3q99OoDgFfNmGCY6YWHvAmUVNZi9PjhldlMSfatJrd0T+vkBhmHqPyzsXeDRWRuDduyGiealA7+8eUjQzs8wTHTAwl6DaV+uxyWv/Wa5f3UQTSpWSvWVW8yXzzBM7MLeOBp8snKP4facgiO4+LXfvetxBITThB6MEoYMw0QXpmojEb1NRAeJKFfR9igR7SWitdLfRMW2+4kon4i2ENH4YA08nCgFPRCe8nqt0uoEfLdWjUJ+foZh6hdWNPt3AbwM4D1V+4tCiOeVDUTUC8BlAE4H0AbAj0TUTQhhrVRThPPIt7nomZnm1x7kzMSa3DWuzlXSzaInDMNEJ6bCXgjxMxFlWTzeZACfCCEqAOwkonwAAwH8brxbZLLrcBlOa14XjPTe77s0+wXTZq9HUUl5yM/JMEz9JZAJ2luJaL1k5pErbLcFoDR4F0ptfhDRDUSUQ0Q5xcXFAQwjeIx87iesL4xMV8fOLfUjYhmGYdQ4FfavAugMoB+AIgAvSO1a9gRNtVcI8YYQIlsIkZ2RkaHVJSLYfeSkX1v/Duk+6/dOCH30aeHRUz7rvds24XquDMPo4kjYCyEOCCFqhBC1AN6Ex1QDeDR5ZcKWdgD2BTbEyEOd+OyLnEJHx9kcQA6aU5W+0yDf3TYc93HyMoZhdHAk7IkoU7H6RwCyp84sAJcRUTIRdQTQFcCKwIYYXuS87ifKq3T77DjkLBFag6S6gKl5d4zwLmc09njaZDVvqLuv3IdhGMYKphO0RPQxgFEAWhBRIYB/ABhFRP3gMdEUALgRAIQQG4noMwCbAFQDuKW+e+I8Oz8Pk/pk4vI3l7l63LvHdfNZ79G6zstn5YNjTfcf2rm5q+NhGCa6seKNc7lG8wyD/k8BeCqQQUUSuw6fRL/Hf0DJSX3N3gm3ju7q1/bhdYOQf7DUrz37tKbI2XUUo3u0xKK8gwCAZAtpFBiGYWQ4XYIF3Bb0rdNSNNuHdWmBKUOz/Npfv/JMNE9Nwp3n1r0NWK0+xTAMA3C6BC+HSyuwcd9xnN0t+J5BN43s5F3u2z4dI7q0MOzfvFEyVj18rk9bYhw/pxmGsQ4Le4m/vLUceftPIPcxdzI8/HjnSGQ2ScGO4jJc8PJSn23tm9VNvH57yzBHx09MYGHPMIx1WGJIbDlwAgDw1OzNrhyvY4tUpCYnoHe7Jt422bumVxv/lAt2SWFhzzCMDVizB5C3/7g3v82+klPGnS2ilRxt0V2jcOxUFZqmJjk+7tpHzsX+4+WWUh8zDMPIsLAH8MmKugwPp6qC5ykaF0cBCXoASG+YhPSGgR2DYZjYI+aF/UPfbMAHy+qKd6/YeSSMo2EYhgkOMW8LUAp6hmGYaCXmhT3DMEwsENPCvromsNqt7107EL/ce45f+9y/jdDozTAMEz5i2mb/yKyNAe2vF4ClVc2KYRgmnMS0Zj9nQ5HjfXu3rfOfb8kZKBmGiXBiWrMPpHbsjKuzvctL7jkHOw6VYtJLS/GPC3r59f3qr0NRXROGQrUMwzASMS3sA6Fl47pkZg2S4nF6myYomD5Js++ADk012xmGYUJFTJtxGIZhYoWYFvbCoR1nbM+WLo+EYRgmuMS0sHfCiK4t8MaV2eYdGYZhIoiYFvZ6ev3401vp7pPVPBVxGknOGIZhIpmYFvYnyqs12+dvPKC7zxCu/cowTD0k5oR9Ta3A/mPleOTbXL9tZlWqLj6zHSb2zgzW0BiGYYJGzLle/nNeHt74eYfmtkbJxkW8OygqTDEMw9QnTDV7InqbiA4SUa6irRkRLSCibdL/plI7EdFLRJRPROuJaEAwB2+XlQVHdAU9ACSZFATp3yHd7SExDMOEBCtmnHcBTFC1TQOwUAjRFcBCaR0AzgPQVfq7AcCr7gzTHS557XfD7ftKyr3Lc24fgXeuPguXD+zgbRtuUhicYRgmUjEV9kKInwGoK3pMBjBTWp4J4EJF+3vCwzIA6URUb4zcKwrqLrNXmzSc06MlhnWpm5AlYi8chmHqJ04naFsJIYoAQPovRxm1BbBH0a9QavODiG4gohwiyikuLnY4DPdY8cAYzfZmXAKQYZgowG1vHC3VV9OdXQjxhhAiWwiRnZFh7AUTCL9tP4SPlu9GZbVx7vqWaSma7a2baLczDMPUJ5wK+wOyeUb6f1BqLwTQXtGvHYB9zocXOH9+czke+HoDKi0UKpHTFr91VV2EbAanL2YYJgpwKuxnAZgiLU8B8K2i/SrJK2cwgGOyuSfcHCmt1N3WOSMVABAvRcY2Ta0z3TROSQzuwBiGYUKAFdfLjwH8DqA7ERUS0VQA0wGcS0TbAJwrrQPAHAA7AOQDeBPAX4MyagekJOlfavfWjQEAXVs2AgA0aeAv4FulsYbPMEz9xTSoSghxuc4mvxlN4UkjeUuggwoGRsVD+rbz+M8/ceEZmNyvLbpIQl9GL089wzBMfSGq0yUcKq3wLg+dvki33/UjOgEAUhLjMbwr+9IzDBN9RLWwv+Kt5Zb6cRZLhmGinagW9nn7T4R7CAzDMBFBVAt7K2x8bHy4h8AwDBN0olbYV1vwq7+wXxukJsdc4k+GYWKQqBX2XR6ca9qndzvOYskwTGwQtcLeCuVVNeEeAsMwTEiIaWE/4YzW4R4CwzBMSIhpYS/046wYhmGiipgS9lufPM9nPS2FJ2cZhokNokra1dQKfLJyN/4vu71Pe3JCHH6+9xwkJcThgYk90CApAWN6tNRNa8wwDBNtRJWw/2LVHjz4dS5KTlZ52+4/rwduHNnZu37D2Z21dmUYholqosqMU1rh8a5Zs7sEAHDjyE4+gp5hGCZWiSphXyUFUv24+QAAYN2eknAOh2EYJmKIKmE/fW6ez3r+wbIwjYRhGCayiCphr0aZ4phhGCaWiWphzzAMw3iIamGf89DYcA+BYRgmIohqYd+iEdeNZRiGAaJY2PeQiogzDMMwUSzs371mYLiHwDAMEzEEFEFLRAUATgCoAVAthMgmomYAPgWQBaAAwP8JIY4GNkxz5HTF7Zs1wPSL+qB1E06FwDAMI+OGZn+OEKKfECJbWp8GYKEQoiuAhdJ60Bn+z0UAgPQGSRjWpUUoTskwDFNvCIYZZzKAmdLyTAAXBuEcfhwqrQQAbNh7LBSnYxiGqVcEKuwFgB+IaBUR3SC1tRJCFAGA9L9lgOewxcTeXJCEYRhGTaBZL4cJIfYRUUsAC4goz3QPCenhcAMAdOjQIcBh1PHipf1cOxbDMEy0EJBmL4TYJ/0/COBrAAMBHCCiTACQ/h/U2fcNIUS2ECI7IyMjkGEAAAZ08BQPT06ID/hYDMMw0YZjYU9EqUTUWF4GMA5ALoBZAKZI3aYA+DbQQVqhY4tGaM3FSBiGYTQJxIzTCsDXRCQf5yMhxDwiWgngMyKaCmA3gEsCH6YxczYU4cvVhcE+DcMwTL3FsbAXQuwA0Fej/TCAMYEMyi5//XB1KE/HMAxT74jaCFqGYRimDhb2DMMwMUC9F/alFdXhHgLDMEzEU++F/Wcr94R7CAzDMBFPvRf2S7YWe5ffuirboCfDMEzsEmgEbdhp3igJAPDtLcPQt316mEfDMAwTmdRrzf5kZTW+Wr0XANC+WcMwj4ZhGCZyqdfC/pFvN3qXGyXX+5cUhmGYoFGvhf1pCm0+KaFeXwrDMExQqdfq8G1juiI1OQF7S06FeygMwzARTb0W9gBw7fCO4R4CwzBMxMO2D4ZhmBiAhT3DMEwMwMKeYRgmBmBhzzAMEwOwsGcYhokBWNgzDMPEACzsGYZhYgAW9gzDMDEACSHCPQYQUTGAXQ53bwHgkIvDqQ/wNccGfM2xQSDXfJoQIsNKx4gQ9oFARDlCiJhKZM/XHBvwNccGobpmNuMwDMPEACzsGYZhYoBoEPZvhHsAYYCvOTbga44NQnLN9d5mzzAMw5gTDZo9wzAMYwILe4ZhmBigXgt7IppARFuIKJ+IpoV7PHYgovZEtJiINhPRRiL6m9TejIgWENE26X9TqZ2I6CXpWtcT0QDFsaZI/bcR0RRF+5lEtEHa5yUiotBfqT9EFE9Ea4joe2m9IxEtl8b/KRElSe3J0nq+tD1LcYz7pfYtRDRe0R5x3wkiSieiL4goT7rfQ6L9PhPR36XvdS4RfUxEKdF2n4nobSI6SES5irag31e9c5gihKiXfwDiAWwH0AlAEoB1AHqFe1w2xp8JYIC03BjAVgC9ADwLYJrUPg3AP6XliQDmAiAAgwEsl9qbAdgh/W8qLTeVtq0AMETaZy6A88J93dK47gTwEYDvpfXPAFwmLb8G4GZp+a8AXpOWLwPwqbTcS7rfyQA6St+D+Ej9TgCYCeA6aTkJQHo032cAbQHsBNBAcX+vjrb7DOBsAAMA5Cragn5f9c5hOt5w/xAC+KCHAJivWL8fwP3hHlcA1/MtgHMBbAGQKbVlAtgiLb8O4HJF/y3S9ssBvK5of11qywSQp2j36RfG62wHYCGA0QC+l77IhwAkqO8rgPkAhkjLCVI/Ut9ruV8kficApEmCj1TtUXuf4RH2eyQBliDd5/HReJ8BZMFX2Af9vuqdw+yvPptx5C+UTKHUVu+QXlv7A1gOoJUQoggApP8tpW5612vUXqjRHm7+DeBeALXSenMAJUKIamldOU7vtUnbj0n97X4W4aQTgGIA70imq7eIKBVRfJ+FEHsBPA9gN4AieO7bKkT3fZYJxX3VO4ch9VnYa9kl650fKRE1AvAlgDuEEMeNumq0CQftYYOIzgdwUAixStms0VWYbKs31wyPpjoAwKtCiP4AyuB59daj3l+zZEOeDI/ppQ2AVADnaXSNpvtsRtivsT4L+0IA7RXr7QDsC9NYHEFEifAI+g+FEF9JzQeIKFPangngoNSud71G7e002sPJMAB/IKICAJ/AY8r5N4B0IkqQ+ijH6b02aXsTAEdg/7MIJ4UACoUQy6X1L+AR/tF8n8cC2CmEKBZCVAH4CsBQRPd9lgnFfdU7hyH1WdivBNBVmuFPgmdiZ1aYx2QZaWZ9BoDNQoh/KTbNAiDPyE+Bx5Yvt18lzeoPBnBMeoWbD2AcETWVNKpx8NgziwCcIKLB0rmuUhwrLAgh7hdCtBNCZMFzvxYJIf4CYDGAi6Vu6muWP4uLpf5Car9M8uLoCKArPJNZEfedEELsB7CHiLpLTWMAbEIU32d4zDeDiaihNCb5mqP2PisIxX3VO4cx4ZzIcWFyZCI8XizbATwY7vHYHPtweF7L1gNYK/1NhMdWuRDANul/M6k/AXhFutYNALIVx7oWQL70d42iPRtArrTPy1BNEob5+kehzhunEzw/4nwAnwNIltpTpPV8aXsnxf4PSte1BQrvk0j8TgDoByBHutffwON1EdX3GcBjAPKkcb0Pj0dNVN1nAB/DMydRBY8mPjUU91XvHGZ/nC6BYRgmBqjPZhyGYRjGIizsGYZhYgAW9gzDMDEAC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMD/D9pwksMstgtRgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Menenie hyperparametrov a sledovanie výsledkov v praxi\n", + "\n", + "Teraz by bolo zaujímavé skutočne vidieť, ako sa správa natrénovaný model. Spustime simuláciu a budeme postupovať podľa rovnakej stratégie výberu akcií ako počas tréningu: vzorkovanie podľa pravdepodobnostného rozdelenia v Q-Tabuľke:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Uloženie výsledku do animovaného GIF\n", + "\n", + "Ak chcete ohúriť svojich priateľov, môžete im poslať animovaný GIF obrázok balansujúcej tyče. Na to môžeme použiť `env.render` na vytvorenie obrazového rámca a následne ich uložiť do animovaného GIF pomocou knižnice PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby AI prekladu [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, prosím, berte na vedomie, že automatizované preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho rodnom jazyku by mal byť považovaný za autoritatívny zdroj. Pre kritické informácie sa odporúča profesionálny ľudský preklad. Nenesieme zodpovednosť za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sk/PyTorch_Fundamentals.ipynb b/translations/sk/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..51ed05d29 --- /dev/null +++ b/translations/sk/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2828 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:07:49+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "sk" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Upozornenie**: \nTento dokument bol preložený pomocou služby na automatický preklad [Co-op Translator](https://github.com/Azure/co-op-translator). Hoci sa snažíme o presnosť, upozorňujeme, že automatické preklady môžu obsahovať chyby alebo nepresnosti. Pôvodný dokument v jeho pôvodnom jazyku by mal byť považovaný za záväzný zdroj. Pre dôležité informácie odporúčame profesionálny ľudský preklad. Nezodpovedáme za akékoľvek nedorozumenia alebo nesprávne interpretácie vyplývajúce z použitia tohto prekladu.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/2-Regression/1-Tools/notebook.ipynb b/translations/sl/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sl/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/sl/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..e2ac16414 --- /dev/null +++ b/translations/sl/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:41:25+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Uvod v regresijo - Lekcija 1\n", + "\n", + "#### Postavljanje v perspektivo\n", + "\n", + "✅ Obstaja veliko vrst regresijskih metod, izbira pa je odvisna od odgovora, ki ga iščete. Če želite napovedati verjetno višino osebe glede na njeno starost, bi uporabili `linearno regresijo`, saj iščete **številčno vrednost**. Če vas zanima, ali naj se določena vrsta kuhinje šteje za vegansko ali ne, iščete **dodelitev kategorije**, zato bi uporabili `logistično regresijo`. Več o logistični regresiji boste izvedeli kasneje. Razmislite o nekaterih vprašanjih, ki jih lahko zastavite podatkom, in o tem, katera od teh metod bi bila bolj primerna.\n", + "\n", + "V tem razdelku boste delali z [majhnim naborom podatkov o diabetesu](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Predstavljajte si, da želite preizkusiti zdravljenje za diabetične bolnike. Modeli strojnega učenja vam lahko pomagajo določiti, kateri bolniki bi se na zdravljenje bolje odzvali, glede na kombinacije spremenljivk. Tudi zelo osnovni regresijski model, ko je vizualiziran, lahko pokaže informacije o spremenljivkah, ki bi vam pomagale organizirati teoretične klinične preizkuse.\n", + "\n", + "Pa začnimo s to nalogo!\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Nalaganje našega nabora orodij\n", + "\n", + "Za to nalogo bomo potrebovali naslednje pakete:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, enostavnejše in bolj zabavno podatkovno znanost!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je [zbirka paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "Namestite jih lahko z ukazom:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Spodnji skript preveri, ali imate nameščene pakete, potrebne za dokončanje tega modula, in jih po potrebi namesti.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj naložimo te odlične pakete in jih naredimo dostopne v naši trenutni R seji. (To je zgolj za ponazoritev, `pacman::p_load()` je to že naredil namesto vas)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Dataset o diabetesu\n", + "\n", + "V tej vaji bomo uporabili svoje regresijske veščine za napovedovanje na podlagi podatkov o diabetesu. [Dataset o diabetesu](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) vključuje `442 vzorce` podatkov o diabetesu, z 10 spremenljivkami napovedovalnih značilnosti: `starost`, `spol`, `indeks telesne mase`, `povprečni krvni tlak` in `šest meritev krvnega seruma`, ter izhodno spremenljivko `y`: kvantitativno merilo napredovanja bolezni eno leto po začetnem stanju.\n", + "\n", + "|Število opazovanj|442|\n", + "|-----------------|:---|\n", + "|Število napovedovalcev|Prvih 10 stolpcev je numeričnih napovedovalnih|\n", + "|Izhodna tarča|Stolpec 11 je kvantitativno merilo napredovanja bolezni eno leto po začetnem stanju|\n", + "|Informacije o napovedovalcih|- starost v letih\n", + "||- spol\n", + "||- bmi indeks telesne mase\n", + "||- bp povprečni krvni tlak\n", + "||- s1 tc, skupni serumski holesterol\n", + "||- s2 ldl, lipoproteini nizke gostote\n", + "||- s3 hdl, lipoproteini visoke gostote\n", + "||- s4 tch, skupni holesterol / HDL\n", + "||- s5 ltg, verjetno logaritem ravni serumskih trigliceridov\n", + "||- s6 glu, raven krvnega sladkorja|\n", + "\n", + "> 🎓 Zapomnite si, to je nadzorovano učenje, zato potrebujemo ciljno spremenljivko 'y'.\n", + "\n", + "Preden lahko manipulirate s podatki v R-ju, jih morate uvoziti v pomnilnik R-ja ali vzpostaviti povezavo do podatkov, ki jih R lahko uporablja za dostop do podatkov na daljavo.\n", + "\n", + "> Paket [readr](https://readr.tidyverse.org/), ki je del Tidyverse, ponuja hiter in prijazen način za branje pravokotnih podatkov v R.\n", + "\n", + "Zdaj naložimo dataset o diabetesu, ki je na voljo na tem URL-ju: \n", + "\n", + "Prav tako bomo izvedli osnovni pregled podatkov z uporabo `glimpse()` in prikazali prvih 5 vrstic z uporabo `slice()`.\n", + "\n", + "Preden nadaljujemo, naj predstavimo nekaj, kar boste pogosto srečali v kodi R 🥁🥁: operator cevi `%>%`\n", + "\n", + "Operator cevi (`%>%`) izvaja operacije v logičnem zaporedju tako, da objekt posreduje naprej v funkcijo ali izraz klica. Operator cevi si lahko predstavljate kot \"in nato\" v vaši kodi.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` nam pokaže, da ima ta podatkovni niz 442 vrstic in 11 stolpcev, pri čemer so vsi stolpci tipa podatkov `double`.\n", + "\n", + "
\n", + "\n", + "> `glimpse()` in `slice()` sta funkciji v knjižnici [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, ki je del Tidyverse, je slovnica za manipulacijo podatkov, ki ponuja dosleden nabor glagolov za reševanje najpogostejših izzivov pri obdelavi podatkov.\n", + "\n", + "
\n", + "\n", + "Zdaj, ko imamo podatke, se osredotočimo na eno značilnost (`bmi`), ki jo bomo uporabili za to vajo. To zahteva, da izberemo želene stolpce. Kako to storimo?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) nam omogoča, da *izberemo* (in po želji preimenujemo) stolpce v podatkovnem okviru.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Podatki za učenje in testiranje\n", + "\n", + "Pri nadzorovanem učenju je običajna praksa, da podatke *razdelimo* na dva podsklopa; (običajno večji) sklop, s katerim treniramo model, in manjši \"rezervni\" sklop, s katerim preverimo, kako se je model odrezal.\n", + "\n", + "Zdaj, ko imamo podatke pripravljene, lahko preverimo, ali nam stroj lahko pomaga določiti logično razdelitev med številkami v tem naboru podatkov. Uporabimo lahko paket [rsample](https://tidymodels.github.io/rsample/), ki je del okvira Tidymodels, za ustvarjanje objekta, ki vsebuje informacije o *načinu* razdelitve podatkov, nato pa še dve funkciji rsample za pridobitev ustvarjenih učnih in testnih sklopov:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Učite model linearne regresije s Tidymodels\n", + "\n", + "Zdaj smo pripravljeni, da naučimo naš model!\n", + "\n", + "V Tidymodels modele določite z uporabo `parsnip()` tako, da določite tri koncepte:\n", + "\n", + "- **Tip** modela razlikuje med modeli, kot so linearna regresija, logistična regresija, modeli odločitvenih dreves in podobno.\n", + "\n", + "- **Način** modela vključuje pogoste možnosti, kot sta regresija in klasifikacija; nekateri tipi modelov podpirajo oba načina, medtem ko imajo drugi le en način.\n", + "\n", + "- **Pogon** modela je računsko orodje, ki bo uporabljeno za prilagoditev modela. Pogosto so to R paketi, kot sta **`\"lm\"`** ali **`\"ranger\"`**.\n", + "\n", + "Te informacije o modeliranju so zajete v specifikaciji modela, zato jo ustvarimo!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ko je model *določen*, ga je mogoče `oceniti` ali `usposobiti` z uporabo funkcije [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), običajno z uporabo formule in nekaterih podatkov.\n", + "\n", + "`y ~ .` pomeni, da bomo prilagodili `y` kot napovedano količino/cilj, ki ga pojasnjujejo vsi napovedniki/lastnosti, tj. `.` (v tem primeru imamo samo en napovednik: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Iz modelnega izhoda lahko vidimo koeficiente, pridobljene med učenjem. Ti predstavljajo koeficiente premice najboljše prileganja, ki nam daje najnižjo skupno napako med dejansko in napovedano spremenljivko.\n", + "
\n", + "\n", + "## 5. Napovedovanje na testnem naboru\n", + "\n", + "Zdaj, ko smo izurili model, ga lahko uporabimo za napovedovanje napredovanja bolezni y za testni nabor podatkov z uporabo [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). To bo uporabljeno za risanje premice med skupinami podatkov.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Juhu! 💃🕺 Pravkar smo izurili model in ga uporabili za napovedovanje!\n", + "\n", + "Pri napovedovanju je konvencija tidymodels vedno ustvariti tibble/podatkovni okvir z rezultati in standardiziranimi imeni stolpcev. To omogoča enostavno združevanje izvirnih podatkov in napovedi v uporabni obliki za nadaljnje operacije, kot je risanje grafov.\n", + "\n", + "`dplyr::bind_cols()` učinkovito združi več podatkovnih okvirjev po stolpcih.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Prikaz rezultatov modeliranja\n", + "\n", + "Zdaj je čas, da to vidimo vizualno 📈. Ustvarili bomo razpršen diagram vseh vrednosti `y` in `bmi` iz testnega nabora, nato pa uporabili napovedi za risanje črte na najbolj ustreznem mestu med skupinami podatkov modela.\n", + "\n", + "R ima več sistemov za izdelavo grafov, vendar je `ggplot2` eden najbolj elegantnih in najbolj vsestranskih. Omogoča vam sestavljanje grafov z **združevanjem neodvisnih komponent**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Malo razmislite, kaj se tukaj dogaja. Ravna črta poteka skozi številne majhne točke podatkov, vendar kaj točno počne? Ali vidite, kako bi morali biti sposobni uporabiti to črto za napovedovanje, kje bi se nova, nevidena podatkovna točka morala uvrstiti glede na y-os grafa? Poskusite z besedami opisati praktično uporabo tega modela.\n", + "\n", + "Čestitke, zgradili ste svoj prvi model linearne regresije, z njim ustvarili napoved in jo prikazali na grafu!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/2-Regression/1-Tools/solution/notebook.ipynb b/translations/sl/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..e35aec234 --- /dev/null +++ b/translations/sl/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linearna regresija za podatkovno zbirko Diabetes - Lekcija 1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uvozi potrebne knjižnice\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Naložite podatkovni niz za diabetes, razdeljen na podatke `X` in značilnosti `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Izberite samo eno funkcijo za ciljanje v tej vaji\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Razdeli podatke za usposabljanje in testiranje za `X` in `y`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Izberite model in ga prilagodite učnim podatkom\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uporabite testne podatke za napovedovanje črte\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prikaži rezultate v grafu\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:38:40+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/2-Data/notebook.ipynb b/translations/sl/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..c7567a3c1 --- /dev/null +++ b/translations/sl/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:45:51+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/sl/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..19c889e01 --- /dev/null +++ b/translations/sl/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,684 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:49:10+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Zgradite regresijski model: priprava in vizualizacija podatkov\n", + "\n", + "## **Linearna regresija za buče - Lekcija 2**\n", + "#### Uvod\n", + "\n", + "Zdaj, ko imate na voljo orodja, ki jih potrebujete za začetek gradnje modelov strojnega učenja z Tidymodels in Tidyverse, ste pripravljeni začeti postavljati vprašanja o svojih podatkih. Ko delate s podatki in uporabljate rešitve strojnega učenja, je zelo pomembno, da znate postaviti prava vprašanja, da pravilno izkoristite potencial svojega nabora podatkov.\n", + "\n", + "V tej lekciji boste spoznali:\n", + "\n", + "- Kako pripraviti podatke za gradnjo modela.\n", + "\n", + "- Kako uporabiti `ggplot2` za vizualizacijo podatkov.\n", + "\n", + "Vprašanje, na katerega želite dobiti odgovor, bo določilo, katere vrste algoritmov strojnega učenja boste uporabili. Kakovost odgovora, ki ga dobite, pa bo močno odvisna od narave vaših podatkov.\n", + "\n", + "Poglejmo to skozi praktično vajo.\n", + "\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Uvoz podatkov o bučah in priklic Tidyverse\n", + "\n", + "Za obdelavo te lekcije bomo potrebovali naslednje pakete:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, enostavnejše in bolj zabavno podatkovno znanost!\n", + "\n", + "Namestite jih lahko z ukazom:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Spodnji skript preveri, ali imate nameščene pakete, potrebne za dokončanje tega modula, in jih namesti, če kateri manjka.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj zaženimo nekaj paketov in naložimo [podatke](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv), ki so na voljo za to lekcijo!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hitro `glimpse()` takoj pokaže, da so prisotne prazne vrednosti ter mešanica nizov (`chr`) in numeričnih podatkov (`dbl`). `Date` je tipa znak, poleg tega pa je tu še nenavadni stolpec z imenom `Package`, kjer so podatki mešanica med `sacks`, `bins` in drugimi vrednostmi. Podatki so, pravzaprav, precej zmedeni 😤.\n", + "\n", + "Pravzaprav ni ravno pogosto, da bi dobili podatkovni niz, ki je popolnoma pripravljen za uporabo pri ustvarjanju modela strojnega učenja kar takoj. A brez skrbi, v tej lekciji se boste naučili, kako pripraviti surov podatkovni niz z uporabo standardnih knjižnic v R 🧑‍🔧. Prav tako se boste naučili različnih tehnik za vizualizacijo podatkov.📈📊\n", + "
\n", + "\n", + "> Osvežitev: Operator cevi (`%>%`) izvaja operacije v logičnem zaporedju tako, da objekt posreduje naprej v funkcijo ali izraz klica. Operator cevi si lahko predstavljate kot \"in potem\" v vaši kodi.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Preverjanje manjkajočih podatkov\n", + "\n", + "Ena najpogostejših težav, s katerimi se soočajo podatkovni znanstveniki, so nepopolni ali manjkajoči podatki. R predstavlja manjkajoče ali neznane vrednosti s posebnim označevalcem: `NA` (Not Available).\n", + "\n", + "Kako torej ugotovimo, da podatkovni okvir vsebuje manjkajoče vrednosti?\n", + "
\n", + "- Eden od preprostih načinov je uporaba osnovne R funkcije `anyNA`, ki vrne logične vrednosti `TRUE` ali `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično, zdi se, da manjkajo nekateri podatki! To je dobro izhodišče.\n", + "\n", + "- Druga možnost bi bila uporaba funkcije `is.na()`, ki pokaže, kateri posamezni elementi stolpcev manjkajo, z logično vrednostjo `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "V redu, naloga je opravljena, vendar bi bilo z veliko podatkovno tabelo, kot je ta, neučinkovito in praktično nemogoče pregledati vse vrstice in stolpce posamično😴.\n", + "\n", + "- Bolj intuitiven način bi bil izračunati vsoto manjkajočih vrednosti za vsak stolpec:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Veliko bolje! Manjkajo podatki, vendar morda to ne bo pomembno za nalogo. Poglejmo, kaj bo prinesla nadaljnja analiza.\n", + "\n", + "> Poleg odličnih paketov in funkcij ima R zelo dobro dokumentacijo. Na primer, uporabite `help(colSums)` ali `?colSums`, da izveste več o funkciji.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Slovnica za manipulacijo podatkov\n", + "\n", + "

\n", + " \n", + "

Ilustracija avtorice @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), paket v Tidyverse, je slovnica za obdelavo podatkov, ki ponuja dosleden nabor glagolov, s katerimi lahko rešite najpogostejše izzive pri obdelavi podatkov. V tem razdelku bomo raziskali nekatere glagole dplyr!\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` je funkcija v paketu `dplyr`, ki vam pomaga izbrati stolpce za ohranitev ali izključitev.\n", + "\n", + "Da bo vaš podatkovni okvir lažji za delo, odstranite več njegovih stolpcev z uporabo `select()` in obdržite samo tiste stolpce, ki jih potrebujete.\n", + "\n", + "Na primer, v tej vaji bo naša analiza vključevala stolpce `Package`, `Low Price`, `High Price` in `Date`. Izberimo te stolpce.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` je funkcija v paketu `dplyr`, ki vam omogoča ustvarjanje ali spreminjanje stolpcev, pri čemer obstoječi stolpci ostanejo nespremenjeni.\n", + "\n", + "Splošna struktura funkcije mutate je:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Poglejmo, kako deluje `mutate`, z uporabo stolpca `Date` in izvedbo naslednjih operacij:\n", + "\n", + "1. Pretvorba datumov (trenutno tipa znak) v format meseca (to so ameriški datumi, zato je format `MM/DD/YYYY`).\n", + "\n", + "2. Izvleček meseca iz datumov v nov stolpec.\n", + "\n", + "V jeziku R paket [lubridate](https://lubridate.tidyverse.org/) olajša delo s podatki tipa Datum-čas. Zato uporabimo `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` in preverimo, kako doseči zgoraj navedene cilje. Stolpec Date lahko odstranimo, saj ga v nadaljnjih operacijah ne bomo več potrebovali.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Juhu! 🤩\n", + "\n", + "Zdaj ustvarimo nov stolpec `Price`, ki predstavlja povprečno ceno buče. Nato izračunajmo povprečje stolpcev `Low Price` in `High Price`, da zapolnimo novi stolpec Price. \n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ja!💪\n", + "\n", + "\"Počakaj malo!\", boš rekel po hitrem pregledu celotnega nabora podatkov z `View(pumpkins)`, \"Tukaj je nekaj nenavadnega!\"🤔\n", + "\n", + "Če pogledaš stolpec `Package`, so buče prodane v različnih konfiguracijah. Nekatere so prodane v merah `1 1/9 bushel`, nekatere v merah `1/2 bushel`, nekatere na bučo, nekatere na funt, in nekatere v velikih škatlah z različnimi širinami.\n", + "\n", + "Preverimo to:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Neverjetno!👏\n", + "\n", + "Buče je očitno zelo težko dosledno tehtati, zato jih filtrirajmo tako, da izberemo samo buče z nizom *bushel* v stolpcu `Package` in jih shranimo v nov podatkovni okvir `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() in stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): ustvari podmnožico podatkov, ki vsebuje **vrstice**, ki ustrezajo vašim pogojem, v tem primeru buče z nizom *bushel* v stolpcu `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): zazna prisotnost ali odsotnost vzorca v nizu.\n", + "\n", + "Paket [`stringr`](https://github.com/tidyverse/stringr) ponuja enostavne funkcije za pogoste operacije z nizi.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vidite, da smo zožili izbor na približno 415 vrstic podatkov, ki vsebujejo buče po koših.🤩\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Ampak počakajte! Še nekaj je treba narediti**\n", + "\n", + "Ste opazili, da se količina košev razlikuje po vrsticah? Potrebno je normalizirati cene, da bodo prikazane na koš, ne pa na 1 1/9 ali 1/2 koša. Čas je za nekaj matematike, da to standardiziramo.\n", + "\n", + "Uporabili bomo funkcijo [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html), da *spremenimo* stolpec Price glede na določene pogoje. `case_when` omogoča vektorizacijo več `if_else()` stavkov.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj lahko analiziramo ceno na enoto glede na njihovo meritev v bušlnih. Vsa ta študija o bušlnih buč pa kaže, kako zelo `pomembno` je `razumeti naravo svojih podatkov`!\n", + "\n", + "> ✅ Po navedbah [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308) teža bušlja zavisi od vrste pridelka, saj gre za meritev prostornine. \"Bušelj paradižnikov, na primer, naj bi tehtal 56 funtov... Listi in zelenjava zavzamejo več prostora z manjšo težo, zato bušelj špinače tehta le 20 funtov.\" Vse skupaj je precej zapleteno! Ne ukvarjajmo se s pretvorbo iz bušljev v funte, ampak raje določimo ceno po bušlju. Vsa ta študija o bušlnih buč pa kaže, kako zelo pomembno je razumeti naravo svojih podatkov!\n", + ">\n", + "> ✅ Ste opazili, da so buče, prodane po pol bušlja, zelo drage? Ali lahko ugotovite, zakaj? Namig: majhne buče so precej dražje od velikih, verjetno zato, ker jih je v bušlju veliko več, glede na neizkoriščen prostor, ki ga zavzame ena velika votla buča za pito.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pomembno pravilo: \n", + "1. NE dodajajte '''markdown ali kakršnih koli drugih oznak okoli prevoda \n", + "2. Poskrbite, da prevod ne bo zvenel preveč dobesedno \n", + "3. Prevedite tudi komentarje \n", + "4. Ta datoteka je napisana v obliki Markdown - ne obravnavajte je kot XML ali HTML \n", + "5. Ne prevajajte: \n", + " - [!NOTE], [!WARNING], [!TIP], [!IMPORTANT], [!CAUTION] \n", + " - Imen spremenljivk, funkcij, razredov \n", + " - Oznak, kot so @@INLINE_CODE_x@@ ali @@CODE_BLOCK_x@@ \n", + " - URL-jev ali poti \n", + "6. Ohranite vso izvirno oblikovanje Markdown nespremenjeno \n", + "7. Vrni SAMO prevedeno vsebino brez dodatnih oznak ali oblikovanja \n", + "\n", + "Zdaj pa, za čisto avanturo 💁‍♀️, premaknimo stolpec Month na prvo mesto, torej `pred` stolpec `Package`. \n", + "\n", + "Za spremembo položajev stolpcev se uporablja `dplyr::relocate()`. \n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično delo!👌 Zdaj imate čist in urejen nabor podatkov, na katerem lahko zgradite svoj novi regresijski model! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Vizualizacija podatkov z ggplot2\n", + "\n", + "

\n", + " \n", + "

Infografika avtorja Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "Obstaja *modri* rek, ki pravi:\n", + "\n", + "> \"Preprost graf je analitiku podatkov prinesel več informacij kot katerakoli druga naprava.\" --- John Tukey\n", + "\n", + "Del vloge podatkovnega znanstvenika je prikazati kakovost in naravo podatkov, s katerimi dela. To pogosto dosežejo z ustvarjanjem zanimivih vizualizacij, kot so grafi, diagrami in prikazi, ki pokažejo različne vidike podatkov. Na ta način lahko vizualno prikažejo odnose in vrzeli, ki jih je sicer težko odkriti.\n", + "\n", + "Vizualizacije lahko pomagajo tudi pri določanju najbolj primerne tehnike strojnega učenja za podatke. Na primer, razpršen diagram, ki sledi črti, nakazuje, da so podatki primerni za nalogo linearne regresije.\n", + "\n", + "R ponuja več sistemov za izdelavo grafov, vendar je [`ggplot2`](https://ggplot2.tidyverse.org/index.html) eden najbolj elegantnih in vsestranskih. `ggplot2` omogoča sestavljanje grafov z **združevanjem neodvisnih komponent**.\n", + "\n", + "Začnimo s preprostim razpršenim diagramom za stolpca Price in Month.\n", + "\n", + "V tem primeru bomo začeli z [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), podali podatkovni niz in estetsko preslikavo (z [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)), nato pa dodali sloje (kot je [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) za razpršene diagrame.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Je to uporaben graf 🤷? Ali te kaj na njem preseneča?\n", + "\n", + "Ni posebej uporaben, saj zgolj prikazuje tvoje podatke kot razpored točk v določenem mesecu.\n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Kako naredimo podatke uporabne?**\n", + "\n", + "Da bi prikazali koristne podatke na grafikonih, je običajno potrebno podatke nekako združiti. Na primer, v našem primeru bi iskanje povprečne cene buč za vsak mesec omogočilo boljši vpogled v osnovne vzorce v naših podatkih. To nas pripelje do še ene hitre predstavitve **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Združeno agregiranje v R lahko enostavno izvedemo z\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` spremeni enoto analize iz celotnega nabora podatkov na posamezne skupine, kot so na primer meseci.\n", + "\n", + "- `dplyr::summarize()` ustvari nov podatkovni okvir z enim stolpcem za vsako spremenljivko skupine in enim stolpcem za vsako statistiko povzetka, ki ste jo določili.\n", + "\n", + "Na primer, lahko uporabimo `dplyr::group_by() %>% summarize()` za združevanje buč v skupine na podlagi stolpca **Month** in nato izračunamo **povprečno ceno** za vsak mesec.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Jedrnato!✨\n", + "\n", + "Kategorijske značilnosti, kot so meseci, so bolje prikazane z uporabo stolpčnega diagrama 📊. Plasti, ki so odgovorne za stolpčne diagrame, so `geom_bar()` in `geom_col()`. Preverite `?geom_bar` za več informacij.\n", + "\n", + "Pa ga ustvarimo!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 To je bolj uporabna vizualizacija podatkov! Zdi se, da kaže, da so najvišje cene buč v septembru in oktobru. Ali to ustreza vašim pričakovanjem? Zakaj ali zakaj ne?\n", + "\n", + "Čestitke za zaključek druge lekcije 👏! Pripravili ste svoje podatke za gradnjo modela in nato odkrili več vpogledov z uporabo vizualizacij!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/2-Regression/2-Data/solution/notebook.ipynb b/translations/sl/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..31d66e397 --- /dev/null +++ b/translations/sl/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,439 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linearna regresija za buče - Lekcija 2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:15+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/3-Linear/notebook.ipynb b/translations/sl/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..c0701506d --- /dev/null +++ b/translations/sl/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cene buč\n", + "\n", + "Naložite potrebne knjižnice in podatkovni niz. Pretvorite podatke v podatkovni okvir, ki vsebuje podnabor podatkov:\n", + "\n", + "- Pridobite samo buče, ki so ocenjene po ceni na koš\n", + "- Pretvorite datum v mesec\n", + "- Izračunajte ceno kot povprečje med najvišjo in najnižjo ceno\n", + "- Pretvorite ceno, da odraža ceno glede na količino v košu\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Osnovni raztreseni diagram nas opominja, da imamo podatke o mesecih samo od avgusta do decembra. Verjetno potrebujemo več podatkov, da bi lahko sklepali na linearen način.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:08:38+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/sl/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..f95001d6c --- /dev/null +++ b/translations/sl/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1089 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:16:09+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Zgradite regresijski model: linearni in polinomski regresijski modeli\n" + ], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Linearna in polinomska regresija za določanje cen buč - Lekcija 3\n", + "

\n", + " \n", + "

Infografika: Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "#### Uvod\n", + "\n", + "Do sedaj ste raziskali, kaj regresija je, z vzorčnimi podatki, zbranimi iz nabora podatkov o cenah buč, ki ga bomo uporabljali skozi celotno lekcijo. Prav tako ste ga vizualizirali z uporabo `ggplot2`. 💪\n", + "\n", + "Zdaj ste pripravljeni, da se poglobite v regresijo za strojno učenje. V tej lekciji boste izvedeli več o dveh vrstah regresije: *osnovni linearni regresiji* in *polinomski regresiji*, skupaj z nekaj matematike, ki stoji za temi tehnikami.\n", + "\n", + "> V tem učnem načrtu predpostavljamo minimalno matematično predznanje in si prizadevamo, da bi bila vsebina dostopna študentom iz drugih področij. Zato bodite pozorni na opombe, 🧮 poudarke, diagrame in druga učna orodja, ki vam bodo pomagala pri razumevanju.\n", + "\n", + "#### Priprava\n", + "\n", + "Naj vas spomnimo, da nalagate te podatke, da bi si zastavili vprašanja o njih.\n", + "\n", + "- Kdaj je najboljši čas za nakup buč?\n", + "\n", + "- Kakšno ceno lahko pričakujem za zabojček miniaturnih buč?\n", + "\n", + "- Ali naj jih kupim v polovičnih košarah ali v škatlah velikosti 1 1/9 busha? Poglobimo se v te podatke.\n", + "\n", + "V prejšnji lekciji ste ustvarili `tibble` (sodobno reinterpretacijo podatkovnega okvira) in ga napolnili z delom izvirnega nabora podatkov, pri čemer ste standardizirali cene glede na bushel. S tem ste sicer pridobili približno 400 podatkovnih točk, vendar le za jesenske mesece. Morda lahko pridobimo še malo več podrobnosti o naravi podatkov, če jih še bolj očistimo? Bomo videli... 🕵️‍♀️\n", + "\n", + "Za to nalogo bomo potrebovali naslednje pakete:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, enostavnejše in zabavnejše delo z znanostjo o podatkih!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je [okvir paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "- `janitor`: Paket [janitor](https://github.com/sfirke/janitor) ponuja preprosta orodja za pregledovanje in čiščenje \"umazanih\" podatkov.\n", + "\n", + "- `corrplot`: Paket [corrplot](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) omogoča vizualno raziskovanje korelacijske matrike, ki podpira samodejno preurejanje spremenljivk za odkrivanje skritih vzorcev med njimi.\n", + "\n", + "Pakete lahko namestite z naslednjim ukazom:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Spodnji skript preveri, ali imate nameščene potrebne pakete za dokončanje tega modula, in jih po potrebi namesti.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Linearna regresijska premica\n", + "\n", + "Kot ste se naučili v Lekciji 1, je cilj linearne regresije narisati *premico* *najboljše prileganje*, da:\n", + "\n", + "- **Prikažete odnose med spremenljivkami**. Prikažete odnos med spremenljivkami.\n", + "\n", + "- **Napovedujete**. Naredite natančne napovedi, kje bi nov podatek padel v odnosu do te premice.\n", + "\n", + "Za risanje takšne premice uporabljamo statistično tehniko, imenovano **Regresija najmanjših kvadratov**. Izraz `najmanjši kvadrati` pomeni, da so vsi podatkovni točki okoli regresijske premice kvadrirani in nato sešteveni. Idealno je, da je končni seštevek čim manjši, saj želimo nizko število napak oziroma `najmanjše kvadrate`. Tako je premica najboljše prileganje tista, ki nam daje najnižjo vrednost za vsoto kvadriranih napak - od tod ime *regresija najmanjših kvadratov*.\n", + "\n", + "To počnemo, ker želimo modelirati premico, ki ima najmanjšo kumulativno razdaljo od vseh naših podatkovnih točk. Prav tako kvadriramo izraze pred seštevanjem, saj nas zanima njihova velikost in ne smer.\n", + "\n", + "> **🧮 Pokaži mi matematiko**\n", + ">\n", + "> Ta premica, imenovana *premica najboljše prileganje*, se lahko izrazi z [enačbo](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` je '`pojasnjevalna spremenljivka` ali `napovednik`'. `Y` je '`odvisna spremenljivka` ali `rezultat`'. Naklon premice je `b`, `a` pa je presečišče z osjo y, kar se nanaša na vrednost `Y`, ko `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"naklon = $y/x$\")\n", + " Infografika: Jen Looper\n", + ">\n", + "> Najprej izračunajte naklon `b`.\n", + ">\n", + "> Z drugimi besedami, in glede na prvotno vprašanje o podatkih o bučah: \"napovedati ceno buče na koš po mesecih\", bi `X` pomenil ceno, `Y` pa mesec prodaje.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Infografika: Jen Looper\n", + "> \n", + "> Izračunajte vrednost Y. Če plačujete približno 4 USD, mora biti april!\n", + ">\n", + "> Matematika, ki izračuna premico, mora prikazati naklon premice, ki je odvisen tudi od presečišča, oziroma kje se `Y` nahaja, ko `X = 0`.\n", + ">\n", + "> Metodo izračuna teh vrednosti si lahko ogledate na spletni strani [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Obiščite tudi [ta kalkulator najmanjših kvadratov](https://www.mathsisfun.com/data/least-squares-calculator.html), da vidite, kako vrednosti številk vplivajo na premico.\n", + "\n", + "Ni tako strašno, kajne? 🤓\n", + "\n", + "#### Korelacija\n", + "\n", + "Še en izraz, ki ga je treba razumeti, je **Koeficient korelacije** med danima spremenljivkama X in Y. S pomočjo razsevnega diagrama lahko hitro vizualizirate ta koeficient. Diagram s podatkovnimi točkami, razporejenimi v urejeni premici, ima visoko korelacijo, medtem ko diagram s podatkovnimi točkami, razpršenimi povsod med X in Y, ima nizko korelacijo.\n", + "\n", + "Dober model linearne regresije bo tisti, ki ima visok (bližje 1 kot 0) koeficient korelacije z uporabo metode regresije najmanjših kvadratov s premico regresije.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Ples s podatki: ustvarjanje podatkovnega okvira za modeliranje**\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Naložite potrebne knjižnice in podatkovni niz. Podatke pretvorite v podatkovni okvir, ki vsebuje podmnožico podatkov:\n", + "\n", + "- Uporabite samo buče, katerih cena je določena na sod.\n", + "\n", + "- Datum pretvorite v mesec.\n", + "\n", + "- Izračunajte ceno kot povprečje najvišje in najnižje cene.\n", + "\n", + "- Ceno prilagodite tako, da odraža določanje cen glede na količino v sodu.\n", + "\n", + "> Te korake smo obravnavali v [prejšnji lekciji](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "V duhu čiste avanture raziščimo [`paket janitor`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor), ki ponuja preproste funkcije za pregledovanje in čiščenje neurejenih podatkov. Na primer, poglejmo imena stolpcev za naše podatke:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Lahko naredimo bolje. Spremenimo ta imena stolpcev v `friendR` tako, da jih pretvorimo v konvencijo [snake_case](https://en.wikipedia.org/wiki/Snake_case) z uporabo `janitor::clean_names`. Če želite izvedeti več o tej funkciji: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Veliko urejenosti 🧹! Zdaj pa ples s podatki z uporabo `dplyr`, kot v prejšnji lekciji! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično delo!👌 Zdaj imate čist in urejen nabor podatkov, na katerem lahko zgradite svoj novi regresijski model!\n", + "\n", + "Kaj pravite na razpršeni diagram?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Raztreseni diagram nas spomni, da imamo podatke le za mesece od avgusta do decembra. Verjetno potrebujemo več podatkov, da bi lahko sklepali na linearen način.\n", + "\n", + "Poglejmo si še enkrat naše podatke za modeliranje:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kaj če bi želeli napovedati `ceno` buče na podlagi stolpcev `mesto` ali `paket`, ki sta tipa znak? Ali pa še bolj preprosto, kako bi lahko našli korelacijo (ki zahteva, da sta oba njena vnosa številska) med, recimo, `paketom` in `ceno`? 🤷🤷\n", + "\n", + "Modeli strojnega učenja najbolje delujejo s številski lastnostmi namesto z besedilnimi vrednostmi, zato je običajno potrebno pretvoriti kategorijske lastnosti v številske predstavitve.\n", + "\n", + "To pomeni, da moramo najti način za preoblikovanje naših napovednih spremenljivk, da jih model lahko učinkoviteje uporabi, kar imenujemo `inženiring lastnosti`.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Predobdelava podatkov za modeliranje z recepti 👩‍🍳👨‍🍳\n", + "\n", + "Dejavnosti, ki preoblikujejo vrednosti napovedovalcev, da jih model lažje učinkovito uporabi, se imenujejo `inženiring značilnosti`.\n", + "\n", + "Različni modeli imajo različne zahteve glede predobdelave. Na primer, metoda najmanjših kvadratov zahteva `kodiranje kategornih spremenljivk`, kot so mesec, sorta in ime_mesta. To preprosto pomeni `pretvorbo` stolpca s `kategorničnimi vrednostmi` v enega ali več `številskih stolpcev`, ki nadomestijo izvirni stolpec.\n", + "\n", + "Na primer, predpostavimo, da vaši podatki vključujejo naslednjo kategornično značilnost:\n", + "\n", + "| mesto |\n", + "|:---------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokio |\n", + "\n", + "Uporabite lahko *ordinalno kodiranje*, da vsaki kategoriji dodelite edinstveno celoštevilsko vrednost, kot je to:\n", + "\n", + "| mesto |\n", + "|:-----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "In to bomo storili s svojimi podatki!\n", + "\n", + "V tem razdelku bomo raziskali še en izjemen paket Tidymodels: [recipes](https://tidymodels.github.io/recipes/) - ki je zasnovan za pomoč pri predobdelavi podatkov **preden** treniramo model. V svojem jedru je recept objekt, ki določa, katere korake je treba uporabiti na podatkovnem naboru, da ga pripravimo za modeliranje.\n", + "\n", + "Zdaj pa ustvarimo recept, ki pripravi naše podatke za modeliranje tako, da za vse opazovanja v stolpcih napovedovalcev nadomesti edinstveno celoštevilsko vrednost:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Super! 👏 Pravkar smo ustvarili naš prvi recept, ki določa izid (ceno) in ustrezne napovedne spremenljivke ter da morajo biti vsi stolpci napovednih spremenljivk kodirani v nabor celih števil 🙌! Hitro si poglejmo podrobnosti:\n", + "\n", + "- Klic funkcije `recipe()` s formulo pove receptu *vloge* spremenljivk, pri čemer uporablja podatke `new_pumpkins` kot referenco. Na primer, stolpec `price` je bil dodeljen vlogi `outcome` (izid), medtem ko so bili preostali stolpci dodeljeni vlogi `predictor` (napovednik).\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` določa, da morajo biti vsi napovedniki pretvorjeni v nabor celih števil, pri čemer se številčenje začne pri 0.\n", + "\n", + "Prepričani smo, da imate misli, kot so: \"To je tako kul!! Ampak kaj, če bi moral potrditi, da recepti dejansko delajo točno to, kar pričakujem? 🤔\"\n", + "\n", + "To je odlična misel! Vidite, ko je vaš recept definiran, lahko ocenite parametre, potrebne za dejansko predobdelavo podatkov, in nato pridobite obdelane podatke. Tega običajno ne potrebujete, ko uporabljate Tidymodels (v trenutku bomo videli običajno prakso -> `workflows`), vendar je to lahko koristno, ko želite narediti nekakšen pregled za potrditev, da recepti delajo, kar pričakujete.\n", + "\n", + "Za to boste potrebovali še dva glagola: `prep()` in `bake()`, in kot vedno, vam naši mali R prijatelji, ki jih je ustvarila [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations), pomagajo bolje razumeti to temo!\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): oceni potrebne parametre iz učnega nabora, ki jih je mogoče kasneje uporabiti na drugih podatkovnih naborih. Na primer, za določen stolpec napovednika, kateremu opazovanju bo dodeljena cela števila 0, 1, 2 itd.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): vzame pripravljeni recept in uporabi operacije na katerem koli podatkovnem naboru.\n", + "\n", + "Torej, pripravimo in uporabimo naše recepte, da resnično potrdimo, da bodo stolpci napovednikov v ozadju najprej kodirani, preden se model prilega.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo!🥳 Obdelani podatki `baked_pumpkins` imajo vse napovedne spremenljivke kodirane, kar potrjuje, da bodo koraki predobdelave, opredeljeni kot naš recept, delovali po pričakovanjih. To sicer otežuje branje, a je veliko bolj razumljivo za Tidymodels! Vzemite si čas, da ugotovite, katera opazovanja so bila preslikana v ustrezne celoštevilske vrednosti.\n", + "\n", + "Prav tako je vredno omeniti, da je `baked_pumpkins` podatkovni okvir, na katerem lahko izvajamo izračune.\n", + "\n", + "Na primer, poskusimo najti dobro korelacijo med dvema točkama vaših podatkov, da bi potencialno zgradili dober napovedni model. Za to bomo uporabili funkcijo `cor()`. Vnesite `?cor()`, da izveste več o funkciji.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kot se izkaže, obstaja le šibka povezava med mestom in ceno. Vendar pa je nekoliko boljša povezava med paketom in njegovo ceno. To ima smisel, kajne? Običajno velja, da večja kot je škatla s pridelki, višja je cena.\n", + "\n", + "Medtem pa poskusimo vizualizirati tudi korelacijsko matriko vseh stolpcev z uporabo paketa `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Veliko bolje.\n", + "\n", + "Dobro vprašanje, ki si ga lahko zdaj zastavimo glede teh podatkov, je: '`Kakšno ceno lahko pričakujem za določen paket buč?`' Pojdimo kar takoj k stvari!\n", + "\n", + "> Opomba: Ko **`bake()`** uporabite na pripravljenem receptu **`pumpkins_prep`** z **`new_data = NULL`**, pridobite obdelane (tj. kodirane) podatke za učenje. Če bi imeli drug nabor podatkov, na primer testni nabor, in bi želeli videti, kako bi recept predhodno obdelal te podatke, bi preprosto uporabili **`bake`** na **`pumpkins_prep`** z **`new_data = test_set`**.\n", + "\n", + "## 4. Izdelava modela linearne regresije\n", + "\n", + "

\n", + " \n", + "

Infografika avtorja Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj, ko smo sestavili recept in dejansko potrdili, da bodo podatki ustrezno predobdelani, bomo zgradili regresijski model, da odgovorimo na vprašanje: `Kakšno ceno lahko pričakujem za določen paket buč?`\n", + "\n", + "#### Učenje linearnega regresijskega modela z uporabo učnega nabora\n", + "\n", + "Kot ste verjetno že ugotovili, je stolpec *price* `izhodna` spremenljivka, medtem ko je stolpec *package* `napovedna` spremenljivka.\n", + "\n", + "Za to bomo najprej razdelili podatke tako, da bo 80 % šlo v učni nabor in 20 % v testni nabor, nato pa definirali recept, ki bo kodiral napovedni stolpec v niz celih števil, in zgradili specifikacijo modela. Recepta ne bomo pripravili in uporabili, saj že vemo, da bo podatke predobdelal, kot je pričakovano.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično! Zdaj, ko imamo recept in specifikacijo modela, moramo najti način, kako ju združiti v objekt, ki bo najprej predobdelal podatke (v ozadju prep+bake), nato prilegal model na predobdelane podatke in omogočal tudi morebitne aktivnosti po obdelavi. Kako ti je to všeč za mirno vest!🤩\n", + "\n", + "V Tidymodels se ta priročen objekt imenuje [`workflow`](https://workflows.tidymodels.org/) in priročno združuje tvoje komponente modeliranja! To je tisto, kar bi v *Pythonu* imenovali *pipelines*.\n", + "\n", + "Torej, združimo vse skupaj v workflow!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Poleg tega je mogoče potek dela prilagoditi/usposobiti na zelo podoben način, kot se lahko model.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Iz modelnega izhoda lahko vidimo koeficiente, pridobljene med učenjem. Ti predstavljajo koeficiente premice najboljše prileganja, ki nam daje najnižjo skupno napako med dejansko in napovedano spremenljivko.\n", + "\n", + "#### Ocenjevanje uspešnosti modela z uporabo testnega nabora\n", + "\n", + "Čas je, da preverimo, kako se je model odrezal 📏! Kako to storimo?\n", + "\n", + "Zdaj, ko smo model naučili, ga lahko uporabimo za napovedovanje na testnem_naboru z uporabo `parsnip::predict()`. Nato lahko te napovedi primerjamo z dejanskimi vrednostmi oznak, da ocenimo, kako dobro (ali ne!) model deluje.\n", + "\n", + "Začnimo z izdelavo napovedi za testni nabor in nato združimo stolpce s testnim naborom.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Da, pravkar ste trenirali model in ga uporabili za napovedovanje! 🔮 Je dober? Poglejmo, kako dobro deluje model!\n", + "\n", + "V Tidymodels to naredimo z uporabo `yardstick::metrics()`! Pri linearni regresiji se osredotočimo na naslednje metrike:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Kvadratni koren [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). To daje absolutno metriko v isti enoti kot oznaka (v tem primeru cena buče). Manjša kot je vrednost, boljši je model (v preprostem smislu predstavlja povprečno ceno, za katero so napovedi napačne!).\n", + "\n", + "- `Coefficient of Determination (običajno znan kot R-squared ali R2)`: Relativna metrika, pri kateri višja vrednost pomeni boljše prileganje modela. V bistvu ta metrika predstavlja, koliko variance med napovedanimi in dejanskimi vrednostmi oznak model lahko pojasni.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tam gre uspešnost modela. Poglejmo, ali lahko dobimo boljšo indikacijo z vizualizacijo razpršenega grafa paketa in cene, nato pa uporabimo napovedi za dodajanje črte najboljše prileganje.\n", + "\n", + "To pomeni, da bomo morali pripraviti in obdelati testni niz, da kodiramo stolpec paketa, nato pa to povežemo z napovedmi, ki jih je ustvaril naš model.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično! Kot lahko vidiš, linearni regresijski model ne posploši najbolje odnosa med paketom in njegovo ustrezno ceno.\n", + "\n", + "🎃 Čestitke, pravkar si ustvaril model, ki lahko pomaga napovedati ceno nekaj vrst buč. Tvoja praznična bučna njiva bo čudovita. Ampak verjetno lahko ustvariš še boljši model!\n", + "\n", + "## 5. Ustvari polinomski regresijski model\n", + "\n", + "

\n", + " \n", + "

Infografika: Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Včasih naši podatki morda nimajo linearne povezave, vendar še vedno želimo napovedati rezultat. Polinomska regresija nam lahko pomaga pri napovedovanju bolj zapletenih nelinearnih povezav.\n", + "\n", + "Vzemimo na primer povezavo med embalažo in ceno v našem naboru podatkov o bučah. Čeprav je včasih med spremenljivkami linearna povezava - večja kot je buča po volumnu, višja je cena - te povezave včasih ni mogoče prikazati kot ravnino ali ravno črto.\n", + "\n", + "> ✅ Tukaj so [nekateri dodatni primeri](https://online.stat.psu.edu/stat501/lesson/9/9.8) podatkov, ki bi lahko uporabili polinomsko regresijo\n", + ">\n", + "> Ponovno si oglejte povezavo med sorto in ceno na prejšnjem grafu. Ali se vam zdi, da bi moral ta raztros nujno analizirati z ravno črto? Morda ne. V tem primeru lahko poskusite polinomsko regresijo.\n", + ">\n", + "> ✅ Polinomi so matematični izrazi, ki lahko vsebujejo eno ali več spremenljivk in koeficientov\n", + "\n", + "#### Učimo polinomski regresijski model z uporabo učnega nabora\n", + "\n", + "Polinomska regresija ustvari *ukrivljeno črto*, ki bolje ustreza nelinearnim podatkom.\n", + "\n", + "Poglejmo, ali bo polinomski model boljši pri napovedovanju. Sledili bomo nekoliko podobnemu postopku kot prej:\n", + "\n", + "- Ustvarite recept, ki določa korake predobdelave, ki jih je treba izvesti na naših podatkih, da jih pripravimo za modeliranje, tj. kodiranje napovedovalcev in izračunavanje polinomov stopnje *n*\n", + "\n", + "- Zgradite specifikacijo modela\n", + "\n", + "- Združite recept in specifikacijo modela v delovni tok\n", + "\n", + "- Ustvarite model z ujemanjem delovnega toka\n", + "\n", + "- Ocenite, kako dobro model deluje na testnih podatkih\n", + "\n", + "Pojdimo kar v akcijo!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Ocenjevanje zmogljivosti modela\n", + "\n", + "👏👏 Ustvarili ste polinomski model, zdaj pa naredimo napovedi na testnem naboru!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Juhu, ocenimo, kako se je model odrezal na testnem naboru z uporabo `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Veliko boljša zmogljivost.\n", + "\n", + "`rmse` se je zmanjšal s približno 7 na približno 3, kar kaže na zmanjšano napako med dejansko ceno in napovedano ceno. To lahko *približno* interpretiramo kot povprečno napako napovedi, ki znaša približno 3 USD. `rsq` se je povečal s približno 0,4 na 0,8.\n", + "\n", + "Vse te metrike kažejo, da polinomski model deluje veliko bolje kot linearni model. Odlično delo!\n", + "\n", + "Poglejmo, ali lahko to vizualiziramo!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Lahko vidite ukrivljeno črto, ki se bolje prilega vašim podatkom! 🤩\n", + "\n", + "To lahko naredite še bolj gladko, če podate polinomsko formulo funkciji `geom_smooth`, kot to:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tako kot gladka krivulja!🤩\n", + "\n", + "Tukaj je, kako bi naredili novo napoved:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Napoved modela `polynomial` je smiselna, glede na razpršene grafe `price` in `package`! In, če je to boljši model kot prejšnji, ob pogledu na iste podatke, morate načrtovati proračun za te dražje buče!\n", + "\n", + "🏆 Odlično opravljeno! Ustvarili ste dva regresijska modela v eni lekciji. V zadnjem delu o regresiji se boste naučili o logistični regresiji za določanje kategorij.\n", + "\n", + "## **🚀Izziv**\n", + "\n", + "Preizkusite več različnih spremenljivk v tej beležki, da vidite, kako korelacija vpliva na natančnost modela.\n", + "\n", + "## [**Kvizi po predavanju**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Pregled in samostojno učenje**\n", + "\n", + "V tej lekciji smo se naučili o linearni regresiji. Obstajajo tudi druge pomembne vrste regresije. Preberite o tehnikah Stepwise, Ridge, Lasso in Elasticnet. Dober tečaj za poglobljeno učenje je [Stanfordov tečaj statističnega učenja](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Če želite izvedeti več o uporabi izjemnega okvira Tidymodels, si oglejte naslednje vire:\n", + "\n", + "- Spletna stran Tidymodels: [Začnite z Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn in Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **POSEBNA ZAHVALA:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) za ustvarjanje izjemnih ilustracij, ki naredijo R bolj prijazen in privlačen. Več ilustracij najdete v njeni [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/2-Regression/3-Linear/solution/notebook.ipynb b/translations/sl/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..c9f3ac327 --- /dev/null +++ b/translations/sl/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linearna in polinomska regresija za določanje cen buč - Lekcija 3\n", + "\n", + "Naložite potrebne knjižnice in podatkovni niz. Podatke pretvorite v podatkovni okvir, ki vsebuje podmnožico podatkov:\n", + "\n", + "- Uporabite samo buče, katerih cena je določena na sod\n", + "- Datum pretvorite v mesec\n", + "- Ceno izračunajte kot povprečje najvišje in najnižje cene\n", + "- Ceno pretvorite tako, da odraža določanje cen glede na količino v sodih\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Raztreseni diagram nas opominja, da imamo podatke le od avgusta do decembra. Verjetno potrebujemo več podatkov, da bi lahko sklepali na linearen način.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdi se, da je korelacija precej majhna, vendar obstaja neka druga, bolj pomembna povezava - ker se zdi, da imajo cenovne točke na zgornjem grafu več ločenih grozdov. Naredimo graf, ki bo prikazal različne sorte buč:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linearna regresija\n", + "\n", + "Za učenje modela linearne regresije bomo uporabili Scikit Learn:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Naklon premice lahko določimo iz koeficientov linearne regresije:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Usposobljen model lahko uporabimo za napovedovanje cene:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polinomska regresija\n", + "\n", + "Včasih je razmerje med značilnostmi in rezultati po naravi nelinearno. Na primer, cene buč so lahko visoke pozimi (meseci=1,2), nato padejo poleti (meseci=5-7) in nato ponovno narastejo. Linearna regresija tega razmerja ne more natančno zajeti.\n", + "\n", + "V tem primeru lahko razmislimo o dodajanju dodatnih značilnosti. Preprost način je uporaba polinomov iz vhodnih značilnosti, kar vodi do **polinomske regresije**. V Scikit Learn lahko avtomatsko predračunamo polinomske značilnosti z uporabo cevovodov:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAD4CAYAAAATpHZ6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbw0lEQVR4nO3de3Cc1Znn8e+jm93ClmRb8kWyjYBgDb6ATQQhFwIhFzu7meBQNVOVyu5Sm9RQSWWnJlMTZ3BIZWq2dpcMnprZzM5WTbEDFVLDsJOZOM4USTAEkkBYMJExjOwYY8AXkGRLsi35otb92T+6JbfurXa3ut+j36eqS2+ffvvto0f2T6/Oe/q0uTsiIhKGonx3QEREskehLiISEIW6iEhAFOoiIgFRqIuIBKRkLl+surra6+vr5/IlRUQib//+/Z3uXpPOvnMa6vX19TQ1Nc3lS4qIRJ6ZnUh3Xw2/iIgERKEuIhIQhbqISEAU6iIiAVGoi4gEZMbZL2a2Bvg+sBIYBh529++a2S7gd4F+4G3gP7t7Vw77KnNgz4EWdu09QmtXnNqqGDu2NrB9S12+uyUiaUrnTH0Q+BN3vwG4Dfiqma0HngE2uvuNwJvAztx1U+bCngMt7NzdTEtXHAdauuLs3N3MngMt+e6aiKRpxlB39zZ3fzW5fQE4DNS5+9PuPpjc7WVgde66KXNh194jxAeGxrTFB4bYtfdInnokIrM1qzF1M6sHtgD7xj30ReBnUzznPjNrMrOmjo6OjDopc6O1Kz6rdhEpPGmHupktAn4IfM3dz6e0P0BiiObxyZ7n7g+7e6O7N9bUpPUuV8mT2qrYrNpFpPCkFepmVkoi0B93990p7fcCnwG+4PoIpcjbsbWBWGnxmLZYaTE7tjbkqUciMlvpzH4x4BHgsLv/VUr7NuBPgTvcvSd3XZS5MjLLRbNfRKLLZjrBNrOPAC8AzSSmNAJ8E/gbYAFwJtn2srt/ebpjNTY2uhb0EhGZHTPb7+6N6ew745m6u/8asEke+ulsOyYiIrmld5SKiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQGb8jFKRXNtzoIVde4/Q2hWntirGjq0NbN9SV/DHlolU7/xTqEte7TnQws7dzcQHhgBo6Yqzc3czwBWHQS6PLROp3oVBwy+SV7v2HhkNgRHxgSF27T1S0MeWiVTvwqBQl7xq7YrPqr1Qji0Tqd6FQaEueVVbFZtVe6EcWyZSvQuDQl3yasfWBmKlxWPaYqXF7NjaUNDHlolU78KgC6WSVyMX0HIxYyKXx5aJVO/CYO4+Zy/W2NjoTU1Nc/Z6IiIhMLP97t6Yzr4znqmb2Rrg+8BKYBh42N2/a2ZLgX8C6oHjwO+7+7lMO50PmlMrIqFJZ0x9EPgTd78BuA34qpmtB+4HnnX364Fnk/cjY2RObUtXHOfynNo9B1ry3TURkYzNGOru3uburya3LwCHgTrgbuCx5G6PAdtz1Mec0JxaEQnRrGa/mFk9sAXYB6xw9zZIBD+wfIrn3GdmTWbW1NHRcYXdzR7NqRWREKUd6ma2CPgh8DV3P5/u89z9YXdvdPfGmpqaTPqYE5pTKyIhSivUzayURKA/7u67k82nzWxV8vFVQHtuupgbmlMrIiGaMdTNzIBHgMPu/lcpD/0rcG9y+17gx9nvXu5s31LHg/dsoq4qhgF1VTEevGeTZr+ISKTNOE/dzD4CvAA0k5jSCPBNEuPqPwDWAieB33P3s9MdS/PURURmL6vz1N3914BN8fDHZ9MxERHJLa39IiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgFRqIuIBEShLiISEH1GqUiG9MlZUogU6iIZGPnkrJEPWhn55CxAwS55peEXkQzok7OkUCnURTKgT86SQqVQF8mAPjlLCpVCXSQD+uQsKVS6UCqSgZGLoZr9IoVGoS6Soe1b6hTiUnA0/CIiEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQAp+SmNUV8KLar9FJNoKOtSjuhJeVPstItFX0MMvUV0JL6r9FpHomzHUzexRM2s3s4MpbZvN7GUze83Mmszs1lx0Lqor4UW13yISfemcqX8P2Dau7SHgz919M/Dt5P2si+pKeFHtt4hE34yh7u7PA2fHNwMVye1KoDXL/QKiuxJeVPstItGX6YXSrwF7zewvSfxi+FDWepQiqivhRbXfIhJ95u4z72RWDzzp7huT9/8G+JW7/9DMfh+4z90/McVz7wPuA1i7du37T5w4ka2+i4jMC2a2390b09k309kv9wK7k9v/DEx5odTdH3b3RndvrKmpyejFuuMD9A8OZ/RcEZH5JNNQbwXuSG7fBRzNTncm97+ePcqt/+PnfGtPM/tPnCWdvy5EROajGcfUzewJ4E6g2szeA/4M+APgu2ZWAvSSHF7JlbtuWE77hT7+Zf97/MPLJ1m7tDzxAQWba7m2ZlEuX1pEJFLSGlPPlsbGRm9qasr4+Rf7Btl78BR7Xmvhxbc6GXa4aU0Vn9tcy+/eVMuyRQuy2FsRkcIwmzH1SIV6qtPne/nX11r50YEWftt2nuIi4451NWzfUscnb1hBrKx45oOIiETAvAj1VEdOXWDPay38+EALrd29XFVWzLaNq/jcljo+eN0yioss668pIjJX5l2ojxgedvYdO8ueAy38tLmNC32DrKhYwN2b69i+uY4bVi3GTAEvItEyb0M9Ve/AEM+90c6PDrTwyyPtDAw5DSsWs31LHXdvrtVb9kUkMhTq45y71M+TzW3sOdDC/hPnMIPbrlnG57bUsW3TSioWls55n0RE0qVQn8aJM5f4cfIC67HOS5SVFHFXw3I+vWklH/ud5Qp4ESk4CvU0uDuvv9c9Ov7efqGP0mLjw++rZtuGlXxi/QqqNUVSRAqAQn2WhoedA+92sffQKZ46eIqTZ3soMrilfinbNq5k64aVGoMXkbxRqF8Bd+dw2wWeOnSKvQdPceT0BQBuWl3JpzasZNvGlVynd7GKyBxSqGfROx0X2XvoNE8dOsXr73YBcP3yRaNn8BtqKzRNUkRySqGeI23dcZ4+dJqnDp5i37EzDDusXhJjW/IM/ua1SyjSG51EJMsU6nPgzMU+nj3czlOHTvHro530Dw1Ts3gBn1q/gm0bV3LbtcsoLS7oz/UWkYhQqM+xC70D/OJIB3sPnuIXR9rp6R+iYmEJn7hhBZ9Yv4IPX1dNZbmmSopIZhTqedQ7MMQLRzt56uApfn74NN3xAYossZrk7dfX8NHrq9m8pooSncWLSJoU6gVicGiY197t4vmjnbxwtIPX3+1i2GHxghI+9L5lyZCvYe2y8nx3VUQKmEK9QHX3DPDi24mAf/7NTlq64gBcvaycj15fw+3XV/PB65axWO9qFZEUCvUIcHeOdV7ihaOdPP9mBy+9c4ae/iGKi4yb11YlQn5dDZvqKrV0sMg8p1CPoP7BYV49eY4XjnbwwtFOmlu6cYfKWCkfeV81t19fze3raqjTO1tF5h2FegDOXOzjxbfP8MKbiZA/db4XgOtqrkqMxa+r5rZrl1FeNuPHzIpIxCnUA+PuvNV+kV8lA37fsTP0DgxTWmw0Xr2U29dV89Hra1i/qkJvfhIJkEI9cL0DQ+w/cY7nkxdcD7edB2DpVWWjQzUfuGYZa5bGtISBSAAU6mnac6CFXXuP0NoVp7Yqxo6tDWzfUpfvbs1a+4VeXnyrkxfe7OT5o510XuwDYPniBdxSv5TG+iXcUr+U31m5WPPjRSJIoZ6GPQda2Lm7mfjA0GhbrLSYB+/ZFMlgHzE87LzZfoGm4+doOn6W3xw/Nzp18qqyYm6+egmNVyeCfvOaKq5aoDF5kUKnUE/Dh7/z3GjYpaqrivHi/XfloUe509oVp+nE5ZB/49R53KG4yNhQW0Hj1Uu5pX4J769fwvLFC/PdXREZZzahPm9P01onCfTp2qOstirGZ6tifPamWgDO9w5w4GRXMuTP8o+vnODRF48BUL+snMb6pdy0poqNtRXcsKqChaXF+ey+iMzCvA312qrYpGfq8+ETjioWlnLHuhruWFcDJObIH2rtpun4OX5z/CzPvdHOv+x/D4Aig/ctX8TG2krW11awsS7xNZuf5RrKtQ2RQjBvh19CHVPPBnentbuXgy3dHGrp5lDreQ62dnP6fN/oPlcvK2djbSUb6irYUFvJxtoKlmXwma76OYjMLKvDL2b2KPAZoN3dN6a0/yHwX4BB4Cfu/o0M+5sXI4GhM8SJzIy6qhh1VTG2blg52t5xoY9DrcmQb+mmuaWbnzS3jT6+qnIhG2qTIV9XyYbaClZVLpx2WuWuvUfGBDpAfGCIXXuP6GchkoF0hl++B/wt8P2RBjP7GHA3cKO795nZ8tx0L7e2b6lTcMxCzeIF3NmwnDsbLv+4u3sGONTWzW+TQX+w9TzPvdHOcPIPwKVXlaUEfeLr1UvLR98kNZ+ubYjMhRlD3d2fN7P6cc1fAb7j7n3Jfdpz0DeZpXyMTVeWl/Kh66r50HXVo209/YMcbruQOKtvSQzdPPLrdxgYSiT9ogUlrK+tYENtBVXlpZzrGZhw3Gxd29B4vcw3mV4oXQfcbmb/HegFvu7uv5lsRzO7D7gPYO3atRm+nMxk/Nh0S1ecnbubAeY8xMrLSnj/1Ut4/9VLRtv6Boc4evoih1q7OZgM+ideOUnvwPCkx7imupyDLd3UV1/Fogzn0hdSTUTmSloXSpNn6k+OjKmb2UHgOeCPgFuAfwKu9RkOVkgXSkMTxXn3Q8POBx98lvYLfdPut3zxAq6pvmrM7dqaq1iztJwFJVNPt4xiTSQcQ8NOW3eck2d7ePdsDx9rWM7yiszeBzIX89TfA3YnQ/wVMxsGqoGODI8nVyiKY9PFRUbHNIH+d//hZt7pvMSxjksc67zEM789zZlL/aOPFxmsXlI+JuhHtmsrY5GsiUTL+d4BTp5JhPbJlNu7Z3to6YqPDjkC/J//1Mgn1+f+zX2Zhvoe4C7gl2a2DigDOrPVKZm9qM67n6rfdVUxtm1cNaG9u2eAY2cucazzIsc6LvFO5yWOn7lE0/GzXOq/PIumrKSI4iJjcHjiH48rKxbi7lrsTGY0MDRMW1fv5bA+dzm0T57toWvc9aCq8lLWLi1nQ10ln960irVLy0dvqyrn5t3a6UxpfAK4E6g2s/eAPwMeBR5NDsP0A/fONPQiubVja8Ok8713bG3IY69mNtt+V5aXsrm8is1rqsa0uzsdF/oSZ/bJ2/97q5NDrecZ/w+z7Xwv67+9l5WVC1lZsZBVlQtZUZn4urJiYaK9ciHVVy3QUsaBcncu9Q/R1dNPV88AXT0DnO3p571zY8+6W7t6GUo5MSgtNlYvKWfN0nJuXF05GthrkrdsvikvU/P2zUchiupMj1z2e8+BFh566g1au3upXlTGv9+0ijVLyznV3Uvb+V5OdSdup8/3TjirLykyVqSE/MgvgJUp4b988ULKSrTyZT71Dgxxrqefc5cG6IpfDulzPf10xwc4d6mfrvjAaICf6xmgO94/ZmgkVfWiskRILykfE9prl5WzsmJhXj5eUgt6iczS8LDTeamP0919tHXHOZUS+G3J0G/tjk+YrWMGVbFSqsrLqIiVUhUrpTJ5qyq/vH25rWz0Ma2pM1b/4HAieKcJ4smCu29w8hlUAAtLi6iKlVFVnqj5kvKR7TKqYon7lcn2JeWl1FbFCnLlUi3oNU/pTD1zRUXG8sWJM+9Nqysn3cfdOR8fpO18fEzgd17sozs+kDgr7Onn+JlLo/enO2cqKylKBHzKL4GK5HZbdy8vvX2G7vgAS8pL2b6ljtuvr6asuJiykqLErTjxdUHylto+3br5uaq3uzM07Ay5c7F3cFwQpwb05cA+d+ly3Xr6h6Y8dmmxjQnixPBHMpxHwjo27v48/cWpM/VARHUNlaj2Ox3Dw86FvkG6ewZGQ74r3j+6PdLelfJ4d3yAzot90559pqPIGA34BaXFia8lRcQHhjh1vnfMLxszWF0VY/HCUoZHgjkZzkPDzvDoNqOPj7QNpmynEyVFxmjwTnamXJn8Ov7suryseF5f2NaZ+jwU1TVUotrvdBQV2eiwy2xMNb++ZtEC/vcXbqZ/cJj+oSH6BobpHxqmb3A40TaYuJ+63TcwNGafnx8+PSF83aH9Qh8NKxdTZEZxUcrNjKLUr0WMaSsuTn4tsjHPXbSgZNJhjsULSnTxOccU6oGI6pzsqPY7l6b63jsv9nHrNUuv6NjX3P+TSdv7B4f5+3tvuaJjS2HQZftATDUfPQrz1GfTPh/ksiaqd/gU6oHYsbWB2LiLQlGZpx7FfudSLmuieodPwy+BiOr68FHtdy7lsiaqd/g0+0VEpMDNZvaLhl9ERAKiUBcRCYhCXUQkIAp1EZGAKNRFRAKiUBcRCYhCXUQkIAp1EZGA6B2lMkYhrG0uIplTqMuo8Wubt3TF2bm7GUDBLhIRGn6RUdOtbS4i0aBQl1Fa21wk+hTqMkprbYtEn0JdRmmtbZHo04VSGaW1tkWiT6EuY2zfUqcQF4kwhbrknebGT5TLmkT12FE11zVRqEteaW78RLmsSVSPHVX5qMmMF0rN7FEzazezg5M89nUzczOrzknvJHiaGz9RLmsS1WNHVT5qks7sl+8B28Y3mtka4JPAySz3SeYRzY2fKJc1ieqxoyofNZkx1N39eeDsJA/9NfANYO4+uVqCo7nxE+WyJlE9dlTloyYZzVM3s88CLe7+ehr73mdmTWbW1NHRkcnLScA0N36iXNYkqseOqnzUZNYXSs2sHHgA+FQ6+7v7w8DDAI2NjTqrlzE0N36iXNYkqseOqnzUxNxnzlkzqweedPeNZrYJeBboST68GmgFbnX3U9Mdp7Gx0Zuamq6sxyIi84yZ7Xf3xnT2nfWZurs3A8tTXuw40OjunbM9lkiuad60zDfpTGl8AngJaDCz98zsS7nvlsiVG5kj3NIVx7k8R3jPgZZ8d00kZ2Y8U3f3z8/weH3WeiOSRdPNEdbZuoRKqzRKsDRvWuYjLRMgwaqtitEySYDP53nTuRbVaxhR7fdkdKYuwdK86bkV1WsYUe33VBTqEqztW+p48J5N1FXFMKCuKsaD92yK7BlYoYvq2i9R7fdUNPwiQdP68HMnqtcwotrvqehMXUSyIqprv0S131NRqItIVkT1GkZU+z0VDb+ISFZEde2XqPZ7Kmmt/ZItWvtFRGT2ZrP2i4ZfREQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQCMmOom9mjZtZuZgdT2naZ2Rtm9m9m9iMzq8ppL0VEJC3pnKl/D9g2ru0ZYKO73wi8CezMcr9ERCQDM4a6uz8PnB3X9rS7DybvvgyszkHfRERklrIxpv5F4GdTPWhm95lZk5k1dXR0ZOHlRERkKlcU6mb2ADAIPD7VPu7+sLs3untjTU3NlbyciIjMoCTTJ5rZvcBngI+7u2evSyIikqmMQt3MtgF/Ctzh7j3Z7ZKIiGQqnSmNTwAvAQ1m9p6ZfQn4W2Ax8IyZvWZmf5fjfoqISBpmPFN3989P0vxIDvoiIiJXSO8oFREJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYBkvPSuiOTOt/Y088S+dxlyp9iMz39gDf9t+6asHHvPgRZ27T1Ca1ec2qoYO7Y2sH1LXVaOLfmnUBcpMN/a08w/vHxy9P6Q++j9Kw32PQda2Lm7mfjAEAAtXXF27m4GULAHQsMvIgXmiX3vzqp9NnbtPTIa6CPiA0Ps2nvkio8thUGhLlJghqb4ILGp2mejtSs+q3aJHoW6SIEpNptV+2zUVsVm1S7Ro1AXKTCf/8CaWbXPxo6tDcRKi8e0xUqL2bG14YqPLYVBF0pFCszIxdBczH4ZuRiq2S/hMs/COF26Ghsbvampac5eT0QkBGa2390b09lXwy8iIgFRqIuIBEShLiISEIW6iEhAFOoiIgGZ09kvZtYBnACqgc45e+HCpTqoBqAajFAdpq7B1e5ek84B5jTUR1/UrCnd6TkhUx1UA1ANRqgO2amBhl9ERAKiUBcRCUi+Qv3hPL1uoVEdVANQDUaoDlmoQV7G1EVEJDc0/CIiEhCFuohIQLIe6ma2xsx+YWaHzeyQmf3RuMe/bmZuZtUpbTvN7C0zO2JmW7Pdp3yYrg5m9ofJ7/WQmT2U0h5UHaaqgZltNrOXzew1M2sys1tTnhNUDQDMbKGZvWJmryfr8OfJ9qVm9oyZHU1+XZLynKDqME0NdpnZG2b2b2b2IzOrSnlOUDWAqeuQ8viV56O7Z/UGrAJuTm4vBt4E1ifvrwH2knwDUrJtPfA6sAC4BngbKM52v+b6NlUdgI8BPwcWJB9bHmodpqnB08Cnk+3/DvhlqDVIfl8GLEpulwL7gNuAh4D7k+33A38Rah2mqcGngJJk+1+EXIPp6pC8n5V8zPqZuru3ufurye0LwGFgZAX+vwa+AaRenb0b+L/u3ufux4C3gFuJuGnq8BXgO+7el3ysPfmU4OowTQ0cqEjuVgm0JreDqwGAJ1xM3i1N3pzE9/tYsv0xYHtyO7g6TFUDd3/a3QeT7S8Dq5PbwdUApv23AFnKx5yOqZtZPbAF2GdmnwVa3P31cbvVAakfk/4el38JBCG1DsA64HYz22dmvzKzW5K7BV2HcTX4GrDLzN4F/hLYmdwt2BqYWbGZvQa0A8+4+z5ghbu3QeIXILA8uXuQdZiiBqm+CPwsuR1kDWDyOmQzH3MW6ma2CPghif/Ag8ADwLcn23WStmDmWabWwd3Pk/gIwSUk/vTcAfzAzIyA6zBJDb4C/LG7rwH+GHhkZNdJnh5EDdx9yN03kzgTvdXMNk6ze5B1mK4GZvYAiZx4fKRpskPkvJNzYJI63EgW8zEnoW5mpST+Ez/u7ruB60iMB71uZsdJfDOvmtlKEr95Uj9RdzWX/xyPtEnqAInvd3fyz7BXgGESi/gEWYcpanAvMLL9z1z+czLIGqRy9y7gl8A24LSZrQJIfh0Zigu6DuNqgJndC3wG+IInB5IJvAYwpg53k818zNGFgO8D/3OafY5z+ULABsZeCHiHcC6ITKgD8GXgvya315H408pCrMM0NTgM3Jnc/jiwP/B/CzVAVXI7BrxAIsR2MfZC6UOh1mGaGmwDfgvUjNs/uBpMV4dx+1xRPpZMk/eZ+jDwH4Hm5LgRwDfd/aeT7ezuh8zsByR+sIPAV919KAf9mmuT1gF4FHjUzA4C/cC9nvjphViHqWrwB8B3zawE6AXug6D/LawCHjOzYhJ/Hf/A3Z80s5dIDL99CTgJ/B4EW4epavAWicB6JjEKycvu/uVAawBT1GGqnTOpg5YJEBEJiN5RKiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgH5/+EaqS+WjFbpAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Kodiranje sort\n", + "\n", + "V idealnem svetu bi želeli napovedati cene za različne sorte buč z uporabo istega modela. Da upoštevamo sorto, jo moramo najprej pretvoriti v številčno obliko, oziroma **kodirati**. Obstaja več načinov, kako to storiti:\n", + "\n", + "* Preprosto številčno kodiranje, ki ustvari tabelo različnih sort in nato ime sorte zamenja z indeksom v tej tabeli. To ni najboljša ideja za linearno regresijo, saj linearna regresija upošteva številčno vrednost indeksa, ta pa verjetno ne bo numerično korelirala s ceno.\n", + "* Kodiranje s tehniko \"one-hot\", ki stolpec `Variety` zamenja s 4 različnimi stolpci, po enim za vsako sorto, ki vsebujejo 1, če ustrezna vrstica pripada določeni sorti, in 0 sicer.\n", + "\n", + "Spodnja koda prikazuje, kako lahko kodiramo sorto s tehniko \"one-hot\":\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linearna regresija na sorti\n", + "\n", + "Zdaj bomo uporabili isto kodo kot zgoraj, vendar bomo namesto `DayOfYear` uporabili našo eno-vroče kodirano sorto kot vhod:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prav tako lahko poskusimo uporabiti druge značilnosti na enak način in jih združiti s številskimi značilnostmi, kot sta `Month` ali `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polinomska regresija\n", + "\n", + "Polinomska regresija se lahko uporablja tudi s kategorialnimi značilnostmi, ki so enovročno kodirane (one-hot-encoded). Koda za učenje polinomske regresije bi bila v bistvu enaka kot tista, ki smo jo videli zgoraj.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:10:10+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/4-Logistic/notebook.ipynb b/translations/sl/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..1f0e117ab --- /dev/null +++ b/translations/sl/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sorte buč in barva\n", + "\n", + "Naložite potrebne knjižnice in podatkovni niz. Podatke pretvorite v podatkovni okvir, ki vsebuje podmnožico podatkov:\n", + "\n", + "Poglejmo razmerje med barvo in sorto.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:26+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/sl/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..7d7da02aa --- /dev/null +++ b/translations/sl/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,685 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ustvarite model logistične regresije - Lekcija 4\n", + "\n", + "![Infografika: Logistična vs. linearna regresija](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Predhodni kviz](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Uvod\n", + "\n", + "V tej zadnji lekciji o regresiji, eni izmed osnovnih *klasičnih* tehnik strojnega učenja, si bomo ogledali logistično regresijo. To tehniko bi uporabili za odkrivanje vzorcev za napovedovanje binarnih kategorij. Ali je ta sladkarija čokolada ali ne? Ali je ta bolezen nalezljiva ali ne? Ali bo ta stranka izbrala ta izdelek ali ne?\n", + "\n", + "V tej lekciji boste spoznali:\n", + "\n", + "- Tehnike za logistično regresijo\n", + "\n", + "✅ Poglobite svoje razumevanje dela s to vrsto regresije v tem [učnem modulu](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Predpogoji\n", + "\n", + "Ker smo že delali s podatki o bučah, smo zdaj dovolj seznanjeni z njimi, da ugotovimo, da obstaja ena binarna kategorija, s katero lahko delamo: `Barva`.\n", + "\n", + "Zgradimo model logistične regresije, da napovemo, glede na nekatere spremenljivke, *kakšne barve bo določena buča verjetno* (oranžna 🎃 ali bela 👻).\n", + "\n", + "> Zakaj govorimo o binarni klasifikaciji v lekciji, ki se ukvarja z regresijo? Izključno zaradi jezikovne priročnosti, saj je logistična regresija [pravzaprav metoda klasifikacije](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), čeprav temelji na linearni metodi. Več o drugih načinih klasifikacije podatkov boste izvedeli v naslednji skupini lekcij.\n", + "\n", + "Za to lekcijo bomo potrebovali naslednje pakete:\n", + "\n", + "- `tidyverse`: [Tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, lažje in bolj zabavno delo z podatki!\n", + "\n", + "- `tidymodels`: [Tidymodels](https://www.tidymodels.org/) je okvir [zbirke paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "- `janitor`: Paket [janitor](https://github.com/sfirke/janitor) ponuja preprosta orodja za pregledovanje in čiščenje umazanih podatkov.\n", + "\n", + "- `ggbeeswarm`: Paket [ggbeeswarm](https://github.com/eclarke/ggbeeswarm) omogoča ustvarjanje grafov v slogu \"beeswarm\" z uporabo ggplot2.\n", + "\n", + "Namestite jih lahko z ukazom:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Alternativno, spodnji skript preveri, ali imate potrebne pakete za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Določite vprašanje**\n", + "\n", + "Za naše namene bomo to izrazili kot binarno: 'Bela' ali 'Ne bela'. V našem naboru podatkov obstaja tudi kategorija 'črtasta', vendar je primerov te kategorije malo, zato je ne bomo uporabili. Kategorija tako ali tako izgine, ko odstranimo ničelne vrednosti iz nabora podatkov.\n", + "\n", + "> 🎃 Zabavno dejstvo: bele buče včasih imenujemo 'duhove' buče. Niso zelo enostavne za rezljanje, zato niso tako priljubljene kot oranžne, vendar so videti kul! Tako bi lahko naše vprašanje preoblikovali tudi kot: 'Duh' ali 'Ne duh'. 👻\n", + "\n", + "## **O logistični regresiji**\n", + "\n", + "Logistična regresija se od linearne regresije, o kateri ste se že učili, razlikuje v nekaj pomembnih vidikih.\n", + "\n", + "#### **Binarna klasifikacija**\n", + "\n", + "Logistična regresija ne ponuja enakih funkcij kot linearna regresija. Prva ponuja napoved o `binarni kategoriji` (\"oranžna ali ne oranžna\"), medtem ko je druga sposobna napovedovati `neprekinjene vrednosti`, na primer glede na izvor buče in čas žetve, *koliko se bo njena cena zvišala*.\n", + "\n", + "![Infografika avtorja Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Druge klasifikacije\n", + "\n", + "Obstajajo tudi druge vrste logistične regresije, vključno z multinomno in ordinalno:\n", + "\n", + "- **Multinomna**, ki vključuje več kot eno kategorijo - \"Oranžna, Bela in Črtasta\".\n", + "\n", + "- **Ordinalna**, ki vključuje urejene kategorije, uporabna, če želimo logično urediti naše rezultate, na primer naše buče, ki so razvrščene po končnem številu velikosti (mini,sm,med,lg,xl,xxl).\n", + "\n", + "![Multinomna vs ordinalna regresija](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Spremenljivke NI NUJNO, da so povezane**\n", + "\n", + "Se spomnite, kako je linearna regresija bolje delovala z bolj povezanimi spremenljivkami? Logistična regresija je nasprotje - spremenljivke ni nujno, da se ujemajo. To ustreza tem podatkom, ki imajo nekoliko šibke korelacije.\n", + "\n", + "#### **Potrebujete veliko čistih podatkov**\n", + "\n", + "Logistična regresija bo dala natančnejše rezultate, če uporabite več podatkov; naš majhen nabor podatkov ni optimalen za to nalogo, zato imejte to v mislih.\n", + "\n", + "✅ Razmislite o vrstah podatkov, ki bi bile primerne za logistično regresijo.\n", + "\n", + "## Naloga - uredite podatke\n", + "\n", + "Najprej nekoliko očistite podatke, odstranite ničelne vrednosti in izberite le nekatere stolpce:\n", + "\n", + "1. Dodajte naslednjo kodo:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vedno lahko pogledate svoj novi podatkovni okvir z uporabo funkcije [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html), kot je prikazano spodaj:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Potrdimo, da bomo dejansko reševali problem binarne klasifikacije:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vizualizacija - kategorni graf\n", + "Do sedaj ste znova naložili podatke o bučah in jih očistili, da ste ohranili podatkovni niz, ki vsebuje nekaj spremenljivk, vključno z Barvo. Vizualizirajmo podatkovni okvir v zvezku z uporabo knjižnice ggplot.\n", + "\n", + "Knjižnica ggplot ponuja nekaj odličnih načinov za vizualizacijo vaših podatkov. Na primer, lahko primerjate porazdelitve podatkov za vsako Sorto in Barvo v kategorni graf.\n", + "\n", + "1. Ustvarite takšen graf z uporabo funkcije geombar, pri čemer uporabite naše podatke o bučah in določite barvno preslikavo za vsako kategorijo buč (oranžna ali bela):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Z opazovanjem podatkov lahko vidite, kako so podatki o barvi povezani z vrsto.\n", + "\n", + "✅ Glede na ta kategorni graf, katere zanimive raziskave si lahko predstavljate?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predobdelava podatkov: kodiranje značilnosti\n", + "\n", + "Naš nabor podatkov o bučah vsebuje nize kot vrednosti za vse stolpce. Delo s kategorialnimi podatki je za ljudi intuitivno, za računalnike pa ne. Algoritmi strojnega učenja delujejo dobro s številkami. Zato je kodiranje zelo pomemben korak v fazi predobdelave podatkov, saj nam omogoča, da kategorialne podatke pretvorimo v številske, ne da bi pri tem izgubili informacije. Dobro kodiranje vodi do gradnje dobrega modela.\n", + "\n", + "Za kodiranje značilnosti obstajata dve glavni vrsti kodirnikov:\n", + "\n", + "1. **Ordinalni kodirnik**: Ta je primeren za ordinalne spremenljivke, torej kategorialne spremenljivke, kjer podatki sledijo logičnemu vrstnemu redu, kot je stolpec `item_size` v našem naboru podatkov. Ustvari preslikavo, kjer je vsaka kategorija predstavljena s številko, ki ustreza vrstnemu redu kategorije v stolpcu.\n", + "\n", + "2. **Kategorialni kodirnik**: Ta je primeren za nominalne spremenljivke, torej kategorialne spremenljivke, kjer podatki ne sledijo logičnemu vrstnemu redu, kot so vse značilnosti, ki niso `item_size` v našem naboru podatkov. Gre za kodiranje \"ena-vroča\" (one-hot encoding), kar pomeni, da je vsaka kategorija predstavljena z binarnim stolpcem: kodirana spremenljivka je enaka 1, če buča pripada tej sorti, in 0 sicer.\n", + "\n", + "Tidymodels ponuja še eno uporabno knjižnico: [recipes](https://recipes.tidymodels.org/) - knjižnico za predobdelavo podatkov. Določili bomo `recipe`, ki opredeljuje, da je treba vse stolpce napovedovalcev zakodirati v nabor celih števil, nato pa ga `prep`-irati, da ocenimo potrebne količine in statistike za posamezne operacije, in na koncu `bake`, da uporabimo izračune na novih podatkih.\n", + "\n", + "> Običajno se recipes uporablja kot predprocesor za modeliranje, kjer določa, kateri koraki naj se uporabijo na naboru podatkov, da ga pripravimo za modeliranje. V tem primeru je **zelo priporočljivo**, da uporabite `workflow()` namesto ročnega ocenjevanja recepta z uporabo prep in bake. Vse to bomo videli v kratkem.\n", + ">\n", + "> Zaenkrat pa uporabljamo recipes + prep + bake, da določimo, kateri koraki naj se uporabijo na naboru podatkov, da ga pripravimo za analizo podatkov, in nato izvlečemo predobdelane podatke z uporabljenimi koraki.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Kakšne so prednosti uporabe ordinalnega kodirnika za stolpec Velikost artikla?\n", + "\n", + "### Analizirajte odnose med spremenljivkami\n", + "\n", + "Zdaj, ko smo predhodno obdelali naše podatke, lahko analiziramo odnose med značilnostmi in oznako, da dobimo predstavo o tem, kako dobro bo model lahko napovedal oznako glede na značilnosti. Najboljši način za izvedbo takšne analize je vizualizacija podatkov. \n", + "Ponovno bomo uporabili funkcijo ggplot geom_boxplot_, da vizualiziramo odnose med Velikostjo artikla, Sorto in Barvo v kategorijskem grafu. Za boljšo vizualizacijo podatkov bomo uporabili kodiran stolpec Velikost artikla in nekodiran stolpec Sorta.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Uporaba swarm plota\n", + "\n", + "Ker je Barva binarna kategorija (Bela ali Ne), zahteva 'poseben pristop [k vizualizaciji](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)'.\n", + "\n", + "Poskusite uporabiti `swarm plot`, da prikažete porazdelitev barve glede na velikost predmeta.\n", + "\n", + "Uporabili bomo [paket ggbeeswarm](https://github.com/eclarke/ggbeeswarm), ki ponuja metode za ustvarjanje grafov v slogu beeswarm z uporabo ggplot2. Beeswarm grafi so način prikaza točk, ki bi se sicer prekrivale, tako da so razporejene ena poleg druge.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdaj, ko imamo predstavo o razmerju med binarnimi kategorijami barve in večjo skupino velikosti, raziščimo logistično regresijo za določitev verjetne barve določene buče.\n", + "\n", + "## Ustvarite svoj model\n", + "\n", + "Izberite spremenljivke, ki jih želite uporabiti v svojem klasifikacijskem modelu, in razdelite podatke na učne in testne sklope. [rsample](https://rsample.tidymodels.org/), paket v Tidymodels, zagotavlja infrastrukturo za učinkovito razdeljevanje podatkov in ponovno vzorčenje:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Zdaj smo pripravljeni, da model naučimo tako, da prilagodimo učne značilnosti učni oznaki (barvi).\n", + "\n", + "Začeli bomo z ustvarjanjem recepta, ki določa korake predobdelave, ki jih je treba izvesti na naših podatkih, da jih pripravimo za modeliranje, tj. kodiranje kategornih spremenljivk v niz celih števil. Tako kot `baked_pumpkins`, ustvarimo `pumpkins_recipe`, vendar ga ne `prep` in `bake`, saj bo vključen v delovni tok, kar boste videli v le nekaj korakih.\n", + "\n", + "Obstaja kar nekaj načinov za določitev logistične regresije v Tidymodels. Oglejte si `?logistic_reg()`. Za zdaj bomo določili model logistične regresije prek privzetega pogona `stats::glm()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdaj, ko imamo recept in specifikacijo modela, moramo najti način, kako ju združiti v objekt, ki bo najprej predobdelal podatke (v ozadju uporabil funkciji prep + bake), nato prilagodil model na predobdelane podatke in omogočil tudi morebitne aktivnosti po obdelavi.\n", + "\n", + "V Tidymodels se ta priročen objekt imenuje [`workflow`](https://workflows.tidymodels.org/) in priročno združuje vaše komponente modeliranja.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ko je potek dela *določen*, lahko model `usposobimo` z uporabo funkcije [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html). Potek dela bo ocenil recept in predhodno obdelal podatke pred usposabljanjem, zato tega ne bo treba ročno narediti z uporabo funkcij prep in bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model izpiše koeficiente, pridobljene med usposabljanjem.\n", + "\n", + "Zdaj, ko smo model usposobili z uporabo učnih podatkov, lahko naredimo napovedi na testnih podatkih z uporabo [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Začnimo z uporabo modela za napovedovanje oznak za naš testni niz in verjetnosti za vsako oznako. Ko je verjetnost večja od 0.5, je napovedan razred `WHITE`, sicer `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zelo lepo! To ponuja nekaj več vpogleda v delovanje logistične regresije.\n", + "\n", + "### Boljše razumevanje prek matrike zmede\n", + "\n", + "Primerjanje vsake napovedi z ustrezno \"resnično vrednostjo\" ni zelo učinkovit način za določanje, kako dobro model napoveduje. Na srečo ima Tidymodels še nekaj trikov v rokavu: [`yardstick`](https://yardstick.tidymodels.org/) - paket, ki se uporablja za merjenje učinkovitosti modelov z uporabo metrik uspešnosti.\n", + "\n", + "Ena od metrik uspešnosti, povezanih s klasifikacijskimi problemi, je [`matrika zmede`](https://wikipedia.org/wiki/Confusion_matrix). Matrika zmede opisuje, kako dobro klasifikacijski model deluje. Matrika zmede prikazuje, koliko primerov v vsakem razredu je model pravilno klasificiral. V našem primeru bo pokazala, koliko oranžnih buč je bilo klasificiranih kot oranžne in koliko belih buč kot bele; matrika zmede prav tako prikazuje, koliko jih je bilo klasificiranih v **napačne** kategorije.\n", + "\n", + "Funkcija [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) iz paketa yardstick izračuna to križno tabelo opazovanih in napovedanih razredov.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Poglejmo, kako interpretirati matriko zmede. Naš model mora razvrstiti buče v dve binarni kategoriji, kategorijo `bela` in kategorijo `ne-bela`.\n", + "\n", + "- Če vaš model napove, da je buča bela, in ta v resnici spada v kategorijo 'bela', temu pravimo `prava pozitivna napoved` (true positive), kar je prikazano s številom v zgornjem levem kotu.\n", + "\n", + "- Če vaš model napove, da buča ni bela, in ta v resnici spada v kategorijo 'bela', temu pravimo `napačna negativna napoved` (false negative), kar je prikazano s številom v spodnjem levem kotu.\n", + "\n", + "- Če vaš model napove, da je buča bela, in ta v resnici spada v kategorijo 'ne-bela', temu pravimo `napačna pozitivna napoved` (false positive), kar je prikazano s številom v zgornjem desnem kotu.\n", + "\n", + "- Če vaš model napove, da buča ni bela, in ta v resnici spada v kategorijo 'ne-bela', temu pravimo `prava negativna napoved` (true negative), kar je prikazano s številom v spodnjem desnem kotu.\n", + "\n", + "| Resnica |\n", + "|:-------:|\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Napovedano** | BELA | ORANŽNA |\n", + "| BELA | TP | FP |\n", + "| ORANŽNA | FN | TN |\n", + "\n", + "Kot ste morda uganili, je zaželeno imeti večje število pravih pozitivnih in pravih negativnih napovedi ter manjše število napačnih pozitivnih in napačnih negativnih napovedi, saj to pomeni, da model deluje bolje.\n", + "\n", + "Matrika zmede je koristna, saj omogoča izračun drugih metrik, ki nam pomagajo bolje oceniti uspešnost klasifikacijskega modela. Poglejmo si nekatere od njih:\n", + "\n", + "🎓 Natančnost (Precision): `TP/(TP + FP)` je definirana kot delež napovedanih pozitivnih primerov, ki so dejansko pozitivni. Imenujemo jo tudi [pozitivna napovedna vrednost](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Priklic (Recall): `TP/(TP + FN)` je definiran kot delež pozitivnih rezultatov glede na število vzorcev, ki so dejansko pozitivni. Imenujemo ga tudi `občutljivost` (sensitivity).\n", + "\n", + "🎓 Specifičnost (Specificity): `TN/(TN + FP)` je definirana kot delež negativnih rezultatov glede na število vzorcev, ki so dejansko negativni.\n", + "\n", + "🎓 Točnost (Accuracy): `TP + TN/(TP + TN + FP + FN)` je odstotek oznak, ki so bile pravilno napovedane za vzorec.\n", + "\n", + "🎓 F-metrika (F Measure): Tehtano povprečje natančnosti in priklica, kjer je najboljša vrednost 1, najslabša pa 0.\n", + "\n", + "Izračunajmo te metrike!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vizualizirajmo ROC krivuljo tega modela\n", + "\n", + "Naredimo še eno vizualizacijo, da si ogledamo tako imenovano [`ROC krivuljo`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC krivulje se pogosto uporabljajo za prikaz rezultatov klasifikatorja glede na njegove prave in lažne pozitivne zadetke. ROC krivulje običajno prikazujejo `True Positive Rate`/občutljivost na Y osi in `False Positive Rate`/1-specifičnost na X osi. Zato sta strmina krivulje in prostor med sredinsko črto ter krivuljo pomembna: želite krivuljo, ki se hitro dvigne in preseže črto. V našem primeru se začne z lažnimi pozitivnimi zadetki, nato pa se črta pravilno dvigne in preseže.\n", + "\n", + "Na koncu uporabimo `yardstick::roc_auc()` za izračun dejanskega območja pod krivuljo (Area Under the Curve). Eden od načinov interpretacije AUC je kot verjetnost, da model naključno pozitivni primer razvrsti višje kot naključno negativni primer.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rezultat je približno `0.975`. Ker se AUC giblje med 0 in 1, si želite visok rezultat, saj bo model, ki je 100 % natančen v svojih napovedih, imel AUC vrednost 1; v tem primeru je model *kar dober*.\n", + "\n", + "V prihodnjih lekcijah o klasifikacijah boste spoznali, kako izboljšati rezultate svojega modela (na primer obravnavanje neuravnoteženih podatkov v tem primeru).\n", + "\n", + "## 🚀Izziv\n", + "\n", + "Logistična regresija ponuja še veliko več! Najboljši način za učenje je eksperimentiranje. Poiščite podatkovni niz, ki je primeren za tovrstno analizo, in zgradite model z njim. Kaj ste se naučili? namig: poskusite [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) za zanimive podatkovne nize.\n", + "\n", + "## Pregled in samostojno učenje\n", + "\n", + "Preberite prvih nekaj strani [tega dokumenta iz Stanforda](https://web.stanford.edu/~jurafsky/slp3/5.pdf) o praktični uporabi logistične regresije. Razmislite o nalogah, ki so bolj primerne za eno ali drugo vrsto regresijskih nalog, ki smo jih preučevali do zdaj. Kaj bi delovalo najbolje?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:31:00+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sl/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/sl/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..47f11ea88 --- /dev/null +++ b/translations/sl/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logistična regresija - Lekcija 4\n", + "\n", + "Naložite potrebne knjižnice in podatkovni niz. Podatke pretvorite v podatkovni okvir, ki vsebuje podmnožico podatkov:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Poglejmo si naše podatke!\n", + "\n", + "S pomočjo vizualizacije s Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predobdelava podatkov\n", + "\n", + "Šifrirajmo značilnosti in oznake, da bomo lažje prikazali podatke in usposobili model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analiza odnosov med značilnostmi in oznako\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Pazi**: Ignoriranje opozoril NI najboljša praksa in se je treba temu izogibati, kadar je le mogoče. Opozorila pogosto vsebujejo koristna sporočila, ki nam pomagajo izboljšati kodo in rešiti težavo. Razlog, zakaj ignoriramo to specifično opozorilo, je zagotavljanje berljivosti grafa. Prikaz vseh podatkovnih točk z zmanjšano velikostjo označevalca, ob ohranjanju skladnosti s paleto barv, ustvarja nejasno vizualizacijo.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Zgradite svoj model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:27:16+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/3-Web-App/1-Web-App/notebook.ipynb b/translations/sl/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sl/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/sl/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..0f8778dc3 --- /dev/null +++ b/translations/sl/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:05+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/1-Introduction/notebook.ipynb b/translations/sl/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..b63c56b53 --- /dev/null +++ b/translations/sl/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:43+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas opozarjamo, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/sl/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..462119e14 --- /dev/null +++ b/translations/sl/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,724 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T14:55:33+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Zgradite klasifikacijski model: Slastne azijske in indijske kuhinje\n" + ], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Uvod v klasifikacijo: Čiščenje, priprava in vizualizacija podatkov\n", + "\n", + "V teh štirih lekcijah boste raziskovali temeljni vidik klasičnega strojnega učenja - *klasifikacijo*. Preučili bomo uporabo različnih algoritmov za klasifikacijo na podatkovnem naboru o vseh čudovitih kuhinjah Azije in Indije. Upam, da ste lačni!\n", + "\n", + "

\n", + " \n", + "

Proslavite panazijske kuhinje v teh lekcijah! Slika: Jen Looper
\n", + "\n", + "\n", + "\n", + "Klasifikacija je oblika [nadzorovanega učenja](https://wikipedia.org/wiki/Supervised_learning), ki ima veliko skupnega s tehnikami regresije. Pri klasifikaciji trenirate model, da napove, v katero `kategorijo` spada določen element. Če je strojno učenje namenjeno napovedovanju vrednosti ali imen stvari z uporabo podatkovnih nizov, potem klasifikacija običajno spada v dve skupini: *binarna klasifikacija* in *večrazredna klasifikacija*.\n", + "\n", + "Zapomnite si:\n", + "\n", + "- **Linearna regresija** vam je pomagala napovedati odnose med spremenljivkami in narediti natančne napovedi, kje bi nov podatkovni element padel v odnosu do te črte. Na primer, lahko bi napovedali numerične vrednosti, kot je *kakšna bo cena buče septembra v primerjavi z decembrom*.\n", + "\n", + "- **Logistična regresija** vam je pomagala odkriti \"binarne kategorije\": pri tej cenovni točki, *ali je buča oranžna ali ne-oranžna*?\n", + "\n", + "Klasifikacija uporablja različne algoritme za določanje drugih načinov določanja oznake ali razreda podatkovne točke. Delajmo s temi podatki o kuhinjah, da vidimo, ali lahko z opazovanjem skupine sestavin določimo izvorno kuhinjo.\n", + "\n", + "### [**Predlekcijski kviz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Uvod**\n", + "\n", + "Klasifikacija je ena temeljnih dejavnosti raziskovalca strojnega učenja in podatkovnega znanstvenika. Od osnovne klasifikacije binarne vrednosti (\"ali je ta e-pošta spam ali ne?\") do kompleksne klasifikacije slik in segmentacije z uporabo računalniškega vida, je vedno koristno, da lahko podatke razvrstimo v razrede in zastavimo vprašanja o njih.\n", + "\n", + "Če proces opišemo na bolj znanstven način, vaša metoda klasifikacije ustvari napovedni model, ki vam omogoča mapiranje odnosa med vhodnimi spremenljivkami in izhodnimi spremenljivkami.\n", + "\n", + "

\n", + " \n", + "

Binarni vs. večrazredni problemi, ki jih algoritmi za klasifikacijo obravnavajo. Infografika: Jen Looper
\n", + "\n", + "Preden začnemo s procesom čiščenja podatkov, njihove vizualizacije in priprave za naloge strojnega učenja, se naučimo nekaj o različnih načinih, kako lahko strojno učenje uporabimo za klasifikacijo podatkov.\n", + "\n", + "Izpeljana iz [statistike](https://wikipedia.org/wiki/Statistical_classification), klasifikacija z uporabo klasičnega strojnega učenja uporablja značilnosti, kot so `kadilec`, `teža` in `starost`, za določanje *verjetnosti razvoja X bolezni*. Kot tehnika nadzorovanega učenja, podobna regresijskim vajam, ki ste jih izvajali prej, so vaši podatki označeni, algoritmi strojnega učenja pa te oznake uporabljajo za klasifikacijo in napovedovanje razredov (ali 'značilnosti') podatkovnega niza ter njihovo dodelitev skupini ali izidu.\n", + "\n", + "✅ Vzemite trenutek in si zamislite podatkovni niz o kuhinjah. Kaj bi lahko odgovoril večrazredni model? Kaj bi lahko odgovoril binarni model? Kaj če bi želeli ugotoviti, ali določena kuhinja verjetno uporablja piskavico? Kaj če bi želeli videti, ali bi lahko iz vrečke zvezdastega janeža, artičok, cvetače in hrena pripravili tipično indijsko jed?\n", + "\n", + "### **Pozdravljeni 'klasifikator'**\n", + "\n", + "Vprašanje, ki ga želimo zastaviti o tem podatkovnem naboru kuhinj, je pravzaprav **večrazredno vprašanje**, saj imamo na voljo več potencialnih nacionalnih kuhinj. Glede na skupino sestavin, v katerega od teh razredov bodo podatki ustrezali?\n", + "\n", + "Tidymodels ponuja več različnih algoritmov za klasifikacijo podatkov, odvisno od vrste problema, ki ga želite rešiti. V naslednjih dveh lekcijah se boste naučili o nekaterih od teh algoritmov.\n", + "\n", + "#### **Predpogoj**\n", + "\n", + "Za to lekcijo bomo potrebovali naslednje pakete za čiščenje, pripravo in vizualizacijo naših podatkov:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, lažje in bolj zabavno podatkovno znanost!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je okvir [zbirke paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "- `DataExplorer`: Paket [DataExplorer](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) je namenjen poenostavitvi in avtomatizaciji procesa EDA ter generiranju poročil.\n", + "\n", + "- `themis`: Paket [themis](https://themis.tidymodels.org/) ponuja dodatne korake receptov za obravnavo neuravnoteženih podatkov.\n", + "\n", + "Namestite jih lahko z:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternativno, spodnji skript preveri, ali imate pakete, potrebne za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kasneje bomo naložili te odlične pakete in jih naredili dostopne v naši trenutni R seji. (To je zgolj za ponazoritev, `pacman::p_load()` je to že naredil namesto vas)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Vaja - očistite in uravnotežite svoje podatke\n", + "\n", + "Prva naloga, preden začnete s tem projektom, je očistiti in **uravnotežiti** svoje podatke za boljše rezultate.\n", + "\n", + "Spoznajmo podatke! 🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zanimivo! Po videzu sodeč je prvi stolpec nekakšen stolpec `id`. Poglejmo si malo več informacij o podatkih.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Iz izpisa lahko takoj vidimo, da imamo `2448` vrstic in `385` stolpcev ter `0` manjkajočih vrednosti. Imamo tudi 1 diskretni stolpec, *cuisine*.\n", + "\n", + "## Naloga - spoznavanje kuhinj\n", + "\n", + "Zdaj delo postaja bolj zanimivo. Odkrijmo porazdelitev podatkov glede na kuhinjo.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Obstaja končno število kuhinj, vendar je porazdelitev podatkov neenakomerna. To lahko popravite! Preden to storite, raziščite še malo več.\n", + "\n", + "Nato dodelimo vsako kuhinjo v njen lasten tibble in ugotovimo, koliko podatkov je na voljo (vrstice, stolpci) za posamezno kuhinjo.\n", + "\n", + "> [Tibble](https://tibble.tidyverse.org/) je sodoben podatkovni okvir.\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Vaja - Odkrijte glavne sestavine po kuhinji z uporabo dplyr**\n", + "\n", + "Zdaj lahko podrobneje raziščete podatke in ugotovite, katere so značilne sestavine za posamezno kuhinjo. Odstraniti morate ponavljajoče se podatke, ki povzročajo zmedo med kuhinjami, zato se lotimo tega problema.\n", + "\n", + "Ustvarite funkcijo `create_ingredient()` v R, ki vrne podatkovni okvir sestavin. Ta funkcija bo začela z odstranitvijo neuporabnega stolpca in razvrstila sestavine glede na njihovo število.\n", + "\n", + "Osnovna struktura funkcije v R je:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "Uvod v funkcije v R najdete [tukaj](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Pojdimo naravnost k stvari! Uporabili bomo [dplyr glagole](https://dplyr.tidyverse.org/), ki smo jih spoznali v prejšnjih lekcijah. Za osvežitev spomina:\n", + "\n", + "- `dplyr::select()`: vam pomaga izbrati, katere **stolpce** obdržati ali izključiti.\n", + "\n", + "- `dplyr::pivot_longer()`: vam pomaga \"podaljšati\" podatke, s čimer povečate število vrstic in zmanjšate število stolpcev.\n", + "\n", + "- `dplyr::group_by()` in `dplyr::summarise()`: vam pomagata najti povzetke statistike za različne skupine in jih predstaviti v pregledni tabeli.\n", + "\n", + "- `dplyr::filter()`: ustvari podmnožico podatkov, ki vsebuje samo vrstice, ki ustrezajo vašim pogojem.\n", + "\n", + "- `dplyr::mutate()`: vam pomaga ustvariti ali spremeniti stolpce.\n", + "\n", + "Oglejte si ta [*umetniško* obarvan učni vodič](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) avtorice Allison Horst, ki predstavlja nekaj uporabnih funkcij za obdelavo podatkov v dplyr *(del Tidyverse)*.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj lahko uporabimo funkcijo, da dobimo vpogled v deset najbolj priljubljenih sestavin po kuhinji. Preizkusimo jo s `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "V prejšnjem razdelku smo uporabili `geom_col()`, poglejmo, kako lahko uporabite tudi `geom_bar` za ustvarjanje stolpčnih grafikonov. Uporabite `?geom_bar` za nadaljnje branje.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Naredimo enako za japonske podatke\n" + ], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kaj pa kitajska kuhinja?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [ + "Na koncu narišite korejske sestavine.\n" + ], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Iz vizualizacij podatkov lahko zdaj odstranimo najpogostejše sestavine, ki povzročajo zmedo med različnimi kuhinjami, z uporabo `dplyr::select()`.\n", + "\n", + "Vsi obožujemo riž, česen in ingver!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Predobdelava podatkov z recepti 👩‍🍳👨‍🍳 - Obvladovanje neuravnoteženih podatkov ⚖️\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n", + "\n", + "Ker je ta lekcija o kulinariki, moramo postaviti `recepte` v kontekst.\n", + "\n", + "Tidymodels ponuja še en odličen paket: `recipes` - paket za predobdelavo podatkov.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Poglejmo si ponovno porazdelitev naših kuhinj.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kot lahko vidite, je število kuhinj precej neenakomerno porazdeljeno. Korejske kuhinje so skoraj trikrat bolj številčne kot tajske kuhinje. Neuravnoteženi podatki pogosto negativno vplivajo na delovanje modela. Pomislite na binarno klasifikacijo. Če večina vaših podatkov pripada enemu razredu, bo model strojnega učenja pogosteje napovedoval ta razred, preprosto zato, ker je zanj na voljo več podatkov. Uravnoteženje podatkov odpravlja to neuravnoteženost in pomaga odstraniti pristranskost. Veliko modelov najbolje deluje, ko je število opazovanj enako, zato se pogosto soočajo s težavami pri delu z neuravnoteženimi podatki.\n", + "\n", + "Obstajata dva glavna načina za obravnavo neuravnoteženih podatkovnih nizov:\n", + "\n", + "- dodajanje opazovanj v manjšinski razred: `Over-sampling`, npr. uporaba algoritma SMOTE\n", + "\n", + "- odstranjevanje opazovanj iz večinskega razreda: `Under-sampling`\n", + "\n", + "Zdaj bomo prikazali, kako obravnavati neuravnotežene podatkovne nize z uporabo `recipe`. Recipe lahko razumemo kot načrt, ki opisuje, katere korake je treba uporabiti na podatkovnem nizu, da ga pripravimo za analizo podatkov.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Razčlenimo naše korake predobdelave.\n", + "\n", + "- Klic funkcije `recipe()` s formulo pove receptu *vloge* spremenljivk, pri čemer uporablja podatke `df_select` kot referenco. Na primer, stolpec `cuisine` je bil dodeljen vlogi `outcome`, medtem ko so ostali stolpci dodeljeni vlogi `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) ustvari *specifikacijo* koraka recepta, ki sintetično generira nove primere manjšinske skupine z uporabo najbližjih sosedov teh primerov.\n", + "\n", + "Če bi želeli videti predobdelane podatke, bi morali [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) in [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) uporabiti na našem receptu.\n", + "\n", + "`prep()`: oceni potrebne parametre iz učnega nabora, ki jih je mogoče kasneje uporabiti na drugih podatkovnih nizih.\n", + "\n", + "`bake()`: uporabi pripravljen recept in operacije na katerem koli podatkovnem nizu.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj preverimo porazdelitev naših kuhinj in jih primerjajmo z neuravnoteženimi podatki.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mmm! Podatki so lepi in čisti, uravnoteženi in zelo okusni 😋!\n", + "\n", + "> Običajno se recept uporablja kot predprocesor za modeliranje, kjer določa, katere korake je treba uporabiti na podatkovnem naboru, da ga pripravimo za modeliranje. V tem primeru se običajno uporablja `workflow()` (kot smo že videli v prejšnjih lekcijah) namesto ročnega ocenjevanja recepta.\n", + ">\n", + "> Zato običajno ni treba uporabljati **`prep()`** in **`bake()`** receptov, ko uporabljate tidymodels, vendar so to koristne funkcije, ki jih imate v svojem orodju za preverjanje, ali recepti delujejo, kot pričakujete, kot v našem primeru.\n", + ">\n", + "> Ko z **`new_data = NULL`** **`bake()`** pripravljen recept, dobite nazaj podatke, ki ste jih podali pri definiranju recepta, vendar so že prestali korake predprocesiranja.\n", + "\n", + "Zdaj shranimo kopijo teh podatkov za uporabo v prihodnjih lekcijah:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ta svež CSV je zdaj na voljo v korenski mapi podatkov.\n", + "\n", + "**🚀Izziv**\n", + "\n", + "Ta učni načrt vsebuje več zanimivih podatkovnih zbirk. Prebrskajte mape `data` in preverite, ali katera vsebuje podatkovne zbirke, ki bi bile primerne za binarno ali večrazredno klasifikacijo? Kakšna vprašanja bi zastavili tej podatkovni zbirki?\n", + "\n", + "## [**Kvizi po predavanju**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Pregled & Samostojno učenje**\n", + "\n", + "- Oglejte si [paket themis](https://github.com/tidymodels/themis). Katere druge tehnike bi lahko uporabili za obravnavo neuravnoteženih podatkov?\n", + "\n", + "- Referenčna spletna stran za Tidy models [tukaj](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham in G. Grolemund, [*R za podatkovno znanost: Vizualizacija, modeliranje, transformacija, urejanje in uvoz podatkov*](https://r4ds.had.co.nz/).\n", + "\n", + "#### HVALA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za ustvarjanje čudovitih ilustracij, ki naredijo R bolj prijazen in privlačen. Več ilustracij najdete v njeni [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) in [Jen Looper](https://www.twitter.com/jenlooper) za ustvarjanje izvirne Python različice tega modula ♥️\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/sl/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..4c42c6a48 --- /dev/null +++ b/translations/sl/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,699 @@ +{ + "cells": [ + { + "source": [ + "# Okusne azijske in indijske jedi\n", + "\n", + "## Uvod\n", + "Azijska in indijska kuhinja sta znani po svojih bogatih okusih, raznolikih sestavinah in edinstvenih tehnikah priprave. V tem vodiču bomo raziskali nekaj najbolj priljubljenih jedi iz teh regij.\n", + "\n", + "## Azijska kuhinja\n", + "### Sushi\n", + "Sushi je japonska jed, ki vključuje surovo ribe, riž in različne dodatke. Priprava zahteva natančnost in spretnost.\n", + "\n", + "### Pad Thai\n", + "Pad Thai je priljubljena tajska jed iz riževih rezancev, jajc, tofuja, kozic in arašidov. Pogosto se postreže z limeto in čilijem.\n", + "\n", + "### Dim Sum\n", + "Dim Sum je kitajska jed, ki vključuje majhne porcijske prigrizke, kot so cmoki, žemljice in zvitki. Običajno se postreže s čajem.\n", + "\n", + "## Indijska kuhinja\n", + "### Butter Chicken\n", + "Butter Chicken je kremasta piščančja jed, pripravljena v paradižnikovi omaki z maslom in začimbami. Pogosto se postreže z naanom ali basmati rižem.\n", + "\n", + "### Biryani\n", + "Biryani je aromatična jed iz riža, mesa, začimb in zelišč. Obstaja veliko različic, odvisno od regije.\n", + "\n", + "### Samosa\n", + "Samosa je ocvrta ali pečena jed, polnjena z začinjenim krompirjem, grahom ali mesom. Pogosto se postreže kot prigrizek.\n", + "\n", + "## Zaključek\n", + "Azijska in indijska kuhinja ponujata širok spekter okusov in tekstur, ki zadovoljijo vsak okus. Poskusite te jedi in odkrijte bogastvo teh kulinaričnih tradicij!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Namestite Imblearn, ki bo omogočil SMOTE. To je paket Scikit-learn, ki pomaga pri obravnavi neuravnoteženih podatkov pri izvajanju klasifikacije. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Ta podatkovni niz vključuje 385 stolpcev, ki označujejo vse vrste sestavin v različnih kuhinjah iz danega nabora kuhinj.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Prikaži kuhinje v stolpčnem grafikonu\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Uravnotežite podatke z uporabo SMOTE nadvzorčenja na najvišji razred. Preberite več tukaj: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [ + "Shrani datoteko za prihodnjo uporabo\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:51:29+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sl/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/sl/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..1e679052a --- /dev/null +++ b/translations/sl/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:32+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Izdelava modelov za klasifikacijo\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/sl/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..277a1ad6d --- /dev/null +++ b/translations/sl/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1302 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:35:18+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Zgradite klasifikacijski model: Slastne azijske in indijske kuhinje\n" + ], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Razvrščevalniki kuhinj 1\n", + "\n", + "V tej lekciji bomo raziskali različne razvrščevalnike za *napovedovanje določene nacionalne kuhinje na podlagi skupine sestavin.* Pri tem se bomo naučili več o načinih, kako lahko algoritme uporabimo za naloge razvrščanja.\n", + "\n", + "### [**Predhodni kviz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Priprava**\n", + "\n", + "Ta lekcija temelji na naši [prejšnji lekciji](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb), kjer smo:\n", + "\n", + "- Naredili uvod v razvrščanje z uporabo nabora podatkov o vseh čudovitih kuhinjah Azije in Indije 😋.\n", + "\n", + "- Raziskali nekaj [glagolov dplyr](https://dplyr.tidyverse.org/) za pripravo in čiščenje podatkov.\n", + "\n", + "- Ustvarili čudovite vizualizacije z uporabo ggplot2.\n", + "\n", + "- Pokazali, kako obravnavati neuravnotežene podatke z njihovo predobdelavo z uporabo [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Demonstrirali, kako `prep` in `bake` naš recept, da potrdimo, da bo deloval, kot je predvideno.\n", + "\n", + "#### **Predpogoji**\n", + "\n", + "Za to lekcijo bomo potrebovali naslednje pakete za čiščenje, pripravo in vizualizacijo podatkov:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, lažje in bolj zabavno podatkovno znanost!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je okvir [zbirke paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "- `themis`: Paket [themis](https://themis.tidymodels.org/) ponuja dodatne korake za obdelavo neuravnoteženih podatkov.\n", + "\n", + "- `nnet`: Paket [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) ponuja funkcije za ocenjevanje nevronskih mrež s povratnim napajanjem z eno skrito plastjo in za modele multinomialne logistične regresije.\n", + "\n", + "Namestite jih lahko tako:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternativno spodnji skript preveri, ali imate potrebne pakete za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj pa zavihajmo rokave!\n", + "\n", + "## 1. Razdelite podatke na učne in testne sklope.\n", + "\n", + "Začeli bomo z izbiro nekaj korakov iz naše prejšnje lekcije.\n", + "\n", + "### Odstranite najpogostejše sestavine, ki povzročajo zmedo med različnimi kuhinjami, z uporabo `dplyr::select()`.\n", + "\n", + "Vsi obožujemo riž, česen in ingver!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično! Zdaj je čas, da podatke razdelimo tako, da gre 70 % podatkov za učenje in 30 % za testiranje. Pri razdelitvi bomo uporabili tudi tehniko `stratifikacije`, da `ohranimo razmerje posameznih vrst kuhinj` v učnih in validacijskih naborih podatkov.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), paket v Tidymodels, zagotavlja infrastrukturo za učinkovito razdeljevanje in ponovno vzorčenje podatkov:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Obdelava neuravnoteženih podatkov\n", + "\n", + "Kot ste morda opazili v izvirnem naboru podatkov in našem učnem naboru, je porazdelitev števila kuhinj precej neenakomerna. Korejske kuhinje so *skoraj* 3-krat pogostejše od tajskih kuhinj. Neuravnoteženi podatki pogosto negativno vplivajo na delovanje modela. Veliko modelov najbolje deluje, ko je število opazovanj enako, zato se pogosto spopadajo z izzivi pri obdelavi neuravnoteženih podatkov.\n", + "\n", + "Obstajata dva glavna načina za obdelavo neuravnoteženih naborov podatkov:\n", + "\n", + "- dodajanje opazovanj v manjšinsko skupino: `Prekomerno vzorčenje` (Over-sampling), na primer z uporabo algoritma SMOTE, ki sintetično generira nove primere manjšinske skupine z uporabo najbližjih sosedov teh primerov.\n", + "\n", + "- odstranjevanje opazovanj iz večinske skupine: `Podvzorečenje` (Under-sampling)\n", + "\n", + "V naši prejšnji lekciji smo prikazali, kako obdelati neuravnotežene nabore podatkov z uporabo `recepta`. Recept si lahko predstavljamo kot načrt, ki opisuje, katere korake je treba uporabiti na naboru podatkov, da ga pripravimo za analizo. V našem primeru želimo doseči enakomerno porazdelitev števila kuhinj v našem `učnem naboru`. Pojdimo kar k stvari.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Lahko seveda potrdite (z uporabo prep+bake), da bo recept deloval, kot pričakujete - vse oznake kuhinj imajo `559` opazovanj.\n", + "\n", + "Ker bomo ta recept uporabljali kot predprocesor za modeliranje, bo `workflow()` opravil vse priprave in peko namesto nas, tako da recepta ne bomo morali ročno ocenjevati.\n", + "\n", + "Zdaj smo pripravljeni na treniranje modela 👩‍💻👨‍💻!\n", + "\n", + "## 3. Izbira vašega klasifikatorja\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj moramo odločiti, kateri algoritem uporabiti za nalogo 🤔.\n", + "\n", + "V Tidymodels [`parsnip package`](https://parsnip.tidymodels.org/index.html) zagotavlja dosleden vmesnik za delo z modeli prek različnih pogonov (paketov). Prosimo, preglejte dokumentacijo parsnip za raziskovanje [vrst modelov in pogonov](https://www.tidymodels.org/find/parsnip/#models) ter njihovih ustreznih [argumentov modelov](https://www.tidymodels.org/find/parsnip/#model-args). Raznolikost je na prvi pogled precej osupljiva. Na primer, naslednje metode vključujejo tehnike klasifikacije:\n", + "\n", + "- C5.0 modeli klasifikacije na osnovi pravil\n", + "\n", + "- Prilagodljivi diskriminantni modeli\n", + "\n", + "- Linearni diskriminantni modeli\n", + "\n", + "- Regularizirani diskriminantni modeli\n", + "\n", + "- Logistični regresijski modeli\n", + "\n", + "- Multinomialni regresijski modeli\n", + "\n", + "- Naivni Bayesovi modeli\n", + "\n", + "- Podporni vektorski stroji\n", + "\n", + "- Najbližji sosedje\n", + "\n", + "- Odločitvena drevesa\n", + "\n", + "- Metode ansambla\n", + "\n", + "- Nevronske mreže\n", + "\n", + "Seznam se nadaljuje!\n", + "\n", + "### **Kateri klasifikator izbrati?**\n", + "\n", + "Torej, kateri klasifikator bi morali izbrati? Pogosto je preizkušanje več klasifikatorjev in iskanje dobrega rezultata način testiranja.\n", + "\n", + "> AutoML to težavo elegantno reši z izvajanjem teh primerjav v oblaku, kar vam omogoča izbiro najboljšega algoritma za vaše podatke. Preizkusite ga [tukaj](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Izbira klasifikatorja je odvisna tudi od našega problema. Na primer, kadar je rezultat mogoče razvrstiti v `več kot dva razreda`, kot v našem primeru, morate uporabiti `algoritem za večrazredno klasifikacijo` namesto `binarne klasifikacije.`\n", + "\n", + "### **Boljši pristop**\n", + "\n", + "Boljši način kot naključno ugibanje je, da sledite idejam iz te prenosljive [ML Cheat Sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott). Tukaj odkrijemo, da imamo za naš večrazredni problem nekaj možnosti:\n", + "\n", + "

\n", + " \n", + "

Del Microsoftovega algoritmičnega priročnika, ki podrobno opisuje možnosti večrazredne klasifikacije
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Razmišljanje**\n", + "\n", + "Poglejmo, ali lahko z razmišljanjem najdemo različne pristope glede na omejitve, ki jih imamo:\n", + "\n", + "- **Globoke nevronske mreže so pretežke**. Glede na naš čist, a minimalen nabor podatkov ter dejstvo, da izvajamo učenje lokalno prek beležk, so globoke nevronske mreže za to nalogo preveč zahtevne.\n", + "\n", + "- **Brez klasifikatorja z dvema razredoma**. Ne uporabljamo klasifikatorja z dvema razredoma, kar izključuje pristop \"ena proti vsem\".\n", + "\n", + "- **Odločilno drevo ali logistična regresija bi lahko delovala**. Odločilno drevo bi lahko delovalo, prav tako multinomna regresija/multirazredna logistična regresija za podatke z več razredi.\n", + "\n", + "- **Multirazredna izboljšana odločilna drevesa rešujejo drugačen problem**. Multirazredna izboljšana odločilna drevesa so najbolj primerna za neparametrične naloge, npr. naloge, namenjene gradnji razvrstitev, zato za nas niso uporabna.\n", + "\n", + "Poleg tega je običajno, preden se lotimo bolj zapletenih modelov strojnega učenja, kot so metode ansambla, dobro zgraditi čim bolj preprost model, da dobimo občutek, kaj se dogaja. Zato bomo v tej lekciji začeli z modelom `multinomne regresije`.\n", + "\n", + "> Logistična regresija je tehnika, ki se uporablja, kadar je odvisna spremenljivka kategorična (ali nominalna). Pri binarni logistični regresiji je število izhodnih spremenljivk dve, medtem ko je število izhodnih spremenljivk pri multinomni logistični regresiji več kot dve. Za več informacij glejte [Napredne metode regresije](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html).\n", + "\n", + "## 4. Učenje in ocenjevanje modela multinomne logistične regresije\n", + "\n", + "V Tidymodels `parsnip::multinom_reg()` definira model, ki uporablja linearne napovedovalce za napovedovanje podatkov z več razredi z uporabo multinomne porazdelitve. Glejte `?multinom_reg()` za različne načine/motorje, ki jih lahko uporabite za prilagoditev tega modela.\n", + "\n", + "Za ta primer bomo prilagodili model multinomne regresije prek privzetega motorja [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf).\n", + "\n", + "> Vrednost za `penalty` sem izbral bolj naključno. Obstajajo boljši načini za izbiro te vrednosti, in sicer z uporabo `resampling` in `tuning` modela, o čemer bomo razpravljali kasneje.\n", + ">\n", + "> Glejte [Tidymodels: Začetek](https://www.tidymodels.org/start/tuning/), če želite izvedeti več o tem, kako nastaviti hiperparametre modela.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično delo 🥳! Zdaj, ko imamo recept in specifikacijo modela, moramo najti način, kako ju združiti v objekt, ki bo najprej predprocesiral podatke, nato prilagodil model na predprocesirane podatke in omogočil tudi morebitne aktivnosti po obdelavi. V Tidymodels se ta priročen objekt imenuje [`workflow`](https://workflows.tidymodels.org/) in priročno združuje vaše modelne komponente! To je tisto, kar bi v *Pythonu* imenovali *pipelines*.\n", + "\n", + "Torej, združimo vse v workflow!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Poteki dela 👌👌! **`workflow()`** se lahko prilagodi na skoraj enak način kot model. Torej, čas je za treniranje modela!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Izhod prikazuje koeficiente, ki jih je model naučil med usposabljanjem.\n", + "\n", + "### Ovrednotite usposobljeni model\n", + "\n", + "Čas je, da preverimo, kako se je model odrezal 📏, tako da ga ovrednotimo na testnem naboru podatkov! Začnimo z napovedovanjem na testnem naboru.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Odlično delo! V Tidymodels lahko ocenjujemo uspešnost modela z uporabo [yardstick](https://yardstick.tidymodels.org/) - paketa, ki se uporablja za merjenje učinkovitosti modelov z uporabo metrik uspešnosti. Kot smo storili v naši lekciji o logistični regresiji, začnimo z izračunom matrike zmede.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ko se ukvarjamo z več razredi, je na splošno bolj intuitivno to vizualizirati kot toplotni zemljevid, takole:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Temnejši kvadrati v grafu matrike zmede označujejo veliko število primerov, in upamo, da lahko vidite diagonalno črto temnejših kvadratov, ki označuje primere, kjer sta napovedana in dejanska oznaka enaka.\n", + "\n", + "Zdaj pa izračunajmo povzetne statistike za matriko zmede.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Če se osredotočimo na nekatere metrike, kot so natančnost, občutljivost, ppv, smo za začetek kar dobro na poti 🥳!\n", + "\n", + "## 4. Poglabljanje\n", + "\n", + "Postavimo si eno subtilno vprašanje: Katera merila se uporabljajo za izbiro določene vrste kuhinje kot napovedanega rezultata?\n", + "\n", + "Statistični algoritmi strojnega učenja, kot je logistična regresija, temeljijo na `verjetnosti`; torej, kar dejansko napove klasifikator, je porazdelitev verjetnosti med naborom možnih rezultatov. Razred z najvišjo verjetnostjo je nato izbran kot najbolj verjeten rezultat za dane opazovanja.\n", + "\n", + "Poglejmo to v praksi, tako da naredimo trde napovedi razredov in verjetnosti.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "Veliko bolje!\n", + "\n", + "✅ Ali lahko razložiš, zakaj je model precej prepričan, da je prva opazka tajska?\n", + "\n", + "## **🚀Izziv**\n", + "\n", + "V tej lekciji si uporabil_a očiščene podatke za izdelavo modela strojnega učenja, ki lahko napove nacionalno kuhinjo na podlagi serije sestavin. Vzemi si nekaj časa in preglej [številne možnosti](https://www.tidymodels.org/find/parsnip/#models), ki jih Tidymodels ponuja za klasifikacijo podatkov, ter [druge načine](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) za prilagoditev multinomnih regresijskih modelov.\n", + "\n", + "#### HVALA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za ustvarjanje neverjetnih ilustracij, ki naredijo R bolj prijazen in privlačen. Več ilustracij najdeš v njeni [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) in [Jen Looper](https://www.twitter.com/jenlooper) za ustvarjanje izvirne Python različice tega modula ♥️\n", + "\n", + "
\n", + "Dodal_a bi nekaj šal, ampak ne razumem hrane-punov 😅.\n", + "\n", + "
\n", + "\n", + "Veselo učenje,\n", + "\n", + "[Eric](https://twitter.com/ericntay), zlati Microsoft Learn študentski ambasador.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/sl/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..f7c078105 --- /dev/null +++ b/translations/sl/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "source": [ + "# Ustvari modele za klasifikacijo\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:00+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sl/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/sl/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..7b53b776b --- /dev/null +++ b/translations/sl/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ustvari Model za Klasifikacijo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:15+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sl/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/sl/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..d45e9c81c --- /dev/null +++ b/translations/sl/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,647 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:45:06+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [ + "# Zgradite klasifikacijski model: Slastne azijske in indijske kuhinje\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Razvrščevalniki kuhinj 2\n", + "\n", + "V tej drugi lekciji o razvrščanju bomo raziskali `več načinov` za razvrščanje kategorijskih podatkov. Prav tako se bomo naučili o posledicah izbire enega razvrščevalnika namesto drugega.\n", + "\n", + "### [**Predhodni kviz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Predpogoji**\n", + "\n", + "Predvidevamo, da ste zaključili prejšnje lekcije, saj bomo nadaljevali z nekaterimi koncepti, ki smo jih že obravnavali.\n", + "\n", + "Za to lekcijo bomo potrebovali naslednje pakete:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) je [zbirka paketov za R](https://www.tidyverse.org/packages), zasnovana za hitrejše, enostavnejše in bolj zabavno podatkovno znanost!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) je okvir [zbirke paketov](https://www.tidymodels.org/packages/) za modeliranje in strojno učenje.\n", + "\n", + "- `themis`: [paket themis](https://themis.tidymodels.org/) ponuja dodatne korake receptov za obravnavo neuravnoteženih podatkov.\n", + "\n", + "Pakete lahko namestite z naslednjim ukazom:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Alternativno, spodnji skript preveri, ali imate nameščene potrebne pakete za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Zemljevid klasifikacije**\n", + "\n", + "V naši [prejšnji lekciji](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1) smo poskušali odgovoriti na vprašanje: kako izbrati med več modeli? V veliki meri je to odvisno od značilnosti podatkov in vrste problema, ki ga želimo rešiti (na primer klasifikacija ali regresija?).\n", + "\n", + "Prej smo se naučili o različnih možnostih, ki jih imate pri klasifikaciji podatkov, z uporabo Microsoftovega pripomočka. Pythonov okvir za strojno učenje, Scikit-learn, ponuja podoben, vendar bolj podroben pripomoček, ki vam lahko dodatno pomaga zožiti izbiro vaših ocenjevalnikov (drugi izraz za klasifikatorje):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Nasvet: [obiščite ta zemljevid na spletu](https://scikit-learn.org/stable/tutorial/machine_learning_map/) in kliknite po poti, da preberete dokumentacijo.\n", + ">\n", + "> [Referenčna stran Tidymodels](https://www.tidymodels.org/find/parsnip/#models) prav tako ponuja odlično dokumentacijo o različnih vrstah modelov.\n", + "\n", + "### **Načrt** 🗺️\n", + "\n", + "Ta zemljevid je zelo koristen, ko imate jasno predstavo o svojih podatkih, saj lahko 'hodite' po njegovih poteh do odločitve:\n", + "\n", + "- Imamo \\>50 vzorcev\n", + "\n", + "- Želimo napovedati kategorijo\n", + "\n", + "- Imamo označene podatke\n", + "\n", + "- Imamo manj kot 100K vzorcev\n", + "\n", + "- ✨ Lahko izberemo Linear SVC\n", + "\n", + "- Če to ne deluje, ker imamo numerične podatke\n", + "\n", + " - Lahko poskusimo ✨ KNeighbors Classifier\n", + "\n", + " - Če to ne deluje, poskusite ✨ SVC in ✨ Ensemble Classifiers\n", + "\n", + "To je zelo koristna pot, ki ji lahko sledite. Zdaj pa se lotimo dela z uporabo [tidymodels](https://www.tidymodels.org/) okvira za modeliranje: dosledne in prilagodljive zbirke paketov za R, razvitih za spodbujanje dobre statistične prakse 😊.\n", + "\n", + "## 2. Razdelite podatke in obravnavajte neuravnotežen nabor podatkov.\n", + "\n", + "Iz naših prejšnjih lekcij smo se naučili, da obstaja niz skupnih sestavin med našimi kuhinjami. Prav tako je bila precej neenakomerna porazdelitev števila kuhinj.\n", + "\n", + "To bomo obravnavali tako, da:\n", + "\n", + "- Odstranimo najpogostejše sestavine, ki povzročajo zmedo med različnimi kuhinjami, z uporabo `dplyr::select()`.\n", + "\n", + "- Uporabimo `recipe`, ki predhodno obdela podatke, da jih pripravi za modeliranje z uporabo algoritma za `over-sampling`.\n", + "\n", + "To smo že obravnavali v prejšnji lekciji, zato bo to enostavno 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Obdelava neuravnoteženih podatkov\n", + "\n", + "Neuravnoteženi podatki pogosto negativno vplivajo na delovanje modela. Veliko modelov deluje najbolje, ko je število opazovanj enako, zato imajo težave z neuravnoteženimi podatki.\n", + "\n", + "Obstajata predvsem dva načina za obravnavo neuravnoteženih podatkovnih nizov:\n", + "\n", + "- dodajanje opazovanj v manjšinsko skupino: `Prevzorčenje` (ang. Over-sampling), npr. z uporabo algoritma SMOTE, ki sintetično ustvari nove primere manjšinske skupine z uporabo najbližjih sosedov teh primerov.\n", + "\n", + "- odstranjevanje opazovanj iz večinske skupine: `Podvzorčenje` (ang. Under-sampling)\n", + "\n", + "V prejšnji lekciji smo prikazali, kako obravnavati neuravnotežene podatkovne nize z uporabo `recepta`. Recept si lahko predstavljamo kot načrt, ki opisuje, kateri koraki naj se uporabijo na podatkovnem nizu, da bo pripravljen za analizo podatkov. V našem primeru želimo doseči enakomerno porazdelitev števila naših kulinarik v `učnem naboru`. Pa začnimo!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Zdaj smo pripravljeni na treniranje modelov 👩‍💻👨‍💻!\n", + "\n", + "## 3. Onkraj multinomnih regresijskih modelov\n", + "\n", + "V prejšnji lekciji smo obravnavali multinomne regresijske modele. Raziskali bomo nekaj bolj prilagodljivih modelov za klasifikacijo.\n", + "\n", + "### Podporni vektorski stroji\n", + "\n", + "V kontekstu klasifikacije so `Podporni vektorski stroji` tehnika strojnega učenja, ki poskuša najti *hiperploskev*, ki \"najbolje\" ločuje razrede. Poglejmo preprost primer:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ ne ločuje razredov. H2~ jih ločuje, vendar le z majhno razdaljo. H3~ jih ločuje z največjo razdaljo.\n", + "\n", + "#### Linearni klasifikator podpornih vektorjev\n", + "\n", + "Podporni vektorski razvrščanje (SVC) je del družine tehnik strojnega učenja, imenovanih podporni vektorski stroji. Pri SVC je hiperploskev izbrana tako, da pravilno loči `večino` opazovanj v učnem naboru, vendar `lahko napačno razvrsti` nekaj opazovanj. Z dovoljenjem, da so nekateri podatkovni točki na napačni strani, postane SVM bolj odporen na odstopanja, kar omogoča boljšo posplošitev na nove podatke. Parameter, ki uravnava to kršitev, se imenuje `cost` in ima privzeto vrednost 1 (glej `help(\"svm_poly\")`).\n", + "\n", + "Ustvarimo linearni SVC tako, da nastavimo `degree = 1` v polinomskem modelu SVM.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Zdaj, ko smo zajeli korake predobdelave in specifikacijo modela v *workflow*, lahko nadaljujemo s treniranjem linearnega SVC in hkrati ocenimo rezultate. Za merjenje učinkovitosti ustvarimo nabor metrik, ki bo ocenjeval: `natančnost`, `občutljivost`, `pozitivno napovedno vrednost` in `F-mero`.\n", + "\n", + "> `augment()` bo dodal stolpec(-ce) za napovedi k danim podatkom.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Podporni vektorski stroj\n", + "\n", + "Podporni vektorski stroj (SVM) je razširitev podpornega vektorskega klasifikatorja, ki omogoča uporabo nelinearne meje med razredi. V bistvu SVM-ji uporabljajo *trik s kernelom*, da razširijo prostor značilnosti in se prilagodijo nelinearnim odnosom med razredi. Ena izmed priljubljenih in izjemno prilagodljivih funkcij jedra, ki jih uporabljajo SVM-ji, je *funkcija radialne baze.* Poglejmo, kako se bo obnesla na naših podatkih.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Veliko bolje 🤩!\n", + "\n", + "> ✅ Prosimo, poglejte:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> za dodatno branje.\n", + "\n", + "### Razvrščevalniki najbližjih sosedov\n", + "\n", + "Algoritem *K*-najbližjih sosedov (KNN) je metoda, pri kateri se vsaka opazovana vrednost napove na podlagi njene *podobnosti* z drugimi opazovanji.\n", + "\n", + "Poglejmo, kako ga lahko uporabimo na naših podatkih.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Zdi se, da ta model ne deluje najbolje. Verjetno bo izboljšanje delovanja modela mogoče z spremembo argumentov modela (glejte `help(\"nearest_neighbor\")`). Vsekakor poskusite to možnost.\n", + "\n", + "> ✅ Prosimo, poglejte:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> za več informacij o klasifikatorjih *K*-Najbližjih Sosedov.\n", + "\n", + "### Ensemble klasifikatorji\n", + "\n", + "Ensemble algoritmi delujejo tako, da kombinirajo več osnovnih ocenjevalnikov za izdelavo optimalnega modela bodisi z:\n", + "\n", + "`bagging`: uporabo *povprečne funkcije* na zbirki osnovnih modelov\n", + "\n", + "`boosting`: gradnjo zaporedja modelov, ki se medsebojno nadgrajujejo za izboljšanje napovedne zmogljivosti.\n", + "\n", + "Začnimo z uporabo modela Random Forest, ki gradi veliko zbirko odločitvenih dreves in nato uporabi povprečno funkcijo za boljši celotni model.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Odlično delo 👏!\n", + "\n", + "Poskusimo tudi z modelom Boosted Tree.\n", + "\n", + "Boosted Tree opredeljuje metodo ansambla, ki ustvari serijo zaporednih odločitvenih dreves, kjer vsako drevo temelji na rezultatih prejšnjih dreves, da postopoma zmanjša napako. Osredotoča se na uteži napačno razvrščenih elementov in prilagodi prileganje za naslednji klasifikator, da jih popravi.\n", + "\n", + "Obstajajo različni načini za prileganje tega modela (glejte `help(\"boost_tree\")`). V tem primeru bomo prilegali Boosted drevesa prek pogona `xgboost`.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "✅ Prosimo, da si ogledate:\n", + "\n", + "- [Strojno učenje za družboslovce](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + "- [Praktično strojno učenje z R](https://bradleyboehmke.github.io/HOML/)\n", + "- [Uvod v statistično učenje z aplikacijami v R](https://www.statlearning.com/)\n", + "- - Raziskuje model AdaBoost, ki je dobra alternativa xgboost.\n", + "\n", + "za več informacij o Ensemble klasifikatorjih.\n", + "\n", + "## 4. Dodatno - primerjava več modelov\n", + "\n", + "V tem laboratoriju smo prilagodili kar nekaj modelov 🙌. Ustvarjanje številnih delovnih tokov iz različnih naborov predprocesorjev in/ali specifikacij modelov ter nato izračunavanje metrik zmogljivosti enega za drugim lahko postane zamudno ali naporno.\n", + "\n", + "Poglejmo, ali lahko to rešimo z ustvarjanjem funkcije, ki prilagodi seznam delovnih tokov na učnem naboru podatkov in nato vrne metrike zmogljivosti na podlagi testnega nabora. Uporabili bomo `map()` in `map_dfr()` iz paketa [purrr](https://purrr.tidyverse.org/), da funkcije uporabimo na vsakem elementu seznama.\n", + "\n", + "> Funkcije [`map()`](https://purrr.tidyverse.org/reference/map.html) vam omogočajo, da nadomestite številne for zanke s kodo, ki je bolj jedrnata in lažja za branje. Najboljše mesto za učenje o funkcijah [`map()`](https://purrr.tidyverse.org/reference/map.html) je [poglavje o iteraciji](http://r4ds.had.co.nz/iteration.html) v R za podatkovno znanost.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "[**workflowset**](https://workflowsets.tidymodels.org/) paket omogoča uporabnikom, da ustvarijo in enostavno prilegajo veliko število modelov, vendar je večinoma zasnovan za delo s tehnikami ponovnega vzorčenja, kot je `križno preverjanje`, pristop, ki ga bomo še obravnavali.\n", + "\n", + "## **🚀Izziv**\n", + "\n", + "Vsaka od teh tehnik ima veliko število parametrov, ki jih lahko prilagodite, na primer `cost` pri SVM, `neighbors` pri KNN, `mtry` (naključno izbrani prediktorji) pri Random Forest.\n", + "\n", + "Raziskujte privzete parametre za vsako od teh tehnik in razmislite, kaj bi prilagajanje teh parametrov pomenilo za kakovost modela.\n", + "\n", + "Če želite izvedeti več o določenem modelu in njegovih parametrih, uporabite: `help(\"model\")`, npr. `help(\"rand_forest\")`.\n", + "\n", + "> V praksi običajno *ocenimo* *najboljše vrednosti* teh parametrov tako, da treniramo veliko modelov na `simuliranem naboru podatkov` in merimo, kako dobro se vsi ti modeli obnesejo. Ta proces imenujemo **uglaševanje**.\n", + "\n", + "### [**Kvizi po predavanju**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Pregled in samostojno učenje**\n", + "\n", + "V teh lekcijah je veliko strokovnih izrazov, zato si vzemite trenutek za pregled [tega seznama](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) uporabne terminologije!\n", + "\n", + "#### HVALA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za ustvarjanje neverjetnih ilustracij, ki naredijo R bolj prijazen in privlačen. Več ilustracij najdete v njeni [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) in [Jen Looper](https://www.twitter.com/jenlooper) za ustvarjanje izvirne različice tega modula v Pythonu ♥️\n", + "\n", + "Veselo učenje,\n", + "\n", + "[Eric](https://twitter.com/ericntay), zlati Microsoft Learn študentski ambasador.\n", + "\n", + "

\n", + " \n", + "

Ilustracija @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/sl/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..10d71d4dd --- /dev/null +++ b/translations/sl/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,304 @@ +{ + "cells": [ + { + "source": [ + "# Zgradite več klasifikacijskih modelov\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Poskusite različne klasifikatorje\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:42:43+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sl/4-Classification/4-Applied/notebook.ipynb b/translations/sl/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..2b5774bd9 --- /dev/null +++ b/translations/sl/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:26+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Ustvari priporočilnik za kulinariko\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/4-Classification/4-Applied/solution/notebook.ipynb b/translations/sl/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..2b77ff4e4 --- /dev/null +++ b/translations/sl/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,292 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:41:51+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Ustvari priporočilnik za kulinariko\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/1-Visualize/notebook.ipynb b/translations/sl/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..41000511d --- /dev/null +++ b/translations/sl/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:07:56+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/sl/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..2124c0570 --- /dev/null +++ b/translations/sl/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,488 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Nigerijska glasba pridobljena s Spotifyja - analiza**\n", + "\n", + "Grozdanje je vrsta [nenadzorovanega učenja](https://wikipedia.org/wiki/Unsupervised_learning), ki predpostavlja, da je podatkovni niz neoznačen ali da njegovi vnosi niso povezani z vnaprej določenimi izhodi. Uporablja različne algoritme za razvrščanje neoznačenih podatkov in zagotavljanje skupin glede na vzorce, ki jih zazna v podatkih.\n", + "\n", + "[**Predhodni kviz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Uvod**\n", + "\n", + "[Grozdanje](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) je zelo uporabno za raziskovanje podatkov. Poglejmo, ali lahko pomaga odkriti trende in vzorce v načinu, kako nigerijsko občinstvo posluša glasbo.\n", + "\n", + "> ✅ Vzemite si trenutek in razmislite o uporabi grozdanja. V vsakdanjem življenju se grozdanje zgodi, kadar imate kup perila in morate razvrstiti oblačila družinskih članov 🧦👕👖🩲. V podatkovni znanosti se grozdanje zgodi, ko poskušate analizirati uporabnikove preference ali določiti značilnosti katerega koli neoznačenega podatkovnega niza. Grozdanje na nek način pomaga ustvariti red iz kaosa, kot je predal za nogavice.\n", + "\n", + "V profesionalnem okolju se grozdanje lahko uporablja za določanje stvari, kot je segmentacija trga, na primer za ugotavljanje, katere starostne skupine kupujejo določene izdelke. Druga uporaba bi bila odkrivanje anomalij, morda za zaznavanje goljufij v podatkovnem nizu transakcij s kreditnimi karticami. Lahko pa uporabite grozdanje za določanje tumorjev v seriji medicinskih skenov.\n", + "\n", + "✅ Razmislite za trenutek, kako ste morda naleteli na grozdanje 'v naravi', v bančništvu, e-trgovini ali poslovnem okolju.\n", + "\n", + "> 🎓 Zanimivo je, da analiza grozdov izvira iz področij antropologije in psihologije v 1930-ih. Si lahko predstavljate, kako bi jo takrat uporabljali?\n", + "\n", + "Alternativno bi jo lahko uporabili za razvrščanje rezultatov iskanja - na primer po nakupovalnih povezavah, slikah ali ocenah. Grozdanje je uporabno, kadar imate velik podatkovni niz, ki ga želite zmanjšati in na katerem želite opraviti bolj podrobno analizo, zato se tehnika lahko uporablja za spoznavanje podatkov, preden se zgradijo drugi modeli.\n", + "\n", + "✅ Ko so vaši podatki organizirani v grozde, jim dodelite ID grozda, kar je lahko uporabno pri ohranjanju zasebnosti podatkovnega niza; namesto bolj razkrivajočih identifikacijskih podatkov lahko uporabite ID grozda za sklicevanje na podatkovno točko. Ali lahko pomislite na druge razloge, zakaj bi za identifikacijo raje uporabili ID grozda kot druge elemente grozda?\n", + "\n", + "### Začetek z grozdanjem\n", + "\n", + "> 🎓 Način, kako ustvarimo grozde, je močno povezan s tem, kako združimo podatkovne točke v skupine. Razčistimo nekaj terminologije:\n", + ">\n", + "> 🎓 ['Transduktivno' vs. 'induktivno'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Transduktivno sklepanje izhaja iz opazovanih primerov usposabljanja, ki se preslikajo na specifične testne primere. Induktivno sklepanje izhaja iz primerov usposabljanja, ki se preslikajo na splošna pravila, ki se nato uporabijo na testnih primerih.\n", + ">\n", + "> Primer: Predstavljajte si, da imate podatkovni niz, ki je le delno označen. Nekatere stvari so 'plošče', nekatere 'CD-ji', nekatere pa so prazne. Vaša naloga je dodeliti oznake praznim. Če izberete induktivni pristop, bi usposobili model za iskanje 'plošč' in 'CD-jev' ter te oznake uporabili na neoznačenih podatkih. Ta pristop bo imel težave pri razvrščanju stvari, ki so dejansko 'kasete'. Transduktivni pristop pa bo to neznano podatkovno točko obravnaval bolj učinkovito, saj deluje tako, da združi podobne predmete in nato skupini dodeli oznako. V tem primeru bi grozdi lahko odražali 'okrogle glasbene stvari' in 'kvadratne glasbene stvari'.\n", + ">\n", + "> 🎓 ['Neploskovna' vs. 'ploskovna' geometrija](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Izpeljano iz matematične terminologije, neploskovna vs. ploskovna geometrija se nanaša na merjenje razdalj med točkami bodisi s 'ploskovnimi' ([Evklidskimi](https://wikipedia.org/wiki/Euclidean_geometry)) bodisi z neploskovnimi (neevklidskimi) geometrijskimi metodami.\n", + ">\n", + "> 'Ploskovna' v tem kontekstu se nanaša na evklidsko geometrijo (deli katere se učijo kot 'ravninska' geometrija), medtem ko se neploskovna nanaša na neevklidsko geometrijo. Kaj ima geometrija skupnega z strojno učenje? Kot dve področji, ki temeljita na matematiki, mora obstajati skupen način za merjenje razdalj med točkami v grozdih, kar se lahko izvede na 'ploskovni' ali 'neploskovni' način, odvisno od narave podatkov. [Evklidske razdalje](https://wikipedia.org/wiki/Euclidean_distance) se merijo kot dolžina odseka med dvema točkama. [Neevklidske razdalje](https://wikipedia.org/wiki/Non-Euclidean_geometry) se merijo vzdolž krivulje. Če se vaši podatki, vizualizirani, ne nahajajo na ravnini, boste morda morali uporabiti specializiran algoritem za obdelavo.\n", + ">\n", + "> 🎓 ['Razdalje'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Grozdi so opredeljeni z matriko razdalj, npr. razdaljami med točkami. Te razdalje je mogoče meriti na več načinov. Evklidski grozdi so opredeljeni z povprečjem vrednosti točk in vsebujejo 'centroid' ali osrednjo točko. Razdalje se tako merijo glede na razdaljo do tega centroida. Neevklidske razdalje se nanašajo na 'clustroid', točko, ki je najbližja drugim točkam. Clustroidi so lahko opredeljeni na različne načine.\n", + ">\n", + "> 🎓 ['Omejeno'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Omejeno grozdanje](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) uvaja 'polnadzorovano' učenje v to nenadzorovano metodo. Razmerja med točkami so označena kot 'ne smejo se povezati' ali 'morajo se povezati', tako da se na podatkovni niz uvedejo določena pravila.\n", + ">\n", + "> Primer: Če je algoritem sproščen na serijo neoznačenih ali delno označenih podatkov, so lahko grozdi, ki jih ustvari, slabe kakovosti. V zgornjem primeru bi grozdi lahko združevali 'okrogle glasbene stvari', 'kvadratne glasbene stvari', 'trikotne stvari' in 'piškote'. Če se uvedejo določene omejitve ali pravila (\"predmet mora biti iz plastike\", \"predmet mora biti sposoben proizvajati glasbo\"), to lahko pomaga 'omejiti' algoritem, da sprejema boljše odločitve.\n", + ">\n", + "> 🎓 'Gostota'\n", + ">\n", + "> Podatki, ki so 'hrupni', se štejejo za 'goste'. Razdalje med točkami v vsakem od njegovih grozdov se lahko ob pregledu izkažejo za bolj ali manj goste ali 'natrpane', zato je treba te podatke analizirati z ustrezno metodo grozdanja. [Ta članek](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) prikazuje razliko med uporabo algoritmov K-Means grozdanja in HDBSCAN za raziskovanje hrupnega podatkovnega niza z neenakomerno gostoto grozdov.\n", + "\n", + "Poglobite svoje razumevanje tehnik grozdanja v tem [učnem modulu](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Algoritmi grozdanja**\n", + "\n", + "Obstaja več kot 100 algoritmov za grozdanje, njihova uporaba pa je odvisna od narave podatkov. Oglejmo si nekatere glavne:\n", + "\n", + "- **Hierarhično grozdanje**. Če je predmet razvrščen glede na svojo bližino bližnjemu predmetu, namesto bolj oddaljenemu, se grozdi oblikujejo na podlagi razdalje med člani. Hierarhično grozdanje je značilno po tem, da se dva grozda večkrat združita.\n", + "\n", + "

\n", + " \n", + "

Infografika: Dasani Madipalli
\n", + "\n", + "- **Centroidno grozdanje**. Ta priljubljeni algoritem zahteva izbiro 'k', ali število grozdov, ki jih je treba oblikovati, nato pa algoritem določi osrednjo točko grozda in zbira podatke okoli te točke. [K-means grozdanje](https://wikipedia.org/wiki/K-means_clustering) je priljubljena različica centroidnega grozdanja, ki razdeli podatkovni niz v vnaprej določene K skupine. Center je določen z najbližjim povprečjem, od tod tudi ime. Kvadratna razdalja od grozda je minimizirana.\n", + "\n", + "

\n", + " \n", + "

Infografika: Dasani Madipalli
\n", + "\n", + "- **Grozdanje na podlagi porazdelitve**. Temelji na statističnem modeliranju, grozdanje na podlagi porazdelitve se osredotoča na določanje verjetnosti, da podatkovna točka pripada grozdu, in ji ustrezno dodeli mesto. Metode Gaussove mešanice spadajo v to vrsto.\n", + "\n", + "- **Grozdanje na podlagi gostote**. Podatkovne točke so dodeljene grozdom glede na njihovo gostoto ali njihovo združevanje okoli drugih točk. Podatkovne točke, ki so daleč od skupine, se štejejo za odstopanja ali hrup. DBSCAN, Mean-shift in OPTICS spadajo v to vrsto grozdanja.\n", + "\n", + "- **Grozdanje na podlagi mreže**. Za večdimenzionalne podatkovne nize se ustvari mreža, podatki pa se razdelijo med celice mreže, s čimer se ustvarijo grozdi.\n", + "\n", + "Najboljši način za učenje o grozdanju je, da ga preizkusite sami, kar boste storili v tej vaji.\n", + "\n", + "Za dokončanje tega modula bomo potrebovali nekaj paketov. Namestite jih lahko z ukazom: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Alternativno spodnji skript preveri, ali imate potrebne pakete za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Naloga - razvrščanje podatkov v skupine\n", + "\n", + "Razvrščanje v skupine kot tehnika je močno podprto z ustrezno vizualizacijo, zato začnimo z vizualizacijo naših glasbenih podatkov. Ta naloga nam bo pomagala odločiti, katero metodo razvrščanja v skupine bi morali najučinkoviteje uporabiti glede na naravo teh podatkov.\n", + "\n", + "Začnimo z uvozom podatkov.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Včasih si želimo pridobiti malo več informacij o naših podatkih. Podatke in njihovo strukturo si lahko ogledamo z uporabo funkcije [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Odlično delo!💪\n", + "\n", + "Opazimo lahko, da `glimpse()` prikaže skupno število vrstic (opazovanj) in stolpcev (spremenljivk), nato pa prvih nekaj vnosov vsake spremenljivke v vrstici za imenom spremenljivke. Poleg tega je *tip podatkov* spremenljivke prikazan takoj za imenom spremenljivke znotraj `< >`.\n", + "\n", + "`DataExplorer::introduce()` lahko to informacijo povzame na urejen način:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Odlično! Pravkar smo ugotovili, da naši podatki nimajo manjkajočih vrednosti.\n", + "\n", + "Medtem lahko raziščemo pogoste statistike centralne tendence (npr. [povprečje](https://en.wikipedia.org/wiki/Arithmetic_mean) in [mediano](https://en.wikipedia.org/wiki/Median)) ter mere razpršenosti (npr. [standardni odklon](https://en.wikipedia.org/wiki/Standard_deviation)) z uporabo `summarytools::descr()`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Poglejmo splošne vrednosti podatkov. Upoštevajte, da lahko priljubljenost znaša `0`, kar pomeni pesmi, ki nimajo uvrstitve. Te bomo kmalu odstranili.\n", + "\n", + "> 🤔 Če delamo s gručenjem, nenadzorovano metodo, ki ne zahteva označenih podatkov, zakaj prikazujemo te podatke z oznakami? V fazi raziskovanja podatkov so koristni, vendar niso nujni za delovanje algoritmov za gručenje.\n", + "\n", + "### 1. Raziskovanje priljubljenih žanrov\n", + "\n", + "Pojdimo naprej in ugotovimo najbolj priljubljene žanre 🎶 tako, da preštejemo, kolikokrat se pojavijo.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "To je šlo dobro! Pravijo, da slika pove več kot tisoč vrstic podatkovnega okvira (pravzaprav tega nihče nikoli ne reče 😅). Ampak razumete bistvo, kajne?\n", + "\n", + "Eden od načinov za vizualizacijo kategorijskih podatkov (znakovnih ali faktorskih spremenljivk) je uporaba stolpčnih grafov. Naredimo stolpčni graf za 10 najbolj priljubljenih žanrov:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Zdaj je veliko lažje prepoznati, da imamo `manjkajoče` žanre 🧐!\n", + "\n", + "> Dobra vizualizacija vam bo pokazala stvari, ki jih niste pričakovali, ali pa sprožila nova vprašanja o podatkih - Hadley Wickham in Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Opomba: ko je glavni žanr opisan kot `Manjkajoč`, to pomeni, da ga Spotify ni klasificiral, zato ga odstranimo.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Iz osnovnega raziskovanja podatkov ugotovimo, da tri najbolj priljubljene zvrsti prevladujejo v tem naboru podatkov. Osredotočimo se na `afro dancehall`, `afropop` in `nigerian pop`, poleg tega pa filtrirajmo nabor podatkov, da odstranimo vse, kar ima vrednost priljubljenosti 0 (kar pomeni, da ni bilo razvrščeno glede na priljubljenost v naboru podatkov in se lahko za naše namene šteje kot šum):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Poglejmo, ali obstaja kakšna očitna linearna povezava med številskimi spremenljivkami v našem naboru podatkov. To povezavo matematično kvantificira [statistika korelacije](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Statistika korelacije je vrednost med -1 in 1, ki kaže na moč povezave. Vrednosti nad 0 kažejo na *pozitivno* korelacijo (visoke vrednosti ene spremenljivke običajno sovpadajo z visokimi vrednostmi druge), medtem ko vrednosti pod 0 kažejo na *negativno* korelacijo (visoke vrednosti ene spremenljivke običajno sovpadajo z nizkimi vrednostmi druge).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Podatki niso močno povezani, razen med `energy` in `loudness`, kar je smiselno, saj je glasna glasba običajno precej energična. `Popularity` ima povezavo z `release date`, kar prav tako ni presenetljivo, saj so novejše pesmi verjetno bolj priljubljene. Dolžina in energija se zdita tudi povezani.\n", + "\n", + "Zanimivo bo videti, kaj lahko algoritem za razvrščanje naredi s temi podatki!\n", + "\n", + "> 🎓 Upoštevajte, da korelacija ne pomeni vzročne povezanosti! Imamo dokaz o korelaciji, vendar nimamo dokaza o vzročnosti. [Zabavna spletna stran](https://tylervigen.com/spurious-correlations) ponuja nekaj vizualizacij, ki poudarjajo to točko.\n", + "\n", + "### 2. Raziskovanje porazdelitve podatkov\n", + "\n", + "Postavimo si nekaj bolj subtilnih vprašanj. Ali se žanri bistveno razlikujejo v zaznavanju njihove plesnosti glede na njihovo priljubljenost? Preučimo porazdelitev podatkov naših treh najbolj priljubljenih žanrov glede priljubljenosti in plesnosti vzdolž določene osi x in y z uporabo [gostotnih grafov](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vidimo, da so tu koncentrični krogi, ki se ujemajo, ne glede na žanr. Ali je mogoče, da se nigerijski okusi pri tem žanru zbližajo na določeni ravni plesnosti?\n", + "\n", + "Na splošno se trije žanri ujemajo glede na svojo priljubljenost in plesnost. Določanje skupkov v teh ohlapno poravnanih podatkih bo izziv. Poglejmo, ali lahko raztreseni diagram to podpre.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Razpršeni diagram istih osi prikazuje podoben vzorec konvergence.\n", + "\n", + "Na splošno lahko za razvrščanje uporabite razpršene diagrame, da prikažete skupine podatkov, zato je obvladovanje te vrste vizualizacije zelo koristno. V naslednji lekciji bomo uporabili te filtrirane podatke in s k-means razvrščanjem odkrili skupine v teh podatkih, ki se na zanimive načine prekrivajo.\n", + "\n", + "## **🚀 Izziv**\n", + "\n", + "V pripravi na naslednjo lekcijo naredite diagram o različnih algoritmih za razvrščanje, ki jih morda odkrijete in uporabite v produkcijskem okolju. Kakšne vrste težav poskuša razvrščanje rešiti?\n", + "\n", + "## [**Kvizi po predavanju**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Pregled & Samostojno učenje**\n", + "\n", + "Preden uporabite algoritme za razvrščanje, kot smo se naučili, je dobro razumeti naravo vašega nabora podatkov. Več o tej temi preberite [tukaj](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html).\n", + "\n", + "Poglobite svoje razumevanje tehnik razvrščanja:\n", + "\n", + "- [Usposabljanje in ocenjevanje modelov za razvrščanje z uporabo Tidymodels in prijateljev](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Naloga**\n", + "\n", + "[Raziskujte druge vizualizacije za razvrščanje](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## HVALA:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) za ustvarjanje izvirne Python različice tega modula ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) za ustvarjanje neverjetnih ilustracij, ki omogočajo boljše razumevanje konceptov strojnega učenja in jih naredijo bolj dostopne.\n", + "\n", + "Veselo učenje,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:12:22+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/sl/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..b4be6c082 --- /dev/null +++ b/translations/sl/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,892 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dobite informacije o podatkovnem okviru\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Oglejte si splošne vrednosti podatkov. Upoštevajte, da je priljubljenost lahko '0' - in obstaja veliko vrstic s to vrednostjo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Poglejmo si žanre. Kar nekaj jih je označenih kot 'Manjkajoči', kar pomeni, da v podatkovnem naboru niso kategorizirani z žanrom\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Uvod\n", + "\n", + "Ta dokument opisuje, kako upravljati z glasbenimi žanri v aplikaciji. Žanri so ključni za organizacijo glasbe in omogočajo uporabnikom, da hitro najdejo skladbe, ki ustrezajo njihovemu okusu.\n", + "\n", + "## Podprti žanri\n", + "\n", + "Spodaj je seznam podprtih žanrov:\n", + "\n", + "- Pop\n", + "- Rock\n", + "- Jazz\n", + "- Hip Hop\n", + "- Klasična glasba\n", + "- Elektronska glasba\n", + "- Country\n", + "- Reggae\n", + "- Blues\n", + "- Funk\n", + "- Soul\n", + "- R&B\n", + "\n", + "## Dodajanje novega žanra\n", + "\n", + "Če želite dodati nov žanr, sledite tem korakom:\n", + "\n", + "1. Odprite nastavitve aplikacije.\n", + "2. Kliknite na \"Upravljanje žanrov\".\n", + "3. Vnesite ime novega žanra.\n", + "4. Kliknite \"Shrani\".\n", + "\n", + "[!TIP] Prepričajte se, da ime žanra ni podvojeno.\n", + "\n", + "## Urejanje obstoječega žanra\n", + "\n", + "Če želite urediti obstoječi žanr:\n", + "\n", + "1. Pojdite na seznam žanrov.\n", + "2. Kliknite na žanr, ki ga želite urediti.\n", + "3. Spremenite ime ali druge lastnosti.\n", + "4. Kliknite \"Shrani spremembe\".\n", + "\n", + "[!IMPORTANT] Urejanje žanrov lahko vpliva na obstoječe sezname predvajanja.\n", + "\n", + "## Brisanje žanra\n", + "\n", + "Če želite izbrisati žanr:\n", + "\n", + "1. Pojdite na seznam žanrov.\n", + "2. Izberite žanr, ki ga želite izbrisati.\n", + "3. Kliknite \"Izbriši\".\n", + "\n", + "[!WARNING] Brisanje žanra bo odstranilo vse povezave do skladb, ki so bile razvrščene pod tem žanrom.\n", + "\n", + "## Pogosta vprašanja\n", + "\n", + "### Kaj se zgodi, če izbrišem žanr?\n", + "\n", + "Ko izbrišete žanr, se skladbe, ki so bile razvrščene pod tem žanrom, ne bodo več prikazovale v kategoriji žanrov. Vendar pa bodo še vedno dostopne prek drugih filtrov ali iskanja.\n", + "\n", + "### Ali lahko obnovim izbrisan žanr?\n", + "\n", + "Ne, izbrisani žanri ni mogoče obnoviti. Priporočamo, da pred brisanjem ustvarite varnostno kopijo.\n", + "\n", + "### Ali lahko dodam več žanrov hkrati?\n", + "\n", + "Trenutno aplikacija podpira dodajanje žanrov enega za drugim. Funkcionalnost za množično dodajanje je v načrtu za prihodnje posodobitve.\n", + "\n", + "## Zaključek\n", + "\n", + "Upravljanje žanrov je pomemben del organizacije glasbe v aplikaciji. S pravilnim razvrščanjem skladb po žanrih lahko uporabniki uživajo v boljši izkušnji iskanja in poslušanja glasbe.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Podatki niso močno povezani, razen med energijo in glasnostjo, kar je smiselno. Priljubljenost ima povezavo z datumom izdaje, kar prav tako ima smisel, saj so novejše pesmi verjetno bolj priljubljene. Dolžina in energija se zdita povezani – morda so krajše pesmi bolj energične?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Na splošno se trije žanri ujemajo glede na svojo priljubljenost in plesnost. Raztreseni diagram z istimi osmi prikazuje podoben vzorec konvergence. Poskusite raztreseni diagram, da preverite porazdelitev podatkov po žanrih.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:08:48+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/2-K-Means/notebook.ipynb b/translations/sl/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..465876a31 --- /dev/null +++ b/translations/sl/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,231 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:28+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Začnite tam, kjer smo končali pri zadnji lekciji, z uvoženimi in filtriranimi podatki.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Osredotočili se bomo le na 3 žanre. Morda lahko ustvarimo 3 gruče!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5SeaPjY01KEOSJGm4JhSskvwrsBj4Qt+0CHhQVT0OeC3wxST3He+1VXVEVc2tqrmzZs2aSBmSJEkjYaWDVZL9gGcAz6+qAqiqm6vq2v7xmcDFwCMa1ClJkjTyVipYJdkJeCOwS1XdONA+K8mM/vFDgTnAJS0KlSRJGnUzl3dCkmOA7YB1k1wBHEK3CnAN4OQkAKf3KwC3AQ5LcitwO/Cyqrpu3AtLkiRNM8sNVlW11zjNn13GuccBx020KEmSpKnIndclSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJamSFglWSo5JcneS8gbZ1kpyc5KL++9p9e5J8NMmCJOck2WKyipckSRolK9pj9Tlgp6Xa3gycUlVzgFP65wBPB+b0X/OAwydepiRJ0uhboWBVVacC1y3VvCtwdP/4aGC3gfbPV+d0YK0kG7QoVpIkaZRNZI7VelW1qH98JbBe/3hD4PKB867o2/5GknlJ5ieZPzY2NoEyJEmSRkOTyetVVUDdxdccUVVzq2rurFmzWpQhSZI0VBMJVlctGeLrv1/dty8ENh44b6O+TZIkaVqbSLA6Edi3f7wvcMJA+wv61YGPB64fGDKUJEmatmauyElJjgG2A9ZNcgVwCPAe4Ngk+wOXAXv0p58E7AwsAG4EXti4ZkmSpJG0QsGqqvZaxqHtxzm3gAMmUpQkSdJU5M7rkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyc2VfmGQT4MsDTQ8F3g6sBbwEGOvb31pVJ610hZIkSVPESgerqroQ2BwgyQxgIXA88ELgw1X1gSYVSpIkTRGthgK3By6uqssaXU+SJGnKaRWs9gSOGXh+YJJzkhyVZO3xXpBkXpL5SeaPjY2Nd4okSdKUMuFgleQewC7AV/qmw4GH0Q0TLgI+ON7rquqIqppbVXNnzZo10TIkSZKGrkWP1dOBs6rqKoCquqqqbquq24EjgS0bvIckSdLIaxGs9mJgGDDJBgPHdgfOa/AekiRJI2+lVwUCJLk3sAPw0oHm9yXZHCjg0qWOSZIkTVsTClZV9Sfg/ku17TOhiiRJkqYod16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKmRmcMuQNJd95vDHj3sEjTNPOjt5w67BGlasMdKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWpk5kQvkORS4AbgNmBxVc1Nsg7wZWA2cCmwR1X9bqLvJUmSNMpa9Vg9uao2r6q5/fM3A6dU1RzglP65JEnStDZZQ4G7Akf3j48Gdpuk95EkSRoZLYJVAd9NcmaSeX3belW1qH98JbDe0i9KMi/J/CTzx8bGGpQhSZI0XBOeYwU8saoWJnkAcHKSXw4erKpKUku/qKqOAI4AmDt37h2OS5IkTTUT7rGqqoX996uB44EtgauSbADQf796ou8jSZI06iYUrJLcO8l9ljwGngacB5wI7Nufti9wwkTeR5IkaSqY6FDgesDxSZZc64tV9e0kZwDHJtkfuAzYY4LvI0mSNPImFKyq6hLgseO0XwtsP5FrS5IkTTXuvC5JktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIysdrJJsnOQHSS5Icn6SV/Xt70iyMMnZ/dfO7cqVJEkaXTMn8NrFwOuq6qwk9wHOTHJyf+zDVfWBiZcnSZI0dax0sKqqRcCi/vENSX4BbNiqMEmSpKmmyRyrJLOBxwE/7ZsOTHJOkqOSrL2M18xLMj/J/LGxsRZlSJIkDdWEg1WSNYHjgFdX1R+Aw4GHAZvT9Wh9cLzXVdURVTW3qubOmjVromVIkiQN3YSCVZLV6ULVF6rqawBVdVVV3VZVtwNHAltOvExJkqTRN5FVgQE+C/yiqj400L7BwGm7A+etfHmSJElTx0RWBW4N7AOcm+Tsvu2twF5JNgcKuBR46YQqlCRJmiImsirwx0DGOXTSypcjSZI0dbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyke0WJEmaNFt/bOthl6Bp5rRXnjbp72GPlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyacEqyU5JLkyyIMmbJ+t9JEmSRsWkBKskM4BPAE8HNgP2SrLZZLyXJEnSqJisHqstgQVVdUlV3QJ8Cdh1kt5LkiRpJKSq2l80+Rdgp6p6cf98H2Crqjpw4Jx5wLz+6SbAhc0L0Z1ZF7hm2EVIk8yfc60K/Dm/+z24qmaNd2Dm3V3JElV1BHDEsN5/VZdkflXNHXYd0mTy51yrAn/OR8tkDQUuBDYeeL5R3yZJkjRtTVawOgOYk+QhSe4B7AmcOEnvJUmSNBImZSiwqhYnORD4DjADOKqqzp+M99JKcxhWqwJ/zrUq8Od8hEzK5HVJkqRVkTuvS5IkNWKwkiRJasRgpSaSzE1yn2HXIUnSMBms1MpLgO8ariRp6kmSYdcwXRisNCFJtgCoqpcCZwLHG640VYz3x8Q/MFrVJElVVZKtk+yfZPt+qyStBFcFakKSnA7cWFVP6Z8fDswBdq+qG4ZanLQCkmxDt6HxH4Bv9n9gVquq24dcmnS3SfJk4LPAl4FnAEcDX6+qBUMtbAqyx0oTUlWPB2Yk+Ub//OXARdhzpRG2pFcqyVzgKGBrYG/g60tClT1XWlUk2QR4GfDqqnoLsC/dB+QdhlrYFGWw0l028EdpJkBVbQvMWipc/RL4fpI1h1aotAx9r9T2wFuAF1fVK4D9gKuBjyw5Z3gVSpMvPWAb4GHAjknuXVVnAccA85KsPdQipyCDle6SJWPx/dMNk8yBv/Rc3T/JN/vnBwKnAusMp1JpudYCdgf+sX9+C/BpwLklmtYGemPXBWZW1ZHAu4DQ3YIO4Erghr5Nd4FzrLRSkrwO2Bm4J/D9qjq4bz8VoKq2GWJ50h0MTNBdD7ihqm5M8s/A14Gdq+rkJDsA76MbArnWXitNV0l2Bg4DFgJ/AvYHnk03DLga3S3v3l9V3xxakVPUpNwrUNNbkhcBu1TVtkk+Brw2yd9V1euqapsk30mycVVdPuxapSX6UPVM4JVAJTmNrodqN+A7SY6l+4R+WFVdM8RSpUmV5JHAO4EDgbOBLwL/r6r2TPJnYEfg3CWhaqmRCi2HQ4FarnEm8S4A9knySmBD4DHA3kk+BVBVOxqqNGqSPIyuN+oNwAfoQtShwLfohgSfCfxPVR2/ZP6gNE3dDFwAnFVVN1bVbsAGSQ6g68H9KfDYJHsaqu46f3louZb8o+onot9cVacmuR+wLfC+qrq4/7S/VZJ1quq6YdYrDRr4w7A2cFlV/W/f/htgK+CpVXVCkn2BY5P8uqp+OLyKpbYGhsFn0HWoXAdsAMwFftyf9iW6X/eLkxwN3Ar8wFB119ljpWVK8rAkm/WPXwt8nm45+gOq6nrg18Czk7yZrufq2YYqjYqBntZ79d/PAxYnORCgqi4ELgc2659/FfgXYNHdXKo0qfpQtStwLN0+VY8EPgF8LMmBSV5MNyy4oD//1qo6uqquGlrRU5iT1zWuJPcCPgZcRddlPA94Od2ta3YHtqALU7sBTwYOqqrzhlOtNL4kO9H9zF4CnA4U3Z5Va9J9Qv80sF9V/Y9DHpqukmwKfAb4d7qVgO8A9qHrldoR2Aj4alV9d1g1TicGKy1Tv5XCa4H7AudX1bv79g8DOwFPqqprktyzqv48xFKlO0jyeOC9dB8QHkO3jcKtdJ/aX0230/r3q+obQytSmmRJHgV8ELiwqg7q23YEPkf3O9yd1RtzKFB/Y3CielVdBLwbuB54TJLH9O2vAf4b+EE/Zn/LMGqVliXJhnQT1H/aD/G9D/gh3bySRVW1P/CGqvqGO6xrmvsV3Z5Uj0wyJ8kaVfUd4Dhg1nBLm54MVvqLwaGQJM9NshuwKV2v1fXA7gPhah7dpN/bvKeaRtBNdJNy90yyVVX9saq+DTyIrveKqlrcf7fbXtNSkhlVdQvwYrq5g68HdkmyLfAsYPEw65uuDFb6i4FQdSDdXj8A36D7Q/ReYH26bRb+vj929d1epDSOgdssPSrJdnRzqN5D11N1WJKn90PbGwO/H1qh0t2k/6B8W5KZVXUrXbhaDfhXulC1X1WdYY9tewYr/UWS1ZJsQDcZfXvgocApwM+r6hK6YcGZdBPa/aSvkdGvetoZOAF4Id1ePM+kG/47jW4DxE8AL6qqs/xjoulm4MPFnCTrL2nvt0+Y2fdcvQKYD/wdcJYLNiaHwWoVt9QfmBl0+5tcS7cr7zbAc6rq1iQv7895vbtSa9QkuTfdH419qmpfuo0/twXWo/tZPhj4I93PtzStDOxTtSNwIt0HiwOSPBz+JlzdSvfv5AF0NyB3L8tJYLBahS01p2pvYF5V3Uy3JP0gun2pbkzyPLr7SFVV3Ta8iqW/SrJa//0f6XaSvgbYBKCqTqDbt+oN/enH0n1SPyTJPe/+aqXJ04equXTDfc8EXgf8PbDbUuFqyZyr5wAf7IOWGjOtrsIGQtUBwIvo9jWhql6aZC3g1CQ/p9uder+qumJoxUq9JPeqqpuq6vYkTwQOp7tx7M+AjZPMrar5dCtXtwBmVNXVSY4AbndrEE03Se5DNwS+Rb99woL+g8dewHOTfKWqftXPuVqtD1e/HWbN05n7WK3ikqwNHAG8qaou6Zfi3twf24muJ+DSqvr1MOuU4C978vwH8Ay6rRMOp9vY8DNJHgocQLfIYjHwD8DBVXX8sOqVJsvS86OSbAJ8lG739Ff2Hzy2A54PvNvf4Xcfg9UqZrzJikm+Rrf673MDvVhbAedU1U1DKFO6gySr0wWpn9L9vD6NbthjbeAFVfXbJOvS7SK9KbCgquY7QVfTzcCcqh3othBJ/+HiEcCb6Ta/fW0frtauqt8NteBVjHOsViFLzama03/CAfgO8GDgn/pjzwXeRrdkXRolC+kmpX+Fbs7UYcDZwEFJ1q+qa6rq7Kr6Uj8c6OpVTSv9UF4l+We6HdWvoNtS5ANV9Su61dvrAx/vX3L9kEpdZdljtYpYKlS9lm5O1U3AfwH/RnfvqMfRDaE8DHheVZ07nGqlv7XUJ/T/BH5YVXv2x7amGxq8F92Qh/uradpJ8hBgtaq6uO+Z/U/gNcCSXqoNge9V1f79h+Y1quqc4VW86nLy+ipiIFQ9HngC8ERgDeAMYHFVHdzvYfVwuiGURUMrVhowEKoeSncLjmcBr07yTrqVTaf1E3V3oxsWNFhpOnoCcFGSK/p7tM6j+3k/lG4+4Wy6Ses3VdWBQ6xzlWewWoUkeSRwCF2v1GpVdVW/VP0nSR5YVa+gu+2BNDL6ULUL3bDfAuAS4NN0S8oPSvLRqvrvJOdWlbuqa1qqqi8kWRM4I8neVXVOkgcCZ/ZzqdYHPkQ3tUND5ByraWzp3aWr6hfAkXTBarsk61bVVcDWwBOTrOeO1Bo1fS/rwcCOwPF0Gxw+je4my9sCr+s3PzRUadoZ2FF9R+BRdEOAR/YrZC8F7pfkk3Q3VT6hqk729/hwOcdqmhpn88916O5y/h3g2XTDJl8HTu33+Jnh5p8aRUk2ottaYW26XdSfB3yKbhf1zwFjVXXG0AqUJlmSLYGPAK+pqtP7ebLPo/s9Dt39XP9UVT8aVo36K4cCp6mBUPUaYFe6VVRvotvs893AbcB+wK1JvgHcPpxKpTvXb0x7RZJ3AV+oqgVJPk93d4Dzq+qy4VYoTZ4kGwNvBM6tqtMBqupDfafUyXS3HTtpiCVqKQaraaa/fcE6VfWzfk7VFnQ3VX4D3f/f69NtpXAY3ZDgmVVlqNJUcC7w0n4/q2cBrzJUaRWwGDgH2DXJTlX1bfhLuJoBrDXU6nQHBqtppL+twf7A6kluBf6Xbhnu04Gdge3ptlk4iO7WHocOq1ZpJZxEt5J1F+BdVXXakOuRmh/RBFIAAAVUSURBVBtYBftPdKtgf0N3t4HfA7snubWqTgGoqvcPsVQtg5PXp4l+07gb6HamXgzsCTyiqhYC9wN+1t8f6hbgW3SrqqQpo6r+UFVHA8+tqv9ygq6moz5UPQ04ClgPOJNugdGJdD1X+/XHNaLssZomBobzdgQeC2wC3DPJZ4CfAJ/t96naDtihqq4cSqHSxN0G7qiu6affj20t4KXA7nSLji4Azuq3x/kKXa+t2+KMMFcFTiNJngR8DNgSeDywE7A63XyqNek2kbugqi4ZWpGSpDuV5E10Iw1PAZ7f77a+H3AqcKnzYkebQ4HTy5rAtVV1S1WdSrevyVPo7hm1TlV901AlSaMnyeZJDumf3hvYB9i7D1WPpVvV/UBD1ehzKHB6+RmwMMmewFeq6swkp9EF6KuGW5okadDARPUnAc8BdkxydVW9PcmmwCFJFgObA2+qqh8PtWCtEIPV9HI98GO6vaqelmQ+3T0Bn11V1wy1MkkS8NdA1YeqbYAvAAcCC4EnJ1mjqvZI8kS6jXE/3n9QjnMLR59zrKaYfvXfMruCk9wL2JRu4uOawGer6vy7qz5J0rL19/d7JPDDqrqtvzPGhlX13v5egJsD7wWOraqPDLNWrRyD1RTV/2N8EHADcMx4PVL9/dMW3+3FSZLGlWRX4CLgCrqtcbYHPky3WvvX/crAo+nmWX21qr44tGK1Upy8PgUleRHdxp8X093376Akj+6PZcn+PoYqSRotVXUCcCXwSbp7/X2X7t6XH+nnVT2G7t6YFwEbDqtOrTznWE0B44yrbwe8vqq+neRU4GC6DUHPdfxdkkbP4O/xqrouyY+Ap9Ft2nw8EOA/6Xqx9qe7HdkO/S2cFvu7fepwKHAKGFg58jLgDLpb1NwT+FD/D/QhdLv07l5Vvx9mrZKk8SXZFng08P2quiDJXnS/z79eVV9Lcu/+1C3p7o6xu3Nkpx6HAkdYkk3gL7c4eBawB/BbunC1Ft3Kv7WAR9F9yrllWLVKku5oydSMJFvRDf9tC7wxyUuq6hjgm8DeSfYA/kz3ofkJwK6GqqnJocARlWRH4PAkW9CNt78YOK+qFgGLkmwMbNO33wN4ZVXdOLSCJUl30H8w3hI4FNirqs7p9xp8Qh+ujkwyA7iwqm4Drk3y/v7erpqCDFYjKMlMuq7gg4HN6Jbf/gDYNckz+h3UP5PkfnR7nPypqsaGV7Ek6U6sBTwV2IHuRspfBW6nn0NVVZ+Ev9nfylA1hRmsRlBVLU5yMfA2uhvOPpmui/gmYJcki6vq21V1Pd2moJKkEVVV3+2nc/x7kt9W1TFJvgrMAP534DwnPU8DBqvRdQ5wI/AH4H5VdU2Sr9F9ytk3ya1VdcpQK5QkrZCqOrG/Pc2/JblHVR0NHDPsutSeqwJHxOBS3CT3AG7rd+V9Pd2NlA+pqjOSbES3iuSb/XwrSdIUkWQX4D10Q4NXelPl6cdgNQKWClUH0s2r+gPwjqr6c5K30t3/7z1V9ZMkM/pJjpKkKSbJLOfFTl8GqxGS5BXAc4HnAWcB3wPeXlUXJ3kn8HBgv6r68xDLlCRJy2CwGhFJ7gt8iG4l4HOAnYGr6bZaeHlVLUhy/6q6dohlSpKkO2GwGiFJ1gA2Bf6jqp7cbyw3RrcD7zuq6tahFihJku6UqwJHSFXdnORGYGZ/U+UHA6cAnzZUSZI0+uyxGjF9r9Wr6VaMPBB4TlVdMNyqJEnSijBYjaD+bubrA7dX1cJh1yNJklaMwUqSJKmR1YZdgCRJ0nRhsJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmN/B/Djeb5PsBsCgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna napačna razumevanja ali napačne interpretacije, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/sl/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..1413736b9 --- /dev/null +++ b/translations/sl/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,640 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:25:26+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Raziskovanje gručenja K-Means z uporabo R in načel urejenih podatkov.\n", + "\n", + "### [**Predhodni kviz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "V tej lekciji se boste naučili, kako ustvariti gruče z uporabo paketa Tidymodels in drugih paketov v ekosistemu R (imenovali jih bomo prijatelji 🧑‍🤝‍🧑) ter nigerijskega glasbenega nabora podatkov, ki ste ga uvozili prej. Pokrili bomo osnove K-Means za gručenje. Upoštevajte, da, kot ste se naučili v prejšnji lekciji, obstaja veliko načinov za delo z gručenjem, metoda, ki jo uporabite, pa je odvisna od vaših podatkov. Poskusili bomo K-Means, saj je to najpogostejša tehnika gručenja. Začnimo!\n", + "\n", + "Pojmi, o katerih se boste učili:\n", + "\n", + "- Silhuetno ocenjevanje\n", + "\n", + "- Metoda komolca\n", + "\n", + "- Inercija\n", + "\n", + "- Varianca\n", + "\n", + "### **Uvod**\n", + "\n", + "[K-Means gručenje](https://wikipedia.org/wiki/K-means_clustering) je metoda, ki izhaja iz področja obdelave signalov. Uporablja se za razdelitev in razvrščanje skupin podatkov v `k gruče` na podlagi podobnosti njihovih značilnosti.\n", + "\n", + "Gruče je mogoče vizualizirati kot [Voronoijeve diagrame](https://wikipedia.org/wiki/Voronoi_diagram), ki vključujejo točko (ali 'seme') in njeno ustrezno regijo.\n", + "\n", + "

\n", + " \n", + "

Infografika avtorice Jen Looper
\n", + "\n", + "\n", + "Postopek K-Means gručenja vključuje naslednje korake:\n", + "\n", + "1. Podatkovni znanstvenik najprej določi želeno število gruč, ki jih želi ustvariti.\n", + "\n", + "2. Nato algoritem naključno izbere K opazovanj iz nabora podatkov, ki služijo kot začetna središča gruč (tj. centroidi).\n", + "\n", + "3. Nato se vsako preostalo opazovanje dodeli najbližjemu centroidu.\n", + "\n", + "4. Nato se izračunajo nove povprečne vrednosti vsake grupe, centroid pa se premakne na povprečje.\n", + "\n", + "5. Ko so središča ponovno izračunana, se vsako opazovanje ponovno preveri, ali bi lahko bilo bližje drugi gruč. Vsa opazovanja se ponovno prerazporedijo z uporabo posodobljenih povprečnih vrednosti gruč. Koraki dodeljevanja gruč in posodabljanja centroidov se ponavljajo, dokler se dodelitve gruč ne prenehajo spreminjati (tj. ko je dosežena konvergenca). Algoritem se običajno ustavi, ko vsaka nova iteracija povzroči zanemarljivo premikanje centroidov in gruče postanejo statične.\n", + "\n", + "
\n", + "\n", + "> Upoštevajte, da zaradi naključnosti začetnih k opazovanj, ki se uporabljajo kot začetni centroidi, lahko dobimo nekoliko različne rezultate vsakič, ko uporabimo postopek. Zaradi tega večina algoritmov uporablja več *naključnih začetkov* in izbere iteracijo z najnižjim WCSS. Zato je močno priporočljivo, da K-Means vedno izvajate z več vrednostmi *nstart*, da se izognete *nezaželenemu lokalnemu optimumu.*\n", + "\n", + "
\n", + "\n", + "Ta kratka animacija z uporabo [ilustracij](https://github.com/allisonhorst/stats-illustrations) Allison Horst pojasnjuje postopek gručenja:\n", + "\n", + "

\n", + " \n", + "

Ilustracija avtorice @allison_horst
\n", + "\n", + "\n", + "\n", + "Osnovno vprašanje, ki se pojavi pri gručenju, je naslednje: kako veste, na koliko gruč razdeliti svoje podatke? Ena od pomanjkljivosti uporabe K-Means je dejstvo, da morate določiti `k`, torej število `centroidov`. Na srečo metoda `komolca` pomaga oceniti dobro začetno vrednost za `k`. Kmalu jo boste preizkusili.\n", + "\n", + "### \n", + "\n", + "**Predpogoj**\n", + "\n", + "Nadaljevali bomo tam, kjer smo končali v [prejšnji lekciji](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), kjer smo analizirali nabor podatkov, ustvarili veliko vizualizacij in filtrirali nabor podatkov na zanimiva opazovanja. Prepričajte se, da si jo ogledate!\n", + "\n", + "Za dokončanje tega modula bomo potrebovali nekaj paketov. Namestite jih lahko z ukazom: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Alternativno spodnji skript preveri, ali imate potrebne pakete za dokončanje tega modula, in jih namesti, če manjkajo.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Začnimo!\n", + "\n", + "## 1. Ples s podatki: Omejimo se na 3 najbolj priljubljene glasbene zvrsti\n", + "\n", + "To je povzetek tega, kar smo naredili v prejšnji lekciji. Razčlenimo in analizirajmo nekaj podatkov!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 To je šlo odlično!\n", + "\n", + "## 2. Več raziskovanja podatkov.\n", + "\n", + "Kako čisti so ti podatki? Preverimo izstopajoče vrednosti z uporabo škatelnih diagramov. Osredotočili se bomo na številske stolpce z manj izstopajočimi vrednostmi (čeprav bi lahko odstranili izstopajoče vrednosti). Škatelni diagrami lahko pokažejo razpon podatkov in pomagajo pri izbiri, katere stolpce uporabiti. Upoštevajte, da škatelni diagrami ne prikazujejo variance, kar je pomemben element za dobro združljive podatke. Za več informacij si oglejte [to razpravo](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot).\n", + "\n", + "[Škatelni diagrami](https://en.wikipedia.org/wiki/Box_plot) se uporabljajo za grafično prikazovanje porazdelitve `številskih` podatkov, zato začnimo z *izbiro* vseh številskih stolpcev skupaj s priljubljenimi glasbenimi žanri.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Opazite, kako funkcija za izbiro `where` to olajša 💁? Raziščite še druge podobne funkcije [tukaj](https://tidyselect.r-lib.org/).\n", + "\n", + "Ker bomo izdelali škatlaste diagrame za vsako številsko značilnost in se želimo izogniti uporabi zank, bomo preoblikovali naše podatke v *daljšo* obliko, ki nam bo omogočila uporabo `facets` - podgrafov, ki vsak prikazujejo en podnabor podatkov.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Zdaj pa nekaj daljšega! Čas je za nekaj `ggplotov`! Kateri `geom` bomo uporabili?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Zdaj lahko vidimo, da so ti podatki nekoliko hrupni: če opazujemo vsak stolpec kot škatelni diagram, lahko vidimo odstopajoče vrednosti. Lahko bi pregledali podatkovni niz in odstranili te odstopajoče vrednosti, vendar bi to podatke precej zmanjšalo.\n", + "\n", + "Zaenkrat izberimo, katere stolpce bomo uporabili za našo nalogo gručenja. Izberimo številske stolpce s podobnimi razponi. Stolpec `artist_top_genre` bi lahko kodirali kot številskega, vendar ga bomo za zdaj izpustili.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Izračun k-means gručenja v R\n", + "\n", + "K-means lahko izračunamo v R z vgrajeno funkcijo `kmeans`, glejte `help(\"kmeans()\")`. Funkcija `kmeans()` sprejme podatkovni okvir z vsemi številsko izraženimi stolpci kot svoj primarni argument.\n", + "\n", + "Prvi korak pri uporabi k-means gručenja je določitev števila gručenj (k), ki bodo ustvarjene v končni rešitvi. Vemo, da obstajajo 3 glasbeni žanri, ki smo jih izluščili iz nabora podatkov, zato poskusimo s 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Kmeans objekt vsebuje več informacij, ki so dobro razložene v `help(\"kmeans()\")`. Za zdaj se osredotočimo na nekaj ključnih točk. Vidimo, da so podatki razdeljeni v 3 skupine velikosti 65, 110, 111. Rezultat prav tako vsebuje središča skupin (povprečja) za 3 skupine glede na 5 spremenljivk.\n", + "\n", + "Vektor razvrščanja predstavlja dodelitev skupine za vsako opazovanje. Uporabimo funkcijo `augment`, da dodamo dodelitev skupine v originalni nabor podatkov.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Odlično, pravkar smo razdelili naš nabor podatkov v 3 skupine. Kako dobra je torej naša razvrstitev 🤷? Poglejmo si `Silhouette score`.\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[Silhouette analiza](https://en.wikipedia.org/wiki/Silhouette_(clustering)) se lahko uporabi za preučevanje razdalje med nastalimi grozdi. Ta ocena se giblje od -1 do 1, pri čemer vrednost blizu 1 pomeni, da je grozd gost in dobro ločen od drugih grozdov. Vrednost blizu 0 predstavlja prekrivajoče se grozde, kjer so vzorci zelo blizu odločitveni meji sosednjih grozdov. [vir](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Metoda povprečnega silhouette izračuna povprečno silhouette opazovanj za različne vrednosti *k*. Visoka povprečna silhouette ocena kaže na dobro razvrstitev.\n", + "\n", + "Funkcija `silhouette` v paketu za razvrščanje omogoča izračun povprečne širine silhouette.\n", + "\n", + "> Silhouette se lahko izračuna z uporabo katere koli [razdalje](https://en.wikipedia.org/wiki/Distance \"Distance\"), kot sta [Evklidska razdalja](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") ali [Manhattanska razdalja](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\"), ki smo ju obravnavali v [prejšnji lekciji](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Naš rezultat je **.549**, kar pomeni, da smo nekje na sredini. To kaže, da naši podatki niso posebej primerni za tovrstno razvrščanje v skupine. Poglejmo, ali lahko to domnevo vizualno potrdimo. Paket [factoextra](https://rpkgs.datanovia.com/factoextra/index.html) ponuja funkcije (`fviz_cluster()`), ki omogočajo vizualizacijo razvrščanja v skupine.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Prekrivanje med grozdi kaže, da naši podatki niso posebej primerni za to vrsto grozdenja, vendar nadaljujmo.\n", + "\n", + "## 4. Določanje optimalnega števila grozdov\n", + "\n", + "Osnovno vprašanje, ki se pogosto pojavi pri K-Means grozdenju, je naslednje - brez znanih oznak razredov, kako veste, na koliko grozdov razdeliti svoje podatke?\n", + "\n", + "Eden od načinov, kako to ugotoviti, je uporaba vzorca podatkov za `ustvarjanje serije modelov grozdenja` z naraščajočim številom grozdov (npr. od 1 do 10) in ocenjevanje metrik grozdenja, kot je **Silhouette score.**\n", + "\n", + "Določimo optimalno število grozdov tako, da izvedemo algoritem grozdenja za različne vrednosti *k* in ocenimo **vsoto kvadratov znotraj grozda** (WCSS). Skupna vsota kvadratov znotraj grozda (WCSS) meri kompaktnost grozdenja, pri čemer želimo, da je čim manjša, saj nižje vrednosti pomenijo, da so podatkovne točke bližje skupaj.\n", + "\n", + "Raziskujmo učinek različnih izbir `k`, od 1 do 10, na to grozdenje.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Zdaj, ko imamo skupno vsoto kvadratov znotraj grozdov (tot.withinss) za vsak algoritem grozdenja s središčem *k*, uporabimo [metodo komolca](https://en.wikipedia.org/wiki/Elbow_method_(clustering)), da najdemo optimalno število grozdov. Metoda vključuje risanje WCSS kot funkcije števila grozdov in izbiro [komolca krivulje](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") kot števila grozdov, ki jih uporabimo.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Graf prikazuje veliko zmanjšanje WCSS (torej večjo *kompaktnost*), ko se število grozdov poveča z enega na dva, in nadaljnje opazno zmanjšanje z dveh na tri grozde. Po tem je zmanjšanje manj izrazito, kar povzroči `komolec` 💪 na grafu pri približno treh grozdih. To je dober pokazatelj, da obstajata dva do trije razmeroma dobro ločeni grozdi podatkovnih točk.\n", + "\n", + "Zdaj lahko nadaljujemo in izluščimo model grozdenja, kjer je `k = 3`:\n", + "\n", + "> `pull()`: uporablja se za izvlečenje ene same kolone\n", + ">\n", + "> `pluck()`: uporablja se za indeksiranje podatkovnih struktur, kot so seznami\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Odlično! Poglejmo si pridobljene grozde. Vas zanima nekaj interaktivnosti z uporabo `plotly`?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Morda bi pričakovali, da bo imel vsak grozd (predstavljen z različnimi barvami) različne žanre (predstavljene z različnimi oblikami).\n", + "\n", + "Poglejmo natančnost modela.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Točnost tega modela ni slaba, vendar ni odlična. Morda podatki niso primerni za K-Means razvrščanje. Ti podatki so preveč neuravnoteženi, premalo povezani in med vrednostmi stolpcev je preveč variance, da bi jih lahko dobro razvrstili. Pravzaprav so skupine, ki se oblikujejo, verjetno močno vplivane ali izkrivljene zaradi treh kategorij žanrov, ki smo jih opredelili zgoraj.\n", + "\n", + "Kljub temu je bil to precej poučen proces!\n", + "\n", + "V dokumentaciji Scikit-learn lahko vidite, da ima model, kot je ta, pri katerem skupine niso dobro opredeljene, težavo z 'varianco':\n", + "\n", + "

\n", + " \n", + "

Infografika iz Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **Varianca**\n", + "\n", + "Varianca je opredeljena kot \"povprečje kvadratov razlik od povprečja\" [vir](https://www.mathsisfun.com/data/standard-deviation.html). V kontekstu te težave z razvrščanjem se nanaša na podatke, pri katerih se vrednosti našega nabora podatkov preveč oddaljujejo od povprečja.\n", + "\n", + "✅ To je odličen trenutek, da razmislite o vseh načinih, kako bi lahko odpravili to težavo. Bi lahko podatke še malo prilagodili? Uporabili različne stolpce? Uporabili drugačen algoritem? Namig: Poskusite [normalizirati podatke](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) in preizkusiti druge stolpce.\n", + "\n", + "> Poskusite '[kalkulator variance](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)', da bolje razumete koncept.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Izziv**\n", + "\n", + "Preživite nekaj časa s tem zvezkom in prilagodite parametre. Ali lahko izboljšate natančnost modela z dodatnim čiščenjem podatkov (na primer odstranjevanjem odstopanj)? Lahko uporabite uteži, da nekaterim vzorcem podatkov dodelite večjo težo. Kaj še lahko storite, da ustvarite boljše skupine?\n", + "\n", + "Namig: Poskusite normalizirati podatke. V zvezku je komentirana koda, ki dodaja standardno normalizacijo, da se stolpci podatkov bolj približajo drug drugemu glede na obseg. Ugotovili boste, da se medtem ko se silhuetni rezultat zniža, 'pregib' na grafu komolca zgladi. To je zato, ker nenormalizirani podatki omogočajo, da podatki z manjšo varianco nosijo večjo težo. Preberite več o tej težavi [tukaj](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Kvizi po predavanju**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Pregled in samostojno učenje**\n", + "\n", + "- Oglejte si simulator K-Means [kot je ta](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). S tem orodjem lahko vizualizirate vzorčne podatkovne točke in določite njihove centroidne točke. Lahko urejate naključnost podatkov, število skupin in število centroidov. Ali vam to pomaga pridobiti idejo, kako se podatki lahko razvrstijo?\n", + "\n", + "- Prav tako si oglejte [ta priročnik o K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) iz Stanforda.\n", + "\n", + "Želite preizkusiti svoje novo pridobljene veščine razvrščanja na naborih podatkov, ki so primerni za K-Means razvrščanje? Oglejte si:\n", + "\n", + "- [Usposabljanje in ocenjevanje modelov razvrščanja](https://rpubs.com/eR_ic/clustering) z uporabo Tidymodels in podobnih orodij\n", + "\n", + "- [Analiza skupin K-Means](https://uc-r.github.io/kmeans_clustering), UC Business Analytics R Programming Guide\n", + "\n", + "- [K-Means razvrščanje z načeli urejenih podatkov](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Naloga**\n", + "\n", + "[Preizkusite različne metode razvrščanja](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## HVALA:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) za ustvarjanje izvirne Python različice tega modula ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za ustvarjanje čudovitih ilustracij, ki naredijo R bolj prijazen in privlačen. Več ilustracij najdete v njeni [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Veselo učenje,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

Umetniško delo @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/sl/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..f4d88a751 --- /dev/null +++ b/translations/sl/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,550 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:20:36+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Začnite tam, kjer smo končali pri zadnji lekciji, z uvoženimi in filtriranimi podatki.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Osredotočili se bomo le na 3 žanre. Morda lahko ustvarimo 3 grozde!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5SeaPjY01KEOSJGm4JhSskvwrsBj4Qt+0CHhQVT0OeC3wxST3He+1VXVEVc2tqrmzZs2aSBmSJEkjYaWDVZL9gGcAz6+qAqiqm6vq2v7xmcDFwCMa1ClJkjTyVipYJdkJeCOwS1XdONA+K8mM/vFDgTnAJS0KlSRJGnUzl3dCkmOA7YB1k1wBHEK3CnAN4OQkAKf3KwC3AQ5LcitwO/Cyqrpu3AtLkiRNM8sNVlW11zjNn13GuccBx020KEmSpKnIndclSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJamSFglWSo5JcneS8gbZ1kpyc5KL++9p9e5J8NMmCJOck2WKyipckSRolK9pj9Tlgp6Xa3gycUlVzgFP65wBPB+b0X/OAwydepiRJ0uhboWBVVacC1y3VvCtwdP/4aGC3gfbPV+d0YK0kG7QoVpIkaZRNZI7VelW1qH98JbBe/3hD4PKB867o2/5GknlJ5ieZPzY2NoEyJEmSRkOTyetVVUDdxdccUVVzq2rurFmzWpQhSZI0VBMJVlctGeLrv1/dty8ENh44b6O+TZIkaVqbSLA6Edi3f7wvcMJA+wv61YGPB64fGDKUJEmatmauyElJjgG2A9ZNcgVwCPAe4Ngk+wOXAXv0p58E7AwsAG4EXti4ZkmSpJG0QsGqqvZaxqHtxzm3gAMmUpQkSdJU5M7rkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyc2VfmGQT4MsDTQ8F3g6sBbwEGOvb31pVJ610hZIkSVPESgerqroQ2BwgyQxgIXA88ELgw1X1gSYVSpIkTRGthgK3By6uqssaXU+SJGnKaRWs9gSOGXh+YJJzkhyVZO3xXpBkXpL5SeaPjY2Nd4okSdKUMuFgleQewC7AV/qmw4GH0Q0TLgI+ON7rquqIqppbVXNnzZo10TIkSZKGrkWP1dOBs6rqKoCquqqqbquq24EjgS0bvIckSdLIaxGs9mJgGDDJBgPHdgfOa/AekiRJI2+lVwUCJLk3sAPw0oHm9yXZHCjg0qWOSZIkTVsTClZV9Sfg/ku17TOhiiRJkqYod16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKmRmcMuQNJd95vDHj3sEjTNPOjt5w67BGlasMdKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWpk5kQvkORS4AbgNmBxVc1Nsg7wZWA2cCmwR1X9bqLvJUmSNMpa9Vg9uao2r6q5/fM3A6dU1RzglP65JEnStDZZQ4G7Akf3j48Gdpuk95EkSRoZLYJVAd9NcmaSeX3belW1qH98JbDe0i9KMi/J/CTzx8bGGpQhSZI0XBOeYwU8saoWJnkAcHKSXw4erKpKUku/qKqOAI4AmDt37h2OS5IkTTUT7rGqqoX996uB44EtgauSbADQf796ou8jSZI06iYUrJLcO8l9ljwGngacB5wI7Nufti9wwkTeR5IkaSqY6FDgesDxSZZc64tV9e0kZwDHJtkfuAzYY4LvI0mSNPImFKyq6hLgseO0XwtsP5FrS5IkTTXuvC5JktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIysdrJJsnOQHSS5Icn6SV/Xt70iyMMnZ/dfO7cqVJEkaXTMn8NrFwOuq6qwk9wHOTHJyf+zDVfWBiZcnSZI0dax0sKqqRcCi/vENSX4BbNiqMEmSpKmmyRyrJLOBxwE/7ZsOTHJOkqOSrL2M18xLMj/J/LGxsRZlSJIkDdWEg1WSNYHjgFdX1R+Aw4GHAZvT9Wh9cLzXVdURVTW3qubOmjVromVIkiQN3YSCVZLV6ULVF6rqawBVdVVV3VZVtwNHAltOvExJkqTRN5FVgQE+C/yiqj400L7BwGm7A+etfHmSJElTx0RWBW4N7AOcm+Tsvu2twF5JNgcKuBR46YQqlCRJmiImsirwx0DGOXTSypcjSZI0dbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyke0WJEmaNFt/bOthl6Bp5rRXnjbp72GPlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyacEqyU5JLkyyIMmbJ+t9JEmSRsWkBKskM4BPAE8HNgP2SrLZZLyXJEnSqJisHqstgQVVdUlV3QJ8Cdh1kt5LkiRpJKSq2l80+Rdgp6p6cf98H2Crqjpw4Jx5wLz+6SbAhc0L0Z1ZF7hm2EVIk8yfc60K/Dm/+z24qmaNd2Dm3V3JElV1BHDEsN5/VZdkflXNHXYd0mTy51yrAn/OR8tkDQUuBDYeeL5R3yZJkjRtTVawOgOYk+QhSe4B7AmcOEnvJUmSNBImZSiwqhYnORD4DjADOKqqzp+M99JKcxhWqwJ/zrUq8Od8hEzK5HVJkqRVkTuvS5IkNWKwkiRJasRgpSaSzE1yn2HXIUnSMBms1MpLgO8ariRp6kmSYdcwXRisNCFJtgCoqpcCZwLHG640VYz3x8Q/MFrVJElVVZKtk+yfZPt+qyStBFcFakKSnA7cWFVP6Z8fDswBdq+qG4ZanLQCkmxDt6HxH4Bv9n9gVquq24dcmnS3SfJk4LPAl4FnAEcDX6+qBUMtbAqyx0oTUlWPB2Yk+Ub//OXARdhzpRG2pFcqyVzgKGBrYG/g60tClT1XWlUk2QR4GfDqqnoLsC/dB+QdhlrYFGWw0l028EdpJkBVbQvMWipc/RL4fpI1h1aotAx9r9T2wFuAF1fVK4D9gKuBjyw5Z3gVSpMvPWAb4GHAjknuXVVnAccA85KsPdQipyCDle6SJWPx/dMNk8yBv/Rc3T/JN/vnBwKnAusMp1JpudYCdgf+sX9+C/BpwLklmtYGemPXBWZW1ZHAu4DQ3YIO4Erghr5Nd4FzrLRSkrwO2Bm4J/D9qjq4bz8VoKq2GWJ50h0MTNBdD7ihqm5M8s/A14Gdq+rkJDsA76MbArnWXitNV0l2Bg4DFgJ/AvYHnk03DLga3S3v3l9V3xxakVPUpNwrUNNbkhcBu1TVtkk+Brw2yd9V1euqapsk30mycVVdPuxapSX6UPVM4JVAJTmNrodqN+A7SY6l+4R+WFVdM8RSpUmV5JHAO4EDgbOBLwL/r6r2TPJnYEfg3CWhaqmRCi2HQ4FarnEm8S4A9knySmBD4DHA3kk+BVBVOxqqNGqSPIyuN+oNwAfoQtShwLfohgSfCfxPVR2/ZP6gNE3dDFwAnFVVN1bVbsAGSQ6g68H9KfDYJHsaqu46f3louZb8o+onot9cVacmuR+wLfC+qrq4/7S/VZJ1quq6YdYrDRr4w7A2cFlV/W/f/htgK+CpVXVCkn2BY5P8uqp+OLyKpbYGhsFn0HWoXAdsAMwFftyf9iW6X/eLkxwN3Ar8wFB119ljpWVK8rAkm/WPXwt8nm45+gOq6nrg18Czk7yZrufq2YYqjYqBntZ79d/PAxYnORCgqi4ELgc2659/FfgXYNHdXKo0qfpQtStwLN0+VY8EPgF8LMmBSV5MNyy4oD//1qo6uqquGlrRU5iT1zWuJPcCPgZcRddlPA94Od2ta3YHtqALU7sBTwYOqqrzhlOtNL4kO9H9zF4CnA4U3Z5Va9J9Qv80sF9V/Y9DHpqukmwKfAb4d7qVgO8A9qHrldoR2Aj4alV9d1g1TicGKy1Tv5XCa4H7AudX1bv79g8DOwFPqqprktyzqv48xFKlO0jyeOC9dB8QHkO3jcKtdJ/aX0230/r3q+obQytSmmRJHgV8ELiwqg7q23YEPkf3O9yd1RtzKFB/Y3CielVdBLwbuB54TJLH9O2vAf4b+EE/Zn/LMGqVliXJhnQT1H/aD/G9D/gh3bySRVW1P/CGqvqGO6xrmvsV3Z5Uj0wyJ8kaVfUd4Dhg1nBLm54MVvqLwaGQJM9NshuwKV2v1fXA7gPhah7dpN/bvKeaRtBNdJNy90yyVVX9saq+DTyIrveKqlrcf7fbXtNSkhlVdQvwYrq5g68HdkmyLfAsYPEw65uuDFb6i4FQdSDdXj8A36D7Q/ReYH26bRb+vj929d1epDSOgdssPSrJdnRzqN5D11N1WJKn90PbGwO/H1qh0t2k/6B8W5KZVXUrXbhaDfhXulC1X1WdYY9tewYr/UWS1ZJsQDcZfXvgocApwM+r6hK6YcGZdBPa/aSvkdGvetoZOAF4Id1ePM+kG/47jW4DxE8AL6qqs/xjoulm4MPFnCTrL2nvt0+Y2fdcvQKYD/wdcJYLNiaHwWoVt9QfmBl0+5tcS7cr7zbAc6rq1iQv7895vbtSa9QkuTfdH419qmpfuo0/twXWo/tZPhj4I93PtzStDOxTtSNwIt0HiwOSPBz+JlzdSvfv5AF0NyB3L8tJYLBahS01p2pvYF5V3Uy3JP0gun2pbkzyPLr7SFVV3Ta8iqW/SrJa//0f6XaSvgbYBKCqTqDbt+oN/enH0n1SPyTJPe/+aqXJ04equXTDfc8EXgf8PbDbUuFqyZyr5wAf7IOWGjOtrsIGQtUBwIvo9jWhql6aZC3g1CQ/p9uder+qumJoxUq9JPeqqpuq6vYkTwQOp7tx7M+AjZPMrar5dCtXtwBmVNXVSY4AbndrEE03Se5DNwS+Rb99woL+g8dewHOTfKWqftXPuVqtD1e/HWbN05n7WK3ikqwNHAG8qaou6Zfi3twf24muJ+DSqvr1MOuU4C978vwH8Ay6rRMOp9vY8DNJHgocQLfIYjHwD8DBVXX8sOqVJsvS86OSbAJ8lG739Ff2Hzy2A54PvNvf4Xcfg9UqZrzJikm+Rrf673MDvVhbAedU1U1DKFO6gySr0wWpn9L9vD6NbthjbeAFVfXbJOvS7SK9KbCgquY7QVfTzcCcqh3othBJ/+HiEcCb6Ta/fW0frtauqt8NteBVjHOsViFLzama03/CAfgO8GDgn/pjzwXeRrdkXRolC+kmpX+Fbs7UYcDZwEFJ1q+qa6rq7Kr6Uj8c6OpVTSv9UF4l+We6HdWvoNtS5ANV9Su61dvrAx/vX3L9kEpdZdljtYpYKlS9lm5O1U3AfwH/RnfvqMfRDaE8DHheVZ07nGqlv7XUJ/T/BH5YVXv2x7amGxq8F92Qh/uradpJ8hBgtaq6uO+Z/U/gNcCSXqoNge9V1f79h+Y1quqc4VW86nLy+ipiIFQ9HngC8ERgDeAMYHFVHdzvYfVwuiGURUMrVhowEKoeSncLjmcBr07yTrqVTaf1E3V3oxsWNFhpOnoCcFGSK/p7tM6j+3k/lG4+4Wy6Ses3VdWBQ6xzlWewWoUkeSRwCF2v1GpVdVW/VP0nSR5YVa+gu+2BNDL6ULUL3bDfAuAS4NN0S8oPSvLRqvrvJOdWlbuqa1qqqi8kWRM4I8neVXVOkgcCZ/ZzqdYHPkQ3tUND5ByraWzp3aWr6hfAkXTBarsk61bVVcDWwBOTrOeO1Bo1fS/rwcCOwPF0Gxw+je4my9sCr+s3PzRUadoZ2FF9R+BRdEOAR/YrZC8F7pfkk3Q3VT6hqk729/hwOcdqmhpn88916O5y/h3g2XTDJl8HTu33+Jnh5p8aRUk2ottaYW26XdSfB3yKbhf1zwFjVXXG0AqUJlmSLYGPAK+pqtP7ebLPo/s9Dt39XP9UVT8aVo36K4cCp6mBUPUaYFe6VVRvotvs893AbcB+wK1JvgHcPpxKpTvXb0x7RZJ3AV+oqgVJPk93d4Dzq+qy4VYoTZ4kGwNvBM6tqtMBqupDfafUyXS3HTtpiCVqKQaraaa/fcE6VfWzfk7VFnQ3VX4D3f/f69NtpXAY3ZDgmVVlqNJUcC7w0n4/q2cBrzJUaRWwGDgH2DXJTlX1bfhLuJoBrDXU6nQHBqtppL+twf7A6kluBf6Xbhnu04Gdge3ptlk4iO7WHocOq1ZpJZxEt5J1F+BdVXXakOuRmh/RBFIAAAVUSURBVBtYBftPdKtgf0N3t4HfA7snubWqTgGoqvcPsVQtg5PXp4l+07gb6HamXgzsCTyiqhYC9wN+1t8f6hbgW3SrqqQpo6r+UFVHA8+tqv9ygq6moz5UPQ04ClgPOJNugdGJdD1X+/XHNaLssZomBobzdgQeC2wC3DPJZ4CfAJ/t96naDtihqq4cSqHSxN0G7qiu6affj20t4KXA7nSLji4Azuq3x/kKXa+t2+KMMFcFTiNJngR8DNgSeDywE7A63XyqNek2kbugqi4ZWpGSpDuV5E10Iw1PAZ7f77a+H3AqcKnzYkebQ4HTy5rAtVV1S1WdSrevyVPo7hm1TlV901AlSaMnyeZJDumf3hvYB9i7D1WPpVvV/UBD1ehzKHB6+RmwMMmewFeq6swkp9EF6KuGW5okadDARPUnAc8BdkxydVW9PcmmwCFJFgObA2+qqh8PtWCtEIPV9HI98GO6vaqelmQ+3T0Bn11V1wy1MkkS8NdA1YeqbYAvAAcCC4EnJ1mjqvZI8kS6jXE/3n9QjnMLR59zrKaYfvXfMruCk9wL2JRu4uOawGer6vy7qz5J0rL19/d7JPDDqrqtvzPGhlX13v5egJsD7wWOraqPDLNWrRyD1RTV/2N8EHADcMx4PVL9/dMW3+3FSZLGlWRX4CLgCrqtcbYHPky3WvvX/crAo+nmWX21qr44tGK1Upy8PgUleRHdxp8X093376Akj+6PZcn+PoYqSRotVXUCcCXwSbp7/X2X7t6XH+nnVT2G7t6YFwEbDqtOrTznWE0B44yrbwe8vqq+neRU4GC6DUHPdfxdkkbP4O/xqrouyY+Ap9Ft2nw8EOA/6Xqx9qe7HdkO/S2cFvu7fepwKHAKGFg58jLgDLpb1NwT+FD/D/QhdLv07l5Vvx9mrZKk8SXZFng08P2quiDJXnS/z79eVV9Lcu/+1C3p7o6xu3Nkpx6HAkdYkk3gL7c4eBawB/BbunC1Ft3Kv7WAR9F9yrllWLVKku5oydSMJFvRDf9tC7wxyUuq6hjgm8DeSfYA/kz3ofkJwK6GqqnJocARlWRH4PAkW9CNt78YOK+qFgGLkmwMbNO33wN4ZVXdOLSCJUl30H8w3hI4FNirqs7p9xp8Qh+ujkwyA7iwqm4Drk3y/v7erpqCDFYjKMlMuq7gg4HN6Jbf/gDYNckz+h3UP5PkfnR7nPypqsaGV7Ek6U6sBTwV2IHuRspfBW6nn0NVVZ+Ev9nfylA1hRmsRlBVLU5yMfA2uhvOPpmui/gmYJcki6vq21V1Pd2moJKkEVVV3+2nc/x7kt9W1TFJvgrMAP534DwnPU8DBqvRdQ5wI/AH4H5VdU2Sr9F9ytk3ya1VdcpQK5QkrZCqOrG/Pc2/JblHVR0NHDPsutSeqwJHxOBS3CT3AG7rd+V9Pd2NlA+pqjOSbES3iuSb/XwrSdIUkWQX4D10Q4NXelPl6cdgNQKWClUH0s2r+gPwjqr6c5K30t3/7z1V9ZMkM/pJjpKkKSbJLOfFTl8GqxGS5BXAc4HnAWcB3wPeXlUXJ3kn8HBgv6r68xDLlCRJy2CwGhFJ7gt8iG4l4HOAnYGr6bZaeHlVLUhy/6q6dohlSpKkO2GwGiFJ1gA2Bf6jqp7cbyw3RrcD7zuq6tahFihJku6UqwJHSFXdnORGYGZ/U+UHA6cAnzZUSZI0+uyxGjF9r9Wr6VaMPBB4TlVdMNyqJEnSijBYjaD+bubrA7dX1cJh1yNJklaMwUqSJKmR1YZdgCRJ0nRhsJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmN/B/Djeb5PsBsCgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Kako čisti so ti podatki? Preverite odstopajoče vrednosti z uporabo škatlastih diagramov. Osredotočili se bomo na stolpce z manj odstopajočimi vrednostmi (čeprav bi lahko odstranili odstopajoče vrednosti). Škatlasti diagrami lahko pokažejo razpon podatkov in pomagajo izbrati, katere stolpce uporabiti. Upoštevajte, da škatlasti diagrami ne prikazujejo variance, kar je pomemben element za dobro združljive podatke (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [ + "Izberite več stolpcev s podobnimi razponi. Prepričajte se, da vključite stolpec artist_top_genre, da ohranimo naše žanre urejene.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Te številke nam ne pomenijo veliko, zato si oglejmo 'silhouette score', da preverimo natančnost. Naš rezultat je nekje v sredini.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [ + "Uvozi KMeans in zgradi model\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Uporabite ta model za odločitev, z uporabo metode komolca, koliko grozdov je najbolje ustvariti\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Natančnost tega modela ni slaba, a tudi ne odlična. Morda podatki niso primerni za gručenje s K-sredinami. Poskusite lahko drugo metodo.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitne nesporazume ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/sl/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..d71ed4d7a --- /dev/null +++ b/translations/sl/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,343 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:20+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Začnite tam, kjer smo končali pri zadnji lekciji, z uvoženimi in filtriranimi podatki.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Osredotočili se bomo le na 3 žanre. Morda lahko ustvarimo 3 grozde!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/sl/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..e17088bc3 --- /dev/null +++ b/translations/sl/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:07+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/sl/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sl/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/sl/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..29c50d414 --- /dev/null +++ b/translations/sl/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:47+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/sl/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..c67b82b5b --- /dev/null +++ b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:29+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..11cfd08ec --- /dev/null +++ b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:22:50+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna napačna razumevanja ali napačne interpretacije, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..c764fbdf4 --- /dev/null +++ b/translations/sl/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:09+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitne nesporazume ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/sl/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..86238287a --- /dev/null +++ b/translations/sl/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli in Rob J. Hyndman, \"Probabilistično napovedovanje energije: Globalno tekmovanje v napovedovanju energije 2014 in naprej\", International Journal of Forecasting, vol.32, št.3, str. 896-913, julij-september, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Naložite podatke iz csv v Pandas dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prikaži vse razpoložljive podatke o obremenitvi (januar 2012 do december 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni prevod s strani človeka. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:11+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/sl/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..7d93b28d2 --- /dev/null +++ b/translations/sl/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Nastavitev podatkov\n", + "\n", + "V tem zvezku prikazujemo, kako:\n", + "\n", + "nastaviti podatke časovnih vrst za ta modul\n", + "vizualizirati podatke\n", + "Podatki v tem primeru so vzeti iz tekmovanja GEFCom2014 za napovedovanje. Vključujejo 3 leta urnih vrednosti porabe električne energije in temperature med letoma 2012 in 2014.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli in Rob J. Hyndman, \"Probabilistično napovedovanje energije: Globalno tekmovanje za napovedovanje energije 2014 in naprej\", International Journal of Forecasting, vol.32, št.3, str. 896-913, julij-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna napačna razumevanja ali napačne interpretacije, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:14+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/sl/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..73a13a939 --- /dev/null +++ b/translations/sl/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1149 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Napovedovanje časovnih vrst z ARIMA\n", + "\n", + "V tem zvezku bomo prikazali, kako:\n", + "- pripraviti podatke časovnih vrst za učenje napovednega modela ARIMA,\n", + "- implementirati preprost model ARIMA za napovedovanje naslednjih korakov HORIZON (od časa *t+1* do *t+HORIZON*) v časovni vrsti,\n", + "- oceniti model.\n", + "\n", + "Podatki v tem primeru so vzeti iz tekmovanja za napovedovanje GEFCom2014. Sestavljeni so iz 3 let urnih vrednosti porabe električne energije in temperature med letoma 2012 in 2014. Naloga je napovedati prihodnje vrednosti porabe električne energije. V tem primeru pokažemo, kako napovedati eno časovno točko naprej, pri čemer uporabimo samo zgodovinske podatke o porabi.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli in Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, št.3, str. 896-913, julij-september, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Namestitev odvisnosti\n", + "Začnite z namestitvijo nekaterih potrebnih odvisnosti. Te knjižnice z ustreznimi različicami so preverjeno delujoče za rešitev:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Prikaži vse razpoložljive podatke o obremenitvi (januar 2012 do december 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Ustvarjanje učnih in testnih podatkovnih nizov\n", + "\n", + "### Razdelitev podatkov\n", + "Razdelitev podatkov na učni in testni niz je ključni korak pri gradnji modela strojnega učenja. Učni niz se uporablja za treniranje modela, medtem ko testni niz služi za ocenjevanje njegove zmogljivosti.\n", + "\n", + "[!NOTE] Pravilna razdelitev podatkov pomaga preprečiti pristranskost in zagotavlja bolj realistične ocene zmogljivosti modela.\n", + "\n", + "### Priporočena razmerja\n", + "Običajno se podatki razdelijo v naslednjih razmerjih:\n", + "- **Učni niz**: 70–80 % celotnih podatkov\n", + "- **Testni niz**: 20–30 % celotnih podatkov\n", + "\n", + "Razmerje je odvisno od velikosti podatkovnega nabora. Pri večjih naborih podatkov lahko uporabite manjši delež za testiranje.\n", + "\n", + "### Primer razdelitve\n", + "Spodaj je primer, kako razdeliti podatke z uporabo knjižnice Python `scikit-learn`:\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Razdelitev podatkov\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "```\n", + "\n", + "V zgornjem primeru:\n", + "- `X` predstavlja vhodne značilnosti.\n", + "- `y` predstavlja ciljne oznake.\n", + "- `test_size=0.2` pomeni, da bo 20 % podatkov uporabljenih za testiranje.\n", + "- `random_state=42` zagotavlja ponovljivost razdelitve.\n", + "\n", + "[!TIP] Vedno uporabite enak `random_state`, če želite zagotoviti, da bo razdelitev podatkov dosledna pri večkratnem zagonu.\n", + "\n", + "### Preverjanje uravnoteženosti\n", + "Po razdelitvi podatkov preverite, ali so razredi enakomerno porazdeljeni v učnem in testnem nizu. To je še posebej pomembno pri neuravnoteženih podatkovnih nizih.\n", + "\n", + "```python\n", + "# Preverjanje porazdelitve razredov\n", + "print(\"Porazdelitev v učnem nizu:\", y_train.value_counts())\n", + "print(\"Porazdelitev v testnem nizu:\", y_test.value_counts())\n", + "```\n", + "\n", + "[!WARNING] Če so razredi v testnem nizu močno neuravnoteženi, lahko to vpliva na oceno zmogljivosti modela. Razmislite o uporabi stratificirane razdelitve, če je to potrebno.\n", + "\n", + "### Stratificirana razdelitev\n", + "Za ohranitev razmerja razredov v učnem in testnem nizu uporabite parameter `stratify`:\n", + "\n", + "```python\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)\n", + "```\n", + "\n", + "[!IMPORTANT] Stratificirana razdelitev je še posebej pomembna pri delu z redkimi razredi ali neuravnoteženimi podatki.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Izvirni podatki proti skaliranim podatkom:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAD7CAYAAACMlyg3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAZ+klEQVR4nO3df5BV5Z3n8fdHoOjwKyq0ZFYGOroRGBEDNNHEgJg4cUdXolIzi8YVzRiyZq1UyspkslZQRl3N7jBOyk00YWOUKJgfikw07tRIIok6M2rjChFtpSxFWX8UkAnQ/Ea/+8c5rZdL3+5z6T739u3zeVWdou95zjn3e56+fb8853nOcxQRmJlZMR1V7wDMzKx+nATMzArMScDMrMCcBMzMCsxJwMyswAbXO4BqjBkzJlpaWuodhplZQ1m7du3WiGjuqqyhkkBLSwttbW31DsPMrKFI2lSpzJeDzMwKzEnAzKzAnATMzAqsofoEzKz/OnDgAJs3b2bv3r31DqWwmpqaGDduHEOGDMm8j5OAmfWJzZs3M3LkSFpaWpBU73AKJyLYtm0bmzdv5qMf/Wjm/Xw5yMz6xN69exk9erQTQJ1IYvTo0VW3xJwEzKzPOAHU15HUv5OAmVmBuU/AzHLR8s1f9unxXvv2eX12rHPPPZcVK1Zw9NFHV9zmuuuuY/bs2Zx99tlVH3/NmjUsWbKEhx9+ONP6IzFnzhyWLFlCa2trr47jJGBWAKVfyH35ZdpoIoKI4JFHHulx2xtuuKEGEdWfLweZ2YBx6623MmXKFKZMmcJ3vvMdAF577TUmTpzIZZddxpQpU3jjjTdoaWlh69atANx4441MnDiRT3/601x88cUsWbIEgMsvv5z7778fSKasuf7665k+fTqnnHIK7e3tADz99NN88pOfZNq0aXzqU5/ipZdeyhzr73//ey644AKmTp3K6aefzvr167s95p49e5g/fz6TJ0/mwgsvZM+ePX1SZzVpCUj6GPA74P6IuDRddwlwCzAGeBT4YkT8vhbxmNnAs3btWu666y6eeuopIoLTTjuNM888k2OOOYaNGzeybNkyTj/99EP2eeaZZ3jggQdYt24dBw4cYPr06cyYMaPL448ZM4Znn32W22+/nSVLlvDDH/6QSZMm8fjjjzN48GBWr17NtddeywMPPJAp3uuvv55p06axatUqfv3rX3PZZZfx3HPPVTzmHXfcwbBhw3jxxRdZv34906dP73WdQe0uB30PeKbzhaSTgR8A5wHPAkuB24H5NYrHzAaYJ554ggsvvJDhw4cDcNFFF/H4448zd+5cJkyYcFgCAHjyySf5/Oc/T1NTE01NTZx//vkVj3/RRRcBMGPGDFauXAnA9u3bWbBgARs3bkQSBw4cqCrezoTxmc98hm3btrFjx46Kx/ztb3/LV7/6VQCmTp3K1KlTM79Xd3K/HCRpPvAH4Fclq78APBQRv42IDmARcJGkkXnHY2bF05kYemPo0KEADBo0iIMHDwKwaNEizjrrLJ5//nkeeuihPrlbOo9jdifXJCBpFHADcE1Z0cnAus4XEfEKsB84qYtjLJTUJqlty5YteYZrVjgt3/zl+0ujmzVrFqtWrWL37t3s2rWLBx98kFmzZnW7zxlnnPH+F21HR0fVo3a2b9/O8ccfD8Ddd99ddbzLly8HklFDY8aMYdSoURWPOXv2bFasWAHA888//34fQm/lfTnoRuDOiNhcdhPDCGB72bbbgcNaAhGxlORyEa2trZFTnGbWx2o9Cmn69OlcfvnlfOITnwDgyiuvZNq0abz22msV95k5cyZz585l6tSpjB07llNOOYUPf/jDmd/zG9/4BgsWLOCmm27ivPOqO9/FixfzxS9+kalTpzJs2DCWLVvW7TGvuuoqrrjiCiZPnszkyZMr9l1USxH5fK9K+jiwHJgWEfslLQb+fURcKukfgCcj4n+WbL8TmBMRaysds7W1NfxQGbPqVRoi2pdDR1988UUmT57cq2PUQ0dHByNGjGD37t3Mnj2bpUuX9lmnaz109XuQtDYiuryhIM+WwBygBXg9bQWMAAZJ+hPgH4FTSwI8ARgKvJxjPGZmh1m4cCEvvPACe/fuZcGCBQ2dAI5EnklgKfCTktdfJ0kKVwHHAf8iaRbJ6KAbgJURsTPHeMzMDtN5nb2ocksCEbEb2N35WlIHsDcitgBbJP0XkstFo4HVwBV5xWJmtRERnkSujo7k8n7Npo2IiMVlr1cAxU7BZgNIU1MT27Zt83TSddL5PIGmpqaq9vPcQWbWJ8aNG8fmzZvxUO766XyyWDWcBMysTwwZMqSqJ1pZ/+AJ5MzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMcweZ9QN9+YQvs2q4JWBmVmC5JgFJ90p6S9IOSS9LujJd3yIpJHWULIvyjMXMzA6X9+WgW4C/jIh9kiYBayT9X2BbWn50RBzMOQYzM6sg15ZARGyIiH2dL9PlxDzf08zMssu9T0DS7ZJ2A+3AW8AjJcWbJG2WdJekMRX2XyipTVKbn1hkZta3ck8CEfEVYCQwC1gJ7AO2AjOBCcCMtHx5hf2XRkRrRLQ2NzfnHa6ZWaHUZHRQRLwbEU8A44CrIqIjItoi4mBEvANcDXxO0shaxGNmZolaDxEdTNd9ApH+6yGrZmY1lNuXrqTjJM2XNELSIEnnABcDv5J0mqSJko6SNBq4DVgTEdvzisfMzA6X5xDRAK4Cvk+SbDYBX4uIX0i6GLgZOA7YATxKkiDMrB8ovYMZfBfzQJZbEoiILcCZFcruA+7L673NzCwbX4M3MyswJwEzswJzEjAzKzBPJW1mPfJU1wOXWwJmZgXmJGBmVmBOAmZmBeYkYGZWYO4YNsuZO1WtP3NLwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCizXJCDpXklvSdoh6WVJV5aUfVZSu6Tdkh6TNCHPWMzM7HB5twRuAVoiYhQwF7hJ0gxJY4CVwCLgWKAN+GnOsZiZWZlcbxaLiA2lL9PlRGAGsCEifg4gaTGwVdKkiGjPMyYzM/tA7n0Ckm6XtBtoB94CHgFOBtZ1bhMRu4BX0vXl+y+U1CapbcuWLXmHa2ZWKLkngYj4CjASmEVyCWgfMALYXrbp9nS78v2XRkRrRLQ2NzfnHa6ZWaHUZHRQRLwbEU8A44CrgA5gVNlmo4CdtYjHzMwStR4iOpikT2ADcGrnSknDS9abmVmN5JYEJB0nab6kEZIGSToHuBj4FfAgMEXSPElNwHXAencKm5nVVp6jg4Lk0s/3SZLNJuBrEfELAEnzgO8C9wJPAfNzjMWsX8h7WmlPW23Vyi0JRMQW4MxuylcDk/J6fzMz65mnjTAzKzAnATOzAnMSMDMrMD9j2Kyfceeu1ZJbAmZmBeYkYGZWYE4CZmYF5iRgZlZg7hg2s6q443pgcUvAzKzAnATMzArMScDMrMAyJQFJp+QdiJmZ1V7WjuHbJQ0F7gaWR0T5oyHNLAfuhLW8ZWoJRMQs4AvAHwNrJa2Q9Ke5RmZmZrnL3CcQERuBbwF/TfKcgNsktUu6qKvtJQ2VdKekTZJ2SnpO0p+lZS2SQlJHybKoL07IzMyyy3Q5SNJU4ArgPOBR4PyIeFbSvwP+BVhZ4dhvkCSM14FzgZ+V9S8cHREHexG/mZn1QtY+gf8F/BC4NiL2dK6MiDclfaurHSJiF7C4ZNXDkl4FZgBrjyxcMzPrS1mTwHnAnoh4F0DSUUBTROyOiHuyHEDSWOAkYEPJ6k2SgqR18VcRsTV76GZm1ltZk8Bq4GygI309DPgn4FNZdpY0BFgOLIuIdkkjgJnAc8Bo4Htp+Tld7LsQWAgwfvz4jOEWi0eQFFvp778321d7HBsYsnYMN0VEZwIg/XlYlh3TVsM9wH7g6s79I6ItIg5GxDvp+s9JGlm+f0QsjYjWiGhtbm7OGK6ZmWWRNQnskjS984WkGcCebrbv3E7AncBYYF5EHKiwaVQZj5mZ9YGsl4O+Bvxc0puAgI8A/ynDfncAk4GzSzuUJZ0G/AHYCBwD3Aas8U1oZma1lSkJRMQzkiYBE9NVL3Xzv3oAJE0AvgzsA95OGgWQrnsPuBk4DthB0jF8cdXRm5lZr1TzPIGZQEu6z3RJRMSPK20cEZtIWg2V3FfFe5sNOHl3xLqj17LIerPYPcCJJKN53k1XB1AxCZiZWf+XtSXQCvxJRESPW5qZWcPIOhrneZLOYDMzG0CytgTGAC9IepqkoxeAiJibS1RmZlYTWZPA4jyDKCrf6ds4/LuygSrrENHfpEM+PxYRqyUNAwblG5qZmeUt6+MlvwTcD/wgXXU8sCqvoMzMrDaydgz/V+AMkhu7Oh8wc1xeQZmZWW1kTQL7ImJ/5wtJg/lgvh8zM2tQWTuGfyPpWuBD6bOFvwI8lF9Y1hfcmVk/vlvXGkXWlsA3gS3A70jm/nmE5HnDZmbWwLKODnoP+N/pYmZmA0TWuYNepYs+gIg4oc8jMjOzmqlm7qBOTcCfA8f2fThmZlZLWS8HbStb9R1Ja4Hr+j4ks77Xl53k7nDvmuulMWW9HDS95OVRJC2Dap5FYGZm/VDWL/K/K/n5IPAa8Bfd7SBpKHA7cDbJpaNXgP8WEf8nLf8s8D1gPPAUcHn6IBozM6uRrJeDzjrCY78BnAm8DpwL/EzSKUAHsBK4kuR+gxuBnwKnH8H7mJnZEcp6Oeia7soj4tYu1u3i0NlHH05HGc0ARgMbIuLn6fEXA1slTYqI9myhm5lZb1UzOmgm8Iv09fnA08DGrG8kaSxwErABuApY11kWEbskvQKcDLSX7bcQWAgwfvz4rG9nNVL0zsCin781vqxJYBwwPSJ2wvv/c/9lRFyaZWdJQ4DlwLKIaJc0guQO5FLbgZHl+0bEUmApQGtrq+crMjPrQ1mnjRgL7C95vT9d1yNJRwH3pPtcna7uAEaVbToK2JkxHjMz6wNZWwI/Bp6W9GD6+gJgWU87SRJwJ0nCODciDqRFG4AFJdsNB05M15uZWY1kaglExH8HrgD+LV2uiIibM+x6BzAZOD8i9pSsfxCYImmepCaSm87Wu1PYzKy2qrnhaxiwIyLuktQs6aMR8WqljdPHUX6Z5MH0byeNAgC+HBHLJc0DvgvcS3KfwPwjOgMzqxtPmd34sg4RvZ5khNBE4C5gCMmX9xmV9klv/FI35auBSdUEa2ZmfStrx/CFwFxgF0BEvEkXI3nMzKyxZE0C+yMiSKeTTjtyzcyswWVNAj+T9APgaElfAlbjB8yYmTW8rHMHLUmfLbyDpF/guoh4NNfIrKH5Ttq+5zq1PPSYBCQNAlank8j5i9/MbADp8XJQRLwLvCfpwzWIx8zMaijrfQIdwO8kPUo6QgggIr6aS1RmZlYTWZPAynQxM7MBpNskIGl8RLweET3OE2S9404/q4bv1LW+0lOfwKrOHyQ9kHMsZmZWYz0lgdJpH07IMxAzM6u9npJAVPjZzMwGgJ46hk+VtIOkRfCh9GfS1xER5Q+GMTOzBtJtEoiIQbUKxGqrUsdiaad0lm36s952nla7vztrrRFlnTvIzMwGoFyTgKSrJbVJ2ifp7pL1LZJCUkfJsijPWMzM7HDVPFnsSLwJ3AScA3yoi/KjI+JgzjGYmVkFuSaBiFgJIKkVGJfne5mZWfXybgn0ZJOkIJmd9K8iYmv5BpIWAgsBxo8fX+Pw6iNLB2OjdM5aMVV7B7zvmK+fenUMbwVmAhOAGSSPqlze1YYRsTQiWiOitbm5uYYhmpkNfHVpCUREB9CWvnxH0tXAW5JGRsTOesRkZlZE/WWIaOfdyP0lHjOzQsi1JSBpcPoeg4BBkpqAgySXgP4AbASOAW4D1kTE9jzjMTOzQ+V9OehbwPUlry8F/gZ4CbgZOI7kucWPAhfnHEvduNPrA64L61RpAIQ7lWsr7yGii4HFFYrvy/O9zcysZ74Gb2ZWYE4CZmYF5iRgZlZg9b5j2I5Qlk61RuWOQbPacUvAzKzAnATMzArMScDMrMCcBMzMCswdw0egUkfkQOuUbcRjZnmv7jqSB8Lv0KwabgmYmRWYk4CZWYE5CZiZFZiTgJlZgbljuJfckfiB3tSF67EY+uoz4jvD+45bAmZmBZZrEpB0taQ2Sfsk3V1W9llJ7ZJ2S3pM0oQ8YzEzs8Pl3RJ4E7gJ+FHpSkljgJXAIuBYkofO/zTnWMzMrEzeTxZbCSCpFRhXUnQRsCEifp6WLwa2SpoUEe15xmRmZh+oV8fwycC6zhcRsUvSK+n6Q5KApIXAQoDx48fXMkbrZxrlbmbrO/795K9eHcMjgO1l67YDI8s3jIilEdEaEa3Nzc01Cc7MrCjqlQQ6gFFl60YBO+sQi5lZYdUrCWwATu18IWk4cGK63szMaiTvIaKDJTUBg4BBkpokDQYeBKZImpeWXwesd6ewmVlt5d0x/C3g+pLXlwJ/ExGLJc0DvgvcCzwFzM85ll5p9A6qRo/fzPKR9xDRxcDiCmWrgUl5vr+ZmXXP00aYmRWYk4CZWYE5CZiZFZinku6G71DNl+uiGPrD77lSDJ6S2i0BM7NCcxIwMyswJwEzswJzEjAzKzB3DJfpD51Y9gH/Pqwr/lz0HbcEzMwKzEnAzKzAnATMzArMScDMrMAK2zFc2rHkuwar404566/8d109twTMzAqsrklA0hpJeyV1pMtL9YzHzKxo+kNL4OqIGJEuE+sdjJlZkfSHJGBmZnXSH5LALZK2SnpS0px6B2NmViT1TgJ/DZwAHA8sBR6SdGLpBpIWSmqT1LZly5Z6xGhmNmDVNQlExFMRsTMi9kXEMuBJ4NyybZZGRGtEtDY3N9cnUDOzAareLYFyAajeQZiZFUXdkoCkoyWdI6lJ0mBJXwBmA/9Yr5jMzIqmnncMDwFuAiYB7wLtwAUR8XIdYzIzK5S6JYGI2ALMrNf7m9nAlmV6E08z0f/6BMzMrIacBMzMCsxJwMyswJwEzMwKrLDPEzAzq6RIHcZuCZiZFZiTgJlZgTkJmJkVmJOAmVmBFapj2A9IN7PeyPod0kidyW4JmJkVmJOAmVmBOQmYmRWYk4CZWYEVqmPYzKySSp2+RzKgpNo7jittX4s7l90SMDMrsLomAUnHSnpQ0i5JmyRdUs94zMyKpt6Xg74H7AfGAh8HfilpXURsqG9YZmbFUM8HzQ8H5gGLIqIjIp4AfgH853rFZGZWNIqI+ryxNA14MiKGlaz7OnBmRJxfsm4hsDB9ORF4qaaB9t4YYGu9g+hHXB+Hcn0cyvVxqL6qjwkR0dxVQT0vB40AdpSt2w6MLF0REUuBpbUKqq9JaouI1nrH0V+4Pg7l+jiU6+NQtaiPenYMdwCjytaNAnbWIRYzs0KqZxJ4GRgs6WMl604F3ClsZlYjdUsCEbELWAncIGm4pDOAzwP31CumnDTspaycuD4O5fo4lOvjULnXR906hiG5TwD4EfCnwDbgmxGxom4BmZkVTF2TgJmZ1ZenjTAzKzAnATOzAnMSyEDSUEl3pvMb7ZT0nKQ/S8taJIWkjpJlUdm+P5K0Q9Lbkq4pO/ZnJbVL2i3pMUkTan1+R0LSvZLeSs/rZUlXlpRVPKeBWh9QuU6K+hkBkPQxSXsl3Vuy7pL0b2mXpFVp32BnWbfziXW3b6MorxNJcyS9V/b5WFCyfb51EhFeeliA4cBioIUkcf5HkvsZWtIlgMEV9r0FeBw4BpgMvA38h7RsDMkNcn8ONAF/C/xrvc83Y52cDAxNf56UnteMns5poNZHD3VSyM9IGv8/ped2b0kd7QRmk9wwugL4Scn29wE/Tcs+nZ77yVn2bZSlizqZA2zuZvtc66TuFdKoC7CeZO6jnv7A3wQ+V/L6xs5fEsl0GP9cUjYc2ANMqvf5VVkXE4G3gL/o6ZyKUB9d1EkhPyPAfOBnJP+B6vzCuxlYUbLNiSSTSI5Mz20/cFJJ+T3At3vat97n2ss6qZgEalEnvhx0BCSNBU7i0BvbNknaLOkuSWPS7Y4B/ghYV7LdOpLsTfrv+2WR3DvxSkl5vybpdkm7gXaSL7xH6OacBnp9QMU66VSYz4ikUcANwDVlReXn8wrpl1y6HIyIl0u2764uSvft97qpE4DjJL0j6VVJf69kgk2oQZ04CVRJ0hBgObAsItpJJneaCUwgafqPTMshaZ5B0nyj5OeRJeWlZeXl/VpEfIUk1lkkN/7to/tzGtD1ARXrpIifkRuBOyNic9n6nj4f3c0n1qh10alSnbSTTKX/R8BnSD4jt6ZludeJk0AVJB1F0hTbD1wNEMk02G0RcTAi3knXf07SSJL5keDQOZJK50dq+PmTIuLdSKYBHwdcRffnNODrAw6vk6J9RiR9HDgb+Psuinv6fHR3rg1XF526q5OIeDsiXoiI9yLiVeAbJJeaoQZ14iSQkSQBd5I8AGdeRByosGnn3XdHRcS/kVwSOLWkvHR+pA2lZWkT8EQac/6kwXwQe5fnVLD6gA/qpNxA/4zMIekHeV3S28DXgXmSnuXw8zkBGEoyl1hP84l1t29/N4fKdVIu+OC7Of86qXdHSaMswPeBfwVGlK0/jaQT8ChgNEkv/mMl5d8GfkMy8mMSyR9858iPZpKm2zySkR//gwYY+QEcR9LBNQIYBJwD7ALm9nROA7E+MtRJoT4jwDDgIyXLEuD+9FxOJrm8MYuk0/NeDh0d9BOS0TDDgTM4fCRMxX3789JDnZxFcqlQwB8DjwF31apO6l45jbCkv6AA9pI0vzqXLwAXA6+mf/BvAT8GPlKy71CS+ZF2AO8A15Qd+2ySa4J7gDVAS73PN0N9NKdfWn9Iz+t3wJeynNNArI+e6qSIn5Gy+BeTjoRJX18CvJ7Wxz8Ax5aUHQusSsteBy4pO1bFfRtp4dDRQdcA/w/YDbwB3EbJ6J6868RzB5mZFZj7BMzMCsxJwMyswJwEzMwKzEnAzKzAnATMzArMScDMrMCcBMzMCsxJwMyswP4/zu7dqmtpqTMAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Povečajmo tudi testne podatke\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Ocenite model\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Ustvari testno podatkovno točko za vsak korak HORIZON.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Naredite napovedi na testnih podatkih\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Primerjajte napovedi z dejansko obremenitvijo\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Izračunajte **povprečno absolutno odstotno napako (MAPE)** za vse napovedi\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Prikaži napovedi v primerjavi z dejanskimi vrednostmi za prvi teden testnega nabora\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:57:24+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/sl/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..f5c9e9867 --- /dev/null +++ b/translations/sl/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,59 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:17+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Napovedovanje časovnih vrst z ARIMA\n", + "\n", + "V tem zvezku bomo prikazali, kako:\n", + "- pripraviti podatke časovnih vrst za učenje napovednega modela ARIMA,\n", + "- implementirati preprost model ARIMA za napovedovanje naslednjih korakov HORIZON (od časa *t+1* do *t+HORIZON*) v časovni vrsti,\n", + "- oceniti model.\n", + "\n", + "Podatki v tem primeru so vzeti iz tekmovanja za napovedovanje GEFCom2014. Sestavljeni so iz 3 let urnih vrednosti porabe električne energije in temperature med letoma 2012 in 2014. Naloga je napovedati prihodnje vrednosti porabe električne energije. V tem primeru pokažemo, kako napovedati eno časovno točko naprej, pri čemer uporabimo samo zgodovinske podatke o porabi.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli in Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, št.3, str. 896-913, julij-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/sl/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..490980636 --- /dev/null +++ b/translations/sl/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1029 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [ + "# Napovedovanje časovnih vrst z uporabo regresorja podpornih vektorjev\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "V tem zvezku bomo prikazali, kako:\n", + "\n", + "- pripraviti 2D časovne vrste podatkov za učenje modela regresorja SVM\n", + "- implementirati SVR z uporabo RBF jedra\n", + "- oceniti model s pomočjo grafov in MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uvažanje modulov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Priprava podatkov\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Naloži podatke\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdaj morate pripraviti podatke za učenje z izvajanjem filtriranja in skaliranja vaših podatkov.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prilagodite podatke, da bodo v razponu (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Ustvarjanje podatkov s časovnimi koraki\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Za naš SVR preoblikujemo vhodne podatke v obliko `[batch, timesteps]`. Tako preoblikujemo obstoječe `train_data` in `test_data`, da dodamo novo dimenzijo, ki se nanaša na časovne korake. V našem primeru vzamemo `timesteps = 5`. Tako so vhodi v model podatki za prve 4 časovne korake, izhod pa bodo podatki za 5. časovni korak.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [ + "## Ustvarjanje SVR modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Naredi napoved modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [ + "## Analiza učinkovitosti modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAADSCAYAAAAL37fDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABSYklEQVR4nO3dd3hU1dbA4d9Kofei0qSJ9B6KKIiIgKCoIIoFbAjYr+Varvez92vFhogNRJCqgCC9inRCB+kSpPciqfv7Y53AgAmkn0my3ueZh5kzM2f2HJKTdfZee21xzmGMMcYYY/wV4ncDjDHGGGOMBWXGGGOMMUHBgjJjjDHGmCBgQZkxxhhjTBCwoMwYY4wxJghYUGaMMcYYEwRSHJSJSKiILBOR8d7jb0Vki4hEercG3nYRkX4islFEVohIo4B93CUiG7zbXRn+bYwxxhhjsqmwVLz2MWAtUCRg27+dcyPPet21QDXv1gz4HGgmIiWAF4EIwAFLRGSsc+5gWhtvjDHGGJNTpCgoE5HyQCfgdeCJ87z8BmCQ06q080WkmIiUAVoDU5xzB7x9TgE6AEOT21GpUqVcpUqVUtJEY4wxxhhfLVmyZJ9zrnRa35/SnrIPgaeBwmdtf11EXgCmAc8656KBcsD2gNdEeduS256sSpUqsXjx4hQ20RhjjDHGPyKyLT3vP29OmYhcB+xxzi0566nngBpAE6AE8Ex6GhLweb1FZLGILN67d29G7NIYY4wxJuilJNH/cqCziGwFhgFtROR759xOp6KBb4Cm3ut3ABUC3l/e25bc9jM45wY45yKccxGlS6e5B9AYY4wxJls5b1DmnHvOOVfeOVcJ6A5Md87d6eWJISIC3Ais8t4yFujpzcJsDhx2zu0EJgHtRKS4iBQH2nnbjDHGGGNyvdTMvjzbEBEpDQgQCfT1tk8AOgIbgRPAPQDOuQMi8iqwyHvdK4lJ/6kRGxtLVFQUJ0+eTEfTTb58+Shfvjzh4eF+N8UYY4wxgOgkyeAUERHhzk7037JlC4ULF6ZkyZJoJ51JLecc+/fv5+jRo1SuXNnv5piz7NsH4eFQtKjfLTHGGJMaIrLEOReR1vdnu4r+J0+etIAsnUSEkiVLWm9jEFq7Flq2hCfOV3jGGGNMjpPtgjLAArIMYMcw+MybB7ffDqNGwcaNsH693y0yxhiTlbJlUBYsfvrpJ0SEdevWnfN1H374ISdOnEjz53z77bc8/PDDaX6/8dnBg7B6NcTEJPuSsWPhX/+CceOgVi145RV44YWsa6Ixxhj/WVCWDkOHDuWKK65g6NBkFyUA0h+UmWwsPh7eew/69YMnn4QBA2DJEoiOPvWSL7+EDz+ESZOgfHndduWVcPgwREb60mpjjDE+sKAsjY4dO8bcuXP56quvGDZsGADx8fE89dRT1KlTh3r16vHxxx/Tr18//vrrL6666iquuuoqAAoVKnRqPyNHjuTuu+8GYNy4cTRr1oyGDRvStm1bdu/eneXfy2SwefNg717N3I+J0YBswAB48klc/y94pc8Ofp0Qz4QJULz4mW999VX473/9abYxxpisl56SGP7r0ydz9vvFF+d9yc8//0yHDh249NJLKVmyJEuWLGHhwoVs3bqVyMhIwsLCOHDgACVKlOD9999nxowZlCpV6pz7vOKKK5g/fz4iwsCBA3nnnXd47733MupbmawWFwcTJuj9u++GypVh2TJYsoT4jVt4qF8NQmUDw698i9BvakHjxlCvHuTLB0CTJpAnj8Z1LVr49zWMMcZkjewdlPlo6NChPPbYYwB0796doUOHsmXLFvr27UtYmB7WEiVKpGqfUVFR3HrrrezcuZOYmBgrV5HdzZ0LBw5A2bIacIlA27b8fXlbbu8WQ0Tz7fyn+ihkc4yOU0ZGQlgY1K4NrVpBnTq8+io8+ihMnapvN8YYk3Nl76AsBT1ameHAgQNMnz6dlStXIiLEx8cjIjRp0iRF7w+c+RhYluKRRx7hiSeeoHPnzsycOZOXXnopo5tuskpsLEycqPc7dz4VUR04AF26QM+eebj33qrA03Do0KkeNDZuhOXL9XbTTdRu356yZYWpU+Gaa3z7NsYYY7KA5ZSlwciRI+nRowfbtm1j69atbN++ncqVK1O/fn2++OIL4uLiAA3eAAoXLszRo0dPvf/CCy9k7dq1JCQkMGbMmFPbDx8+TLly5QD47rvvsvAbmQw3Z44GWxUqQIMGAGzfDh06wL//DffeG/DaYsXgqqvgqafg7bfhuus0iBszBoYO5aUXEnjpJQjiOs/GGGMygAVlaTB06FBuuummM7Z17dqVnTt3cvHFF1OvXj3q16/PDz/8AEDv3r3p0KHDqUT/t956i+uuu44WLVpQpkyZU/t46aWX6NatG40bNz5v/pkJYjExp3vJrr8eRFi1Su/26wedOp3jvUWL6gt799ahzFmzqDr5c+rVjufnn7Ok9cYYY3yS7ZZZWrt2LTVr1vSpRTmLHctMMmUKjBwJFSvCc8+xcZPQrRsMGwbVq6diPxs3wmefwfHj7ChehxsnP8j8RaGEhmZay40xxqRDrltmyZigFh2tBcfgVC7ZoEHw3HOpDMgALrkEnnkGSpak3MFVXJlnHsO+OJzhTTbG5C779uk8pIEDYcUKv1tjAmXvRH9jgs2MGXD0KFSpArVr4xyMH6+xVZpceCE8+yx88gnPnhzDNS9V5pY2+wivUTVDm22MyQEmToTffoOePYmveilbt8K6dXpbu1b/PXYMSpWCmjXh4ou1pvWUKX433CSyoMyYjHLyJEyerPe9XrIVy6FaNShYMB37LVIEnnySUgMG0HlVJF/3PUqfD45Aw4YZ0mxjTA6wcSNHR/xKn9l3sH5Aflzpk1SukY+aNaFGDV0lpHp1KFz4zLdNnKiTkCpU8KfZ5kwWlBmTUaZNg+PHNQqrUQOAH3+EW27JgH3nzQsPPcQTeUfQ6rkr6PnpO+S/o4vO2jTG5G4xMfDdd0z8szblS59k0FVvEJY/HB57DKqeu1f9zjthyBDtkDf+s5wyYzLCiROnxwC8XjLn9Cq0Y8cM+oyQEIrefwt33niMz1e30pkDI0darQxjcruff4Y9exi7pzndP7qMsMuaaH5rv36wZcs533rzzXYaCSYpDspEJFRElonIeO/xEBFZLyKrRORrEQn3trcWkcMiEundXgjYRwfvPRtFxOJyk3NMmwZ//609ZJdeCmg92Jo1IX/+DPwcER76uCbf7evI0bj8GggOHKjFao0xuc+mTTBtGnGEsTK2Bg2bhOuybk2aaErFhx/C1q3Jvr1IEZ1TtGxZVjXYnEtqesoeA9YGPB4C1ADqAvmBXgHPzXHONfBur4AGdcCnwLVALeA2EamVnsb7JTQ0lAYNGlCnTh26devGiRMn0ryvu+++m5EjRwLQq1cv1qxZk+xrZ86cybx581L9GZUqVWLfvn1pbqM5j+PHdR0k0F4yz48/wq23ZvzHFSgAfR4vyIcFn9d1Mhcvhk8/1bU2jTG5R2wsfPcdOMdvFbrTonUeXTwkJEQrVDdurIHZRx/Bn38mu5uePWHQoKxrtkleioIyESkPdAIGJm5zzk1wHmAhUP48u2kKbHTObXbOxQDDgBvS1mx/5c+fn8jISFatWkWePHno37//Gc/HpfGP48CBA6lVK/k4Na1BmclkU6boia927VP5G85pZYz27TPnI3v1gtFzSrO/1zN6qbt2LXzzjY1BGJObjB0Lu3dDmTKMO3A5118f8FxICNx3n04IOnECPvhAM/qT0K4dTJ9uHe7BIKU9ZR8CTwMJZz/hDVv2AH4N2HyZiCwXkYkiUtvbVg4I/ImI8rZlay1btmTjxo3MnDmTli1b0rlzZ2rVqkV8fDz//ve/adKkCfXq1eMLb51O5xwPP/ww1atXp23btuzZs+fUvlq3bk1isdxff/2VRo0aUb9+fa6++mq2bt1K//79+eCDD2jQoAFz5sxh7969dO3alSZNmtCkSRN+++03APbv30+7du2oXbs2vXr1IpgLBGd7R4/q2QwIPCMuWgR162pHVmbIkwcefxze+b6srlie2GP2448WmBmTG2zerBeEInD33UyfGUKbNme9JjRUr+Dq1z8dmO3Y8Y9dhYXB1Vefnjxu/HPeoExErgP2OOeWJPOSz4DZzrk53uOlQEXnXH3gY+Cn1DRIRHqLyGIRWbx3797UvDXLxcXFMXHiROrWrQvA0qVL+eijj/jjjz/46quvKFq0KIsWLWLRokV8+eWXbNmyhTFjxrB+/XrWrFnDoEGDkuz52rt3L/fffz+jRo1i+fLljBgxgkqVKtG3b18ef/xxIiMjadmyJY899hiPP/44ixYtYtSoUfTqpSPIL7/8MldccQWrV6/mpptu4s9zdFubdJo8WRNq69aFypVPbR4+PHOGLgPdcYemsu0MqwAPPqhn1hkzTi/xZIzJmQKGLWnXjvXRlahYMZmLwLAwXbatbl1NtfjgA/jrr3+8rEcPGDw485tuzi0lJTEuBzqLSEcgH1BERL53zt0pIi8CpYE+iS92zh0JuD9BRD4TkVLADiCwEkp5b9sZnHMDgAGgyyydr3EdOmh14oxSqhT8+uu5X/P333/TwFtkumXLltx3333MmzePpk2bUtn7wzx58mRWrFhxKl/s8OHDbNiwgdmzZ3PbbbcRGhpK2bJlafOPSxuYP38+rVq1OrWvEiVKJNmOqVOnnpGDduTIEY4dO8bs2bMZPXo0AJ06daJ48eKpOgYmhY4c0SAIzsglS0jQC9jXX8/cjw8N1ZUCXn8dPvmkug5VDBigM7GKFIErrsjcBhhj/DFuHOzaBRddBNdfz9gPzzgF/VNYGPTpA59/DqtXw/vva9XYgLWXGzbUOQOHD+sSvMYf5w3KnHPPAc+BzqwEnvICsl5Ae+Bq59ypYU0RuQjY7ZxzItIU7Y3bDxwCqolIZTQY6w7cnt4vcL4AKjMk5pSdrWBAhVDnHB9//DHtz0oqmjBhQoa1IyEhgfnz55Mvs8bIzLn9+qtesTZooKWxPQsWQKNGWloss3XpAu+9p+fZ2o0awe23a9Gh77+HQoW0bcaYnGPrVu2hF4G77oLwcMaPhxEjzvO+8HB44AGdFLR2rQZmTz2lq4agu0ssj3HffZn+LUwy0lOnrD9wIfD7WaUvbgZWichyoB/Q3ZsPEAc8DExCZ3EOd86tTsfnB7X27dvz+eefE+tlTv7xxx8cP36cVq1a8eOPPxIfH8/OnTuZkdjTEqB58+bMnj2bLV59mQMHDgBQuHBhjh49eup17dq14+OPPz71ODFQbNWqFT/88AMAEydO5ODBg5nyHXO1Q4dg1iy9f0Z2rQ5dZkjB2BQQ0YoYPXrA3r1Aq1baHufgyy9hw4asaYgxJvPFxsK33+rv9zXXQJUq7NunvfMXXJCC94eHa6pDjRra0//eexCQ13zHHXpNZ/yTqqDMOTfTOXeddz/MOVf17NIXzrlPnHO1nXP1nXPNnXPzAt4/wTl3qfe+TB7c8VevXr2oVasWjRo1ok6dOvTp04e4uDhuuukmqlWrRq1atejZsyeXXXbZP95bunRpBgwYQJcuXahfvz63eslJ119/PWPGjDmV6N+vXz8WL15MvXr1qFWr1qlZoC+++CKzZ8+mdu3ajB49mosDenFMBpk4UUtQNG4M5U9PPE5I0Dyvq6/OuqbUqgVvvqmBYHQ00KmTBmdxcXpVHBWVdY0xxmSeX36BnTu1d8sbr5wwIZUFqvPk0cDs0kt1rPL993USAHoqCws7Z1kzk8kkmGfmRUREuMTZiInWrl1LzZo1fWpRzmLHMo0OHID/+z+Ij4cXXoCyZU89NXeuXsgOHJj82zPLp5/qrM9vvgFxCdpTtnSpJog8/bQmTBpjsqetW+Gtt/T+009DlSoAdOsGL72kFXlSJTpaA7KtW6FlS11vCZ0/sH07/Pe/GdXw3EVEljjnItL6fltmyZjUSuwla9LkjIAMMnCtyzR46CFNI3v7bU7XKKpeXa+GP/pIy3cYY7KfuLjTsy3btj0VkEVHa4bCOcpbJi9vXq38HxoKc+acSnXo0gVGj7bKOn6xoMyY1Dh4EObN02SuTp3OeCo+XtPMkphQm2U+/FDbMHo0Og7x4INQoYLmjXz8sRa5NcZkL7/8omUsLrwQbjhdc33WLGjdWk9HaVKmDFx7rd4fPBji4ihcWIO8hQvT3WqTBhaUGZMaU6acziW76KIznpozBy67TGMhv4SF6Trlb74JS5aghYsefRRKl4Zt26B/f1uOyZjsZNs2nekdMNsy0dix5ymFkRLXXqvB3u7dp2oc9uxpNcv8ki2DsmDOg8su7BimwZEjMHu23k+8ugyQlbMuz6VoUR1Gve8+r3h3kSLw2GOnl2NKnL1ljAluicOWCQnaBe8t4wb6KzxnjqaDpUtYmE7fBg3Kdu7k6qv1VBcTk859m1TLdkFZvnz52L9/vwUV6eCcY//+/VbfLLWmTtUp6fXrnzHjEvTcOXcuXHmlT207S5Uq8MknmgR8/DjaU5a4HNOiRbowpzEmuE2dqldWpUvDjTee8dSKFTrMGNBxlnbVqmmx6fh4GDKE0BBH+/a2OIgffBxoSZvy5csTFRVFsC/BFOzy5ctH+bMCC3MOx4/DzJl6P4n557Nm6TnNz6HLs11xBfTtqyMew4dDSIUKcP/9mls2aZImo1hgbkxwOnHi9MXTHXdoKYsAY8f+o0Ri+nTtqpHehg0wdy49erTklVfOSGEzWSCI/oSkTHh4+Knlh4zJMtOn61SnWrWgUqV/PD18uBbTDzY9e8K6dVrB4/XX0XnzVavqeiqzZ0O7dn430RiTlKlTNTC79FIt9nqWiRM1/z/DFCigC/Z++SWMGkW9l+uxfXtRDh4EW6kv62S74UtjstzJkxqUQZK9ZLGx8PvvwbvU5GuvaWA2aBBnzhqdMkUbb4wJLseOaVAGOmx51vTKv/7STu4MD5YaN4Y6deDvv2H4cG65RS84TdaxoMyY85k5U69Yq1XT21lmzNCRwNDQLG9ZioSEaEDWv7/mvVGrFlSsqBMX5s71u3nGmLNNmqQ983XqnJHcn+iXXzJ46DKRiHb558kDixdze8O1tuxSFrOgzJhziY7WHiX4R12yRH4WjE2pggV1weKHH4bNW+T07NFJk6xEhjHB5PBhvdKDZOtdZHg+WaCSJU8lkpWZMogC+RLYtCmTPsv8Q7bLKTMmS82dq0MJlSolmdcRE6OTGb/8MuubllrlysHXX0P37jBrZgPyly2r4yALFsDll/vdPJPdOafrMq5cyfqZO4ncVJgLi0VzQdFoLigRR4mi8YTkDdfpguHh2hsTeL9KFf09S3Ml1Bxi4kRNK2jYUHu0z3LihP7aXnJJJrahTRs9L/z5Jz1qLGLw4Ga89FImfp45xYIyY5ITGwuTJ+v9Tp2S/GORuPh4SDbpc27USEsSvfqa8MaN18JXX+kfgcsuyz5fwgSP6GhNWFy5ElatgoMHWbG/HD1n3sOtVRaz8GRpdv9dhD1/F+ZAdEESCxmVyHucC/Mf5YL8R7kg/xEuyr+bJqXnUKt6PHJZc2jWLHeu1XrggBYfE0m2K2zqVF1pKVOFhOiJ4o03uPH4EK6Y1pAXX8yT6+PlrGBBmTHJ+f13OHRIa5LVrZvkS378Efr0ydpmpdeDD2o9tZW3RlC39FjYuxcWL4amTf1umgl2zsGuXbB6tQZiGzZobSvPmphL6LnwPkZ8+RfVIiL0wiYmRv+N3Q+xsSScjOHgAceefSHs3leQPfsK89fui3llcQPWTi9Fg+HbuabcWK6+/CRl29fV5PMCBXz80llowoTT6+qWK5fkS8aN0yUrM93FF8PVV1Nw6lTq513LvLl1ubylXbhlNgvKjElKfLwubQI64zKJS8ToaFi2DJo3z+K2pVNoKHz2GTz4UAgzX7+W0CGD9I9BkyY2dGSSFhWlPTgrV8L+/ae3i+iwY926/FGgAbf/qwxDJwnVapZIdlchQEnvVjNg+78SEkhYvZbIEbuZMrUEd317KQe/KMgVZefT9oporryjPIWb1gyuYoAZac8e+O23c/aSJSToqGL//lnUps6dYdkyepadxuC3SnB5ywpZ9MG5Vw796TYmnRYs0D8+F12kuR1JmDwZrrkme8Yx9eppGln/yOY8VHyc5gJFRib7XU0uFhUF77yjVyEAhQppvbu6dXUmb8GCbN4Mt3TRWb41a557d8kKCSGkbm0a1a1No/+c5JmlS/l7zlR+m5PA1N9q8NrIguQN30ab5ifo2KMkTTqXyZ6/fMn55ReNulq00LUok7BokaYgZNlM77x54fbbab3vE/41OoSTUfvIVz4XDitnoRT3RYpIqIgsE5Hx3uPKIrJARDaKyI8iksfbntd7vNF7vlLAPp7ztq8XkfYZ/m2MyQgJCafXF7n22mRzrYJlrcu0euEFGPhNKDsaeVflEybYmpjmTAcP6goQ0dG6vNhzz8H//gf33qs9qwUL8uefWgz+66812M8Q+fJBixbkf+ZR2g6+i7feFuY/OJhRV39KrcPzeOmR/Txx+QJip87SOoLZ3c6deiEYGgrXXZfsy8aNy4AFyFOrTh1CmkbQqcIKfnlhgZ0jMllqBogfA9YGPH4b+MA5dwlwELjP234fcNDb/oH3OkSkFtAdqA10AD4TkSCt7GRytSVLdCihVKlk86xOntSRnCZNsrhtGahAAXj7bXjkh8ugcGH4809Ys8bvZplgcfKkLqB66JDW5+vdW2dHBlyk7NihtU3799cenExRvDi0bw8vvECp1x6nW+8SjO/2HWUSdtCu50X89eBr8P33+vObXY0bp8FOy5ZakiIZkyf7tAjHLbfQo+5yBk0vp/mnJtOkKCgTkfJAJ2Cg91iANsBI7yXfATd692/wHuM9f7X3+huAYc65aOfcFmAjYJnFJrg4pz1GAB06JNtL9uuv+nciu4+etGsHBQuH8FPBO3TDL7/YlbDR3uIvv9ShywsugAce+Ecu1+7dWs6qXz+dLJnpRKBCBejWDXn7Lf79SUVevCGSjmP7MOvHXbqO2Jtval5W4lBrdrB9u14Ihoefrh+YhK1bNV4rVCjrmnZKkSLU6n0Fe/4uwr6vx2oPqskUKe0p+xB4GkjwHpcEDjnnEqtORgGJU0XKAdsBvOcPe68/tT2J95hgEh+vZ4B583RmXjLi4vTidN48nYU4dmzWNTHTLF+uRYCKF9cyEckYPlyXicsJ3n8fXhxTnyOhxXVNzA0b/G6S8ZNzMGyYlrgoVAgeeUSrDwfYt09z0d95x6flxcLCICKC1p/fyoSpeXlpc0/+t6YTbstWTWx75hn9Dn/95UPjUinxxHnllVCsWLIvGzcuEwvGpsTll3N36618uqgpDBhwxqxbk3HOm+gvItcBe5xzS0SkdWY3SER6A70BLr744sz+OAN6Vbl5M2zcqLfNmyEmht0nCrPp6AVElWnC9gsaE3WkCNu365BFdLSeF8uW1YoRFSro8pCxsZpfki0F9pK1b5/sLK+oKC3NlFNy4kuXhsefCOH5UX34uOxbegwuvdTvZhm/TJsGs2bpz/+DD2pPWYADBzTt6dVXtcao38o2uIDJkfDMvzvRNfJyvrlqEEX/WqdV8WfM0KHXVq10fDXYZm5u3gwrVmhCfYcO53zpuHFaVtA3ItzzeVOa1zzMA6tnccGYMXDzzT42KGdKyU/o5UBnEekI5AOKAB8BxUQkzOsNKw/s8F6/A6gARIlIGFAU2B+wPVHge05xzg0ABgBERETYOEpmOHr0dAC2YYN2nydoJ+j+kwUZsbk5w7a3QMLDqZV3ExV27qdCoZFENChFhX9FUDaiLPny/XO3ffroSbp27SSL3we/NWtg2zbNr0rm8v/wYQ06P/00+w9dBrrrLvj+u4tZcKg6zdau1Z7SSpX8bpbJasuWwUgvK+Wee/6x7uLhw5po/vzzet0SLMLD4f0PQxkxoiRt3nqcb97cRb290zR5fsMGvQ0frr/X58nbylKJvWRt2uh5JxlHjuixr+BzRYo8JQrx7PPHePXLTnycf5gGvPXr+9uoHEZcKvJHvJ6yp5xz14nICGCUc26YiPQHVjjnPhORh4C6zrm+ItId6OKcu0VEagM/oHlkZYFpQDXnXLJ9oBEREW6xJRWmn3N6RTZ/Pqxfr8kgAU7E52XcsasYsrEZe2OK0u22UG7tmU9rF+7fr+sj/vbb6TUS69TR2l1JLJS7YgXcd59eoPqS+5BWzumssk2bNOpKIps2JkZ7CPr2hS5dfGhjJvvjD+hx3QHmXvk84Q3rai+JyT22boV339Xu7htv/Ed+09Gj+vP/6KPB3Ru+bp0Wo3/0UejR7SQsXKg9f1FR+gIRLefRurWW9PDr6uqPP+C99yB/fnjjjXMWyB0xQkeTX345C9uXjIQEuKLuIQbX+x9VLzqhEXpuXH0hGSKyxDkXkeb3pyMoqwIMA0oAy4A7nXPRIpIPGAw0BA4A3Z1zm733Pw/cC8QB/3LOTTzX51lQlk6HDumV4m+/nRmIhYcTV7Eq005cxpClNVixtQjXdw7h9tvPUWPo0CFdmHv2bI1OAKpX1+CsevUzTmzff69d7cOGZaPepMQTZMGCeoI8qyvQOa2i3agRPPaYP03MCq/9XzTh0ybyTN2J8H//p2PTJufbtw/eeksjr8sv16gm4Jf3+HHtIevVC267zcd2ptCxY3D//Zqi9eGHkDePd2E6c6Ym1SfmQ5UurUObl1/+j7y5TOWcBsAbN+qB7dTpnC/v0UPPOxFp/lOfsaZPc3z5zEaGNn5X1+d8+ungGxr2SZYGZVnNgrI0iIvT7qp58/TSKvH/t0gRXLPmLAptzpAZZZgxM4Qrr4Tbb9eK9CkOno4e1eSx6dNP1weqUkWvquvWPbWjRx7RzY8/nvFfMVN88IFeYidzgvy//9M/TO+/70PbslBMDFxe8wDDmr5H1TaV9C+bydlOnNCM/Z079arskUfOqE66fr0GBf/6l54vsgvntKLHsGHwww8Ba3sfPQpz5+oF5oEDui0sTOvbtG6dNcP2q1frtNVkLgIDxcXpxWBkZHAtT3ttu3heK/c5jfOshKuugu7d/W5SULCgzKioKA3EFizQy0TQ3+D69aFFCw6UrUO3W0MoUwbuuEMXtA0PT8fnnTihV51Tp2q0Atqr0rIlREQQk6cQ7drBK6/ohWjQSkjQk/PQoXpifPPNfwwjDByoJTCGDw+uk2JmmffrEV7uHcWv1/ZDXnk52eriJgeIi9PgYP16nbXz9NM6nOb58UetZff119CggX/NTI958zSgrFQJevbUXLjwcPR3f9UqPY+tXn36DRUrnp4YkBlrbjqn55lt25JNlQg0ezYMGQJffJHxTUmPyEh4+pG/mVTnSSQhXi/ggqUrz0cWlOVmJ05ovsRvv51ZOLFcOV2qo1kzKFyY7ds1B+qll87bS5560dG6Jt7kyZqJChq51K3LX1Vb0vHpOkyYKJQtm8GfmxE2bNC/Otu9Si033KDDsQEmTtQ/ShMnnvG3Ksd74Oo/uNzN5c67QnUWgMl5nNPyEfPmQZEiWq2/hK5ZGR0NTz2lnWdffQVFi/rc1nRyToOIQYN0cunVV+uP9alAc88ePY/99tvpi8zQUO05bNJEL24z6gSwfLkuPlukiNZWy5PnnC9/6intiMrwc3cG6NEDetSNpN2mz/Wi9j//yfUXcRaU5VY7dmiyxJEj+rhAAa0+36IFXHzxqWHE1au1Z6x//0xeODs2VmduzZ+vsxi9n6tZB+ry4qpuTB51lDw1qwZHktn+/TB69OnK1MWL6xVrRMQZ7Vu6VC/+Jk8OnslaWeXQxn1c2ewk067/kFLvPZf7DkBuMGEC/PyzBgVPPXVqfG/bNj1n3Hyz5jEFw69sRoqN1Z7vQYP0u956q37fiy7ynly8+PSkqMS/j2FhOq08IkLXkjrHcGOy4uI0t/err/T83b27RlvnsG2bXicuXhycF4Vbt0L37o55d39JyLIlOlry7LPpHIbJ3iwoy422bYOPPtIrukqVdCyyQYN//CLMmwcPPaT5FGleJDgtjhzRHrz582H7dt5f0ZZtx0rwUefpGhk2a/aP2kdZIjpaz8ZTpujJNzxcxzLat//H1eq2bdpxNmpUkpNMc4WRD07nl2n5+ObFrdkrmcic35IlWgBURKv1e2UNJkzQzo7PPtPru5zuwAHtLP/hB+246tlTf+/z5UNzz5Yu1Yhow4bTAVp4uObPRkToTPS8ec/c6cmTsGuXdjMm3nbt0kLcifsoXhxee+2cyfEjRmhH2qef6jyEYPX449Ckfgy3b3pVexyvuEK70HIpC8pym82bNQfk77/1iq137ySvSsaO1d/50aN9nkD311+43+fT/fmq3FBuEbdfski3V6miJ7WKFbWBabnyTCnnNEgcPVpnkYIOSXTtqifHsxw8qGken36a7NKXuYL7ayc3ttjNY/Vn0ea7u85ZbdxkI3Fx8N//6g96t27Qti1xcfDiixqDDB6cOyscrF+v333cOL12vPde/f0XQVMzli6FRYu0bE6iPHn0PFykyOkALPEcczYRne1ZpozWFkmmOPrx45oDd+SI5pEF+6/dvn3aL7BgVBR5339Tf77uuSeTh2aClwVluckff+h0ouhoTUK9774kr7S++krLUowenWTM4YujhxNoc0U0X98yibp7p/9zbbrSpU8vDZD4b/Hi6R872bpVL4U3b9bHFSvqeEUy3V/R0Tpc8NhjOhEzt9v+xmBu+ehyfv96bXAmtZjU++03HbsrUwZefJFdu4U779Tc9uefP2PiZa6UkKB1Fr/+WgO17t214+dUqtTBg9rTuHgxbNnyzx2EhelY6EUX6TFOvF1wwXnLRixfrqf1Bx7QoDC7DB2//rrWpXys8VyNbPPk0S7XMmX8blqWs6Ast1izRscUYmN1+O/uu/8xFdA5nV29aJFOJgy2HIR16zR3Y9qEaIptjdQNUVG6Pl1iYdpABQqcGaiVKKF/MUJD9buffT9w28mTMH68DqGCXsnedJOuZ5nMmS4hAe68U3vfrW6qZ/VqulwXwyvt5lKn/8PZ56+ESVpCgnaJ7dkD997LrJPNePRRLdHXtq3fjQs+hw5pSY3Bg/W68d57tfrPqcGJ/ft1BkF8/OkgrGTJVE/Tdg4+/liHUb/9NvutiHL8uA53z57lKDrmWz3vlimjk0fOHt7N4Swoyw1WrNB+7Lg4TS64885//NLHx2uXd3S0xm7BWsdv1Ci9SB8zJuArxMdrAuz27Rqkbd+ut8TSHukRFgbXXKPryp1niPTZZ/Xk+Pbb6f/YHCMhgVE3fc/iP0vz5ugaULmy3y0y6bFoEQwciCtZineKvMrEX0MYMkQnbJtzW7kSvvlGqwC1b68BWkbk6u7dq/u65BKt35tdY5gBA/S0/ep/o7Xkx86dOoR599256mLOgrKcbulS+PJLvcJt3Vr70s/6AY+O1u71GjV0GY5g//l/+mnNk/jPf87xIuc0jyMxSIuK0iSLhAQN4hL/Tep+4q16dc0bO0eCjHM6EWrIEL3gHTIkd9QiS42TQ0bR7NGmLOs3l5A7skE5d5M053QV8R07mF7zIT6eVY/hw3P1RLk0iYmBX37R4c2DB7W0RocO2pmf2nPv1Kk68fWNN/5RjSfbiYvTQZzx46EMO/VLxcTo7IlgnqmQwdIblAVpf4oBtBDsN9/oybRdOy02dtZv/ZEjGnd06aJ5CNnBG2/oEEChQnqFHhZ2+hYamnhfCA0tRlhYMcIuqkNoOS1+XaKE1kxKbeCUkKCl3NasOX1bt07nS5QrBw0b6rCBBWT/lK9VU5qU3sbcn/fT6ta44O2GNee2YoVegRQrxvtT6vDGmxaQpUWePJoJcdNN2hk0eLCmO0RF6ZriderorW5dvSWVqB8bq6uELFumNRBzQupVWJjmJL7yCnz+uVel/JtvtOp27drBP2MhSFhPWbD67Tf9bXdOE6yvv/4fAdmmTTqS+eSTWlMoO9m7V39fY2O1gysuTm+B9wMfx8ZqrdwDBzTPwzk9HGFhOh+gRIkzb8WK6Sz0NWs0WTc2VnP8a9U6fatePXMKduc4zjHznu8YuqAKXwwreqp8gslGnNOxsa1bWdvsbv7142VMmuR3o3KeI0d0kYCVK/W2apX2ppUrdzpIK19eJ7/ecIOeu3PShaBzOmFk4ECofqmDzz/X2Qv162uvQbAP42QA6ynLiWbO1Ex9gBtv1G4lz549Wr/mxx81kf+dd3Rlo+ymdGkdxkyv2Fg96R04oLfE+wcPavpTp05QrVrmVtzI8URo1b0sj42rTPTsieS1oCz7WbdOZyIXKsQHvzXliSf8blDOVKSIJrwH1nhzTucyJQZqs2bpUrtNmvjXzswiojMxn38eRo4UrW+4fr0GZsuWadUAc04WlAWbKVNg5Ei9f8stcPXVHDkCP/2kcdrRo7p5+HCvAnUuFx6uM839qEWbm4Q0b0qHCguYOMFx430nrIsxu5kwAYA9ER2J/CCULwb63J5cRER7ysqV09yznK5VK/jf/3QCZvPmxTS35ocf9A9YjRp27jiPHNRxmgNMnXoqIDt5852MOXI13bpBmzaa6/7JJzB3Ljz6qAVkJosVK8Yd7fby/fqI08tTmexh0yatcZg/P5+tapVbRpGMj958UydyOYdGaVWr6tju6NF+Ny3oWVAWLI4exf30M1OjanDvjldo9lRLFizQZNBFi7Q7OLcu92OCQ70ul7DtWEkOzVjmd1NMani9ZH+3uJox48NtxSyT6erU0RzeiRPRK4AePTQBeM4cvUAwybKgLFhMn86YP2rz7uYu3PvshSxbpnm59erZVa0JEg0bcvMlyxk1vbjO1DDBb/t2zTbPk4fBu67hlluybx0sk728/DK89JJXF7xMmdO50YMHazKwSdJ5gzIRySciC0VkuYisFpGXve1zRCTSu/0lIj9521uLyOGA514I2FcHEVkvIhtF5NlM+1bZzcmTMGMGn69pxSefaEX5nDQjx+QQefNyW5doftjYVMu1mOA3cSIACVe04ovv8tG3r8/tMbnGxRdr/nOfPt4wZocOULaszlb75Re/mxe0UvKnPxpo45yrDzQAOohIc+dcS+dcA+dcA+B3IHCweE7ic865VwBEJBT4FLgWqAXcJiK1MvC7ZF+zZrF+ZxHCCuXnkqsr+t0aY5J1cae6OCBq8hrvTGuC1q5dWnw6LIwJ7lqaN9cVgIzJKk8+qXUln30WHb7s0UOHfiZN0sJu5h/OG5Q5lbjeTbh3O3U2FpEiQBvgp/Psqimw0Tm32TkXAwwDbkhLo3OU2FiYMoX+a1rR96FcvhKwCX7Vq3Nb3dUMXVj19CLvJjj9+qsGzi1a8OHAQjz+uN8NMrmNCLz7rl4fvPsuUKWKrkyTkKDDmAkJfjcx6KRokExEQkUkEtgDTHHOBY5d3AhMc84dCdh2mTfcOVFEanvbygHbA14T5W3L3X77jRMHTzJ9X1069a3gd2uMObeQEG6+LZwRmxudXuzdBJ/9+3WIOSSEpRd1pHBhXVvRmKwWEqLFZOfM0VVTuPFGrfi9dSvMmOFv44JQioIy51y8N0xZHmgqInUCnr4NGBrweClQ0Rvu/Jjz96CdQUR6i8hiEVm8N6cnE8fHw6RJ/LipCTffGE9YuGX0m+BXvG1jyhc8xMpfd3hZvCboTJqkvRBNm/Le18V58km/G2Rys/BwGDYMvvsOfp6Uj1NTgH/+WS8gzCmpSid3zh0CZgAdAESkFDos+UvAa44kDnc65yYA4d7rdgCBXUHlvW1nf8YA51yEcy6idOnSqfs22c3ChXDgAAM3XUWv/8sBi5+Z3KFcOe5svpEhK+tpiXITXA4f1mXagO11O7JtW65aD9oEqfz5YcwYrWE262A9iIiA6GgYMsTyUwOkZPZlaREp5t3PD1wDrPOevhkY75w7GfD6i0S0iIOINPU+Yz+wCKgmIpVFJA/QHRibgd8le0lIgIkTWbL3YspWK0iZstZLZrKPjj1KMnF7bRLm2RBm0JkyRXswGzak348X8uijVlbHBIdixTQwe/xxWFbjNq3uv3q1FuM0QMp6ysoAM0RkBRpYTXHOjfee686ZQ5eggdoqEVkO9AO6e5MF4oCHgUnAWmC4c251RnyJbCkyEnbv5vPN7XjguWJ+t8aYVMl3eWOaXrCNOVNOwvHjfjfHJDp+HGbPBuDIFR2ZMkVXuTEmWJQpo+s33/NIITY2v1M3/vgjHDt27jfmEimZfbnCOdfQOVfPOVcnscSF91xr59yvZ73+E+dcbedcfedcc+fcvIDnJjjnLnXOVXXOvZ6xXyUbcQ4mTuRQdH6WnajBVW1t1qXJZooW5Y62uxmyPsKucoPJtGk6JFS7Nl9NuZi77tJKBMYEk6pVNb/slrcbsbNsYw3IRozwu1lBwUqU+mHNGvjzTwZtb02PPgVsaMFkS63uqMD8PZWJ/s3WwgwKXhFqgLh2Hfn2W7jvPn+bZExy6teHjz4Sbvz5Hg7GF9HZ3GvW+N0s31lQ5oeJE3EOBm2/irvutV4ykz2FNGrAtZXWMWFWQdi92+/mmJkz4cQJqFaNUcsvoV07KFLE70YZk7yWLeG/L4dz08LnOBEXrkn/0dF+N8tXFpRltY0bYcMGZh2oS73LClK8uN8NMiaN8uThjptOMMSWXfJfTAxMnQqAu7Yj/frBo4/63CZjUuD66+HeJ4px69xHid1zAH76ye8m+cqCsqzmrUX3+a6beOBhS/Yw2Vu9my9l27GSHJoZadPa/fT773D0KFSsyNz9NalUCSpYLWqTTfS8O4SrbilNr9l34aZNh3Xrzv+mHMqCsqy0fTusWsWu2JJsj72IJk38bpAx6VS9OjfXWsuoJZW0F9j4Y543n6ptW957X6xYrMl2nni1OAnlyjN2W32dBfD33343yRcWlGWlX3Wi6ld/38Z9vS2XzOQAItx+h/CDDWH6Z9cuXbImXz42FGzA0aPQqJHfjTIm9d4dUpYXV3blxJ6jMHy4383xhQVlWWX3bliyhHgJY/jKWnTv7neDjMkYFa6rjwO2z9gIsbF+Nyf3SVyDtHFjPvg0jy08brKtC8uE0Ovh/Ly+/Hrt/Y2M9LtJWc6CsqwyaRI4x4QCN9P66lAKFvS7QcZkkDJluL3pJoauqgsrVvjdmtzFuVM9lPurt2DBAujY0ec2GZMODzxThOknmvPHoQvg++81VzIXsaAsKxw4oIm4IvRf2YK+ff1ukDEZ6+a7CzFyS6PTvTYma2zYoOeXEiX4fHJV+vSBEDurm2wsNBQ++KoIjy6/D3fkKAwenKsmEdmvb1aYMgUSEthSsTXRLi81a/rdIGMyVrE2jahQ6BArZx3IdVe2vvr9dwBiIy5j+AihRw+f22NMBmh+mVCh6UWMjGoOy5fnqos9C8oy29GjMGcOAF9EdaRPH5/bY0xmKFyYO9rsZMgfTWCxVfjPEjExsHQpAOMOt6J9e8if3+c2GZNB3nw/H69tuIWjMXlh2DDYv9/vJmUJC8oy27RpEBtLdM0GTJxbhBtv9LtBxmSOjvdcyMTttUmYl3uuan21fLkurVSpEgOGF+P++/1ukDEZp1QpeOipArwSda/+nH/3Xa4YxrSgLDP9/feptehG0YXOnSE83Oc2GZNJ8jWtR9My25kzPxyiovxuTs7nDV1uqdia2Fi49FKf22NMBut1v/D7sTqsjr4E1q+H6dP9blKms6AsM82erRF+9eoM+OlCevf2u0HGZKLwcO7oclJrlk2Z4ndrcrYjR3Tx5pAQBi5rbL1kJkcKCYGPPgnjkTV9tZNszBity5eDWVCWmRYuBGBlxesoWtSWPTE5X8uH6vH7nirEz18EBw/63Zyca+FCcI7YWvUZNykPN93kd4OMyRyNG0PNJoX5Qe7QOohffw3x8X43K9NYUJZZdu/WIZz8+ek/9RIeeMDvBhmT+UIvLMVltY8y968qmk9pMoc3dDkuuh3t20PevD63x5hM9Npr8M7vV3C4QBnYtu3UGtI50XmDMhHJJyILRWS5iKwWkZe97d+KyBYRifRuDbztIiL9RGSjiKwQkUYB+7pLRDZ4t7sy7VsFA29W1NFLGzNvfgjt2vncHmOySNf7SzByS2OddZxL16/LVFFReitQgAFTKtnQpcnxiheHJ54M4YX9j+mGX37R4CwHSklPWTTQxjlXH2gAdBCR5t5z/3bONfBukd62a4Fq3q038DmAiJQAXgSaAU2BF0WkeEZ9kaCzZAkAQ/66ittus4KOJve4qvuFzDpQh4S/ozWv0mQsr4L/louvJDYuxBL8Ta7Qowcs/7M4kVW7QkICfPNNjlzW7byhglPHvIfh3u1c81JvAAZ575sPFBORMkB7YIpz7oBz7iAwBeiQvuYHqb17Yft2XN58fDO5LPfc43eDjMk64eHQuFkYC/ZU1iHMuDi/m5RzJCScCsoGbrjSeslMrhESAv36waPj2pJwYRnYuRN++snvZmW4FPXfiEioiEQCe9DAaoH31OveEOUHIpKY1VAO2B7w9ihvW3Lbcx5v6HJFyauoWCmE0qV9bo8xWazrvcUYtbcVHD58asKLyQDr1sHhw8SWuJBxc4tZgr/JVerVg8YRIXxb+GGN0qZOhbVr/W5WhkpRUOaci3fONQDKA01FpA7wHFADaAKUAJ7JiAaJSG8RWSwii/fu3ZsRu8x63tDl0K2XcdttPrfFGB9c006YureeTmOfPDlXFH3MEt5yM+NDOtOunViCv8l1Xn4ZPvq+FAdad9ENAwfmqJneqcp0cs4dAmYAHZxzO70hymjgGzRPDGAHEFj8oby3LbntZ3/GAOdchHMuonR27GLavx+2bcPlycuvSy/g2mv9bpAxWS9vXqjVOD/LYmrrMMOqVX43Kfs7eRKWLQNgwPx6VvfQ5EpFisCzz8LzM9pC7dpw7BgMGJBj0iRSMvuytIgU8+7nB64B1nl5YoiIADcCiWfdsUBPbxZmc+Cwc24nMAloJyLFvQT/dt62nMUbuvy94NU0bCTky+dze4zxSdebQxgVc50+mJTzftWz3LJlEBPDlhKNiSGPJfibXKt7d9i4SVhUv5dOzdy8GUaP9rtZGSIlPWVlgBkisgJYhOaUjQeGiMhKYCVQCnjNe/0EYDOwEfgSeBDAOXcAeNXbxyLgFW9bzmJDl8YA0KEDTFxbCZc3H2zYAFu3+t2k7M0buhy4o4Ml+JtcTUST/vs+UYCdNz4AoaE6qcj7+5udpWT25QrnXEPnXD3nXB3n3Cve9jbOubretjsTZ2h6Q5oPOeeqes8vDtjX1865S7zbN5n3tXxy4ABs2UJcWD5mrS5FmzZ+N8gY/xQsCFWqhrC6yvW6wXrL0u7gQVi/nljJw7il5S3B3+R6NWvCBx/AdQ9VZEfrO3Tjd99p4fZszKpnZSQv32Nm+DW0bBVCWJjP7THGZ127wqjdl+uV7LJlsGeP303KnhYsAOcYL9fTrkOIJfgbA7RqpT1m17/Vgu1VroToaOjfX//Npiwoy0iJQ5dbmtnQpTFAp07wy7T80KyZzsCcOtXvJmU/zp0auhywsrkl+BsT4PLL4bPPhM5Du7M1b3X46y/44YdsO+PbgrKMcugQbNpEdEh+Fm8pSYsWfjfIGP8VKQIXXggbLvGmIc+bB0eP+tuo7ObPP2HnTrYkVCQmT2FL8DfmLM2bw4AvQ7hp+iNs/ruMXsTMnet3s9LEgrKM4g1d/koH2ncIsWWVjPF07Qqj5lwAdevqsigzZ/rdpOzF6yX7av8N9LpffG6MMcGpSRP4alA4Xeb/m42HS8OwYdlyfUwLHTJK4tDlxiY2dGlMgM6dYexYoH173TBjBsTE+NqmbCM+HhYtIjYhhLFrq9Gli98NMiZ4NWoE340oyM2/P8H6fSXhiy/gxAm/m5UqFpRlhCNHYONGjrsC/LGvOA0a+N0gY4JHiRJQuDBsDbsEKlWC48d1GNOc3+rVcPQo449cSbuO4Zbgb8x51K8Pg38qwq2zH2LtxnBduDwb5ZdZUJYRli4F5xgbdy2dbwhBbITBmDN07Qqjx8jp3rIpU3RxbXNuiQn+m6+mdx87sRiTEnUbhvHDqLzcNqM3q2buy1bleCwoywheFf+hGyJs6NKYJNx4I/z0E9CgAZQuDfv2ncrDNMk4cQKWL2frsVLEFChmCf7GpEKtFsUY9tVx7pxxLyu+WgR//OF3k1LEgrL0OnoU/viDA7GF2RNdlOrV/W6QMcHnggsgLAz+2hUCbdvqRluo/NyWLIG4OAbu6UyvvuF+t8aYbKdG50sZ8co6es64m2Wv/QKHD/vdpPOyoCy9IiPBOUb/fS1du4X63RpjgtZNN8GYMUCLFlCokC67tGGD380KXvPna4L/ljqW4G9MGlXrezWjek/m3gldWfzC2KBPm7CgLL28WZc/bmzMrbf63BZjgliXLt6awXnywFVX6cZslOuRpfbsgY0bGb+jEe2uy2sJ/sakVUgIVZ/txpiuQ4jbvhO2bPG7RedkQVl6HDsG69ez62QxYvIW5uKL/W6QMcGrXDld/WTvXqB1awgPh1WrtAK3OdP48QAMiLqW+x+w9dqMSZciRaj09C00f/dmqFrV79ackwVl6bF8OSQkMPxYR265zYYujTmfUwn/hQrp+iiguWXmtKgoWLiQrcdLE1PsQstTNSYjVK0KVar43YrzsqAsPbyhyxEbG9Ctm89tMSYb6NrVG8IETfgX0bIPllt22s8/g3MMOHY7vR6wBH9jchMLytLq+HFYu5Ytx0pT8IKCXHCB3w0yJvhVrqzLxB48iJbG6NBBZ2B+9VW2q7ydKTZuhBUrWHakKtN3VKdrV78bZIzJShaUpZU3dDns8LV0v8NyPoxJqeuvh3HjAh5UrqxR2uDBubtEhnMwejTHYvNy/+LefPd9KHny+N0oY0xWOm9QJiL5RGShiCwXkdUi8rK3fYiIrBeRVSLytYiEe9tbi8hhEYn0bi8E7KuD956NIvJs5n2tLOAVjB2zqS433uhvU4zJTrp2hVGjvAehodCrF+TLp79TuXn5pZUrYdMmHll4J488U8ByyYzJhVLSUxYNtHHO1QcaAB1EpDkwBKgB1AXyA70C3jPHOdfAu70CICKhwKfAtUAt4DYRqZVh3yQr/f03rFnDmkNlKXdJfooV87tBxmQf1avrhMujR70NpUrB7bfr/WHDYNcu39rmm4QEGDOGHzY2IfaiCvS8z7rIjMmNzhuUOXXMexju3ZxzboL3nAMWAuXPs6umwEbn3GbnXAwwDLghHW33z/LlEB/P0APtua2HJeIak1odO8IvvwRsaNZMbzExMHAgxMX51jZfLFrEpnUxvLv6Wj4fUdrWzzUml0pRTpmIhIpIJLAHmOKcWxDwXDjQA/g14C2XecOdE0WktretHLA94DVR3rbsZ+lSnIPxW2tz3XV+N8aY7OfmmwOGMBPdfrv2mm3f7pX+zyXi4ogZPZ6eM+5hwBv7KVzcclSNya1SFJQ55+Kdcw3Q3rCmIlIn4OnPgNnOuTne46VARW+482Pgp9Q0SER6i8hiEVm8d+/e1Lw1a5w8CatXs3hfJWo3ykuBAn43yJjsp04d2LTprAmX+fJpfllICEydCqtX+9a+LDVnDs//2pIu9TYRcXed87/eGJNjpWr2pXPuEDAD6AAgIi8CpYEnAl5zJHG40zk3AQgXkVLADqBCwO7Ke9vO/owBzrkI51xE6dKlU/dtssLKlRAXx7D919C9h+V9GJMWItC+fRKrLFWuDJ076/1vvw1IPMuhTp7k1082suZgGR5/+yINSI0xuVZKZl+WFpFi3v38wDXAOhHpBbQHbnPOJQS8/iIRzYgQkabeZ+wHFgHVRKSyiOQBugNjM/j7ZL4lS0hwwtSoGrRr53djjMm+zpiFGah9e7j0UjhyRAOzHFwmY9fIuTw7qwPf3DWTkAb1/G6OMcZnKbksKwPMEJEVaGA1xTk3HugPXAj8flbpi5uBVSKyHOgHdPfmA8QBDwOTgLXAcOdc9hqfiI6GVauYs/MSmrUMtxpCxqRD48ba8RwdfdYTISFw771QoICujTl9ui/ty2wJh49y9/PleLfZSC7o2QHL7jfGnDej1Dm3AmiYxPYk3+uc+wT4JJnnJgATUtnG4LFsGcTGMnRvW257Iq/frTEmWxPRlZZGjIA77zzryeLFoWdP6N9f12WqXh3Kn2+Cd/byvwe30LDEDtp2CINq1fxujjEmCFgCQ0olJMDEicQmhDBvXzVatfK7QcZkf88/D59+ChOSulRr2BBatdLyGF9+qeUycogFkw/zy4wCvBIxFqs+bYxJZEFZSi1ZArt2MeVwM9p0yk9oqN8NMib7K1FC65W99tpZdcsSdesGZcpoQdkRI7K8fZnh8GHoe38c3135NeHNI6BChfO/yRiTK1hQlhIJCTB+PABDj3TkttvtsBmTUUqU0F+v118/9Wt2Wp48WiYjLAxmz9YUgmzMOejT82+eu3Q0lYsdPD3T1BhjsKAsZbxessUx9fjzRGmaNvW7QcbkLImB2RtvJBGYlS8PXbro/UGDdPHybOrrr6Hw/i3cUmWxDs0GY9kfY4xvLCg7H6+XLCY+lAcX9GTAl2KTpIzJBIlDmW++CePGnfVkmzZacfbECejXD/bs8aWN6bF8OfT/6CQfVf9MewA7dfK7ScaYIGNB2fksXgy7dvH6uq50u6sg1av73SBjcq7ixbWn7K23YGxgFUMRuPtuuOgiXc38zTe1XEY2cPgwPP009O3rGNRhKAXCYnXaaZEifjfNGBNkLCg7F6+XbPn+8kw70oQnnrLDZUxmSwzM3n4bfv454InCheG556BBA+0x++QTnbYZpMVl4+NhwAAdpaxRA+Z+sYaah+dDwYJY5WljTFIsyjiXxYuJ3bmXvvPv4ovvC9qMS2OySPHiOpT5zjtnBWb58kHfvnDDDfr455+1ltnJk760MzkzZkCLFrBlC8ydC/feepzQn7zlCzp0gPz5/W2gMSYonbd4bK7l9ZK9E9me6zo6ateziMyYrFSsmAZm112njxPjMESgY0ctJfHVVxAZqcOZDz4IF17oU2vVpk3w739DaCgMG6ZLebJ1K3zxBRw4ACVLwlVX+dpGY0zwsqAsOYsXs3ptCON3NmL2nHJ+t8aYXKlYMR3KvO46HaU8o85q3brwn//A559rntkbb+jyTPXrZ3k7jxzRWmuzZ+uw65VXog2eMVPrq8XHa4R2//0QHp7l7TPGZA82fJmUhATixk6gz5w76f/afsLzWS+ZMX5J7DF77z0YM+asJy+4AJ59Fho10iHMzz7TqZtZlGcWH6+LDbRsqStB/fabF5CdPKlPDBumL2rTBp56SnvKjDEmGeKCNEkWICIiwi1evDjrP3jBAv73WBTHwovz8swrsWQyY/x3+DB07aqrLtWpox1l9erp/cKFHEyerFGbc/rEPffoouaZ0I7ISFi6FH78UUcjn3suYDJlVJQOV+7ZozlwPXvq6uvGmBxPRJY45yLS+n4bvjxbQgLrv5vPiM2dmTt8pwVkxgSJokVh6lStHbtqFaxcqbVkV66EY8eEiy9uT90yjakbNYF6Bzdz6V9vE/ZQHyhbNs2fuWePLiKwdKn+u2GDTgJt2FBvw4fDxRcHvGHePPjhB4iN1aK3vXv7nudmjMk+rKfsLPHzFnD1zcV4t8M0Ir7sY0GZMdmAc/DnnxqgrZx/nJVjt/DHzsI4CaFQ6XwUvLAQBUvmo1AhoWBBKFSIM/5NvB8fDytWaBC2bZuOjjZqpAFYo0ZwySUQklTSR0wMDB2qQRnA5ZfDbbdZ/pgxuUx6e8osKAuUkMBH1/7Krj3Cm58U0ROrMSb7iYmB778n/veFnIjLw/G4vBwvXp5j1RpyvEodjoWX4PhxOH4cjh3j1H3Qkc+GDbUHLEWrd+zercOVO3ZoEHbHHXDZZZn69YwxwcmGLzPQpjErGLy0NnPv/Rqav+B3c4wxaZUnD9xzD6EtW1J44UIKL1kCx9fAqjWwCqhYEZo0gdYRWhQtrRYv1jHU6GgdpuzTB8rZbG1jTNqct6dMRPIBs4G8aBA30jn3oohUBoYBJYElQA/nXIyI5AUGAY2B/cCtzrmt3r6eA+4D4oFHnXOTzvXZWdlTlhCXQLtLt/BqvZFc9nRLrfxojMkZ4uNh7VpYtEiTw6KjdbuIjkk2barjk4UKnX5PQgIcPar1Lg4f1n8Tb4cPa3Lbpk362ogI6NFDE/uNMblWVvSURQNtnHPHRCQcmCsiE4EngA+cc8NEpD8abH3u/XvQOXeJiHQH3gZuFZFaQHegNlAWmCoilzrn4tPa+Iz0xX+2Ub/QZi6rfQSaNfO7OcaYjBQaqtM069SBO+/U5LOFC/XfDRv0NnQoVKqkQ5+HD+u45vnSO0JD4ZZbtA5GisY6jTEmeecNypx2pR3zHoZ7Nwe0AW73tn8HvIQGZTd49wFGAp+IiHjbhznnooEtIrIRaAr8nhFfJD22bUngy8F5mdvxZ+h0uyX3G5OThYdrr1hibbPISA3Q1q6FzZtPv05Ep1oWKaK3okXP/LdIEShTRh8bY0wGSFFOmYiEokOUlwCfApuAQ865OO8lUUBiIkU5YDuAcy5ORA6jQ5zlgPkBuw18j2+cgz63H+HDJj9Q4KKi1ktmTG6SLx80b663o0e1xlihQhpwFS6czFRLY4zJHCkKyrwhxgYiUgwYA9TIrAaJSG+gN8DFZxQAyhxfD0ygWvw6WpXZAJ3usl4yY3KrwoWhZk2/W2GMycVSdRnonDsEzAAuA4qJSGJQVx7Y4d3fAVQA8J4viib8n9qexHsCP2OAcy7CORdRunTp1DQvTeqGrObNOj9AqVLWS2aMMcYY35w3KBOR0l4PGSKSH7gGWIsGZzd7L7sL+Nm7P9Z7jPf8dC8vbSzQXUTyejM3qwELM+h7pE1CAk23DqdQeDR06mS9ZMYYY4zxTUqGL8sA33l5ZSHAcOfceBFZAwwTkdeAZcBX3uu/AgZ7ifwH0BmXOOdWi8hwYA0QBzzk+8zLyEhdR6V0ac0pMcYYY4zxSe6u6J+QAEuWaKHJ+vUz73OMMcYYk+NZRf/0CAnRqt7GGGOMMT6z+d7GGGOMMUHAgjJjjDHGmCBgQZkxxhhjTBCwoMwYY4wxJghYUGaMMcYYEwSCuiSGiOwFtvndjnQqBezzuxFByo5N8uzYJM+OTfLs2CTNjkvy7NgkLy3HpqJzLs3LEQV1UJYTiMji9NQsycns2CTPjk3y7Ngkz45N0uy4JM+OTfL8ODY2fGmMMcYYEwQsKDPGGGOMCQIWlGW+AX43IIjZsUmeHZvk2bFJnh2bpNlxSZ4dm+Rl+bGxnDJjjDHGmCBgPWXGGGOMMUHAgrIAIvK1iOwRkVVnbf+fiKwTkRUiMkZEiiXz/le910SKyGQRKettFxHpJyIbvecbJfP+DiKy3nvdswHbRUReF5E/RGStiDyagV87RYLg2KTr8zNTEB+bBiIy39vvYhFpmkFfOUUy8bjUEJHfRSRaRJ46x+c3FpGV3vHrJyLibS8hIlNEZIP3b/EM/NopEqzHxnvuEa8Nq0XknQz6yikWBMfmdRHZLiLHztr+hIis8fY9TUQqZsDXTZUgPjYXi8gMEVnm7b9jBnzdVJHk/34+7G1zIlLqHO+vLCILvNf+KCJ5vO2tRGSpiMSJyM1p+Pwk95ss55zdvBvQCmgErDprezsgzLv/NvB2Mu8vEnD/UaC/d78jMBEQoDmwIIn3hgKbgCpAHmA5UMt77h5gEBDiPb4gNx2bjPj8XHpsJgPXBuxrZg45LhcATYDXgafO8fkLveMm3nFMPBbvAM9695/NYT8z6T02VwFTgbyJ+8uFx6Y5UAY4dtb2q4AC3v0HgB/t2JzaPgB4wLtfC9iaxcflXH8/GwKVgK1AqXPsYzjQ3bvfP+D7VALqoX+Db07D5ye53+Ru1lMWwDk3GziQxPbJzrk47+F8oHwy7z8S8LAgkJiwdwMwyKn5QDERKXPW25sCG51zm51zMcAw732gJ4BXnHMJ3ufsSf23Sx+fj026Pz8zBeux8fZTxLtfFPgrBV8nw2TWcXHO7XHOLQJik/ts7zgVcc7Nd3o2HATc6D19A/Cdd/+7gO1ZJoiPzQPAW8656MT9peZ7ZQQ/j433uvnOuZ1JbJ/hnDtxvs/PTMF6bPD5XMM5/n4655Y557ae681eT3EbYKS36dR5wTm31Tm3AkhI7eefa7/JCTvXkyZJ9wI/JvekiLwO9AQOo1dWAOWA7QEvi/K2Bf5wJ/WaZt79qsCtInITsBd41Dm3IR3fIbNk1rHJkM/3mR/H5l/AJBF5F01VaJG6JmeJtByXlCiHHq9EiccO4MKAPyy7gAtTsd+s5MexuRRo6e37JNprsig1jc4imXVsUuo+tIcxGPlxbF4CJovII2iw1zaD9ptS5/r7mRIlgUMBgW3g70R6Pj/V+7WeslQQkeeBOGBIcq9xzj3vnKvgvebhDProvMBJp5WFvwS+zqD9Zhgfj02KP98vPh6bB4DHvf0+DnyVQfvNEH7/zHj7d5zumQwaPh6bMKAEOkz1b2B4YL5ZMPD750ZE7gQigP9l5H4zgo/H5jbgW+dceTRVYrCIWHyRBnbQUkhE7gauA+7wTuSIyDdewuSEJN4yBOjq3d8BVAh4rry3LdC5XhMFjPbuj0HHt4NGFhybVH9+sPD52NzF6Z+bEWgXe1BI53FJiR2cOYQTeOx2Jw4De/9m+RDdufh8bKKA0d6Q+UJ0yCbZ5OislgXH5nyf3xZ4HuicOMQbLHw+NvehuVM4534H8pG1PzepPleKyCTv2AwE9qPpIYmjh6k91yb3+anerwVlKSAiHYCn0V/ExJwCnHP3OOcaOOc6eq+rFvC2G4B13v2xQE9RzYHDSYzLLwKqeTM18gDdvfcB/MTpbuYrgT8y7tulTxYdm1R/fjDw+9igeR1XevfbAEEx5J0Bx+W8vON0RESaez09PYGfvafHogEr3r8/J7ELXwTBsfkJ71wjIpeiSctBsVh1Vhyb83x+Q+AL7/ODLZD39dgAfwJXe59REw3K9mbQvlPiXH8/k+Sca+8dm15eEDsDSJxdmdrzQpKfn6b9uiyePRLMN2Aomq8Ti14x3udt34iOF0d6t/7JvH8UsApYAYwDyiVesACforMzVgIRyby/IxpwbQKeD9heDPjFe+/vQP1ceGzS9fm59NhcASxBZwItABrnkONykbe/I8Ah736RJN4f4b1/E/AJp4tllwSmoUHqVKBEDvqZSe+xyQN87z23FGiTC4/NO95zCd6/L3nbpwK7Az5/rB2bU8emFvAbeq6JBNr5cGyS+/v5qNfWOPRCdWAy76+CzkreiI4sJM5AbuK9/zja87U6lZ+f5H6Tu1lFf2OMMcaYIGDDl8YYY4wxQcCCMmOMMcaYIGBBmTHGGGNMELCgzBhjjDEmCFhQZowxxhgTBCwoM8YEBREp6RVzjBSRXSKyw7t/TEQ+y8TPbS0iwbgElTEml7G1L40xQcE5tx9oACAiLwHHnHPvZsFHtwaOAfOy4LOMMSZZ1lNmjAlqXk/WeO/+SyLynYjMEZFtItJFRN4RkZUi8quIhHuvaywis0RkibecSuKySo+KyBoRWSEiw0SkEtAXeNzrlWspIteLyAIRWSYiU0XkwlR+9taA7QtF5BJfDpwxJtuxoMwYk91URZeN6oxWn5/hnKsL/A108oKjj4GbnXONga+B1733Pgs0dM7VA/o657YC/YEPnC65MgeYCzR3zjUEhqHL16ToswNed9jb/gnwYQZ/f2NMDmXDl8aY7Gaicy5WRFYCocCv3vaVQCWgOlAHmKJLOxKKLk0DurzMEBH5CV3nMSnlgR+93rU8wJZUfHaioQH/fpDqb2iMyZWsp8wYk91EAzjnEoBYd3qtuAT0QlPQ9ekaeLe6zrl23ms6oeuJNgIWiUhSF6YfA594PV190MWVU/rZiVwy940xJlkWlBljcpr1QGkRuQxARMJFpLaIhAAVnHMzgGeAokAh4ChQOOD9RYEd3v270tiGWwP+/T2N+zDG5DI2fGmMyVGcczEicjPQT0SKoue5D4E/gO+9bQL0c84dEpFxwEgRuQF4BHgJGCEiB4HpQOU0NKO4iKxAe9ZuS+93MsbkDnK6990YY0x6ichWIMI5t8/vthhjshcbvjTGGGOMCQLWU2aMMcYYEwSsp8wYY4wxJghYUGaMMcYYEwQsKDPGGGOMCQIWlBljjDHGBAELyowxxhhjgoAFZcYYY4wxQeD/AfLJXGzTpAfQAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Napoved celotnega nabora podatkov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:03:30+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sl/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/sl/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..e46fe7af6 --- /dev/null +++ b/translations/sl/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,705 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [ + "# Napovedovanje časovnih vrst z uporabo regresorja podpornih vektorjev\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "V tem zvezku bomo prikazali, kako:\n", + "\n", + "- pripraviti 2D časovne vrste podatkov za učenje modela SVM regresorja\n", + "- implementirati SVR z uporabo RBF jedra\n", + "- oceniti model s pomočjo grafov in MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uvažanje modulov\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Priprava podatkov\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Naloži podatke\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zdaj morate pripraviti podatke za učenje z izvajanjem filtriranja in skaliranja vaših podatkov.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prilagodite podatke, da bodo v razponu (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Ustvarjanje podatkov s časovnimi koraki\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Za naš SVR preoblikujemo vhodne podatke v obliko `[batch, timesteps]`. Tako preoblikujemo obstoječe `train_data` in `test_data`, da dodamo novo dimenzijo, ki se nanaša na časovne korake. V našem primeru vzamemo `timesteps = 5`. Tako so vhodi v model podatki za prve 4 časovne korake, izhod pa bodo podatki za 5. časovni korak.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [ + "## Ustvarjanje SVR modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Ustvari napoved modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [ + "## Analiza učinkovitosti modela\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Napoved celotnega nabora podatkov\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:06:00+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sl/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/sl/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..d3147c5c5 --- /dev/null +++ b/translations/sl/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:03:25+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter in volk: Uvod v okrepljeno učenje\n", + "\n", + "V tem vodiču se bomo naučili, kako uporabiti okrepljeno učenje za reševanje problema iskanja poti. Zgodba je navdihnjena z glasbeno pravljico [Peter in volk](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) ruskega skladatelja [Sergeja Prokofjeva](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Gre za zgodbo o mladem pionirju Petru, ki pogumno zapusti svojo hišo in se odpravi na gozdno jaso, da bi lovil volka. Naučili bomo algoritme strojnega učenja, ki bodo Petru pomagali raziskati okolico in zgraditi optimalen navigacijski zemljevid.\n", + "\n", + "Najprej uvozimo nekaj uporabnih knjižnic:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Pregled učenja z okrepitvijo\n", + "\n", + "**Učenje z okrepitvijo** (RL) je tehnika učenja, ki nam omogoča, da se naučimo optimalnega vedenja **agenta** v nekem **okolju** z izvajanjem številnih poskusov. Agent v tem okolju mora imeti določen **cilj**, ki ga opredeljuje **funkcija nagrajevanja**.\n", + "\n", + "## Okolje\n", + "\n", + "Za enostavnost si predstavljajmo Peterjev svet kot kvadratno ploščo velikosti `width` x `height`. Vsaka celica na tej plošči je lahko:\n", + "* **zemlja**, po kateri lahko Peter in druga bitja hodijo\n", + "* **voda**, po kateri se seveda ne more hoditi\n", + "* **drevo** ali **trava** - mesto, kjer se lahko spočiješ\n", + "* **jabolko**, ki predstavlja nekaj, kar bi Peter z veseljem našel, da se nahrani\n", + "* **volk**, ki je nevaren in se mu je treba izogniti\n", + "\n", + "Za delo z okoljem bomo definirali razred `Board`. Da ne bi preveč obremenili tega zvezka, smo vso kodo za delo s ploščo premaknili v ločen modul `rlboard`, ki ga bomo zdaj uvozili. Več podrobnosti o notranji implementaciji si lahko ogledate znotraj tega modula.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Zdaj ustvarimo naključno ploščo in si oglejmo, kako izgleda:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Dejanja in Pravila\n", + "\n", + "V našem primeru bi bil Peterjev cilj najti jabolko, medtem ko se izogiba volku in drugim oviram. Določite ta dejanja kot slovar in jih povežite s pari ustreznih sprememb koordinat.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Strategija našega agenta (Peter) je določena z tako imenovano **politiko**. Oglejmo si najpreprostejšo politiko, imenovano **naključna hoja**.\n", + "\n", + "## Naključna hoja\n", + "\n", + "Najprej rešimo naš problem z implementacijo strategije naključne hoje.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Funkcija nagrajevanja\n", + "\n", + "Da bi naša politika postala bolj inteligentna, moramo razumeti, kateri premiki so \"boljši\" od drugih.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Učenje\n", + "\n", + "Ustvarite Q-tabelo ali večdimenzionalno matriko. Ker ima naša plošča dimenzije `width` x `height`, lahko Q-tabelo predstavimo z numpy matriko oblike `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Podajte Q-tabelo funkciji `plot`, da vizualizirate tabelo na plošči:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Bistvo Q-Učenja: Bellmanova enačba in učni algoritem\n", + "\n", + "Napišite psevdokodo za naš učni algoritem:\n", + "\n", + "* Inicializirajte Q-tabelo Q z enakimi vrednostmi za vsa stanja in akcije\n", + "* Nastavite hitrost učenja $\\alpha\\leftarrow 1$\n", + "* Večkrat ponovite simulacijo\n", + " 1. Začnite na naključni poziciji\n", + " 1. Ponavljajte\n", + " 1. Izberite akcijo $a$ v stanju $s$\n", + " 2. Izvedite akcijo z premikom v novo stanje $s'$\n", + " 3. Če naletimo na pogoj konca igre ali je skupna nagrada premajhna - zaključite simulacijo \n", + " 4. Izračunajte nagrado $r$ v novem stanju\n", + " 5. Posodobite Q-funkcijo v skladu z Bellmanovo enačbo: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Posodobite skupno nagrado in zmanjšajte $\\alpha$.\n", + "\n", + "## Izkoriščanje vs. Raziskovanje\n", + "\n", + "Najboljši pristop je uravnotežiti med raziskovanjem in izkoriščanjem. Ko se več naučimo o našem okolju, bomo bolj verjetno sledili optimalni poti, vendar se občasno odločimo za nepreizkušeno pot.\n", + "\n", + "## Python Implementacija\n", + "\n", + "Sedaj smo pripravljeni implementirati učni algoritem. Pred tem potrebujemo tudi funkcijo, ki bo poljubne številke v Q-tabeli pretvorila v vektor verjetnosti za ustrezne akcije:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Dodamo majhno količino `eps` k prvotnemu vektorju, da se izognemo deljenju z 0 v začetnem primeru, ko so vse komponente vektorja enake.\n", + "\n", + "Dejanski učni algoritem bomo izvedli za 5000 poskusov, imenovanih tudi **epohi**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Po izvedbi tega algoritma bi morala biti Q-tabela posodobljena z vrednostmi, ki določajo privlačnost različnih dejanj na vsakem koraku. Vizualizirajte tabelo tukaj:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Preverjanje politike\n", + "\n", + "Ker Q-Table navaja \"privlačnost\" vsakega dejanja v vsakem stanju, je zelo enostavno uporabiti to tabelo za določanje učinkovite navigacije v našem svetu. V najpreprostejšem primeru lahko preprosto izberemo dejanje, ki ustreza najvišji vrednosti v Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Če večkrat preizkusite zgornjo kodo, boste opazili, da se včasih preprosto \"zatakne\" in morate pritisniti gumb STOP v zvezku, da jo prekinete.\n", + "\n", + "> **Naloga 1:** Spremenite funkcijo `walk`, da omejite največjo dolžino poti na določeno število korakov (recimo 100), in opazujte, kako zgornja koda občasno vrne to vrednost.\n", + "\n", + "> **Naloga 2:** Spremenite funkcijo `walk`, da se ne vrača na mesta, kjer je že bila prej. To bo preprečilo, da bi se `walk` zanka, vendar se agent še vedno lahko \"ujame\" na lokaciji, iz katere ne more pobegniti.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5wU5Z3v8c8vEk1islETkuPtlcFdT3LMvjbRsF5iTnajibdkQ5KjOeRKjKsnWT3rms1mwVw8q/EWL6gJXlAwxBsqQSWCIgJeuDPc5TrDfQBhhoFhYBiYgef80U8PPT19qe7p7qrp+r5fL5jup6qrnuqq/tVTTz31POacQ0RE4uE9YWdAREQqR0FfRCRGFPRFRGJEQV9EJEYU9EVEYqRf2BnI5aMf/airqakJOxsiIn3KggULmpxz/TNNi3TQr6mpoba2NuxsiIj0KWa2Mds0Ve+IiMSIgr6ISIwo6IuIxIiCvohIjCjoi4jEiIK+iEiMKOiLiMSIgr5IlXr1nW007T0QdjYkYhT0RapQy/4OfvLkQn78x/lhZ0UiRkFfpAp1HjoMQMOu/SHnRKJGQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYCBX0zu8HMlpvZO2b2jJm9z8wGmNlcM6szs2fN7Gg/7zH+fb2fXpOynGE+fbWZXVyeTRIRkWzyBn0zOxn4V2Cgc+5vgaOAwcCdwHDn3OnALuAq/5GrgF3Oub8Bhvv5MLMz/Oc+DVwCPGhmR5V2c0REJJeg1Tv9gPebWT/gA8A24AJgnJ8+BviGfz3Iv8dPv9DMzKePdc4dcM6tB+qBs3u/CSIiElTeoO+c2wLcDWwiEexbgAXAbudcp5+tATjZvz4Z2Ow/2+nn/0hqeobPdDGza8ys1sxqGxsbi9kmERHJIkj1zvEkSukDgJOAY4FLM8zqkh/JMi1bevcE50Y65wY65wb2798/X/ZEpACz1+7k0bfWhZ0NCVG/APN8GVjvnGsEMLPxwOeB48ysny/NnwJs9fM3AKcCDb466MNAc0p6UupnRKQCvvPoHACu/uJpIedEwhKkTn8TcK6ZfcDXzV8IrACmA5f7eYYAL/nXE/x7/PRpzjnn0wf71j0DgNOBeaXZDBERCSJvSd85N9fMxgELgU5gETASmAiMNbPf+rRR/iOjgCfMrJ5ECX+wX85yM3uOxAmjE7jWOXeoxNsjIiI5BKnewTl3E3BTWvI6MrS+cc61A1dkWc6twK0F5lFEREpET+SKiMSIgr6ISIwo6IuIxEigOn0R6du27t7PrLU7w86GRICCvkgMDB45h03NbWFnQyKgqqt3duxp56xbprBme2vYWREJVWPrgbCzIAENHjmb+1+vK9vyqzroT1m5neZ9B3l85oawsyIiEsicdc0Mf31N2ZZf1UFfRES6U9AXEYkRBX0RkRhR0BcRiREFfZEq1GOgChFPQV+kimUauUjiTUFfpIqpxC/pFPRFqpBK+JJNTIL+kfLOV+59k/ELG0LMi4hIeKo66FuG8k7djr387LklIeRGRCR8VR30RUSkOwV9EZEYUdAXEYkRBX2RGHBqvCmegr6ISIwo6IuIxIiCvohIjMQq6Dunek2Jp0zPrEg8xSLoK9aLiCRUddA3FW5ERLqp6qAvIiLdVXXQV7WOiEh3VR30k1TNIyJRc+iw47/+spyGXW0VXW8sgr6ISNQs3ryLx2du4IZnF1d0vQr6IiIhSFY/H65wNbSCvkiVeLuukUfeXBt2NiTiAgV9MzvOzMaZ2SozW2lm55nZCWY2xczq/N/j/bxmZg+YWb2ZLTWzs1KWM8TPX2dmQ8q1USJx9INR87j9lVVhZ0MiLmhJ/37gVefcp4DPACuBocBU59zpwFT/HuBS4HT/7xrgIQAzOwG4CTgHOBu4KXmiqBS15hGRuMsb9M3sr4AvAqMAnHMHnXO7gUHAGD/bGOAb/vUg4E8uYQ5wnJmdCFwMTHHONTvndgFTgEtKujVZKNhL3OiQl2yClPRPAxqBx81skZk9ZmbHAh93zm0D8H8/5uc/Gdic8vkGn5YtvRszu8bMas2strGxseAN6r6sXn1cpM9L/gTUn74kBQn6/YCzgIecc2cC+zhSlZNJplDrcqR3T3BupHNuoHNuYP/+/QNkT0REggoS9BuABufcXP9+HImTwHZfbYP/uyNl/lNTPn8KsDVHuoiUicr3ki5v0HfOvQtsNrNP+qQLgRXABCDZAmcI8JJ/PQH4oW/Fcy7Q4qt/JgMXmdnx/gbuRT5NREpMNZuSTb+A8/1f4CkzOxpYB1xJ4oTxnJldBWwCrvDzTgIuA+qBNj8vzrlmM7sFmO/nu9k511ySrRARkUACBX3n3GJgYIZJF2aY1wHXZlnOaGB0IRkUEZHS0RO5IiIxEqugr5taIhJVlRrONRZBXw9nSdxpjNxo++sbJ/Gth2ZVZF1VHfR1mItIVKWWRQ8ddizatLsi663qoC8iIt0p6IuIhCCsmggFfRGRGFHQFxGJkaoO+mq0IyLSXVUH/SR1sSxx5Zzj91Pr2N9xKOysSETEIugnVerhB5Go2NXWwT1T1oSdDYmQWAV9kUrae6CTZQ0trNy2hzteWaVCh0RC0F42+zT91iQMV4+pZfa6nRzT7z0c6DzMtV/6az70vveGnS2Juaou6asqX8K0cNMuADoPJ0odpptLEgFVHfRFRKQ7BX2REL2xegf7D5a2ZY3uHUguCvoiIVn9bis/enw+v3rxnZIu97ZJK0u6PKkuCvoiIWlt7wBgw859JV3uk3M26cFEyUpBX0SkAhpbD1AzdCK1GxJDg4d1Yo5V0FfpRyopebwl2+yorj3e5q1PBPvRM9d3S690m65YBX2RMKilpkSJgr6ISIgqff0Xi6DvVLEjMaOLi+jTICploMtqEZHuqjroi4hIdwr6ImWmRjsSJQr6IiIxEqugf+iwilxSebq3JFESq6B/9+TVYWdBpAc9tCWVFKug/+ryd8POgkgXXQFIGKo66KsAJVFWruNTJ5PqsHPvgbIst6qDfpLpURWpgB2t7cxZt7Pgz5V6RC0VdqIt1/7pPHS463W5bkHGIujriVyphG+OmMXgkXPCzob0YXe+uqrs6wgc9M3sKDNbZGYv+/cDzGyumdWZ2bNmdrRPP8a/r/fTa1KWMcynrzazi0u9MT3zXO41iByxZff+nNNV9BA4EpcyHQ9zfU+cienlOWIKKelfD6QOyXMnMNw5dzqwC7jKp18F7HLO/Q0w3M+HmZ0BDAY+DVwCPGhmR/Uu+yIR5n+zql6UXCLZtbKZnQJ8FXjMvzfgAmCcn2UM8A3/epB/j59+oZ9/EDDWOXfAObceqAfOLsVGBKWSv4RCx50UI+Q6/fuAXwDJuwwfAXY75zr9+wbgZP/6ZGAzgJ/e4ufvSs/wmS5mdo2Z1ZpZbWNjYwGbIiIi+eQN+mb2NWCHc25BanKGWdMHCkqfluszRxKcG+mcG+icG9i/f/982SuIWjVI2DoOHWZWfVPY2ZAIyRqWynSFGKSkfz7wdTPbAIwlUa1zH3CcmfXz85wCbPWvG4BTAfz0DwPNqekZPiNSvVJ+1fdOWcN3H5vbNU5qqbR3HCrp8qT88sb0sKp3nHPDnHOnOOdqSNyIneac+x4wHbjczzYEeMm/nuDf46dPc4nnzCcAg33rngHA6cC8km1JACrpS5gMWLtjLwBNew+WdNnN+0q7PCmf1vbOrNMqcfunN+30/xP4mZnVk6izH+XTRwEf8ek/A4YCOOeWA88BK4BXgWudcyqexMg9r61mWUNL2NkI7K7Jq3hhUUPJlucobeHtF+OWUDN0YgmXKJXwdl336r0FG3dVdP398s9yhHPuDeAN/3odGVrfOOfagSuyfP5W4NZCM9lbyRK+Wu+E6/fT6vn9tHo23PHVsLMSyIjpawH45pmn9G5BeY67Yjtce642+wkp6BI7Dh3m1y++w79eeDonHff+ovIh5VGuiomqfiJX7aMlalKPyPRCyN4DnSzc1PtSXyGFm7frGhk7fzM3vrCs1+uVvqGqg75I1OQqvV371EK+9eAs9rR3VCw/SSoeRUPq8VGufaKgLxICs54NC5ZtSdzv6Og8nOET0tcVWr2s6h2RKpIa8EvRy6buV1WHqLfeEYmFGXVN7G7rO00inVN1TRRFpcm4gr5IHt8fNZerxtT2ejmpP/rmfQfZvKut18sslYjEo9hbktKkuVwnCQV9kQDWbG8t+rOZSt03vrCMG55dknH+Yn7rqQFif4Cncy9/aJbPm64JwhLWiVZBXySICvxCexN+t+bpyz9dbYUfCJLCles+jYK+VESxDyBFRVGl7wqW5fIN4CJ9j6p3eiH53fXxuBOqhZt20XGo+psSjphezx2v9ByyrvNwdW67hhINx4qte3hyzsZQ1l1QNwx9jqorS2Lltj1868FZXP0/B/DLr54RdnbK6q7JqwEYeumnuqW3dwQP+sPGL2XHngNd74OGVYXf+LjsgbdDW3d1B/20X5HaMhenaW8igK3cVvzNzGp36PCRg+2ZeZszzpPt+OvrVV9SHlEYI7fPUqyXchs2fmmg+XLF9zCO02ufWhTCWiVMsQj65eacU2kt5nL1eJkUtUPkzTWNgZp3SjjK1Zw2VkG/HD+6toOdDBg2iT9Mqy/9wiNGN/2KU4qf7oqte/iHu6bT0pa5M7b0Y3vKiu15l7moBD16SnCF972j6p1I2u1/hE/P2xRyTnqvdkMzO/ce6JGuB3h6pxQ/3Qem1rFxZxuz1gYbX/eXL76Td577Xq/reh21qxApHwX9Eho+ZQ33vLY67GwU7fKHZ3PFI7NLvtz7X69jwLBJJV9utSpFAFZ1o2QTq6Bf7tY790+t4/d9vJpnXeO+ki9zzOwNJV9mNUn2sqnWZZJKD2dJn1Xtpc69B7IPdF1q2b5J3W+Jvqj8DGIR9CPyXcdWNX//k5Zt429vmlySZeVszlnmATh0lRE96nunGClf2uHDrixn2moOaNLTo2+t4+a/rOh6/9aaxuAfDniwhBGAo1IKlSMmLXu3LMut7qCf4rQbJ2XslOqRN9fy0uItgZbxz2Pm89mbX8s4bVtLe6/yJ6WxoWkfNUMnsrRhd1mWf+uklYyeuR6APe0dge6BFBrDcwXgbNPS0xXE+75bXl6Rf6YixCboZ3P7K6u4fuziQPO+vnJHVxPNOEoGknELGlj17p6M83QeOsySzd0DbiUD0LRVOwAYvzDYiTyb1G4Vsvn2w7OZt6G5wCVnX25qCb9ue2ugtvYihYp90O+tOFSFplc3/Pz5JVxyX+YOo+5+bQ2DRsxk+daWjNP7iuZ9+YdHXPVusL6IijnnfWX4W1z9p56jdanuXXpLQT/mWto6WNu4t6jPDhk9r0fVWDLYN+3tO2PKVox1/ddNtbdukmhR0I+5r4+YwYX3vFlU4HlzTWPgqrFstu9pp2V/6arMol8S7l2AL9f5Qaed+FDQ76W+/mPZuDP44NzFBpxcJ5RzbpvKF+6cVtyCC9TecYimvQdoj3gnY+lt7mfUNfUYo7fQ4RFFkqq7P/0y29kHAkhQ5axhyLfo1vbgDzftbjvIuAUNXPWFAV1PsmZcZ4YN+tSvXwXg7075MBOu+0LgdZaMSzQGyC7z9nx/1NweaZ+/o/uJUjVE0fDtR2bzoWP6MepHf99jWlSuQhX0e+Fzv32dE449OuxsVEwUDtrP3jwl8ffU4xhYc0KP6UGyuLShuJvM/3DX9KI+F4GvLa++kMe+YN76QltzVV4sqnfKWQoK0sqjL0j9ivpC1cHBEMbrLaQqLFXwwy//nOpuQXqrqoO+Si+5ZRvoPL3qIKnok2c54lQBy2xsPcDztZmHMIyChZt2d6uzT+3KumboxIrkQaeS+FD1Toz95qUjfa7nutlazMkzSs0Q/88TtSzcVJ4ndEvlouFv8dEPJqoKc5Xms41tEJ1vW6Kuqkv6kluuJz63tezPWHUVpWCeS2oud7T2HBgmmvKfXh2OXwcYIEUSxs7bxKvvlKcPm0JF5aeTN+ib2almNt3MVprZcjO73qefYGZTzKzO/z3ep5uZPWBm9Wa21MzOSlnWED9/nZkNKd9mJUTkO+4T0r+r826fxlm3TOkx33MBq0neWH2kI7JK1u7katFTDa57ehFPzNkYdjb6jKHjl/GTJxeEnY1ICVLS7wT+3Tn3P4BzgWvN7AxgKDDVOXc6MNW/B7gUON3/uwZ4CBInCeAm4BzgbOCm5Imi3MKIA+ffMa3qSmQOR932zE/vvrx0K7tSrgz+OGtDhXIVfQc7E/dOdBNWoiBv0HfObXPOLfSvW4GVwMnAIGCMn20M8A3/ehDwJ5cwBzjOzE4ELgamOOeanXO7gCnAJSXdmgppyjCObLotu/cXVCL77csrutWxl0vDrjbWbG/11TRHzoa9ufTc1rKf655exE+fCl6iembeJs7M0mNpKaRuT5UX/gGYsGRrrz7fm2q78++Yxr1T1hT12T3tHRVvLfbKsm1sbk60xNqyez81QyeyMEaDxBdUp29mNcCZwFzg4865bZA4MQAf87OdDKTWATT4tGzp6eu4xsxqzay2sbGAvsorKFlyK6XHZqznT7PLf9n+hTunc9Hwt3hm3mYKrXgxLGMATX4fW3cH71562Phl7Cqwx9Ig33scAnwmBfXrX2Jbdu/ngal1+WfM4OLhb2VtLVYuP31qIZfdn+gwcEZd4nsbO29TSdfR0tbB6BnrI3kPLHDQN7MPAn8G/s05l7lfXT9rhjSXI717gnMjnXMDnXMD+/fvHzR7FRW93RjM9FVHngZNDxLlrHoo1YG/cluuwy65rpKsiu17QhwfIYQDLKx7IWGNQ9Fa5iEuh72wlJtfXkHtxuhdQQQK+mb2XhIB/ynn3HifvN1X2+D/JiNKA3BqysdPAbbmSC+7CJ5sQzFtVXoXAIX90B2uqOBQlhu5Zd6nC8rwY83W3DLdna+uLvm6o6alrYPdbdXxYGMmyaveYh/oK6cgrXcMGAWsdM7dmzJpApBsgTMEeCkl/Ye+Fc+5QIuv/pkMXGRmx/sbuBf5tLJJ/sTmbdjZq+V0HjrMw2+u7XofxUu2pBcWNXDFw7MK/lzOTUqJVZm2PUpfRzVU7/x5YUPF17k6y6A45fKZm1/r6lIjTOU6dhf7gYTumryqKy0qx2aQh7POB34ALDOzZD+6NwJ3AM+Z2VXAJuAKP20ScBlQD7QBVwI455rN7BZgvp/vZudcRTqq2Nxc/I2izkOHOfu2qX2mu4Ubnl2SdVpqFc6ry4O1XT4cYASpsI2esZ7jj30v3zzzlK601G3NVsLuOHSY68cu4voL/zuf/G8fKns+o2z7nr7yLEPpjJm1gZsmLC/rOqJUIErKG/SdczPIXg9wYYb5HXBtlmWNBkYXksGwvbmmMWPAzzZcYLVJ7+MmU/VOvhJM+oH/qxeX9TZb3dzsxxL95pmn8Nry4EMMrti6h0nL3qVh1/6K9Lp54FDuHlmDtAqrViOm1/P9cz7Bhz/w3oqt8+m5pb15m0lUSvep9ERuHpnGSnWOijzl19rewYqtxZ1cnHPM39DM7LW9q9pKDdj5Si2bmttYmFYXvqe9g/1p3U8/Oafnj62zRB2ozahv6vb+3ZZ2NjX3rFd1zvGbHKW8Fxf3bozdTK4e03P4w1LoC1djSTv3HmBHa8+bt3dNXs2vKtBkWRT08ypFq4ZtLfsZMb2+4HsBVz4+n8seyDwWbT7OwRUPz+Y7j84JPH82qdUj+b6NfQe7B/gfPz4/y5zdFTp6Vr7WRsnt+drvZ2Sc/lzt5h4DuKdaVIa+epYU2aVzPoWMR1Col5duZUPTvkDzBjn5fO63r3P2rVMzTmsrc4uaMESxekdBv0hBW2IA/PTJhdw1eTV1Owobi7Y3zb0OpRxt+w/2bqCXoAE2k8U5AmsQW3bv59aJK4ouzWarMvnPP5e2iilMo2asK9uyr3t6ERfd91bZlp8qzPhYyO+5EBGM+fEN+hOXbqvYupJB93CRp/1iWgulVkt9+5HZrGvcmzM4z12fvxrIrPCD+D0Br5Q6DmVe8vXPLOLRt9ezpKH7yeOFhVtwzmUthT5V5vraJ2ZvKOvyC/HAtPqyLr8cDyNWQiGNL54tU9fbhVSPVkpsg/61Ty+s+DorudNTb8Au29LCBfe8yZYcj7v/KEA1jHMw8q2epcpccf09AY+wc2/vfsm/o7Wd7Xva6chSwh+/aAuTl7/LzLVNGaeX269fKm+rj76ot4f37raD1AydyMtLjzy+s2Z7K/uKrPZpzNC76gV3v1Fs9qpGbIN+ujezPMZeiou+Qm4LzN/QsxVr0JPFz58/0lwzU510as+XhejNpW/Qkn6q1vYOzr51KufclrnuN6llf0dkSk/VYH3Auvtsevv8yjq//lEz1nct76Lhb3HVmGD3hQpZRy6z1jbxwqLKPytRKQr63pDR8zI+hZkpZjlXmqZYre0dfOvBmaxrPFLXf8XDs3uUyA8eOsyNLyzLe7k6bsGRA/W495em6Vtqff7qlNGdgiom6N82aVX+maTkdgZsMvrioi3UDJ3Yo6O0XCF/4859jJheWDVU8hwyZ11xj/PkuxeV7Xj+7qNzuz3v8vLSrTya4Qq3r6rqoF9oy5v/9dCswKWVl3I06UvvtyVZJ/r4zPXd0qet2sHCTbsZ/nr3zqrOT+uAasLirTw9dxO3T1oZKG8Ag0bMDDxvLtv3HGCXf1x+d5YO0nK2/Cni5Higo+eN50zNLqF7oPl/RT5ok8zijLpwqoqiImg5PfnEcCENE74/ai53Tc7dvUSxFwqz1jZ161eq1K57ehG3FvDb6y56l6JVG/Snr97BnHWFt1F/fkH+yzqHY21j9svE9GqJPe2JYPlcbXGXjMkbwNkasMysb6LtYHmau33p7jf4l6eKv/9RTEk/U23S9WMX93hQbOe+g7yR8mNP78O/vsDWUoti1L1uPqve3ZO1yjOb13OMxBakBVl6s90g4XJ90z6+++hcrvxjzyqgYk4iuQpzNUMnsnFn4ne/YGMzNUMnUuevFkZMr+ebD5amoFVuVTtG7pUB24enW76lBQaemn/GgvSMYm0HO3lweqI/n3yllGxx0znHym2tfO+xuXz9Myf1OpfFSg/G5XLFw7O7vf9dno7Jvnzvm+XMTlW75L78z4ekH5Y/fWohG+74Kks272bnvgNc8KmP9yoPQa66v5RyY3b+hmb+vuYEnHNFD5F5/djFOacvbWjhEx85lr8sSbT+e6uuidM//qG8VzEQnadzq7akXyrFPhGbz/Apa7rqFPfmaZ2Q7dj/3eTVXQ9vFfoMQCllC77rGvcW9NDVlBXbmVvE1VmpRO9CvLJyxdgOf2Lfvqedt/NUgw0aMZMf/zHx9PH6pn28sXpHUaXu9I+s3LYnY0OHpCsens3m5jYeeWsd59w2tceN6VJ0lJhcwtKGYM+fpK4yW/VopVVtSb9YyX00d91OTj3hA9yTYUSgH4ya16t17G47yMz63gW3WfVNPPTGkZ4/g/Q1Xy6vr8x8WX/BPYWVtK/+UyJQfOusI2Pr5HpqtpSmrNjeZ9ujV8L3HpvLoM+elHW4zKT2tPsxXyphE8lL/cAntwz6ND84rybjPHvaO7ruzWxOuQ/0wqIGBn2mx5hNef3kicyjwS30reMKOZFsbO5d66hSUdDP4n+PzN59Qbabipn8/PklPZ4KHTxyDqveLbwljMOxo7WdptaDLNtSnkf6o6C3TQcLtaShpeuEE2cz6xNNFedv6HlvY976Zuat717KzlRd8alfv1qSvHz/sbl8+qS/yjjtpgnL+d45n+A97+mZgQUbd3XlMzUc3/DsEv7p7wqvAg3SG+2yMnWvUS4K+mlKXe02Lu3G8KX3vx0o4Kc2H92YcpL5x7veoO3gIf7j4k+WLpMRU45+byS/sfM3RaaL5Rn1Td06z0ttrnzYwR2vrmJthirN3+R4aK5c1Xf/9IfM/TtBorEBJJrD5rtKqhQF/TRjZm8sawdWQathxqS0REmtxmnzrSCC3DgSKUSh3RuVq7+aTM66pfuAK5meDM9nXY4Wd5DorqRQnQG/tC/f+2bBY0KXS6xv5KaXwpPGLyp9t7oiUVfojc58fUlNydGEsxLS85evSW569VUmvxy/jJqhE7veBy18RSXgQ8yDfmq3BVGT2v9Il7g3L5Gyatpb2Ohw/zEu9++n2PskpareS29VVujPZ1tLz76q0gdUzzTeRtTFOuhHWR88liRmolL/H1ShLTbPu31a/pn6IAX9PkTVTiLFu/GFcMZQuPkvK0JZbzYK+iJStFINc1nNRqf1uRU2BX0RKdq/R/i+mGSmoC8iRXtpcYYGBxJpCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxUvGgb2aXmNlqM6s3s6GVXr+ISJxVNOib2VHACOBS4AzgO2Z2RqnX07I/OkOTiYhESaVL+mcD9c65dc65g8BYYFCpV7KuMRqjzouIRE2lg/7JwOaU9w0+rYuZXWNmtWZW29jYWNRKPnvqccXnUEQkAp695tyyLLdfWZaanWVI6zZypXNuJDASYODAgUWNFGtmbLjjq8V8VESkqlW6pN8AnJry/hRAozCIiFRIpYP+fOB0MxtgZkcDg4EJFc6DiEhsVbR6xznXaWbXAZOBo4DRzrnllcyDiEicVbpOH+fcJGBSpdcrIiJ6IldEJFYU9EVEYkRBX0QkRhT0RURixJwr6vmnijCzRmBjLxbxUaCpRNnpC+K2vaBtjgttc2E+4Zzrn2lCpIN+b5lZrXNuYNj5qJS4bS9om+NC21w6qt4REYkRBX0RkRip9qA/MuwMVFjcthe0zXGhbS6Rqq7TFxGR7qq9pC8iIikU9EVEYqQqg341Db5uZqea2XQzW2lmy83sep9+gplNMbM6/y+8XRcAAAQhSURBVPd4n25m9oDf9qVmdlbKsob4+evMbEhY2xSEmR1lZovM7GX/foCZzfV5f9Z3zY2ZHePf1/vpNSnLGObTV5vZxeFsSTBmdpyZjTOzVX5fnxeDfXyDP6bfMbNnzOx91bafzWy0me0ws3dS0kq2X83sc2a2zH/mATPLNFBVd865qvpHosvmtcBpwNHAEuCMsPPVi+05ETjLv/4QsIbEoPK/A4b69KHAnf71ZcArJEYpOxeY69NPANb5v8f718eHvX05tvtnwNPAy/79c8Bg//ph4Kf+9b8AD/vXg4Fn/esz/L4/Bhjgj4mjwt6uHNs7Bvhn//po4Lhq3sckhkldD7w/Zf/+qNr2M/BF4CzgnZS0ku1XYB5wnv/MK8ClefMU9pdShi/5PGByyvthwLCw81XC7XsJ+AqwGjjRp50IrPavHwG+kzL/aj/9O8AjKend5ovSPxIjqk0FLgBe9gd0E9AvfR+TGJvhPP+6n5/P0vd76nxR+wf8lQ+AlpZezfs4OV72CX6/vQxcXI37GahJC/ol2a9+2qqU9G7zZftXjdU7eQdf76v8Je2ZwFzg4865bQD+78f8bNm2vy99L/cBvwAO+/cfAXY75zr9+9S8d22Xn97i5+9L23sa0Ag87qu0HjOzY6nifeyc2wLcDWwCtpHYbwuo7v2cVKr9erJ/nZ6eUzUG/byDr/dFZvZB4M/Avznn9uSaNUOay5EeKWb2NWCHc25BanKGWV2eaX1ie71+JKoAHnLOnQnsI3HZn02f32Zfjz2IRJXMScCxwKUZZq2m/ZxPodtY1LZXY9CvusHXzey9JAL+U8658T55u5md6KefCOzw6dm2v698L+cDXzezDcBYElU89wHHmVlypLfUvHdtl5/+YaCZvrO9kMhrg3Nurn8/jsRJoFr3McCXgfXOuUbnXAcwHvg81b2fk0q1Xxv86/T0nKox6FfV4Ov+bvwoYKVz7t6USROA5F38ISTq+pPpP/QtAc4FWvwl5GTgIjM73peyLvJpkeKcG+acO8U5V0Ni301zzn0PmA5c7mdL397k93C5n9/59MG+1ccA4HQSN70ixzn3LrDZzD7pky4EVlCl+9jbBJxrZh/wx3hym6t2P6coyX7101rN7Fz/Hf4wZVnZhX2To0w3Ti4j0cplLfDLsPPTy235AolLtqXAYv/vMhL1mVOBOv/3BD+/ASP8ti8DBqYs68dAvf93ZdjbFmDb/5EjrXdOI/FjrgeeB47x6e/z7+v99NNSPv9L/z2sJkCrhpC39bNArd/PL5JopVHV+xj4L2AV8A7wBIkWOFW1n4FnSNyz6CBRMr+qlPsVGOi/v7XAH0hrDJDpn7phEBGJkWqs3hERkSwU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEb+P5qkdQkuhnG4AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Vaja\n", + "## Bolj realističen svet Petra in volka\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/sl/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..378b5b57f --- /dev/null +++ b/translations/sl/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,466 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:13:57+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter in volk: Realistično okolje\n", + "\n", + "V naši situaciji se je Peter lahko premikal skoraj brez utrujenosti ali lakote. V bolj realističnem svetu se mora občasno usesti in spočiti, pa tudi nahraniti. Naredimo naš svet bolj realističen z uvedbo naslednjih pravil:\n", + "\n", + "1. Z gibanjem iz enega kraja v drugega Peter izgublja **energijo** in pridobiva **utrujenost**.\n", + "2. Peter lahko pridobi več energije z uživanjem jabolk.\n", + "3. Peter se lahko znebi utrujenosti z počitkom pod drevesom ali na travi (tj. ko stopi na polje z drevesom ali travo - zeleno polje).\n", + "4. Peter mora najti in ubiti volka.\n", + "5. Da bi ubil volka, mora Peter imeti določene ravni energije in utrujenosti, sicer izgubi boj.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Določanje stanja\n", + "\n", + "V naših novih pravilih igre moramo spremljati energijo in utrujenost v vsakem stanju plošče. Zato bomo ustvarili objekt `state`, ki bo nosil vse potrebne informacije o trenutnem stanju problema, vključno s stanjem plošče, trenutnimi ravnmi energije in utrujenosti ter ali lahko premagamo volka v končnem stanju:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [ + "Poskusimo rešiti težavo z naključnim sprehodom in preverimo, ali nam uspe:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Funkcija nagrajevanja\n", + "\n", + "### Uvod\n", + "Funkcija nagrajevanja je ključni del sistema za kreiranje inteligentnih agentov. Omogoča agentu, da oceni svoje delovanje in se prilagodi za dosego optimalnih rezultatov.\n", + "\n", + "### Cilji\n", + "- Definirati jasne kriterije za uspešnost.\n", + "- Spodbujati želeno vedenje.\n", + "- Preprečiti neželene stranske učinke.\n", + "\n", + "### Struktura funkcije nagrajevanja\n", + "Funkcija nagrajevanja je običajno sestavljena iz več komponent, ki se združijo v eno samo vrednost. Te komponente lahko vključujejo:\n", + "- **Osnovne nagrade**: Nagrade za dosego osnovnih ciljev.\n", + "- **Kazni**: Kazni za neželena dejanja ali vedenje.\n", + "- **Bonusne nagrade**: Dodatne nagrade za preseganje pričakovanj.\n", + "\n", + "### Primer\n", + "Spodaj je primer funkcije nagrajevanja:\n", + "\n", + "```python\n", + "def reward_function(state, action):\n", + " if state == \"goal_reached\":\n", + " return 100 # Osnovna nagrada\n", + " elif action == \"undesired_action\":\n", + " return -10 # Kazen\n", + " else:\n", + " return 0 # Nevtralna vrednost\n", + "```\n", + "\n", + "### Najboljše prakse\n", + "- **Jasnost**: Poskrbite, da je funkcija nagrajevanja enostavna za razumevanje in implementacijo.\n", + "- **Ravnovesje**: Zagotovite, da nagrade in kazni niso preveč ekstremne, saj lahko to vodi do nepredvidenega vedenja.\n", + "- **Testiranje**: Funkcijo nagrajevanja je treba temeljito testirati v različnih scenarijih.\n", + "\n", + "### Pogoste napake\n", + "- **Preveč kompleksna funkcija**: Kompleksne funkcije nagrajevanja lahko otežijo učenje.\n", + "- **Nejasni cilji**: Če cilji niso jasno definirani, agent morda ne bo deloval optimalno.\n", + "- **Neupoštevanje stranskih učinkov**: Funkcija nagrajevanja mora upoštevati morebitne neželene posledice.\n", + "\n", + "### Zaključek\n", + "Dobro zasnovana funkcija nagrajevanja je bistvenega pomena za uspeh inteligentnih agentov. Z upoštevanjem najboljših praks in izogibanjem pogostim napakam lahko ustvarite sistem, ki učinkovito dosega svoje cilje.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Algoritem Q-Learning\n", + "\n", + "Dejanski učni algoritem ostaja skoraj nespremenjen, le da namesto samo položaja na plošči uporabljamo `state`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Rezultati\n", + "\n", + "Poglejmo, ali smo bili uspešni pri treniranju Petra, da se bori proti volku!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Zdaj vidimo veliko manj primerov utopitve, vendar Peter še vedno ne more vedno ubiti volka. Poskusite eksperimentirati in preveriti, ali lahko izboljšate ta rezultat z igranjem s hiperparametri.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem maternem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna napačna razumevanja ali napačne interpretacije, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/sl/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..4e40d2109 --- /dev/null +++ b/translations/sl/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:08:38+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter in volk: Uvod v okrepljeno učenje\n", + "\n", + "V tem vodiču se bomo naučili, kako uporabiti okrepljeno učenje za reševanje problema iskanja poti. Zgodba je navdihnjena z glasbeno pravljico [Peter in volk](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) ruskega skladatelja [Sergeja Prokofjeva](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Gre za zgodbo o mladem pionirju Petru, ki pogumno zapusti svojo hišo in se odpravi na gozdno jaso, da bi lovil volka. Naučili bomo algoritme strojnega učenja, ki bodo Petru pomagali raziskati okolico in zgraditi optimalen navigacijski zemljevid.\n", + "\n", + "Najprej uvozimo nekaj uporabnih knjižnic:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Pregled učenja z okrepitvijo\n", + "\n", + "**Učenje z okrepitvijo** (RL) je tehnika učenja, ki nam omogoča, da se naučimo optimalnega vedenja **agenta** v nekem **okolju** z izvajanjem številnih poskusov. Agent v tem okolju mora imeti določen **cilj**, ki ga opredeljuje **funkcija nagrajevanja**.\n", + "\n", + "## Okolje\n", + "\n", + "Za enostavnost si predstavljajmo Peterjev svet kot kvadratno ploščo velikosti `width` x `height`. Vsaka celica na tej plošči je lahko:\n", + "* **zemlja**, po kateri lahko Peter in druga bitja hodijo\n", + "* **voda**, po kateri se seveda ne more hoditi\n", + "* **drevo** ali **trava** - mesto, kjer se lahko spočiješ\n", + "* **jabolko**, ki predstavlja nekaj, kar bi Peter z veseljem našel, da se nahrani\n", + "* **volk**, ki je nevaren in se mu je treba izogniti\n", + "\n", + "Za delo z okoljem bomo definirali razred `Board`. Da ne bi preveč obremenili tega zvezka, smo ves kodo za delo s ploščo premaknili v ločen modul `rlboard`, ki ga bomo zdaj uvozili. Več podrobnosti o notranji implementaciji si lahko ogledate znotraj tega modula.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Zdaj ustvarimo naključno ploščo in si oglejmo, kako izgleda:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Dejanja in Pravila\n", + "\n", + "V našem primeru je Peterjev cilj najti jabolko, medtem ko se izogiba volku in drugim oviram. Da bi to dosegel, se lahko preprosto sprehaja, dokler ne najde jabolka. Tako lahko na katerem koli položaju izbere eno od naslednjih dejanj: gor, dol, levo in desno. Ta dejanja bomo definirali kot slovar in jih preslikali v pare ustreznih sprememb koordinat. Na primer, premik v desno (`R`) bi ustrezal paru `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Strategija našega agenta (Peter) je določena z tako imenovano **politiko**. Poglejmo najpreprostejšo politiko, imenovano **naključna hoja**.\n", + "\n", + "## Naključna hoja\n", + "\n", + "Najprej rešimo naš problem z implementacijo strategije naključne hoje.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Pojdimo večkrat izvesti poskus naključnega sprehoda in si oglejmo povprečno število opravljenih korakov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Funkcija nagrajevanja\n", + "\n", + "Da bi naša politika postala bolj inteligentna, moramo razumeti, kateri premiki so \"boljši\" od drugih.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Učenje\n", + "\n", + "Ustvarite Q-tabelo ali večdimenzionalno matriko. Ker ima naša plošča dimenzije `width` x `height`, lahko Q-tabelo predstavimo z numpy matriko oblike `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Podajte Q-tabelo funkciji za risanje, da vizualizirate tabelo na plošči:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Bistvo Q-Učenja: Bellmanova enačba in učni algoritem\n", + "\n", + "Napišite psevdokodo za naš učni algoritem:\n", + "\n", + "* Inicializirajte Q-tabelo Q z enakimi vrednostmi za vsa stanja in akcije\n", + "* Nastavite stopnjo učenja $\\alpha\\leftarrow 1$\n", + "* Večkrat ponovite simulacijo\n", + " 1. Začnite na naključni poziciji\n", + " 1. Ponavljajte\n", + " 1. Izberite akcijo $a$ v stanju $s$\n", + " 2. Izvedite akcijo z premikom v novo stanje $s'$\n", + " 3. Če naletimo na pogoj konca igre ali je skupna nagrada premajhna - zaključite simulacijo \n", + " 4. Izračunajte nagrado $r$ v novem stanju\n", + " 5. Posodobite Q-funkcijo v skladu z Bellmanovo enačbo: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Posodobite skupno nagrado in zmanjšajte $\\alpha$.\n", + "\n", + "## Izkoriščanje vs. Raziskovanje\n", + "\n", + "Najboljši pristop je uravnotežiti med raziskovanjem in izkoriščanjem. Ko se več naučimo o našem okolju, bomo bolj verjetno sledili optimalni poti, vendar se občasno odločimo za nepreizkušeno pot.\n", + "\n", + "## Python Implementacija\n", + "\n", + "Zdaj smo pripravljeni implementirati učni algoritem. Pred tem potrebujemo tudi funkcijo, ki bo poljubne številke v Q-tabeli pretvorila v vektor verjetnosti za ustrezne akcije:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Dodamo majhno količino `eps` k prvotnemu vektorju, da se izognemo deljenju z 0 v začetnem primeru, ko so vse komponente vektorja enake.\n", + "\n", + "Dejanski učni algoritem bomo izvedli za 5000 poskusov, imenovanih tudi **epoh**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Po izvedbi tega algoritma bi morala biti Q-tabela posodobljena z vrednostmi, ki določajo privlačnost različnih dejanj na vsakem koraku. Vizualizirajte tabelo tukaj:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Preverjanje politike\n", + "\n", + "Ker Q-Table prikazuje \"privlačnost\" vsakega dejanja v vsakem stanju, ga je zelo enostavno uporabiti za določanje učinkovite navigacije v našem svetu. V najpreprostejšem primeru lahko preprosto izberemo dejanje, ki ustreza najvišji vrednosti v Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Če večkrat preizkusite zgornjo kodo, boste opazili, da se včasih preprosto \"zatakne\" in morate pritisniti gumb STOP v beležnici, da jo prekinete.\n", + "\n", + "> **Naloga 1:** Spremenite funkcijo `walk`, da omejite največjo dolžino poti na določeno število korakov (recimo 100), in opazujte, kako zgornja koda občasno vrne to vrednost.\n", + "\n", + "> **Naloga 2:** Spremenite funkcijo `walk`, da se ne vrača na mesta, kjer je že bila prej. To bo preprečilo, da bi se `walk` zanka, vendar se agent še vedno lahko \"ujame\" na lokaciji, iz katere ne more pobegniti.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Kar vidimo tukaj, je, da se je povprečna dolžina poti sprva povečala. To je verjetno posledica dejstva, da ko o okolju ne vemo ničesar, se zlahka ujamemo v slaba stanja, kot so voda ali volk. Ko se naučimo več in začnemo uporabljati to znanje, lahko okolje raziskujemo dlje, vendar še vedno ne vemo dobro, kje so jabolka.\n", + "\n", + "Ko se dovolj naučimo, postane agentu lažje doseči cilj, dolžina poti pa začne upadati. Vendar smo še vedno odprti za raziskovanje, zato pogosto odstopamo od najboljše poti in raziskujemo nove možnosti, kar podaljša pot nad optimalno dolžino.\n", + "\n", + "Na tem grafu opazimo tudi, da se je dolžina na neki točki nenadoma povečala. To kaže na stohastično naravo procesa in na to, da lahko na neki točki \"pokvarimo\" koeficiente Q-tabele, tako da jih prepišemo z novimi vrednostmi. To bi morali idealno zmanjšati z znižanjem učne stopnje (tj. proti koncu učenja prilagajamo vrednosti Q-tabele le za majhno vrednost).\n", + "\n", + "Na splošno je pomembno vedeti, da uspeh in kakovost učnega procesa močno odvisna od parametrov, kot so učna stopnja, zmanjševanje učne stopnje in faktor diskonta. Ti se pogosto imenujejo **hiperparametri**, da jih ločimo od **parametrov**, ki jih optimiziramo med učenjem (npr. koeficienti Q-tabele). Proces iskanja najboljših vrednosti hiperparametrov se imenuje **optimizacija hiperparametrov**, in si zasluži ločeno obravnavo.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Vaja\n", + "#### Bolj realističen svet Petra in volka\n", + "\n", + "V naši situaciji se je Peter lahko premikal skoraj brez utrujenosti ali lakote. V bolj realističnem svetu se mora občasno ustaviti, da si odpočije, in se tudi nahraniti. Naredimo naš svet bolj realističen z uvedbo naslednjih pravil:\n", + "\n", + "1. Ko se Peter premika iz enega kraja v drugega, izgublja **energijo** in pridobiva **utrujenost**.\n", + "2. Peter lahko pridobi več energije z uživanjem jabolk.\n", + "3. Peter se lahko znebi utrujenosti tako, da počiva pod drevesom ali na travi (tj. ko stopi na polje z drevesom ali travo - zeleno polje).\n", + "4. Peter mora najti in ubiti volka.\n", + "5. Da bi ubil volka, mora Peter imeti določene ravni energije in utrujenosti, sicer izgubi boj.\n", + "\n", + "Spremenite zgornjo funkcijo nagrajevanja v skladu s pravili igre, zaženite algoritem za okrepitev učenja, da se naučite najboljše strategije za zmago v igri, in primerjajte rezultate naključnega premikanja z vašim algoritmom glede na število zmag in porazov.\n", + "\n", + "> **Opomba**: Morda boste morali prilagoditi hiperparametre, da bo delovalo, še posebej število epoh. Ker je uspeh igre (boj z volkom) redek dogodek, lahko pričakujete precej daljši čas učenja.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/8-Reinforcement/2-Gym/notebook.ipynb b/translations/sl/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..c04a3c13b --- /dev/null +++ b/translations/sl/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,394 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:16:34+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Drsanje na CartPole\n", + "\n", + "> **Problem**: Če želi Peter pobegniti volku, se mora premikati hitreje od njega. Videli bomo, kako se Peter lahko nauči drsati, predvsem pa ohranjati ravnotežje, z uporabo Q-Learninga.\n", + "\n", + "Najprej namestimo knjižnico gym in uvozimo potrebne knjižnice:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Ustvari okolje cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Da vidimo, kako deluje okolje, izvedimo kratko simulacijo za 100 korakov.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Med simulacijo moramo pridobiti opazovanja, da se lahko odločimo, kako ukrepati. Pravzaprav nam funkcija `step` vrne trenutna opazovanja, funkcijo nagrajevanja in zastavico `done`, ki označuje, ali ima smisel nadaljevati simulacijo ali ne:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Lahko dobimo najmanjšo in največjo vrednost teh števil:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Raziskujmo tudi druge metode diskretizacije z uporabo binov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Zdaj izvedimo kratko simulacijo in opazujmo te diskretne vrednosti okolja.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [ + "## Struktura Q-tabele\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Iz tega grafa ni mogoče ničesar razbrati, saj se zaradi narave stohastičnega procesa učenja dolžina učnih sej močno razlikuje. Da bi ta graf imel več smisla, lahko izračunamo **tekoče povprečje** preko serije poskusov, recimo 100. To lahko priročno izvedemo z uporabo `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Spreminjanje hiperparametrov in opazovanje rezultatov v praksi\n", + "\n", + "Zdaj bi bilo zanimivo dejansko videti, kako se obnaša trenirani model. Zaženimo simulacijo in sledili bomo isti strategiji izbire akcij kot med treningom: vzorčenje glede na porazdelitev verjetnosti v Q-tabeli:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Shranjevanje rezultata v animiran GIF\n", + "\n", + "Če želite navdušiti svoje prijatelje, jim lahko pošljete animiran GIF slike ravnotežne palice. Za to lahko uporabimo `env.render` za ustvarjanje slikovnega okvirja in nato te okvirje shranimo v animiran GIF z uporabo knjižnice PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve AI za prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna napačna razumevanja ali napačne interpretacije, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/sl/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..dfd958fc6 --- /dev/null +++ b/translations/sl/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,526 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:19:25+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "sl" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Drsanje na CartPole\n", + "\n", + "> **Problem**: Če želi Peter pobegniti volku, mora biti sposoben premikati se hitreje od njega. Videli bomo, kako se lahko Peter nauči drsati, predvsem ohranjati ravnotežje, z uporabo Q-Learninga.\n", + "\n", + "Najprej namestimo knjižnico gym in uvozimo potrebne knjižnice:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Ustvari okolje cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Da vidimo, kako deluje okolje, izvedimo kratko simulacijo za 100 korakov.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Med simulacijo moramo pridobiti opazovanja, da se lahko odločimo, kako ukrepati. Pravzaprav nam funkcija `step` vrne trenutna opazovanja, funkcijo nagrajevanja in zastavico `done`, ki označuje, ali ima smisel nadaljevati simulacijo ali ne:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Lahko dobimo najmanjšo in največjo vrednost teh števil:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Raziskujmo tudi druge metode diskretizacije z uporabo binov:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Zdaj izvedimo kratko simulacijo in opazujmo te diskretne vrednosti okolja.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [ + "## Struktura Q-tabele\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Iz tega grafa ni mogoče ničesar razbrati, saj se zaradi narave stohastičnega procesa učenja dolžina učnih sej močno razlikuje. Da bi ta graf bolje razumeli, lahko izračunamo **tekoče povprečje** preko serije poskusov, recimo 100. To lahko enostavno izvedemo z uporabo `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Spreminjanje hiperparametrov in opazovanje rezultatov v praksi\n", + "\n", + "Zdaj bi bilo zanimivo dejansko videti, kako se obnaša trenirani model. Zaženimo simulacijo, pri čemer bomo sledili isti strategiji izbire akcij kot med treningom: vzorčenje glede na porazdelitev verjetnosti v Q-tabeli:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Shranjevanje rezultata v animiran GIF\n", + "\n", + "Če želite navdušiti svoje prijatelje, jim lahko pošljete animiran GIF slike ravnotežne palice. Za to lahko uporabimo `env.render` za ustvarjanje slikovnega okvirja in nato te okvirje shranimo v animiran GIF z uporabo knjižnice PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za prevajanje z umetno inteligenco [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da upoštevate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo profesionalni človeški prevod. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki bi nastale zaradi uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sl/PyTorch_Fundamentals.ipynb b/translations/sl/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..04eb2331b --- /dev/null +++ b/translations/sl/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2830 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:07:55+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "sl" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Vrste podatkov Tensor\n" + ], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Omejitev odgovornosti**: \nTa dokument je bil preveden z uporabo storitve za strojno prevajanje [Co-op Translator](https://github.com/Azure/co-op-translator). Čeprav si prizadevamo za natančnost, vas prosimo, da se zavedate, da lahko avtomatizirani prevodi vsebujejo napake ali netočnosti. Izvirni dokument v njegovem izvirnem jeziku je treba obravnavati kot avtoritativni vir. Za ključne informacije priporočamo strokovno človeško prevajanje. Ne prevzemamo odgovornosti za morebitna nesporazumevanja ali napačne razlage, ki izhajajo iz uporabe tega prevoda.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/2-Regression/1-Tools/notebook.ipynb b/translations/sr/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/sr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..620ab39c4 --- /dev/null +++ b/translations/sr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:42:12+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Увод у регресију - Лекција 1\n", + "\n", + "#### Стављање у перспективу\n", + "\n", + "✅ Постоји много врста метода регресије, а коју ћете изабрати зависи од одговора који тражите. Ако желите да предвидите вероватну висину особе одређеног узраста, користили бисте `линеарну регресију`, јер тражите **нумеричку вредност**. Ако вас занима да ли одређена врста кухиње треба да се сматра веганском или не, тражите **категоријску класификацију**, па бисте користили `логистичку регресију`. О логистичкој регресији ћете више научити касније. Размислите мало о питањима која можете поставити подацима и који од ових метода би био прикладнији.\n", + "\n", + "У овом делу, радићете са [малим сетом података о дијабетесу](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Замислите да желите да тестирате третман за пацијенте са дијабетесом. Модели машинског учења могу вам помоћи да одредите који пацијенти би боље реаговали на третман, на основу комбинација променљивих. Чак и веома основни модел регресије, када се визуализује, може показати информације о променљивим које би вам помогле да организујете теоријске клиничке студије.\n", + "\n", + "С тим речима, хајде да започнемо овај задатак!\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Учитавање нашег алата\n", + "\n", + "За овај задатак биће нам потребни следећи пакети:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) осмишљена да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) је [оквир пакета](https://www.tidymodels.org/packages/) за моделирање и машинско учење.\n", + "\n", + "Можете их инсталирати на следећи начин:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Скрипта испод проверава да ли имате инсталиране пакете потребне за завршетак овог модула и инсталира их уколико неки недостају.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада, хајде да учитамо ове сјајне пакете и учинимо их доступним у нашој тренутној R сесији. (Ово је само за илустрацију, `pacman::p_load()` је то већ урадио за вас)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Дијабетес скуп података\n", + "\n", + "У овом задатку, применићемо наше вештине регресије правећи предвиђања на скупу података о дијабетесу. [Скуп података о дијабетесу](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) садржи `442 узорка` података о дијабетесу, са 10 предикторских променљивих: `године`, `пол`, `индекс телесне масе`, `просечан крвни притисак` и `шест мерења крвног серума`, као и излазну променљиву `y`: квантитативну меру напретка болести годину дана након почетног стања.\n", + "\n", + "|Број опсервација|442|\n", + "|----------------|:---|\n", + "|Број предиктора|Првих 10 колона су нумерички предиктори|\n", + "|Излаз/Циљ|Колона 11 је квантитативна мера напретка болести годину дана након почетног стања|\n", + "|Информације о предикторима|- године живота\n", + "||- пол\n", + "||- bmi индекс телесне масе\n", + "||- bp просечан крвни притисак\n", + "||- s1 tc, укупни серумски холестерол\n", + "||- s2 ldl, липопротеини ниске густине\n", + "||- s3 hdl, липопротеини високе густине\n", + "||- s4 tch, укупни холестерол / HDL\n", + "||- s5 ltg, могуће логаритам нивоа триглицерида у серуму\n", + "||- s6 glu, ниво шећера у крви|\n", + "\n", + "> 🎓 Запамтите, ово је надгледано учење, и потребан нам је именовани циљ 'y'.\n", + "\n", + "Пре него што можете манипулисати подацима у R-у, потребно је да увезете податке у меморију R-а или успоставите везу са подацима коју R може користити за приступ подацима на даљину.\n", + "\n", + "> Пакет [readr](https://readr.tidyverse.org/), који је део Tidyverse-а, пружа брз и једноставан начин за читање правоугаоних података у R.\n", + "\n", + "Сада, учитајмо скуп података о дијабетесу који је доступан на овом URL-у: \n", + "\n", + "Такође, извршићемо проверу исправности наших података користећи `glimpse()` и приказати првих 5 редова користећи `slice()`.\n", + "\n", + "Пре него што наставимо даље, представићемо нешто што ћете често сусретати у R коду 🥁🥁: оператор цеви `%>%`\n", + "\n", + "Оператор цеви (`%>%`) извршава операције у логичком низу тако што прослеђује објекат функцији или изразу. Можете замислити оператор цеви као да у коду каже \"и онда\".\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` нам показује да овај податак има 442 реда и 11 колона, при чему су све колоне типа податка `double`.\n", + "\n", + "
\n", + "\n", + "> glimpse() и slice() су функције у [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, део Tidyverse-а, је граматика за манипулацију подацима која пружа конзистентан сет глагола који вам помажу да решите најчешће изазове у манипулацији подацима.\n", + "\n", + "
\n", + "\n", + "Сада када имамо податке, усмерићемо се на једну карактеристику (`bmi`) коју ћемо циљати за ову вежбу. Ово ће захтевати да изаберемо жељене колоне. Па, како то можемо урадити?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) нам омогућава да *изаберемо* (и опционално преименујемо) колоне у оквиру података.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Тренинг и тестирање података\n", + "\n", + "У надгледаном учењу је уобичајена пракса да се *подели* подаци на два подскупа; (обично већи) скуп за тренирање модела и мањи \"резервни\" скуп за проверу како је модел функционисао.\n", + "\n", + "Сада када имамо припремљене податке, можемо видети да ли машина може помоћи у одређивању логичне поделе између бројева у овом скупу података. Можемо користити пакет [rsample](https://tidymodels.github.io/rsample/), који је део оквира Tidymodels, да креирамо објекат који садржи информације о *начину* поделе података, а затим још две rsample функције за издвајање креираних скупова за тренирање и тестирање:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Обучите модел линеарне регресије помоћу Tidymodels\n", + "\n", + "Сада смо спремни да обучимо наш модел!\n", + "\n", + "У Tidymodels-у, модели се специфицирају помоћу `parsnip()` кроз дефинисање три концепта:\n", + "\n", + "- **Тип модела** разликује моделе као што су линеарна регресија, логистичка регресија, модели одлуке стабла и тако даље.\n", + "\n", + "- **Режим модела** укључује уобичајене опције као што су регресија и класификација; неки типови модела подржавају обе опције, док неки имају само један режим.\n", + "\n", + "- **Енџин модела** је рачунарски алат који ће се користити за прилагођавање модела. Често су то R пакети, као што су **`\"lm\"`** или **`\"ranger\"`**\n", + "\n", + "Ове информације о моделу се чувају у спецификацији модела, па хајде да направимо једну!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Након што је модел *одређен*, модел може бити `процењен` или `трениран` коришћењем функције [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), обично уз помоћ формуле и неких података.\n", + "\n", + "`y ~ .` значи да ћемо прилагодити `y` као предвиђену вредност/циљ, објашњену свим предикторима/карактеристикама, односно `.` (у овом случају, имамо само један предиктор: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Из модела можемо видети коефицијенте који су научени током тренинга. Они представљају коефицијенте линије најбољег прилагођавања која нам даје најмању укупну грешку између стварне и предвиђене променљиве.\n", + "
\n", + "\n", + "## 5. Направите предвиђања на тест сету\n", + "\n", + "Сада када смо обучили модел, можемо га користити за предвиђање прогресије болести y за тестни скуп података користећи [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Ово ће се користити за цртање линије између група података.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ура! 💃🕺 Управо смо обучили модел и користили га за прављење предвиђања!\n", + "\n", + "Када правимо предвиђања, конвенција tidymodels-а је да увек производимо tibble/data frame резултата са стандардизованим именима колона. Ово олакшава комбиновање оригиналних података и предвиђања у употребљивом формату за наредне операције као што је креирање графикона.\n", + "\n", + "`dplyr::bind_cols()` ефикасно спаја више data frame-ова по колонама.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Приказивање резултата модела\n", + "\n", + "Сада је време да ово видимо визуелно 📈. Направићемо расејани графикон свих `y` и `bmi` вредности из тест скупа, а затим ћемо користити предвиђања да нацртамо линију на најприкладнијем месту, између група података модела.\n", + "\n", + "R има неколико система за прављење графикона, али `ggplot2` је један од најелегантнијих и најсвестранијих. Овај пакет вам омогућава да креирате графиконе **комбиновањем независних компоненти**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Размислите мало о томе шта се овде дешава. Права линија пролази кроз много малих тачака података, али шта она заправо ради? Можете ли да видите како би требало да будете у могућности да користите ову линију за предвиђање где би нова, непозната тачка података требало да се уклопи у односу на y осу графикона? Покушајте да речима опишете практичну употребу овог модела.\n", + "\n", + "Честитамо, направили сте свој први модел линеарне регресије, креирали предвиђање са њим и приказали га на графикону!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/2-Regression/1-Tools/solution/notebook.ipynb b/translations/sr/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..e2a00d691 --- /dev/null +++ b/translations/sr/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Линеарна регресија за дијабетес скуп података - Лекција 1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Увези потребне библиотеке\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Учитајте скуп података о дијабетесу, подељен на `X` податке и `y` карактеристике\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Изаберите само једну функцију за циљ ове вежбе\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Раздвојите податке за обуку и тестирање за `X` и `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Изаберите модел и обучите га са подацима за тренирање\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Користите тест податке да предвидите линију\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Прикажи резултате на графикону\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:38:54+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/2-Data/notebook.ipynb b/translations/sr/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..1b5138e30 --- /dev/null +++ b/translations/sr/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:45:55+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/sr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..042aa1010 --- /dev/null +++ b/translations/sr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,672 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:50:30+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Изградња регресионог модела: припрема и визуализација података\n", + "\n", + "## **Линеарна регресија за бундеве - Лекција 2**\n", + "#### Увод\n", + "\n", + "Сада када сте опремљени алатима који су вам потребни за почетак изградње модела машинског учења уз помоћ Tidymodels и Tidyverse-а, спремни сте да почнете постављати питања о вашим подацима. Док радите са подацима и примењујете решења машинског учења, веома је важно разумети како поставити право питање како бисте на прави начин искористили потенцијале вашег скупа података.\n", + "\n", + "У овој лекцији ћете научити:\n", + "\n", + "- Како припремити податке за изградњу модела.\n", + "\n", + "- Како користити `ggplot2` за визуализацију података.\n", + "\n", + "Питање на које желите одговор одредиће који тип алгоритама машинског учења ћете користити. Квалитет одговора који добијете у великој мери ће зависити од природе ваших података.\n", + "\n", + "Хајде да ово видимо кроз практичну вежбу.\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Увоз података о бундевама и позивање Tidyverse-а\n", + "\n", + "Биће нам потребни следећи пакети за обраду и анализу података у овој лекцији:\n", + "\n", + "- `tidyverse`: [Tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) осмишљена да учини науку о подацима бржом, једноставнијом и забавнијом!\n", + "\n", + "Можете их инсталирати на следећи начин:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Скрипта испод проверава да ли имате инсталиране пакете потребне за завршетак овог модула и инсталира их уколико неки недостају.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада, хајде да покренемо неке пакете и учитамо [подаци](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) који су обезбеђени за ову лекцију!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "Брзи `glimpse()` одмах показује да постоје празнине и мешавина стрингова (`chr`) и нумеричких података (`dbl`). `Date` је типа карактер, а ту је и чудна колона названа `Package` где су подаци мешавина између `sacks`, `bins` и других вредности. Подаци су, у ствари, прилично неуредни 😤.\n", + "\n", + "Заправо, није баш уобичајено добити скуп података који је потпуно спреман за употребу и креирање ML модела одмах. Али не брините, у овој лекцији ћете научити како да припремите необрађени скуп података користећи стандардне R библиотеке 🧑‍🔧. Такође ћете научити различите технике за визуализацију података.📈📊\n", + "
\n", + "\n", + "> Подсетник: Оператор цеви (`%>%`) извршава операције у логичком низу тако што прослеђује објекат функцији или изразу. Можете замислити оператор цеви као да у вашем коду каже \"и онда\".\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Провера недостајућих података\n", + "\n", + "Један од најчешћих проблема са којима се научници за податке сусрећу јесте непотпуни или недостајући подаци. У R-у се недостајуће или непознате вредности представљају посебном вредношћу: `NA` (Not Available).\n", + "\n", + "Како бисмо знали да ли оквир података садржи недостајуће вредности? \n", + "
\n", + "- Један једноставан начин био би коришћење основне R функције `anyNA`, која враћа логичке вредности `TRUE` или `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одлично, чини се да недостају неки подаци! То је добро место за почетак.\n", + "\n", + "- Други начин би био да се користи функција `is.na()` која указује на то који појединачни елементи у колони недостају логичком вредношћу `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "У реду, завршен посао, али са овако великим оквиром података, било би неефикасно и практично немогуће прегледати све редове и колоне појединачно😴.\n", + "\n", + "- Интуитивнији начин био би да израчунате збир недостајућих вредности за сваку колону:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Много боље! Недостају неки подаци, али можда то неће бити важно за задатак који је пред нама. Да видимо шта ће даља анализа донети.\n", + "\n", + "> Поред сјајних сетова пакета и функција, R има веома добру документацију. На пример, користите `help(colSums)` или `?colSums` да бисте сазнали више о функцији.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Граматика за манипулацију подацима\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), пакет у оквиру Tidyverse-а, представља граматику за манипулацију подацима која пружа конзистентан сет глагола који вам помажу да решите најчешће изазове у манипулацији подацима. У овом делу, истражићемо неке од глагола из dplyr-а!\n", + "
\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` је функција у пакету `dplyr` која вам помаже да изаберете колоне које желите да задржите или изузмете.\n", + "\n", + "Да бисте свој рад са подацима учинили једноставнијим, уклоните неколико колона из вашег оквира података користећи `select()`, задржавајући само оне колоне које су вам потребне.\n", + "\n", + "На пример, у овој вежби, наша анализа ће обухватати колоне `Package`, `Low Price`, `High Price` и `Date`. Хајде да изаберемо те колоне.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` је функција у пакету `dplyr` која вам помаже да креирате или модификујете колоне, а да притом задржите постојеће колоне.\n", + "\n", + "Општа структура функције `mutate` је:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Хајде да испробамо `mutate` користећи колону `Date` и извршимо следеће операције:\n", + "\n", + "1. Претворимо датуме (који су тренутно типа карактер) у формат месеца (ово су датуми у америчком формату, дакле `MM/DD/YYYY`).\n", + "\n", + "2. Извуцимо месец из датума у нову колону.\n", + "\n", + "У програмском језику R, пакет [lubridate](https://lubridate.tidyverse.org/) олакшава рад са подацима типа датум-време. Дакле, хајде да користимо `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` и видимо како да постигнемо горе наведене циљеве. Можемо избацити колону `Date` јер нам више неће бити потребна у наредним операцијама.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ура! 🤩\n", + "\n", + "Следеће, хајде да направимо нову колону `Price`, која представља просечну цену бундеве. Сада, хајде да узмемо просек из колона `Low Price` и `High Price` како бисмо попунили нову колону Price.\n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Дааа!💪\n", + "\n", + "\"Али чекај!\", рећи ћеш након што прелетиш цео скуп података помоћу `View(pumpkins)`, \"Овде нешто није у реду!\"🤔\n", + "\n", + "Ако погледаш колону `Package`, бундеве се продају у различитим конфигурацијама. Неке се продају у мерама `1 1/9 бушел`, неке у мерама `1/2 бушел`, неке по бундеви, неке по фунти, а неке у великим кутијама различитих ширина.\n", + "\n", + "Хајде да ово проверимо:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Невероватно!👏\n", + "\n", + "Чини се да је веома тешко доследно измерити бундеве, па хајде да их филтрирамо тако што ћемо изабрати само бундеве које у колони `Package` садрже реч *bushel* и ставимо их у нови податак оквира `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() и stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): креира подскуп података који садржи само **редове** који испуњавају ваше услове, у овом случају, тикве са низом *bushel* у колони `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): открива присуство или одсуство шаблона у низу.\n", + "\n", + "Пакет [`stringr`](https://github.com/tidyverse/stringr) пружа једноставне функције за уобичајене операције са низовима.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Можете видети да смо сузили избор на око 415 редова података који садрже бундеве по бушелу.🤩 \n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Али сачекајте! Постоји још нешто што треба урадити**\n", + "\n", + "Да ли сте приметили да количина у бушелима варира по реду? Потребно је нормализовати цене тако да приказујете цену по бушелу, а не по 1 1/9 или 1/2 бушела. Време је за мало математике како бисмо то стандардизовали.\n", + "\n", + "Користићемо функцију [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) да *изменимо* колону Price у зависности од одређених услова. `case_when` нам омогућава да векторизујемо више `if_else()` изјава.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада можемо анализирати цену по јединици на основу мерења по бушелу. Све ово проучавање бушела бундева, међутим, показује колико је `важно` да `разумете природу ваших података`!\n", + "\n", + "> ✅ Према [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308), тежина бушела зависи од врсте производа, јер је то мера запремине. \"Бушел парадајза, на пример, треба да тежи 56 фунти... Листови и зелено поврће заузимају више простора са мање тежине, па бушел спанаћа тежи само 20 фунти.\" Све је то прилично компликовано! Нећемо се мучити са конверзијом бушела у фунте, већ ћемо одредити цену по бушелу. Све ово проучавање бушела бундева, међутим, показује колико је важно да разумете природу ваших података!\n", + ">\n", + "> ✅ Да ли сте приметили да су бундеве које се продају по пола бушела веома скупе? Можете ли схватити зашто? Савет: мале бундеве су много скупље од великих, вероватно зато што их има много више по бушелу, с обзиром на неискоришћени простор који заузима једна велика шупља бундева за питу.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада, зарад авантуре 💁‍♀️, хајде да преместимо колону \"Месец\" на прву позицију, односно `пре` колоне `Пакет`.\n", + "\n", + "`dplyr::relocate()` се користи за промену позиција колона.\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сјајно!👌 Сада имате чист и уредан скуп података на коме можете изградити свој нови регресиони модел! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Визуелизација података са ggplot2\n", + "\n", + "

\n", + " \n", + "

Инфографика од Дасани Мадипали
\n", + "\n", + "\n", + "\n", + "\n", + "Постоји једна *мудра* изрека која гласи:\n", + "\n", + "> \"Једноставан графикон је донео више информација у ум аналитичара података него било који други уређај.\" --- Џон Туки\n", + "\n", + "Део улоге научника за податке је да демонстрира квалитет и природу података са којима ради. Да би то урадили, често креирају занимљиве визуелизације, или графиконе, дијаграме и табеле, које приказују различите аспекте података. На овај начин могу визуелно приказати односе и празнине које је иначе тешко открити.\n", + "\n", + "Визуелизације такође могу помоћи у одређивању технике машинског учења која је најприкладнија за податке. На пример, расејани графикон који изгледа као да прати линију указује на то да су подаци добар кандидат за вежбу линеарне регресије.\n", + "\n", + "R нуди неколико система за креирање графикона, али [`ggplot2`](https://ggplot2.tidyverse.org/index.html) је један од најелегантнијих и најсвестранијих. `ggplot2` вам омогућава да саставите графиконе **комбинујући независне компоненте**.\n", + "\n", + "Хајде да почнемо са једноставним расејаним графиконом за колоне Price и Month.\n", + "\n", + "У овом случају, почећемо са [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), додаћемо скуп података и естетско мапирање (са [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)), а затим додати слојеве (као [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) за расејане графиконе.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Да ли је ово користан графикон 🤷? Да ли вас нешто на њему изненађује?\n", + "\n", + "Није нарочито користан јер све што ради јесте да приказује ваше податке као распоред тачака у датом месецу. \n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Како да учинимо да буде корисно?**\n", + "\n", + "Да би графикони приказивали корисне податке, обично је потребно некако груписати податке. На пример, у нашем случају, проналажење просечне цене бундева за сваки месец би пружило више увида у основне обрасце у нашим подацима. Ово нас доводи до још једног брзог прегледа функција пакета **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Груписана агрегација у R-у може се лако израчунати помоћу\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` мења јединицу анализе са целокупног скупа података на појединачне групе, као што су групе по месецима.\n", + "\n", + "- `dplyr::summarize()` креира нови скуп података са једном колоном за сваку променљиву груписања и једном колоном за сваку од статистика које сте навели.\n", + "\n", + "На пример, можемо користити `dplyr::group_by() %>% summarize()` да групишемо бундеве у групе на основу колоне **Month** и затим израчунамо **просечну цену** за сваки месец.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сажето!✨\n", + "\n", + "Категоријске карактеристике, као што су месеци, боље се представљају помоћу стубичастог графикона 📊. Слојеви који су одговорни за стубичасте графиконе су `geom_bar()` и `geom_col()`. Погледајте `?geom_bar` за више информација.\n", + "\n", + "Хајде да направимо један!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩Ово је кориснија визуализација података! Чини се да указује на то да се највиша цена бундева јавља у септембру и октобру. Да ли то одговара вашим очекивањима? Зашто или зашто не?\n", + "\n", + "Честитамо на завршетку друге лекције 👏! Припремили сте своје податке за изградњу модела, а затим открили више увида користећи визуализације!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/2-Regression/2-Data/solution/notebook.ipynb b/translations/sr/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..c2fae168d --- /dev/null +++ b/translations/sr/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:19+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/3-Linear/notebook.ipynb b/translations/sr/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..b765139e7 --- /dev/null +++ b/translations/sr/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Цене за тикве\n", + "\n", + "Учитајте потребне библиотеке и скуп података. Претворите податке у датафрејм који садржи подскуп података:\n", + "\n", + "- Узмите само тикве чија је цена дата по бушелу\n", + "- Претворите датум у месец\n", + "- Израчунајте цену као просек високих и ниских цена\n", + "- Претворите цену тако да одражава цену по количини бушела\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Основни дијаграм расејања нас подсећа да имамо податке само за месеце од августа до децембра. Вероватно нам је потребно више података како бисмо могли да извучемо закључке на линеаран начин.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:08:47+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/sr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..47935c975 --- /dev/null +++ b/translations/sr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1084 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:18:01+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Линеарна и полиномијална регресија за одређивање цене бундева - Лекција 3\n", + "

\n", + " \n", + "

Инфографика: Дасани Мадипали
\n", + "\n", + "\n", + "#### Увод\n", + "\n", + "До сада сте истраживали шта је регресија користећи пример података из скупа података о ценама бундева који ћемо користити током ове лекције. Такође сте је визуализовали помоћу `ggplot2`. 💪\n", + "\n", + "Сада сте спремни да дубље уђете у регресију за машинско учење. У овој лекцији ћете научити више о две врсте регресије: *основна линеарна регресија* и *полиномијална регресија*, као и нешто од математике која стоји иза ових техника.\n", + "\n", + "> Кроз овај курикулум претпостављамо минимално знање математике и настојимо да га учинимо доступним студентима из других области, па обратите пажњу на напомене, 🧮 дијаграме и друге алате за учење који ће вам помоћи у разумевању.\n", + "\n", + "#### Припрема\n", + "\n", + "Подсећамо, учитавате ове податке како бисте постављали питања о њима.\n", + "\n", + "- Када је најбоље време за куповину бундева?\n", + "\n", + "- Коју цену могу да очекујем за кутију минијатурних бундева?\n", + "\n", + "- Да ли да их купим у корпама од пола бушела или у кутијама од 1 1/9 бушела? Хајде да наставимо са истраживањем ових података.\n", + "\n", + "У претходној лекцији, креирали сте `tibble` (модерно преобликовање оквира података) и попунили га делом оригиналног скупа података, стандардизујући цене по бушелу. Међутим, на тај начин сте успели да сакупите само око 400 података и то само за јесење месеце. Можда можемо добити мало више детаља о природи података ако их боље очистимо? Видећемо... 🕵️‍♀️\n", + "\n", + "За овај задатак биће нам потребни следећи пакети:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) дизајнирана да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) је оквир који представља [збирку пакета](https://www.tidymodels.org/packages/) за моделирање и машинско учење.\n", + "\n", + "- `janitor`: [janitor пакет](https://github.com/sfirke/janitor) пружа једноставне алате за испитивање и чишћење \"прљавих\" података.\n", + "\n", + "- `corrplot`: [corrplot пакет](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) пружа визуелни алат за истраживање корелационе матрице који подржава аутоматско ређање променљивих како би се открили скривени обрасци међу променљивима.\n", + "\n", + "Можете их инсталирати на следећи начин:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Скрипта испод проверава да ли имате потребне пакете за завршетак овог модула и инсталира их за вас у случају да недостају.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Учитаћемо ове сјајне пакете и учинити их доступним у нашој тренутној R сесији. (Ово је само за илустрацију, `pacman::p_load()` је то већ урадио за вас)\n", + "\n", + "## 1. Линија линеарне регресије\n", + "\n", + "Као што сте научили у Лекцији 1, циљ вежбе линеарне регресије је да се нацрта *линија* *најбољег* *прилагођавања* како би се:\n", + "\n", + "- **Приказали односи између променљивих**. Приказао однос између променљивих.\n", + "\n", + "- **Направиле предикције**. Направиле тачне предикције о томе где би нова тачка података могла пасти у односу на ту линију.\n", + "\n", + "Да бисмо нацртали ову врсту линије, користимо статистичку технику која се зове **Регресија најмањих квадрата**. Термин `најмањи квадрати` значи да су све тачке података које окружују регресиону линију подигнуте на квадрат и затим сабране. Идеално, та коначна сума је што је могуће мања, јер желимо низак број грешака, односно `најмање квадрате`. Као таква, линија најбољег прилагођавања је линија која нам даје најнижу вредност за збир квадрата грешака - отуда назив *регресија најмањих квадрата*.\n", + "\n", + "Ово радимо јер желимо да моделирамо линију која има најмању кумулативну удаљеност од свих наших тачака података. Такође подижемо термине на квадрат пре него што их саберемо, јер нас занима њихова величина, а не правац.\n", + "\n", + "> **🧮 Покажите ми математику**\n", + ">\n", + "> Ова линија, названа *линија најбољег прилагођавања*, може се изразити [једначином](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` је '`објашњавајућа променљива` или `предиктор`'. `Y` је '`зависна променљива` или `исход`'. Нагиб линије је `b`, а `a` је пресек са Y-осом, што се односи на вредност `Y` када је `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"нагиб = $y/x$\")\n", + " Инфографика: Џен Лупер\n", + ">\n", + "> Прво, израчунајте нагиб `b`.\n", + ">\n", + "> Другим речима, и позивајући се на оригинално питање о подацима о бундевама: \"предвидите цену бундеве по бушелу по месецу\", `X` би се односио на цену, а `Y` на месец продаје.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Инфографика: Џен Лупер\n", + "> \n", + "> Израчунајте вредност Y. Ако плаћате око 4 долара, мора да је април!\n", + ">\n", + "> Математика која израчунава линију мора показати нагиб линије, који такође зависи од пресека, односно где се `Y` налази када је `X = 0`.\n", + ">\n", + "> Можете видети метод израчунавања ових вредности на веб сајту [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Такође посетите [овај калкулатор најмањих квадрата](https://www.mathsisfun.com/data/least-squares-calculator.html) да бисте видели како вредности бројева утичу на линију.\n", + "\n", + "Није тако страшно, зар не? 🤓\n", + "\n", + "#### Корелација\n", + "\n", + "Још један термин који треба разумети је **Коефицијент корелације** између датих X и Y променљивих. Користећи дијаграм расејања, можете брзо визуализовати овај коефицијент. Дијаграм са тачкама података распоређеним у уредну линију има високу корелацију, али дијаграм са тачкама података распршеним свуда између X и Y има ниску корелацију.\n", + "\n", + "Добар модел линеарне регресије биће онај који има висок (ближи 1 него 0) Коефицијент корелације користећи метод Регресије најмањих квадрата са линијом регресије.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Плес са подацима: креирање дата фрејма који ће се користити за моделирање**\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Учитајте потребне библиотеке и скуп података. Претворите податке у оквир података који садржи подскуп података:\n", + "\n", + "- Узмите само тикве чија је цена одређена по бушелу\n", + "\n", + "- Претворите датум у месец\n", + "\n", + "- Израчунајте цену као просек високих и ниских цена\n", + "\n", + "- Претворите цену тако да одражава цену по количини бушела\n", + "\n", + "> Ове кораке смо обрадили у [претходној лекцији](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "У духу чисте авантуре, хајде да истражимо [`janitor package`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor) који пружа једноставне функције за испитивање и чишћење неуређених података. На пример, хајде да погледамо називе колона за наше податке:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Можемо боље. Хајде да направимо ова имена колона `friendR` тако што ћемо их претворити у [snake_case](https://en.wikipedia.org/wiki/Snake_case) конвенцију користећи `janitor::clean_names`. Да бисте сазнали више о овој функцији: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Толико tidyR 🧹! Сада, плес са подацима користећи `dplyr`, као у претходној лекцији! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одличан посао!👌 Сада имате чист, уредан скуп података на којем можете изградити свој нови регресиони модел!\n", + "\n", + "Шта кажете на расејани графикон?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Дијаграм распршивања нас подсећа да имамо податке о месецима само од августа до децембра. Вероватно нам је потребно више података како бисмо могли да извучемо закључке на линеаран начин.\n", + "\n", + "Хајде да поново погледамо наше податке за моделирање:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Шта ако бисмо желели да предвидимо `price` тикве на основу колона `city` или `package`, које су типа карактер? Или још једноставније, како бисмо могли да пронађемо корелацију (која захтева да оба уноса буду нумеричка) између, рецимо, `package` и `price`? 🤷🤷\n", + "\n", + "Модели машинског учења најбоље функционишу са нумеричким карактеристикама уместо текстуалних вредности, тако да је генерално потребно претворити категоријалне карактеристике у нумеричке репрезентације.\n", + "\n", + "То значи да морамо пронаћи начин да преобликујемо наше предикторе како бисмо их учинили погоднијим за ефикасно коришћење у моделу, процес познат као `feature engineering`.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Предобрада података за моделирање са рецептурама 👩‍🍳👨‍🍳\n", + "\n", + "Активности које преобликују вредности предиктора како би их учиниле лакшим за ефикасно коришћење модела називају се `инжењеринг карактеристика`.\n", + "\n", + "Различити модели имају различите захтеве за предобраду. На пример, метода најмањих квадрата захтева `кодирање категоријалних променљивих` као што су месец, сорта и назив града. Ово једноставно подразумева `превод` колоне са `категоријалним вредностима` у једну или више `нумеричких колона` које замењују оригиналну.\n", + "\n", + "На пример, претпоставимо да ваши подаци укључују следећу категоријалну карактеристику:\n", + "\n", + "| град |\n", + "|:-------:|\n", + "| Денвер |\n", + "| Најроби |\n", + "| Токио |\n", + "\n", + "Можете применити *ордирално кодирање* да замените јединствену целобројну вредност за сваку категорију, овако:\n", + "\n", + "| град |\n", + "|:----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "И то је оно што ћемо урадити са нашим подацима!\n", + "\n", + "У овом одељку, истражићемо још један невероватан пакет из Tidymodels-а: [recipes](https://tidymodels.github.io/recipes/) - који је осмишљен да вам помогне да предобрадите своје податке **пре** тренинга модела. У својој суштини, рецепт је објекат који дефинише које кораке треба применити на скуп података како би био спреман за моделирање.\n", + "\n", + "Сада, хајде да направимо рецепт који припрема наше податке за моделирање заменом јединственог целобројног броја за све опсервације у колонама предиктора:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сјајно! 👏 Управо смо направили наш први рецепт који одређује исход (цену) и његове одговарајуће предикторе, а такође и да све колоне предиктора треба да буду кодиране у скуп целих бројева 🙌! Хајде да брзо разложимо шта смо урадили:\n", + "\n", + "- Позив функције `recipe()` са формулом говори рецепту *улоге* променљивих користећи податке `new_pumpkins` као референцу. На пример, колона `price` је добила улогу `outcome`, док су остале колоне добиле улогу `predictor`.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` одређује да сви предиктори треба да буду претворени у скуп целих бројева, при чему нумерација почиње од 0.\n", + "\n", + "Сигурни смо да вам можда падају на памет мисли попут: \"Ово је тако кул!! Али шта ако треба да проверим да ли рецепти раде тачно оно што очекујем? 🤔\"\n", + "\n", + "То је сјајна мисао! Видите, када једном дефинишете рецепт, можете проценити параметре који су потребни за стварну претобраду података, а затим извући обрађене податке. Обично ово није потребно када користите Tidymodels (ускоро ћемо видети уобичајену конвенцију -\\> `workflows`), али може бити корисно када желите да урадите неку врсту провере како бисте потврдили да рецепти раде оно што очекујете.\n", + "\n", + "За то ће вам бити потребна још два глагола: `prep()` и `bake()`, а као и увек, наши мали пријатељи из R-а од стране [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) помоћи ће вам да ово боље разумете!\n", + "\n", + "

\n", + " \n", + "

Илустрација од @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): процењује потребне параметре из тренинг скупа који касније могу бити примењени на друге скупове података. На пример, за дату колону предиктора, који посматрач ће бити додељен као цео број 0, 1, 2 итд.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): узима припремљен рецепт и примењује операције на било који скуп података.\n", + "\n", + "С тим речено, хајде да припремимо и применимо наше рецепте како бисмо заиста потврдили да ће, у позадини, колоне предиктора прво бити кодиране пре него што се модел прилагоди.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ура! 🥳 Обрађени подаци `baked_pumpkins` имају све своје предикторе кодиране, што потврђује да ће кораци предобраде дефинисани као наш рецепт функционисати како је очекивано. Ово чини податке теже читљивим за вас, али много разумљивијим за Tidymodels! Одвојите мало времена да откријете која је опсервација мапирана на одговарајући целобројни вредност.\n", + "\n", + "Такође је вредно напоменути да је `baked_pumpkins` оквир података на којем можемо изводити прорачуне.\n", + "\n", + "На пример, хајде да покушамо да пронађемо добру корелацију између две тачке ваших података како бисмо потенцијално изградили добар предиктивни модел. Користићемо функцију `cor()` да то урадимо. Укуцајте `?cor()` да бисте сазнали више о функцији.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Испоставило се да постоји само слаба корелација између Града и Цене. Међутим, постоји нешто боља корелација између Пакета и његове Цене. То има смисла, зар не? Обично, што је већа кутија са производима, то је виша цена.\n", + "\n", + "Док смо већ код тога, хајде да покушамо и да визуализујемо матрицу корелације свих колона користећи пакет `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Много боље.\n", + "\n", + "Добро питање које сада можемо поставити о овим подацима је: '`Коју цену могу очекивати за одређени пакет бундева?`' Хајде да одмах пређемо на то!\n", + "\n", + "> Напомена: Када **`bake()`** припремљени рецепт **`pumpkins_prep`** са **`new_data = NULL`**, добијате обрађене (тј. кодиране) податке за обуку. Ако имате други скуп података, на пример тест сет, и желите да видите како би рецепт обрадио те податке, једноставно бисте испекли **`pumpkins_prep`** са **`new_data = test_set`**.\n", + "\n", + "## 4. Изградња модела линеарне регресије\n", + "\n", + "

\n", + " \n", + "

Инфографика: Дасани Мадипали
\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада када смо направили рецепт и заправо потврдили да ће подаци бити одговарајуће претходно обрађени, хајде сада да направимо регресиони модел како бисмо одговорили на питање: `Коју цену могу очекивати за одређени пакет бундеве?`\n", + "\n", + "#### Обучавање линеарног регресионог модела користећи тренинг сет\n", + "\n", + "Као што сте вероватно већ закључили, колона *price* је `излазна` променљива, док је колона *package* `предикторска` променљива.\n", + "\n", + "Да бисмо то урадили, прво ћемо поделити податке тако да 80% иде у тренинг сет, а 20% у тест сет, затим ћемо дефинисати рецепт који ће кодирати предикторску колону у скуп целих бројева, а потом ћемо направити спецификацију модела. Нећемо припремати и обрађивати наш рецепт јер већ знамо да ће он претходно обрадити податке како је очекивано.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одлично! Сада када имамо рецепт и спецификацију модела, потребно је да пронађемо начин да их спојимо у један објекат који ће прво обрадити податке (припрема + обрада у позадини), затим обучити модел на обрађеним подацима, а такође омогућити потенцијалне активности пост-обраде. Како ти се то чини за мир у души!🤩\n", + "\n", + "У оквиру Tidymodels-а, овај практични објекат се зове [`workflow`](https://workflows.tidymodels.org/) и згодно чува све компоненте твог модела! Ово је оно што бисмо у *Python*-у назвали *pipelines*.\n", + "\n", + "Хајде да све спакујемо у један workflow!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Узгред, ток рада може бити прилагођен/трениран на сличан начин као што се тренира модел.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Из излаза модела можемо видети коефицијенте који су научени током тренинга. Они представљају коефицијенте праве најбољег прилагођавања која нам даје најмању укупну грешку између стварне и предвиђене променљиве.\n", + "\n", + "#### Процена перформанси модела коришћењем тест скупа\n", + "\n", + "Време је да видимо како се модел показао 📏! Како то радимо?\n", + "\n", + "Сада када смо обучили модел, можемо га користити за прављење предвиђања за test_set користећи `parsnip::predict()`. Затим можемо упоредити ова предвиђања са стварним вредностима ознака како бисмо проценили колико добро (или не!) модел функционише.\n", + "\n", + "Хајде да почнемо са прављењем предвиђања за тест скуп, а затим да повежемо колоне са тест скупом.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Да, управо сте обучили модел и користили га за прављење предикција! 🔮 Да ли је добар? Хајде да проценимо перформансе модела!\n", + "\n", + "У оквиру Tidymodels-а, ово радимо помоћу `yardstick::metrics()`! За линеарну регресију, фокусираћемо се на следеће метрике:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Квадратни корен од [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). Ова метрика даје апсолутну вредност у истој јединици као и ознака (у овом случају, цена бундеве). Што је вредност мања, то је модел бољи (у поједностављеном смислу, представља просечну цену за коју су предикције погрешне!).\n", + "\n", + "- `Coefficient of Determination (обично познат као R-squared или R2)`: Релативна метрика код које је већа вредност боља за модел. У суштини, ова метрика представља колико варијансе између предвиђених и стварних вредности ознака модел може да објасни.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ту иде перформанс модела. Хајде да видимо да ли можемо добити бољу индикацију визуелизацијом расејаног графикона пакета и цене, а затим користити предвиђања за преклапање линије најбољег уклапања.\n", + "\n", + "То значи да ћемо морати припремити и обрадити тестни сет како бисмо кодирали колону пакета, а затим је повезати са предвиђањима која је наш модел направио.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одлично! Као што можете видети, линеарни регресиони модел не успева добро да генерализује однос између пакета и његове одговарајуће цене.\n", + "\n", + "🎃 Честитамо, управо сте направили модел који може помоћи у предвиђању цене неколико врста бундева. Ваш празнични бундева врт ће бити прелеп. Али вероватно можете направити бољи модел!\n", + "\n", + "## 5. Направите полиномијални регресиони модел\n", + "\n", + "

\n", + " \n", + "

Инфографик од Дасани Мадипали
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Понекад наши подаци можда немају линеарну везу, али и даље желимо да предвидимо исход. Полиномијална регресија може нам помоћи да направимо предвиђања за сложеније нелинеарне односе.\n", + "\n", + "Узмимо, на пример, однос између паковања и цене у нашем скупу података о бундевама. Иако понекад постоји линеарна веза између променљивих - што је већа бундева по запремини, то је већа цена - понекад се ти односи не могу приказати као раван или права линија.\n", + "\n", + "> ✅ Ево [још неких примера](https://online.stat.psu.edu/stat501/lesson/9/9.8) података који би могли користити полиномијалну регресију\n", + ">\n", + "> Поново погледајте однос између сорте и цене на претходном графикону. Да ли овај распршени графикон изгледа као да би нужно требало да се анализира правом линијом? Можда не. У овом случају, можете пробати полиномијалну регресију.\n", + ">\n", + "> ✅ Полиноми су математички изрази који могу садржати једну или више променљивих и коефицијената\n", + "\n", + "#### Тренирајте модел полиномијалне регресије користећи скуп за тренирање\n", + "\n", + "Полиномијална регресија креира *закривљену линију* како би боље одговарала нелинеарним подацима.\n", + "\n", + "Хајде да видимо да ли ће полиномијални модел боље функционисати у прављењу предвиђања. Следићемо донекле сличан поступак као што смо радили раније:\n", + "\n", + "- Направите рецепт који одређује кораке предобраде који треба да се спроведу на нашим подацима како би били спремни за моделирање, тј. кодирање предиктора и израчунавање полинома степена *n*\n", + "\n", + "- Направите спецификацију модела\n", + "\n", + "- Спојите рецепт и спецификацију модела у радни ток\n", + "\n", + "- Креирајте модел тако што ћете прилагодити радни ток\n", + "\n", + "- Процените колико добро модел функционише на тест подацима\n", + "\n", + "Хајде да почнемо!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Оцена перформанси модела\n", + "\n", + "👏👏 Направили сте полиномни модел, хајде да направимо предвиђања на тест скупу!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ву-ху, хајде да проценимо како је модел извршио на test_set користећи `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Много бољи учинак.\n", + "\n", + "`rmse` се смањио са отприлике 7 на отприлике 3, што указује на смањење грешке између стварне цене и предвиђене цене. Ово можете *слободно* тумачити као да су у просеку нетачне прогнозе погрешне за око \\$3. `rsq` се повећао са отприлике 0.4 на 0.8.\n", + "\n", + "Сви ови показатељи указују на то да полиномски модел ради много боље од линеарног модела. Одличан посао!\n", + "\n", + "Хајде да видимо да ли можемо ово да визуализујемо!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Можете видети закривљену линију која боље одговара вашим подацима! 🤩\n", + "\n", + "Можете је учинити још глаткијом тако што ћете проследити полиномску формулу функцији `geom_smooth` на следећи начин:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Баш као глатка крива!🤩\n", + "\n", + "Ево како можете направити нову прогнозу:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Предвиђање помоћу `полиномског модела` има смисла, с обзиром на распршене графике `цена` и `пакета`! И, ако је ово бољи модел од претходног, гледајући исте податке, потребно је да планирате буџет за ове скупље тикве!\n", + "\n", + "🏆 Браво! Направили сте два модела регресије у једном часу. У завршном делу о регресији, научићете о логистичкој регресији за одређивање категорија.\n", + "\n", + "## **🚀Изазов**\n", + "\n", + "Тестирајте неколико различитих променљивих у овом нотебуку да видите како корелација утиче на тачност модела.\n", + "\n", + "## [**Квиз након предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Преглед и Самостално учење**\n", + "\n", + "У овом часу смо научили о Линеарној регресији. Постоје и други важни типови регресије. Прочитајте о техникама Stepwise, Ridge, Lasso и Elasticnet. Добар курс за даље учење је [Stanford Statistical Learning course](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Ако желите да научите више о коришћењу невероватног Tidymodels оквира, погледајте следеће ресурсе:\n", + "\n", + "- Веб-сајт Tidymodels: [Почните са Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Мак Кун и Џулија Силџ, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **ХВАЛА:**\n", + "\n", + "[Елисон Хорст](https://twitter.com/allison_horst?lang=en) за креирање невероватних илустрација које чине R приступачнијим и занимљивијим. Пронађите више илустрација у њеној [галерији](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/2-Regression/3-Linear/solution/notebook.ipynb b/translations/sr/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..8ccddf415 --- /dev/null +++ b/translations/sr/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Линеарна и полиномијална регресија за одређивање цене бундева - Лекција 3\n", + "\n", + "Учитајте потребне библиотеке и скуп података. Претворите податке у датафрејм који садржи подскуп података:\n", + "\n", + "- Узмите само бундеве чија је цена дата по бушелу \n", + "- Претворите датум у месец \n", + "- Израчунајте цену као просек између највише и најниже цене \n", + "- Претворите цену тако да одражава цену по количини у бушелима \n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Скатерплот нас подсећа да имамо податке само за месеце од августа до децембра. Вероватно нам је потребно више података како бисмо могли да извучемо закључке на линеаран начин.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAshElEQVR4nO3dfZyU5Xno8d+1y7IgiwLLunJkV2xWSangRreKIVoVQ03qEdKon74YTKMlzan9pCatmLTHGtvaiE3M6UlOq4k59SVNJJiA9ZgIEqxRwThQXhQ0bAOyGFhwBd01sO7Ldf6YZ5aZ3ZndmWfuZ+aemev7+Sw7c+8z19zP7HDtM/erqCrGGGMqR1WxK2CMMaawLPEbY0yFscRvjDEVxhK/McZUGEv8xhhTYcYVuwLZmD59us6aNavY1TDGmJKyefPmN1W1YXh5SST+WbNmEYvFil0NY4wpKSLyerpya+oxxpgKY4nfGGMqjCV+Y4ypMJb4jTGmwljiN8aYCmOJ32TU1dPLto6jdPX0FrsqxhiHSmI4pym8NVvfYPlj26mpqqJvcJAVH5/H1a2nF7taxhgH7IrfjNDV08vyx7ZzvG+Q7t5+jvcNcutj2+3K35gyYYnfjLD/yDFqqlLfGjVVVew/cqxINTLGuGSJ34wwc+pE+gYHU8r6BgeZOXVikWpkjHHJEr8Zob6ulhUfn8eEmiom145jQk0VKz4+j/q62rziWmexMX6wzl2T1tWtp7OgZTr7jxxj5tSJeSd96yw2xh+RJn4R2Qt0AwNAv6q2icg04FFgFrAXuE5Vj0RZDxNOfV1t3gkfUjuLjxNvQrr1se0saJnuJL4xJjeFaOq5TFVbVbUtuH8bsF5VzwLWB/dNGbPOYmP8Uow2/sXAg8HtB4ElRaiDKSDrLDbGL1EnfgXWishmEVkWlDWq6gGA4PupEdfBFFlUncXGmHCi7txdoKq/FJFTgXUi8mq2Dwz+UCwDaG5ujqp+pkBcdxYbY8KL9IpfVX8ZfD8E/BC4AOgUkRkAwfdDGR57v6q2qWpbQ8OIncNMCaqvq+XcpimW9I0pssgSv4hMEpHJidvAIuBl4HHghuCwG4A1UdXBGGPMSFE29TQCPxSRxPP8m6r+WEReAlaKyI3APuDaCOtgjDFmmMgSv6r+Ajg3TXkXsDCq5zXGGDM6W7LBGGMqjCX+InK9do3v8YwxfrC1eorE9do1vsczxvjDrviLwPVGJ77HM8b4xRJ/Ebheu8b3eMYYv1jiLwLXa9f4Hs8Y4xdL/EXgeu0a3+MlWGexMX4QVS12HcbU1tamsVis2NVwrqun1+naNT7Hs85iYwpPRDYnLYk/xEb1FJGrjU58j2cbsRjjF2vqMZEr5c5i181T7Z3drIp10N7Z7SSeNZ+ZMOyK30SuVDuLXTdP3b56Bw9t2jd0f+lFzdy5eK439TOVw674Tc5yvWotxY1YXM9laO/sTkn6AA9t3Bf6yt/mWph82BW/yUnYq9ZS24gl0TyV6JOAE81TYeq+teNoxvKWxslFr5+pLHbFb7KW71VrKW3E4rp5qrVpSk7lYynV5jPjB0v8JmujXbWWG9fNUy2Nk1l6UeoWoksvag51tR9F/UxlsaYekzXXV62+c908defiuSydP4utHUdpbZoSOulHVT9TOSJP/CJSDcSAN1T1KhG5A/hj4HBwyBdV9cmo62Hyl7hqfWhjaht/vgnMZ67nRrQ0Tnb6ermun+tJgMZPhbji/yywCzg5qexeVf3HAjy3ccz1Vavxhw0PrRyRtvGLyEzgd4BvRfk8prBaGidzTVuTJf0yYsNDK0vUnbtfA24FBoeV3ywi20Xk2yIyNd0DRWSZiMREJHb48OF0hxhjHCnl2dUmd5ElfhG5CjikqpuH/eifgfcBrcAB4CvpHq+q96tqm6q2NTQ0RFVNYww2PLTSRHnFvwC4WkT2At8DLheRR1S1U1UHVHUQ+CZwQYR1MMZkwYaHVpbIOndV9QvAFwBE5FLgL1T1ehGZoaoHgsM+BrwcVR2MMdmz4aGVoxjj+FeISCugwF7g00WogzEmDdfDQ42fCpL4VfUZ4Jng9icK8ZzGGGPSsyUbjDGmwljiN8aYCmOJ3xhjKowlfmOMqTCW+I0xpsJY4jfGmApjid8UTFdPL9s6jka68Nf6nQdZvmob63cerIh4rl9T3+MZN0RVi12HMbW1tWksFit2NUweCrHk76J7n+Hnne8O3Z/dOImnbrm0bOO5fk19j2dyJyKbVbVteLld8ZvIFWLJ3/U7D6YkVYDXOt8NfWXtezzXr6nv8YxblvhN5Aqx5O/anZ05lZd6PNevqe/xjFuW+E3kCrHk76I5jTmVl3o816+p7/GMW5b4TeQKseTvwjmnMbtxUkrZ7MZJLJxzWlnGc/2a+h7PuGWdu6ZgCrGR9/qdB1m7s5NFcxpDJ9VSiuf6NfU9nslNps5dS/zGGFOmbFSPMcYYoACJX0SqReQ/ReSJ4P40EVknIruD72k3Wzf+cjkpZ/WWDm568CVWb+lwULPKmzAU29PFV9e+RmxPl5fx2ju7WRXroL2z20k840bkTT0i8jmgDThZVa8SkRXAW6r6ZRG5DZiqqstHi2FNPf5wOSln/l3rOPjOe0P3Z5w8no1f/LAXdSsF139rE8+1n0jQF7fU8/BN872Jd/vqHTy0ad/Q/aUXNXPn4rmh45ncFaWpR0RmAr8DfCupeDHwYHD7QWBJlHUw7riclLN6S0dK0gc48M57oa/8K23CUGxPV0qSBvhpe1foK3XX8do7u1OSPsBDG/fZlb8nom7q+RpwK5A8oLcxsdl68P3UdA8UkWUiEhOR2OHDhyOupsmGy0k5T+xIP2M1U3kh61YKnt39Zk7lhY63teNoTuWmsCJL/CJyFXBIVTeHebyq3q+qbara1tDQ4Lh2JgyXk3Kumpt+KGOm8kLWrRRcctb0nMoLHa+1aUpO5aaworziXwBcLSJ7ge8Bl4vII0CniMwACL4firAOxiGXk3KWnNfEjJPHp5TNOHk8S85rKnrdSkHbmfVc3FKfUnZxSz1tZ9ZneERh47U0TmbpRc0pZUsvaqalcXKoeMatgozjF5FLgb8IOnfvAbqSOnenqeqtoz3eOnf94nJSzuotHTyx4yBXzT0tdNKPqm6lILani2d3v8klZ00PnaSjjNfe2c3WjqO0Nk2xpF8ERZ3ANSzx1wMrgWZgH3Ctqr412uMt8RtjTO4yJf5xhXhyVX0GeCa43QUsLMTzGmOMGclm7hpjTIUp68Tv+yxO1/VzPUvS93iFYFsRmnJUkKaeYvB9Fqfr+rmeJel7vEKwrQhNuSrLK37fZ3G6rp/rWZK+xysE24rQlLOyTPy+z+J0XT/XsyR9j1cIthWhKWdlmfh9n8Xpun6uZ0n6Hq8QbCtCU87KMvH7PovTdf1cz5L0PV4h2FaEppyV9Q5cvs/idF0/17MkfY9XCLYVoSlltvWiqQiWWI05oagzd40pBBsuaUx2yrKN31QeGy5pTPYs8ReR61mc63ceZPmqbazfGW4zk2zjhX2edI9zteduVMMlfd/T1vXv3GYWVwZr4y8S180Si+59hp93vjt0f3bjJJ665VLn8cI+T7rHvX2sz9meu109vbT93dMkv5sFiP31FaHb+n3f09b179yayspPUfbcNem5bpZYv/NgSgIAeK3z3dBXgZni3fvUrlDPkymeyz13t+47wvBLGA3Kw/B9T1vXv3NrKqsslviLwHWzxNqdnTmVh423Znv6pDLW8+RSj7B77rp+DXzf09b1+drM4soS5Z67E0TkZyKyTUReEZEvBeV3iMgbIrI1+PpoVHXwletZnIvmNOZUHjbe4nnp98Md63lyqUfYPXddvwa+72nr+nxtZnFlifKKvxe4XFXPBVqBK0Uk0aB5r6q2Bl9PRlgHL7mexblwzmnMbpyUUja7cRIL54RLopni3fLbvx7qeTLFc7nnruvXwPc9bV2fb1Qzi21Zaz9FNo5f473GPcHdmuDL/57kArm69XQWtEx3NtnoqVsuZf3Og6zd2cmiOY2hE8BY8RomT+C1pLblUydPCB1v/l3r8qrjcK7fXJ3dx1PuHxp2P1cP3zTf6Z62rn/nrt+Ttqy1vyId1SMi1cBmoAX4hqouF5E7gE8C7wAx4POqOmoPXDmO6ilFsT1dXHPfphHlqz49P+cktnpLB3++cvuI8q9dNy/UVf/6nQe58aHNI8ofWHp+qIToOl6l6erpZcHdP+F434nmowk1VTy//PJQf1Bcx6sURRnVo6oDqtoKzAQuEJFzgH8G3ke8+ecA8JV0jxWRZSISE5HY4cOHo6ymyZLLDspMnbi+dO66jldpbFlrvxVkVI+qHiW+2fqVqtoZ/EEYBL4JXJDhMferapuqtjU0NBSimmPyfTKPa8PbU/PtoEyOl6kT15fOXdfxEh55YQ/X/ssLPPLCnrziRBXP1YQ6W9bab5G18YtIA9CnqkdFZCJwBXC3iMxQ1QPBYR8DXo6qDi4lT775p5+0O53M4yKea5naU0+ZUM3bxweGjjtlQnVWzTzp4qWLlU/n7oyTx3Ng2ISwfDq4XcYDOPeOHw+d70t7j3DP2tfYdseV3sSbf9e6obkVT+86xN0/fjX0hLr6ulrOmDYxpT/ojGnh+w3q62ppnjYxZe5CPvEqXZRX/DOADSKyHXgJWKeqTwArRGRHUH4ZcEuEdXDC98k8rmWazLN+58GURA3w9vGBMeudLt5frtrO8f7UK7jeAc1ra8Mjx/pTyo4c6/cm3iMv7En72oW9Uncdb/WWDqcT6mJ7ulKSPsQnmOXzfybdhDVf/s+UmsgSv6puV9UPqOo8VT1HVe8Myj+hqnOD8quTrv695ftkHtcytadmat8eq97p4lVXCdXibxuw63hrtqd/m2cqL3Q8130ulfZ/ptTYzN0s+D6Zx7VM7amZ2rfHqne6eAODyoD62wbsOt7ieTNyKi90PNd9LpX2f6bUWOLPgu+TeVzLNJln4ZzTQtU7Ea92nHBSTTW144R7rpnHPdecSzXxN2E1ONnasJr44myu4klwX/KMd/0Hz+SUCdUpZadMqOb6D57pRbwl5zU5nVBXaf9nSo1txJIl15NvXMdzLdNknt2HulOOax92P5P4bBGJZ1CNp9O7ntxJciv1Pzy5M68JOf9z9Y6heAPB/XziLV+1bWhSmAb3fZowNHF8auf4SeOrRzl6bK5n9FTa/5lSYlf8OWg7s57PLZrt7A3nOp5r9XW1nNs0ZSjph+0ATHTu9vYP8qv3BujtH+Tz39/mtDPRdWfnfRt2c6w/NRUe61fu27Dbi/q57ox1HS+h0v7PlIqsEr+InC0i60Xk5eD+PBH562irZnwTtgMwXUfpYIbLy7Cdia47O1dneFym8rH43hnrOp7xW7ZX/N8EvgD0QXzEDvB7UVXK+ClsB2C6jtIqSX9s2M5E152dSzI8LlP5WHzvjHUdz/gt28R/kqr+bFhZf9ojTdZKbeXCsB2A6TqLv3LtuU47E113dn76srOYOC71r9PEccKnLzvLi/q57ox1Hc/4LatF2kTkR8DNwPdV9TwRuQa4UVU/EnUFoTwXaSvllQtXb+ngiR0HuWruaTklhq6e3hGdxWFjZfLIC3tYs/0Ai+fNCJ1Uk923YTertx9gybwZoZN+VPVbs/UNPve9rSjxPvN7f68179+569+HKa5Mi7Rlm/h/Dbgf+CBwBNgDXK+qex3XM61yS/y2cqHJl/3OTTbyWp1TVX+hqlcADcD7VfVDhUr65cj3WaZh2AYZheXD79yUrqzG8YvIXcCKYJVNRGQq8XX0bWRPCL7PMs2VbZBReMX+nZvSlm3n7kcSSR8g2Dil4vbKdcX1NneF3jYvuTzTgm7ZXvn73sHta7zE73x8NdRWVzG+Or+ZxQntnd2sinXQ3pndxDxTmrKduVstIrWq2gsQLLNsDYl5cL3NXaG2zRte/qeXtlBTVcVxTlx9Vouw4dVDXPb+UwEy1sn3Dm7f48X2vsV7A0Dw2sdefyuveLev3sFDm/YN3V96UTN3Lp4bOp7xV7adu7cCVwP/l/jM7k8Bj6vqimirF1dunbu+y9Rx+MTNH+Kqrz+XUl47TgChd9gSy3W11RzvG0BEmDCuekSi872D2/d47Z3dXHHvsyPKn77lEloaJxc9nvFDvp27K4C/B34d+A3gbwuV9E3hZeo43NpxdET5+Opqbr6shQk1VUxKWiump3eA/kHoG9C0TUC+d3D7Hm9rx9Gcygsdz/gt60XaVPVHwI8irIvxRKaOw9amKWnL/+DCZv7gwmY2vHqIO/79FXp6U9ekSUgkuvq6Wu87uH2P19o0JafyQsczfhv1il9Engu+d4vIO0lf3SLyzhiPnSAiPxORbSLyioh8KSifJiLrRGR38H2qu9OpbK728D3RcVjF+GphfHW8s7ilcXKwvHLV0FeiQ/HIu+/R09tP30DmpsNj7/XzlbWvsXpLB0fefY+Pf2Am4wRqqoXx1eKkg7umCqqroKbKzbLM46vFaf1cxWtpnMzSi5pTypZe1By6WcZ1POO3Ua/4VfVDwfcwv/1e4HJV7RGRGuC5YAbw7wLrVfXLInIbcBuwPER8k8T1Hr4rYx28N5C4QlW+H+vg6tbTie19K6U9P/b6W8T2vpXSKZhJv8Z3TBqxa1LwxyLxHGF9fcNuEk3oA8A3NuzOK178NTjxhyzf+rmO53od5TsXz2Xp/Fls7ThKa9MUS/plbMw2fhGpSqzKmQuN6wnu1gRfCiwGHgzKHwSW5BrbpCrUnsCrt3SMSPAPbdyXVdLPRj51Xr/zYNo9WdfvDLe6pO/7LLd3dqf9XeQ7DLOlcTLXtDVZ0i9zYyZ+VR0EtolI81jHDici1SKyFThEfLP1F4HGxD67wfdTMzx2mYjERCR2+PDhXJ+6ohRqf9NCLNEbts6Z9gPOVB62Hr7sGWudsSYf2U7gmgG8EqzJ/3jia6wHqeqAqrYCM4ELROScbCumqverapuqtjU0NGT7sIpUqP1NC7FEb9g6Z9oPOFN52Hr4smesdcaafGSb+L8EXAXcCXwl6SsrwazfZ4ArgU4RmQEQfD+UfXVNOoXa33TJeU1pOwCHl4WVT50XzjmN2Y2TUspmN05i4Zxwf6x83zPWOmNNPkadwCUiE4A/AVqAHcADqprVOvwi0gD0qerRYKbvWuBu4LeArqTO3WmqeutosWwCV3Zc70eaKV57Z/eIDsDkMiDt7U3/9ebQksTvn3Eyz+5+k1PrxnOo5z1ndV6/8yBrd3ayaE5j6KSfrFCvaVjpfhfGJIRalllEHiW+69ZPgY8Ar6vqZ7N8wnnEO2+riX+yWKmqd4pIPbASaAb2Adeq6lujxbLEX/qSlys41tefcUavMcadTIl/rAlcc1R1bhDgAWD4LlwZBdszfiBNeRewMNs4pvQlL+R2Yk0fpW8g/uHx1se2s6Bluq0jb0yBjNXG35e4kW0TjzHDpVuuIJmtI29MYY2V+M9Nnq0LzMt25q4pX7ku3ZtuuYJkx/sH6OtPv8xD1HWzeKYSjTVzt3q0n5vKE2bp3sRyBbcOa+OH+CJufQPKNfdtynsZYNfLCldaPFM5sh3OaUzWs0WHb9SyreMoC1qm8/zyy3nkpgt58YtX8N2bLhyxrk8+M09dz2SttHimsmS9Oqcxo80WTQwlzHb0zoZX00/fSI7lum4Wz5g4u+I3WRtrtujwbRhHW4/f92WFKy2eqSyW+E3WxpotmsvoHd+XFa60eKayZLX1YrHZBC6/ZJotmm57wWTpthp0PfPU4hlzQqiZu76wxF86Ht/6xojROzZD15jiCDtz15icXN16OgtaprP/yLGhbQUTt21mrjF+sMRvnKuvq01J8pbwjfGLde4aY0yFsSt+UzBdPb3WBGSMByzxm4KwZZmN8Yc19ZjI5TKxyxgTvcgSv4g0icgGEdklIq+IyGeD8jtE5A0R2Rp8fTSqOpjCSV6fZzhbltkYv0TZ1NMPfF5Vt4jIZGCziKwLfnavqv5jhM9tCii5GSdd081YyzL3DQ4OtfsbY6IX2RW/qh5Q1S3B7W5gF2ANuWVmeDNOuqabxLLME2qqmFw7jnFVUFMtTK4dx4SaKlZ8fJ518BpTQAXp3BWRWcS3YXwRWADcLCJLgRjxTwVH0jxmGbAMoLm5efiPjScSzTgntlQ80XSTnMxtYpcx/oi8c1dE6oDHgD9X1XeAfwbeB7QCB4CvpHucqt6vqm2q2tbQ0BB1NU1I6ZpxMjXd1NfVcm7TlKEJXonbxpjCijTxi0gN8aT/HVX9AYCqdqrqgKoOAt8ELoiyDiZaw5txRmu6Sd4m0LYMNKZ4Imvqkfjeeg8Au1T1q0nlM1T1QHD3Y8DLUdXBFMbwZpx0SX/4NoHJbMtAYworyjb+BcAngB0isjUo+yLw+yLSCiiwF/h0hHUwBTJ8fZ5k6bYJTPbQxn0snT/LlhU2pkAiS/yq+hwgaX70ZFTPafyUaZvA4cdY4jemMGzmrolcNtsB2paBxhSOJX4TuXTbBCazLQONKSxbpM0UxJ2L57J0/qyhbQIB2zLQmCKxxG8KpqVxckqSt4RvTHFYU48xxlQYS/zGGFNhLPGbjEZbatmHeMaYcKyN36Q11lLLxY5njAnPrvjNCNkstVzMeMaY/FjiNyOk2zErn12yXMczxuTHEr8ZIZellosRzxiTH0v8ZoRcllouRjxjTH5EVYtdhzG1tbVpLBYrdjUqTldPr9NdslzHM8aMTkQ2q2rb8HIb1WMyGm2pZR/ipeP7HyuLZ/F8YInflA3fh6BaPIvni8ja+EWkSUQ2iMguEXlFRD4blE8TkXUisjv4PjWqOpjK4fsQVItn8XwSZeduP/B5Vf11YD7wpyIyB7gNWK+qZwHrg/sVyfeZsaU009b3IagWz+L5JModuA4AB4Lb3SKyCzgdWAxcGhz2IPAMsDyqevjK94+ZpfSxFfwfgmrxLJ5PCjKcU0RmAR8AXgQaE5utB99PLUQdfOL7x8xS+9gK/g9BtXgWzyeRD+cUkTrgP4C/V9UfiMhRVZ2S9PMjqjqinV9ElgHLAJqbm89//fXXI61nIW3rOMr133qR7t7+obLJteN45KYLOTfEFoS+xysk30dpWDyLV0hFGc4pIjXAY8B3VPUHQXGniMxQ1QMiMgM4lO6xqno/cD/Ex/FHWc9C8/1jZql9bE3m+xBUi2fxfBDlqB4BHgB2qepXk370OHBDcPsGYE1UdfBV4mNh7bgqThpfTe04Nx8z3ccTTqqppnacpMQL2+mb7nG+d0hbPFOOorziXwB8AtghIluDsi8CXwZWisiNwD7g2gjr4C1N/KsydM+/eAJCEDMubKdvuscpeN0hbfH87tA34dmSDUXQ1dPLgrt/wvG+E80pE2qqeH755aGu0gsV74mbP8RVX38u5+dJF692XBWg9PafeP+Vwmtg8UwpydTGX9aLtPn6Mdj3McSZ4m3tOBrqedLFq64SqqX0XgOLZ8pB2S7Z4PPH4JlTJ3K8fyCl7Hj/QF6dsT1JI3AAenr7ncdrbZoSqtM33fn2DQxSJanH+dQhbfFKp0Pf5K4sr/hLYVz78Ca2fJrcjrz73ohWfQ3KXcYDQo9VTne+t//33/B2HLXF83scuslPWV7xJz62HufEFUziY2uYN3IU8SbWjEsZJz+xZlzoeFs7jmYsb2mc7DTeNW1NLGiZntNY5Uzne85/O4Xnl1/ubNzz1a2n51w3ixddPOOvskz8vn8Mdh2vNcOkqkzl+cbLdazyaOfr+zhqi2fKUVk29Yw1Dj1sPF8/Vrc0TmbpRc0pZUsvag51tR9FPNfzDIwx+SnLK37IPA49LN8/Vp9/xjS+97N9CFUog7SdMc2reK7nGRSK71P6Ky2ecaMsx/FX2phk38d0l+rvw+eRYZUYz+SuosbxV9qYZN/HdJfi78P3kWGVFs+4VZaJP6oxyb5OCCt053Ou9R4tnuvXdPWWDm568CVWb+nIK47vf/wqLZ5xqyzb+BOdibcO+5iZT7OCzx+D6+tqaZ42kZ93vjtUdsa08G2q9XW1tJ0xlefau4bKfvOMqdTX1Yaqd31dLdedP5OHNu0bKruubSbPtb/p9DWdf9c6Dr4Tn7vw9K5D3P3jV9n4xQ+HijVz6kR+1Zc66exXfflNsvN5ZJjv8YxbZXnFD/HO0+eXX84jN13I88svzyuh+P4xOLanKyXpA7zW+S6xPV0ZHjG69s7ulKQP8NP2LmJ7ukLVu6unl5Wb96eUPfrSfm5dtc3Za7B6S8dQ0k848M57oa/8j7z7HgODqf1fA4MaelJcfV0t17XNTCm7rm2mNyPDfI9n3CrLK/4EV2OSfZ8Q9uzuNzOWt51Zn3O8TBO4nt39Zqh6pzvf6ioJRvicuKrO5zX44dZfZixfcl5TzvGeaz+csTzMsNaunl5WxlL/+K2M7eezC88O/R71faSZTQjzV9le8bvk+8fgS86anlP5WDJN4LrkrOkc60tdw+dY39hrAqU734FBHdGU0p3H+kIfyFDnTOVjmV43IafysUTV5l1fV8u5TVOcJVXf4xk3LPFnwfePwW1n1nNxS+qV/cUt9aGu9gGmThofvyJPUl0lTDlpPPH9dU4Yfj+ddM0cl53dkPbYPYd7cqxt3NzTT8mpfCwTa9L/18hUPpao2rxddWYnrN95kOWrtrF+50En8WJ7uvjq2tdCNztGHa+9s5tVsQ7aO7srIl5CWTf1uOT7x+CHb5pPbE8Xz+5+k0vOmh466UP86vSkmuqUtXVOqqlma8fRtO3eYzXPpGvmWLerM+2xYZuntu1/O2P5wjmnFT1efV0ttdXC8b4TZbXVktfv3WVnNsCie58Z6it6NLaf2Y2TeOqWS0PHu/5bm4b6iv7pJ+1c3FLPwzfN9ybe7at3pAw4WHpRM3cunlu28ZJFufXit0XkkIi8nFR2h4i8ISJbg6+PRvX8UfD9Y3DbmfV8btHsvJI+ZL46HVcFw/I+gwpvdh8fNV66Zo5xGd55vzb9pJzrO9rjfIm3eksHbx9Pbdp6+/hA6Ct1153Z63ceTDtAIOyVf2xPV8YBAj7Ea+/sTkmqAA9t3Bf6ytr3eMNF2dTzr8CVacrvVdXW4OvJCJ/fhJSpKeoXb/4q7fGZro4T0rbxk76JqH8wbfGYMj3Ol3hP7EifQDOVFzre2p3pP4FlKh/LaAMOfIg32gq05RhvuMgSv6o+C7wVVXwTratbT+eRT13AHy2YxSOfuoCrW08P3YmcvEhb4uvW356d9tioVhQtdryr5qZvHspUPpZLM7zmmcrHsmhOY07lY3E94KBQAxh8eb+4jjdcMTp3bxaR7UFT0NRMB4nIMhGJiUjs8OH0Q+tMdG5fvYNr7tvEP/2knWvu28Tta3bk1Ykc2/sWvf2DQ1/7j/zK6xVFXcdbcl4TM04en1I24+TxoYaaAsxtSv9fJ1P5WBbOOY3ZjZNSymY3TgrVnwHuBxy4juf7+8V1vOEiXaRNRGYBT6jqOcH9RuBN4ssz/i0wQ1U/NVacctts3Xftnd1cce+zI8qfvuUSWhons3pLB0/sOMhVc0/LKnGNFu/1rndZu7OTRXMaQyeZZOt3HvQ63n0bdrN6+wGWzJvBpy87K3ScqBa+e+SFPazZfoDF82Zw/QfPDB0nIdf3SqHj+f5+yXfARqZF2go6qkdVhxoMReSbwBOFfH6TndHaFx/auHeo0+npXYfY0nF0zJEGmeLdvuZlXvhFvDXw0dh+p6MgfI+360A3b7xzPHS80ZbVcFG/l/Ye4eeHe5ydb7bvlWLF8/H9krw8yv0//YXT1U0L2tQjIjOS7n4MeDnTsaZ4MrUjTj2pJtRIg0zxEkk/l1iZ+D6qIop46Ua5+FQ/ixc+XtSrm0Y5nPO7wEZgtojsF5EbgRUiskNEtgOXAbdE9fwmvEzti0d+1Zf2+LFGGqSL98H3pd/YxZdREBbP4hUzXtSrm0bW1KOqv5+m+IGons+4defiuSydP4utHUdpbZpCS+PkjFcv2Yw0OP+MaTz60n4S+29devapvPBfIwd9+TIKwuJZvGLGi3p1U1uywWTU0jiZa9qahkYSTJ00fsToewnKR5P42NrbP8jxYFTPV5/+Ode1pbZX5jNqIWzdLF5cS+PktKNm8hmVUmnxotinOqrVTW3JBpO1/UeOUVc7LmUph7racaFW56ypquIPL5zFsovfl/KpotB1s3hxXT29vPT6kZSyl14/QldPr8XLUrpPyfmIcnVTS/wma2E/fo72uPq6Widjk31fQdX3eL4vPe57vISWxsnOxtqDu6Xlh7OmHpO1sB8/C7Eph+8rqPoez/c/TL7HKzWRTuByxSZw+aWrpzfUx8+wjytE3SwePL71jRHbleYzbrzS4vko0wQuS/zGmCE+/2EqhXi+8WLmrjGlptwTw3Cu25QrLV6psMRvTAbJU+bLtSnAVCbr3DUmjainzBtTTJb4jUkj6inzxhSTJX5j0qj04X6mvFniNyaNqOYetHd2syrW4WzvVNfxunp62dZx1Jq0ypx17hqTgesp88nrtQNO1393Ec86syuHXfEbM4r6ulrObZri5Eq/ktd/N36xxG9MAVT6+u/GL1FuxPJtETkkIi8nlU0TkXUisjv4Hm5naGNKTKWv/278EuUV/78CVw4ruw1Yr6pnAeuD+8aUPdfrtZfa+u/GL5Gu1SMis4AnVPWc4P5rwKWqeiDYf/cZVZ09Vhxbq8eUi/bObmfrtUcRr9KWqCh3vqzV06iqBwCC5H9qpgNFZBmwDKC5uTnTYcaUFNfrtZfK+u/GL9527qrq/arapqptDQ0Nxa6OMcaUjUIn/s6giYfg+6ECP78xxlS8Qif+x4Ebgts3AGsK/PzGGFPxohzO+V1gIzBbRPaLyI3Al4EPi8hu4MPBfWOMMQUUWeeuqv5+hh8tjOo5jTHGjK0ktl4UkcPA6xE+xXTgzQjj+6Dcz7Hczw/sHMtFIc/xDFUdMTqmJBJ/1EQklm6sazkp93Ms9/MDO8dy4cM5ejuc0xhjTDQs8RtjTIWxxB93f7ErUADlfo7lfn5g51guin6O1sZvjDEVxq74jTGmwljiN8aYClP2iV9EmkRkg4jsEpFXROSzw37+FyKiIjI9qewLItIuIq+JyG8Xvta5Ge0cReTPgvN4RURWJJWXxTmKSKuIbBKRrSISE5ELkh5Tauc4QUR+JiLbgnP8UlCecQOjUjrHUc7vHhF5VUS2i8gPRWRK0mNK5vwg8zkm/dyPfKOqZf0FzADOC25PBn4OzAnuNwFPEZ8cNj0omwNsA2qBM4H/AqqLfR5hzhG4DHgaqA1+dmoZnuNa4CNB+UeJ7/FQqucoQF1wuwZ4EZgPrABuC8pvA+4uxXMc5fwWAeOC8rtL9fxGO8fgvjf5puyv+FX1gKpuCW53A7uA04Mf3wvcCiT3cC8Gvqeqvaq6B2gHLsBjo5zjZ4Avq2pv8LPEaqjldI4KnBwcdgrwy+B2KZ6jqmpPcLcm+FLi5/JgUP4gsCS4XVLnmOn8VHWtqvYH5ZuAmcHtkjo/GPV3CB7lm7JP/MmCHcE+ALwoIlcDb6jqtmGHnQ50JN3fz4k/FN5LPkfgbOBiEXlRRP5DRH4zOKyczvHPgXtEpAP4R+ALwWEleY4iUi0iW4kvWb5OVV9k2AZGQGIDo5I7xwznl+xTwI+C2yV3fpD+HH3LNxWT+EWkDniMeKLoB/4KuD3doWnKSmLMa/I5quo7xBfhm0r84/RfAitFRCivc/wMcIuqNgG3AA8kDk3zcO/PUVUHVLWV+FXvBSJyziiHl9w5jnZ+IvJXxP9vfidRlC5E5JXMU5pznIdn+aYiEr+I1BBPFt9R1R8A7yPenrZNRPYS/wVtEZHTiP/FbUp6+ExONB94K805QvxcfhB8/PwZMEh8gahyOscbgMTt73PiY3JJnmOCqh4FngGuJPMGRiV7jsPODxG5AbgK+EMNGr8p4fODlHNcjG/5ppgdIYX4Iv4X9SHga6Mcs5cTnS2/QWpnyy8ojQ6lEecI/AlwZ3D7bOIfKaXMznEXcGlweyGwuYR/jw3AlOD2ROCnxJPhPaR27q4oxXMc5fyuBHYCDcOOL6nzG+0chx1T9HxT6M3Wi2EB8AlgR9DuBvBFVX0y3cGq+oqIrCT+RuwH/lRVBwpS0/DSniPwbeDbIvIy8B5wg8bfbeV0jn8M/C8RGQccB5ZByf4eZwAPikg18U/jK1X1CRHZSLyZ7kZgH3AtlOQ5Zjq/duKJb128JZJNqvonJXh+kOEcMx1crHO0JRuMMabCVEQbvzHGmBMs8RtjTIWxxG+MMRXGEr8xxlQYS/zGGFNhLPGbsiUiA8Gqna8EqyV+TkRCv+dF5EPByouvBl/Lkn7WECyN8Z8SX0X0M0k/uzBYebIShk+bEmBvRFPOjml86jwicirwb8QXcvubXAMFsyz/DViiqluCZXWfEpE3VPX/EZ889qqq3iAijcBGEVkFdAFfB/6HnliILNfnFuJDrwfDPN6Y4WwcvylbItKjqnVJ938NeIn4shVnAA8Dk4If36yqL4jIw8AqVV0TPOY7wKPAbxJffPH2pHgLgTuAPwMeJz5T8w3gIuCPgse8BJxPfGLZl4FLiU9W+oaq3hesPbSG+JpKNcBfq+qaYCG6HwEbgnhLVPV1l6+PqVyW+E3ZGp74g7IjwPuBbmBQVY+LyFnAd1W1TUR+i/iib0tE5BRgK3AWsBJ4MPEHIYh1CrBHVaeJyCeBNlW9OfhZFbCR+EqabcDHie+H8HciUgs8T3wGbgdwkqq+E3yK2BQ83xnEp+9/UFU3RfICmYplTT2m0iRWQ6wBvi4ircAA8bWMUNX/EJFvBE1Dvws8pqr9QXNLuquktFdOqjooIvcR/2PQJSKLgHkick1wyCnEE/x+4C4RuYT4InqnA43BMa9b0jdRsMRvKkbQ1DNAfHXLvwE6gXOJD3I4nnTow8AfAr9HfH14gFeIX7k/nnTc+cTXWMlkMPiC+B+cP1PVp4bV6ZPEF/Y6X1X7gtUbJwQ/fjf7szMmezaqx1QEEWkA/gX4erBQ3SnAgaDD9BNAddLh/0p83wZU9ZWg7BvAJ4NPCIhIPfFtAleQnaeAzwRLSyMiZ4vIpKAeh4KkfxnxJh5jImVX/KacTQxW8qwhvvLhw8BXg5/9H+AxEbmWeAfq0NW1qnaKyC5gdVLZARG5HvimiEwmfgX/NVX99yzr8i1gFvF12AU4THwLxe8A/y4iMeL9Ca+GOVFjcmGdu8YMIyInATuIb+7+drHrY4xr1tRjTBIRuYL4Vff/tqRvypVd8RtjTIWxK35jjKkwlviNMabCWOI3xpgKY4nfGGMqjCV+Y4ypMP8fFF03YlhPduQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Изгледа да је корелација прилично мала, али постоји нека друга важнија веза - јер цене на графикону изнад изгледају као да имају неколико различитих кластера. Хајде да направимо графикон који ће приказати различите врсте бундева:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Линеарна регресија\n", + "\n", + "Користићемо Scikit Learn за тренирање модела линеарне регресије:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Нагиб праве може се одредити из коефицијената линеарне регресије:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Можемо користити обучени модел да предвидимо цену:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Полиномна регресија\n", + "\n", + "Понекад је однос између карактеристика и резултата природно нелинеаран. На пример, цене бундева могу бити високе током зиме (месеци=1,2), затим падати током лета (месеци=5-7), а потом поново расти. Линеарна регресија није у стању да тачно ухвати овај однос.\n", + "\n", + "У овом случају можемо размотрити додавање додатних карактеристика. Једноставан начин је коришћење полинома из улазних карактеристика, што би довело до **полиномне регресије**. У Scikit Learn-у, можемо аутоматски претходно израчунати полиномне карактеристике користећи пайплайне:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAD4CAYAAAATpHZ6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbw0lEQVR4nO3de3Cc1Znn8e+jm93ClmRb8kWyjYBgDb6ATQQhFwIhFzu7meBQNVOVyu5Sm9RQSWWnJlMTZ3BIZWq2dpcMnprZzM5WTbEDFVLDsJOZOM4USTAEkkBYMJExjOwYY8AXkGRLsi35otb92T+6JbfurXa3ut+j36eqS2+ffvvto0f2T6/Oe/q0uTsiIhKGonx3QEREskehLiISEIW6iEhAFOoiIgFRqIuIBKRkLl+surra6+vr5/IlRUQib//+/Z3uXpPOvnMa6vX19TQ1Nc3lS4qIRJ6ZnUh3Xw2/iIgERKEuIhIQhbqISEAU6iIiAVGoi4gEZMbZL2a2Bvg+sBIYBh529++a2S7gd4F+4G3gP7t7Vw77KnNgz4EWdu09QmtXnNqqGDu2NrB9S12+uyUiaUrnTH0Q+BN3vwG4Dfiqma0HngE2uvuNwJvAztx1U+bCngMt7NzdTEtXHAdauuLs3N3MngMt+e6aiKRpxlB39zZ3fzW5fQE4DNS5+9PuPpjc7WVgde66KXNh194jxAeGxrTFB4bYtfdInnokIrM1qzF1M6sHtgD7xj30ReBnUzznPjNrMrOmjo6OjDopc6O1Kz6rdhEpPGmHupktAn4IfM3dz6e0P0BiiObxyZ7n7g+7e6O7N9bUpPUuV8mT2qrYrNpFpPCkFepmVkoi0B93990p7fcCnwG+4PoIpcjbsbWBWGnxmLZYaTE7tjbkqUciMlvpzH4x4BHgsLv/VUr7NuBPgTvcvSd3XZS5MjLLRbNfRKLLZjrBNrOPAC8AzSSmNAJ8E/gbYAFwJtn2srt/ebpjNTY2uhb0EhGZHTPb7+6N6ew745m6u/8asEke+ulsOyYiIrmld5SKiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQGb8jFKRXNtzoIVde4/Q2hWntirGjq0NbN9SV/DHlolU7/xTqEte7TnQws7dzcQHhgBo6Yqzc3czwBWHQS6PLROp3oVBwy+SV7v2HhkNgRHxgSF27T1S0MeWiVTvwqBQl7xq7YrPqr1Qji0Tqd6FQaEueVVbFZtVe6EcWyZSvQuDQl3yasfWBmKlxWPaYqXF7NjaUNDHlolU78KgC6WSVyMX0HIxYyKXx5aJVO/CYO4+Zy/W2NjoTU1Nc/Z6IiIhMLP97t6Yzr4znqmb2Rrg+8BKYBh42N2/a2ZLgX8C6oHjwO+7+7lMO50PmlMrIqFJZ0x9EPgTd78BuA34qpmtB+4HnnX364Fnk/cjY2RObUtXHOfynNo9B1ry3TURkYzNGOru3uburya3LwCHgTrgbuCx5G6PAdtz1Mec0JxaEQnRrGa/mFk9sAXYB6xw9zZIBD+wfIrn3GdmTWbW1NHRcYXdzR7NqRWREKUd6ma2CPgh8DV3P5/u89z9YXdvdPfGmpqaTPqYE5pTKyIhSivUzayURKA/7u67k82nzWxV8vFVQHtuupgbmlMrIiGaMdTNzIBHgMPu/lcpD/0rcG9y+17gx9nvXu5s31LHg/dsoq4qhgF1VTEevGeTZr+ISKTNOE/dzD4CvAA0k5jSCPBNEuPqPwDWAieB33P3s9MdS/PURURmL6vz1N3914BN8fDHZ9MxERHJLa39IiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgFRqIuIBEShLiISEH1GqUiG9MlZUogU6iIZGPnkrJEPWhn55CxAwS55peEXkQzok7OkUCnURTKgT86SQqVQF8mAPjlLCpVCXSQD+uQsKVS6UCqSgZGLoZr9IoVGoS6Soe1b6hTiUnA0/CIiEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQAp+SmNUV8KLar9FJNoKOtSjuhJeVPstItFX0MMvUV0JL6r9FpHomzHUzexRM2s3s4MpbZvN7GUze83Mmszs1lx0Lqor4UW13yISfemcqX8P2Dau7SHgz919M/Dt5P2si+pKeFHtt4hE34yh7u7PA2fHNwMVye1KoDXL/QKiuxJeVPstItGX6YXSrwF7zewvSfxi+FDWepQiqivhRbXfIhJ95u4z72RWDzzp7huT9/8G+JW7/9DMfh+4z90/McVz7wPuA1i7du37T5w4ka2+i4jMC2a2390b09k309kv9wK7k9v/DEx5odTdH3b3RndvrKmpyejFuuMD9A8OZ/RcEZH5JNNQbwXuSG7fBRzNTncm97+ePcqt/+PnfGtPM/tPnCWdvy5EROajGcfUzewJ4E6g2szeA/4M+APgu2ZWAvSSHF7JlbtuWE77hT7+Zf97/MPLJ1m7tDzxAQWba7m2ZlEuX1pEJFLSGlPPlsbGRm9qasr4+Rf7Btl78BR7Xmvhxbc6GXa4aU0Vn9tcy+/eVMuyRQuy2FsRkcIwmzH1SIV6qtPne/nX11r50YEWftt2nuIi4451NWzfUscnb1hBrKx45oOIiETAvAj1VEdOXWDPay38+EALrd29XFVWzLaNq/jcljo+eN0yioss668pIjJX5l2ojxgedvYdO8ueAy38tLmNC32DrKhYwN2b69i+uY4bVi3GTAEvItEyb0M9Ve/AEM+90c6PDrTwyyPtDAw5DSsWs31LHXdvrtVb9kUkMhTq45y71M+TzW3sOdDC/hPnMIPbrlnG57bUsW3TSioWls55n0RE0qVQn8aJM5f4cfIC67HOS5SVFHFXw3I+vWklH/ud5Qp4ESk4CvU0uDuvv9c9Ov7efqGP0mLjw++rZtuGlXxi/QqqNUVSRAqAQn2WhoedA+92sffQKZ46eIqTZ3soMrilfinbNq5k64aVGoMXkbxRqF8Bd+dw2wWeOnSKvQdPceT0BQBuWl3JpzasZNvGlVynd7GKyBxSqGfROx0X2XvoNE8dOsXr73YBcP3yRaNn8BtqKzRNUkRySqGeI23dcZ4+dJqnDp5i37EzDDusXhJjW/IM/ua1SyjSG51EJMsU6nPgzMU+nj3czlOHTvHro530Dw1Ts3gBn1q/gm0bV3LbtcsoLS7oz/UWkYhQqM+xC70D/OJIB3sPnuIXR9rp6R+iYmEJn7hhBZ9Yv4IPX1dNZbmmSopIZhTqedQ7MMQLRzt56uApfn74NN3xAYossZrk7dfX8NHrq9m8pooSncWLSJoU6gVicGiY197t4vmjnbxwtIPX3+1i2GHxghI+9L5lyZCvYe2y8nx3VUQKmEK9QHX3DPDi24mAf/7NTlq64gBcvaycj15fw+3XV/PB65axWO9qFZEUCvUIcHeOdV7ihaOdPP9mBy+9c4ae/iGKi4yb11YlQn5dDZvqKrV0sMg8p1CPoP7BYV49eY4XjnbwwtFOmlu6cYfKWCkfeV81t19fze3raqjTO1tF5h2FegDOXOzjxbfP8MKbiZA/db4XgOtqrkqMxa+r5rZrl1FeNuPHzIpIxCnUA+PuvNV+kV8lA37fsTP0DgxTWmw0Xr2U29dV89Hra1i/qkJvfhIJkEI9cL0DQ+w/cY7nkxdcD7edB2DpVWWjQzUfuGYZa5bGtISBSAAU6mnac6CFXXuP0NoVp7Yqxo6tDWzfUpfvbs1a+4VeXnyrkxfe7OT5o510XuwDYPniBdxSv5TG+iXcUr+U31m5WPPjRSJIoZ6GPQda2Lm7mfjA0GhbrLSYB+/ZFMlgHzE87LzZfoGm4+doOn6W3xw/Nzp18qqyYm6+egmNVyeCfvOaKq5aoDF5kUKnUE/Dh7/z3GjYpaqrivHi/XfloUe509oVp+nE5ZB/49R53KG4yNhQW0Hj1Uu5pX4J769fwvLFC/PdXREZZzahPm9P01onCfTp2qOstirGZ6tifPamWgDO9w5w4GRXMuTP8o+vnODRF48BUL+snMb6pdy0poqNtRXcsKqChaXF+ey+iMzCvA312qrYpGfq8+ETjioWlnLHuhruWFcDJObIH2rtpun4OX5z/CzPvdHOv+x/D4Aig/ctX8TG2krW11awsS7xNZuf5RrKtQ2RQjBvh19CHVPPBnentbuXgy3dHGrp5lDreQ62dnP6fN/oPlcvK2djbSUb6irYUFvJxtoKlmXwma76OYjMLKvDL2b2KPAZoN3dN6a0/yHwX4BB4Cfu/o0M+5sXI4GhM8SJzIy6qhh1VTG2blg52t5xoY9DrcmQb+mmuaWbnzS3jT6+qnIhG2qTIV9XyYbaClZVLpx2WuWuvUfGBDpAfGCIXXuP6GchkoF0hl++B/wt8P2RBjP7GHA3cKO795nZ8tx0L7e2b6lTcMxCzeIF3NmwnDsbLv+4u3sGONTWzW+TQX+w9TzPvdHOcPIPwKVXlaUEfeLr1UvLR98kNZ+ubYjMhRlD3d2fN7P6cc1fAb7j7n3Jfdpz0DeZpXyMTVeWl/Kh66r50HXVo209/YMcbruQOKtvSQzdPPLrdxgYSiT9ogUlrK+tYENtBVXlpZzrGZhw3Gxd29B4vcw3mV4oXQfcbmb/HegFvu7uv5lsRzO7D7gPYO3atRm+nMxk/Nh0S1ecnbubAeY8xMrLSnj/1Ut4/9VLRtv6Boc4evoih1q7OZgM+ideOUnvwPCkx7imupyDLd3UV1/Fogzn0hdSTUTmSloXSpNn6k+OjKmb2UHgOeCPgFuAfwKu9RkOVkgXSkMTxXn3Q8POBx98lvYLfdPut3zxAq6pvmrM7dqaq1iztJwFJVNPt4xiTSQcQ8NOW3eck2d7ePdsDx9rWM7yiszeBzIX89TfA3YnQ/wVMxsGqoGODI8nVyiKY9PFRUbHNIH+d//hZt7pvMSxjksc67zEM789zZlL/aOPFxmsXlI+JuhHtmsrY5GsiUTL+d4BTp5JhPbJlNu7Z3to6YqPDjkC/J//1Mgn1+f+zX2Zhvoe4C7gl2a2DigDOrPVKZm9qM67n6rfdVUxtm1cNaG9u2eAY2cucazzIsc6LvFO5yWOn7lE0/GzXOq/PIumrKSI4iJjcHjiH48rKxbi7lrsTGY0MDRMW1fv5bA+dzm0T57toWvc9aCq8lLWLi1nQ10ln960irVLy0dvqyrn5t3a6UxpfAK4E6g2s/eAPwMeBR5NDsP0A/fONPQiubVja8Ok8713bG3IY69mNtt+V5aXsrm8is1rqsa0uzsdF/oSZ/bJ2/97q5NDrecZ/w+z7Xwv67+9l5WVC1lZsZBVlQtZUZn4urJiYaK9ciHVVy3QUsaBcncu9Q/R1dNPV88AXT0DnO3p571zY8+6W7t6GUo5MSgtNlYvKWfN0nJuXF05GthrkrdsvikvU/P2zUchiupMj1z2e8+BFh566g1au3upXlTGv9+0ijVLyznV3Uvb+V5OdSdup8/3TjirLykyVqSE/MgvgJUp4b988ULKSrTyZT71Dgxxrqefc5cG6IpfDulzPf10xwc4d6mfrvjAaICf6xmgO94/ZmgkVfWiskRILykfE9prl5WzsmJhXj5eUgt6iczS8LDTeamP0919tHXHOZUS+G3J0G/tjk+YrWMGVbFSqsrLqIiVUhUrpTJ5qyq/vH25rWz0Ma2pM1b/4HAieKcJ4smCu29w8hlUAAtLi6iKlVFVnqj5kvKR7TKqYon7lcn2JeWl1FbFCnLlUi3oNU/pTD1zRUXG8sWJM+9Nqysn3cfdOR8fpO18fEzgd17sozs+kDgr7Onn+JlLo/enO2cqKylKBHzKL4GK5HZbdy8vvX2G7vgAS8pL2b6ljtuvr6asuJiykqLErTjxdUHylto+3br5uaq3uzM07Ay5c7F3cFwQpwb05cA+d+ly3Xr6h6Y8dmmxjQnixPBHMpxHwjo27v48/cWpM/VARHUNlaj2Ox3Dw86FvkG6ewZGQ74r3j+6PdLelfJ4d3yAzot90559pqPIGA34BaXFia8lRcQHhjh1vnfMLxszWF0VY/HCUoZHgjkZzkPDzvDoNqOPj7QNpmynEyVFxmjwTnamXJn8Ov7suryseF5f2NaZ+jwU1TVUotrvdBQV2eiwy2xMNb++ZtEC/vcXbqZ/cJj+oSH6BobpHxqmb3A40TaYuJ+63TcwNGafnx8+PSF83aH9Qh8NKxdTZEZxUcrNjKLUr0WMaSsuTn4tsjHPXbSgZNJhjsULSnTxOccU6oGI6pzsqPY7l6b63jsv9nHrNUuv6NjX3P+TSdv7B4f5+3tvuaJjS2HQZftATDUfPQrz1GfTPh/ksiaqd/gU6oHYsbWB2LiLQlGZpx7FfudSLmuieodPwy+BiOr68FHtdy7lsiaqd/g0+0VEpMDNZvaLhl9ERAKiUBcRCYhCXUQkIAp1EZGAKNRFRAKiUBcRCYhCXUQkIAp1EZGA6B2lMkYhrG0uIplTqMuo8Wubt3TF2bm7GUDBLhIRGn6RUdOtbS4i0aBQl1Fa21wk+hTqMkprbYtEn0JdRmmtbZHo04VSGaW1tkWiT6EuY2zfUqcQF4kwhbrknebGT5TLmkT12FE11zVRqEteaW78RLmsSVSPHVX5qMmMF0rN7FEzazezg5M89nUzczOrzknvJHiaGz9RLmsS1WNHVT5qks7sl+8B28Y3mtka4JPAySz3SeYRzY2fKJc1ieqxoyofNZkx1N39eeDsJA/9NfANYO4+uVqCo7nxE+WyJlE9dlTloyYZzVM3s88CLe7+ehr73mdmTWbW1NHRkcnLScA0N36iXNYkqseOqnzUZNYXSs2sHHgA+FQ6+7v7w8DDAI2NjTqrlzE0N36iXNYkqseOqnzUxNxnzlkzqweedPeNZrYJeBboST68GmgFbnX3U9Mdp7Gx0Zuamq6sxyIi84yZ7Xf3xnT2nfWZurs3A8tTXuw40OjunbM9lkiuad60zDfpTGl8AngJaDCz98zsS7nvlsiVG5kj3NIVx7k8R3jPgZZ8d00kZ2Y8U3f3z8/weH3WeiOSRdPNEdbZuoRKqzRKsDRvWuYjLRMgwaqtitEySYDP53nTuRbVaxhR7fdkdKYuwdK86bkV1WsYUe33VBTqEqztW+p48J5N1FXFMKCuKsaD92yK7BlYoYvq2i9R7fdUNPwiQdP68HMnqtcwotrvqehMXUSyIqprv0S131NRqItIVkT1GkZU+z0VDb+ISFZEde2XqPZ7Kmmt/ZItWvtFRGT2ZrP2i4ZfREQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQCMmOom9mjZtZuZgdT2naZ2Rtm9m9m9iMzq8ppL0VEJC3pnKl/D9g2ru0ZYKO73wi8CezMcr9ERCQDM4a6uz8PnB3X9rS7DybvvgyszkHfRERklrIxpv5F4GdTPWhm95lZk5k1dXR0ZOHlRERkKlcU6mb2ADAIPD7VPu7+sLs3untjTU3NlbyciIjMoCTTJ5rZvcBngI+7u2evSyIikqmMQt3MtgF/Ctzh7j3Z7ZKIiGQqnSmNTwAvAQ1m9p6ZfQn4W2Ax8IyZvWZmf5fjfoqISBpmPFN3989P0vxIDvoiIiJXSO8oFREJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYBkvPSuiOTOt/Y088S+dxlyp9iMz39gDf9t+6asHHvPgRZ27T1Ca1ec2qoYO7Y2sH1LXVaOLfmnUBcpMN/a08w/vHxy9P6Q++j9Kw32PQda2Lm7mfjAEAAtXXF27m4GULAHQsMvIgXmiX3vzqp9NnbtPTIa6CPiA0Ps2nvkio8thUGhLlJghqb4ILGp2mejtSs+q3aJHoW6SIEpNptV+2zUVsVm1S7Ro1AXKTCf/8CaWbXPxo6tDcRKi8e0xUqL2bG14YqPLYVBF0pFCszIxdBczH4ZuRiq2S/hMs/COF26Ghsbvampac5eT0QkBGa2390b09lXwy8iIgFRqIuIBEShLiISEIW6iEhAFOoiIgGZ09kvZtYBnACqgc45e+HCpTqoBqAajFAdpq7B1e5ek84B5jTUR1/UrCnd6TkhUx1UA1ANRqgO2amBhl9ERAKiUBcRCUi+Qv3hPL1uoVEdVANQDUaoDlmoQV7G1EVEJDc0/CIiEhCFuohIQLIe6ma2xsx+YWaHzeyQmf3RuMe/bmZuZtUpbTvN7C0zO2JmW7Pdp3yYrg5m9ofJ7/WQmT2U0h5UHaaqgZltNrOXzew1M2sys1tTnhNUDQDMbKGZvWJmryfr8OfJ9qVm9oyZHU1+XZLynKDqME0NdpnZG2b2b2b2IzOrSnlOUDWAqeuQ8viV56O7Z/UGrAJuTm4vBt4E1ifvrwH2knwDUrJtPfA6sAC4BngbKM52v+b6NlUdgI8BPwcWJB9bHmodpqnB08Cnk+3/DvhlqDVIfl8GLEpulwL7gNuAh4D7k+33A38Rah2mqcGngJJk+1+EXIPp6pC8n5V8zPqZuru3ufurye0LwGFgZAX+vwa+AaRenb0b+L/u3ufux4C3gFuJuGnq8BXgO+7el3ysPfmU4OowTQ0cqEjuVgm0JreDqwGAJ1xM3i1N3pzE9/tYsv0xYHtyO7g6TFUDd3/a3QeT7S8Dq5PbwdUApv23AFnKx5yOqZtZPbAF2GdmnwVa3P31cbvVAakfk/4el38JBCG1DsA64HYz22dmvzKzW5K7BV2HcTX4GrDLzN4F/hLYmdwt2BqYWbGZvQa0A8+4+z5ghbu3QeIXILA8uXuQdZiiBqm+CPwsuR1kDWDyOmQzH3MW6ma2CPghif/Ag8ADwLcn23WStmDmWabWwd3Pk/gIwSUk/vTcAfzAzIyA6zBJDb4C/LG7rwH+GHhkZNdJnh5EDdx9yN03kzgTvdXMNk6ze5B1mK4GZvYAiZx4fKRpskPkvJNzYJI63EgW8zEnoW5mpST+Ez/u7ruB60iMB71uZsdJfDOvmtlKEr95Uj9RdzWX/xyPtEnqAInvd3fyz7BXgGESi/gEWYcpanAvMLL9z1z+czLIGqRy9y7gl8A24LSZrQJIfh0Zigu6DuNqgJndC3wG+IInB5IJvAYwpg53k818zNGFgO8D/3OafY5z+ULABsZeCHiHcC6ITKgD8GXgvya315H408pCrMM0NTgM3Jnc/jiwP/B/CzVAVXI7BrxAIsR2MfZC6UOh1mGaGmwDfgvUjNs/uBpMV4dx+1xRPpZMk/eZ+jDwH4Hm5LgRwDfd/aeT7ezuh8zsByR+sIPAV919KAf9mmuT1gF4FHjUzA4C/cC9nvjphViHqWrwB8B3zawE6AXug6D/LawCHjOzYhJ/Hf/A3Z80s5dIDL99CTgJ/B4EW4epavAWicB6JjEKycvu/uVAawBT1GGqnTOpg5YJEBEJiN5RKiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgH5/+EaqS+WjFbpAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Кодирање сорти\n", + "\n", + "У идеалном свету, желимо да будемо у могућности да предвидимо цене за различите сорте бундева користећи исти модел. Да бисмо узели у обзир сорту, прво је потребно да је претворимо у нумерички облик, или **кодирање**. Постоји неколико начина на које то можемо урадити:\n", + "\n", + "* Једноставно нумеричко кодирање које ће направити табелу различитих сорти, а затим заменити име сорте индексом у тој табели. Ово није најбоља идеја за линеарну регресију, јер линеарна регресија узима у обзир нумеричку вредност индекса, а нумеричка вредност вероватно неће нумерички корелирати са ценом.\n", + "* One-hot кодирање, које ће заменити колону `Variety` са 4 различите колоне, по једну за сваку сорту, које ће садржати 1 ако одговарајући ред припада датој сорти, а 0 у супротном.\n", + "\n", + "Код испод показује како можемо да применимо one-hot кодирање на сорту:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Линеарна регресија на сорти\n", + "\n", + "Сада ћемо користити исти код као горе, али уместо `DayOfYear` користићемо нашу једнохот-енкодовану сорту као улаз:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Можемо такође покушати да користимо друге функције на исти начин и комбинујемо их са нумеричким функцијама, као што су `Month` или `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Полиномна регресија\n", + "\n", + "Полиномна регресија може се користити и са категоријалним карактеристикама које су једнохот-кодиране. Код за тренирање полиномне регресије би у суштини био исти као што смо видели горе.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:10:36+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/4-Logistic/notebook.ipynb b/translations/sr/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..a5db6bb9b --- /dev/null +++ b/translations/sr/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Врсте бундева и боја\n", + "\n", + "Учитајте потребне библиотеке и скуп података. Претворите податке у датафрејм који садржи подскуп података:\n", + "\n", + "Хајде да погледамо однос између боје и врсте\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:31+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/sr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..8b5df505c --- /dev/null +++ b/translations/sr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,685 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Изградња модела логистичке регресије - Лекција 4\n", + "\n", + "![Инфографика: Логистичка vs. линеарна регресија](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Квиз пре предавања](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Увод\n", + "\n", + "У овој последњој лекцији о регресији, једној од основних *класичних* техника машинског учења, погледаћемо логистичку регресију. Ову технику користите да бисте открили обрасце за предвиђање бинарних категорија. Да ли је овај слаткиш чоколада или не? Да ли је ова болест заразна или не? Да ли ће овај купац изабрати овај производ или не?\n", + "\n", + "У овој лекцији ћете научити:\n", + "\n", + "- Технике за логистичку регресију\n", + "\n", + "✅ Продубите своје разумевање рада са овом врстом регресије у овом [модулу за учење](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Предуслов\n", + "\n", + "Радећи са подацима о бундевама, сада смо довољно упознати са њима да схватимо да постоји једна бинарна категорија са којом можемо радити: `Color`.\n", + "\n", + "Хајде да изградимо модел логистичке регресије да бисмо предвидели, на основу неких променљивих, *која је вероватно боја дате бундеве* (наранџаста 🎃 или бела 👻).\n", + "\n", + "> Зашто говоримо о бинарној класификацији у лекцији која се бави регресијом? Само ради језичке погодности, јер је логистичка регресија [заправо метода класификације](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), иако заснована на линеарним принципима. Сазнајте више о другим начинима класификације података у следећој групи лекција.\n", + "\n", + "За ову лекцију биће нам потребни следећи пакети:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) дизајнирана да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) оквир је [збирка пакета](https://www.tidymodels.org/packages/) за моделирање и машинско учење.\n", + "\n", + "- `janitor`: [janitor пакет](https://github.com/sfirke/janitor) пружа једноставне алате за испитивање и чишћење \"прљавих\" података.\n", + "\n", + "- `ggbeeswarm`: [ggbeeswarm пакет](https://github.com/eclarke/ggbeeswarm) пружа методе за креирање графикона у стилу \"роја пчела\" користећи ggplot2.\n", + "\n", + "Можете их инсталирати помоћу:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Алтернативно, скрипта испод проверава да ли имате потребне пакете за завршетак овог модула и инсталира их ако недостају.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Дефинишите питање**\n", + "\n", + "За наше потребе, изразићемо ово као бинарно: 'Бело' или 'Није бело'. У нашем сету података постоји и категорија 'пругасто', али има мало примерака, па је нећемо користити. Ионако нестаје када уклонимо вредности null из сета података.\n", + "\n", + "> 🎃 Забавна чињеница, понекад беле тикве називамо 'духовима' тиквама. Нису баш лаке за резбарење, па нису толико популарне као наранџасте, али изгледају занимљиво! Тако да бисмо могли да преобликујемо наше питање као: 'Дух' или 'Није дух'. 👻\n", + "\n", + "## **О логистичкој регресији**\n", + "\n", + "Логистичка регресија се разликује од линеарне регресије, коју сте раније учили, на неколико важних начина.\n", + "\n", + "#### **Бинарна класификација**\n", + "\n", + "Логистичка регресија не нуди исте могућности као линеарна регресија. Прва нуди предвиђање о `бинарној категорији` (\"наранџасто или није наранџасто\"), док је друга способна да предвиђа `континуалне вредности`, на пример, на основу порекла тикве и времена жетве, *колико ће њена цена порасти*.\n", + "\n", + "![Инфографик од Дасани Мадипали](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Друге класификације\n", + "\n", + "Постоје и други типови логистичке регресије, укључујући мултиномијалну и ординалну:\n", + "\n", + "- **Мултиномијална**, која укључује више од једне категорије - \"Наранџасто, Бело и Пругасто\".\n", + "\n", + "- **Ординална**, која укључује уређене категорије, корисна ако желимо да логички поређамо наше исходе, као тикве које су поређане по ограниченом броју величина (мини, мало, средње, велико, веома велико, екстра велико).\n", + "\n", + "![Мултиномијална vs ординална регресија](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Променљиве НЕ морају да буду корелисане**\n", + "\n", + "Сећате се како је линеарна регресија боље функционисала са више корелисаних променљивих? Логистичка регресија је супротна - променљиве не морају да буду усклађене. То одговара овим подацима који имају релативно слабе корелације.\n", + "\n", + "#### **Потребно је много чистих података**\n", + "\n", + "Логистичка регресија ће дати прецизније резултате ако користите више података; наш мали сет података није оптималан за овај задатак, па то имајте на уму.\n", + "\n", + "✅ Размислите о типовима података који би били погодни за логистичку регресију\n", + "\n", + "## Вежба - уредите податке\n", + "\n", + "Прво, мало очистите податке, уклањајући null вредности и бирајући само неке од колона:\n", + "\n", + "1. Додајте следећи код:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Увек можете бацити поглед на ваш нови датафрејм користећи функцију [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) као што је приказано испод:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Хајде да потврдимо да ћемо заправо радити на проблему бинарне класификације:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Визуелизација - категоријални графикон\n", + "До сада сте поново учитали податке о бундевама и очистили их тако да задржите скуп података који садржи неколико променљивих, укључујући боју. Хајде да визуелизујемо dataframe у бележници користећи библиотеку ggplot.\n", + "\n", + "Библиотека ggplot нуди неке сјајне начине за визуелизацију ваших података. На пример, можете упоредити расподеле података за сваку сорту и боју у категоријалном графикону.\n", + "\n", + "1. Направите такав графикон користећи функцију geombar, користећи наше податке о бундевама, и одредите мапирање боја за сваку категорију бундеве (наранџаста или бела):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Посматрајући податке, можете видети како се подаци о Боји односе на Врсту.\n", + "\n", + "✅ С обзиром на овај категоријални графикон, која занимљива истраживања можете замислити?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Предобрада података: кодирање карактеристика\n", + "\n", + "Наш скуп података о бундевама садржи стринг вредности за све своје колоне. Рад са категоријским подацима је интуитиван за људе, али не и за машине. Алгоритми машинског учења добро функционишу са бројевима. Зато је кодирање веома важан корак у фази предобраде података, јер нам омогућава да категоријске податке претворимо у нумеричке, без губитка информација. Добро кодирање води ка изградњи доброг модела.\n", + "\n", + "За кодирање карактеристика постоје два главна типа кодера:\n", + "\n", + "1. Ординални кодер: добро одговара ординалним променљивама, које су категоријске променљиве где њихови подаци следе логички редослед, као што је колона `item_size` у нашем скупу података. Он креира мапирање тако да је свака категорија представљена бројем, који је редослед категорије у колони.\n", + "\n", + "2. Категоријски кодер: добро одговара номиналним променљивама, које су категоријске променљиве где њихови подаци не следе логички редослед, као што су све карактеристике различите од `item_size` у нашем скупу података. То је једно-хот кодирање, што значи да је свака категорија представљена бинарном колоном: кодирана променљива је једнака 1 ако бундева припада тој врсти, а 0 у супротном.\n", + "\n", + "Tidymodels пружа још један користан пакет: [recipes](https://recipes.tidymodels.org/) - пакет за предобраду података. Дефинисаћемо `recipe` који спецификује да све колоне предиктора треба да буду кодиране у сет целих бројева, `prep` да процени потребне количине и статистике за било коју операцију и на крају `bake` да примени израчунавања на нове податке.\n", + "\n", + "> Уобичајено је да се recipes користи као предобрада за моделирање, где дефинише које кораке треба применити на скуп података како би био спреман за моделирање. У том случају је **топло препоручено** да користите `workflow()` уместо ручног процењивања рецепта помоћу prep и bake. Све ово ћемо видети ускоро.\n", + ">\n", + "> Међутим, за сада користимо recipes + prep + bake да спецификујемо које кораке треба применити на скуп података како би био спреман за анализу података и затим извукли предобрађене податке са примењеним корацима.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Које су предности коришћења ординалног енкодера за колону величине артикла?\n", + "\n", + "### Анализа односа између променљивих\n", + "\n", + "Сада када смо претходно обрадили наше податке, можемо анализирати односе између карактеристика и ознаке како бисмо стекли идеју о томе колико добро модел може предвидети ознаку на основу карактеристика. Најбољи начин за спровођење овакве анализе је кроз визуализацију података. \n", + "Поново ћемо користити функцију ggplot geom_boxplot_, како бисмо визуализовали односе између величине артикла, сорте и боје у категоријалном графику. За бољу визуализацију података користићемо енкодовану колону величине артикла и неенкодовану колону сорте.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Користите swarm plot\n", + "\n", + "Пошто је Color бинарна категорија (Бела или Не), потребан је 'специјализовани приступ [визуализацији](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)'.\n", + "\n", + "Пробајте `swarm plot` да бисте приказали расподелу боје у односу на item_size.\n", + "\n", + "Користићемо [ggbeeswarm пакет](https://github.com/eclarke/ggbeeswarm) који пружа методе за креирање beeswarm-стил графикона користећи ggplot2. Beeswarm графикони су начин приказивања тачака које би се иначе преклапале тако да падају једна поред друге.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сада када имамо идеју о односу између бинарних категорија боје и веће групе величина, хајде да истражимо логистичку регресију како бисмо одредили вероватну боју одређене тикве.\n", + "\n", + "## Направите свој модел\n", + "\n", + "Изаберите променљиве које желите да користите у свом класификационом моделу и поделите податке на сетове за обуку и тестирање. [rsample](https://rsample.tidymodels.org/), пакет у оквиру Tidymodels, пружа инфраструктуру за ефикасно раздвајање и ресемплинг података:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Сада смо спремни да обучимо модел тако што ћемо прилагодити карактеристике за обуку ознаци за обуку (боји).\n", + "\n", + "Почећемо креирањем рецепта који одређује кораке предобраде који треба да се спроведу на нашим подацима како би били спремни за моделирање, тј. кодирање категоријских променљивих у скуп целих бројева. Баш као `baked_pumpkins`, креирамо `pumpkins_recipe`, али не `prep` и `bake`, јер ће бити укључено у радни ток, што ћете видети за само неколико корака.\n", + "\n", + "Постоји прилично велики број начина за спецификацију модела логистичке регресије у Tidymodels. Погледајте `?logistic_reg()`. За сада ћемо специфицирати модел логистичке регресије преко подразумеваног `stats::glm()` механизма.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сада када имамо рецепт и спецификацију модела, потребно је да пронађемо начин да их спојимо у један објекат који ће прво обрадити податке (припрема+печење у позадини), затим обучити модел на обрађеним подацима, а такође омогућити потенцијалне активности пост-обраде.\n", + "\n", + "У Tidymodels-у, овај практични објекат се назива [`workflow`](https://workflows.tidymodels.org/) и згодно чува ваше компоненте за моделирање.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Након што је радни ток *одређен*, модел може бити `обучен` коришћењем функције [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html). Радни ток ће проценити рецепт и претходно обрадити податке пре обуке, тако да нећемо морати то ручно да радимо користећи prep и bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Модел приказује коефицијенте научене током тренинга.\n", + "\n", + "Сада када смо обучили модел користећи податке за тренинг, можемо направити предвиђања на тест подацима користећи [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Хајде да започнемо тако што ћемо користити модел за предвиђање ознака за наш тест сет и вероватноћа за сваку ознаку. Када је вероватноћа већа од 0.5, предвиђена класа је `WHITE`, иначе је `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Врло лепо! Ово пружа додатне увиде у то како логистичка регресија функционише.\n", + "\n", + "### Боље разумевање кроз матрицу конфузије\n", + "\n", + "Поређење сваке предикције са њеном одговарајућом \"истинском\" вредношћу није баш ефикасан начин да се утврди колико добро модел предвиђа. Срећом, Tidymodels има још неке трикове у рукаву: [`yardstick`](https://yardstick.tidymodels.org/) - пакет који се користи за мерење ефикасности модела помоћу метрика перформанси.\n", + "\n", + "Једна од метрика перформанси која се повезује са проблемима класификације је [`матрица конфузије`](https://wikipedia.org/wiki/Confusion_matrix). Матрица конфузије описује колико добро модел класификације функционише. Матрица конфузије приказује колико примера у свакој класи је модел исправно класификовао. У нашем случају, показаће вам колико наранџастих бундева је класификовано као наранџасте и колико белих бундева је класификовано као беле; матрица конфузије такође показује колико је њих класификовано у **погрешне** категорије.\n", + "\n", + "Функција [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) из yardstick-а израчунава ову крос-табелацију посматраних и предвиђених класа.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Хајде да интерпретирамо матрицу конфузије. Наш модел треба да класификује бундеве у две бинарне категорије, категорију `бела` и категорију `није бела`.\n", + "\n", + "- Ако ваш модел предвиди да је бундева бела и она заиста припада категорији „бела“, то називамо `истински позитивно`, што је приказано горњим левим бројем.\n", + "\n", + "- Ако ваш модел предвиди да бундева није бела, а она заиста припада категорији „бела“, то називамо `лажно негативно`, што је приказано доњим левим бројем.\n", + "\n", + "- Ако ваш модел предвиди да је бундева бела, а она заиста припада категорији „није бела“, то називамо `лажно позитивно`, што је приказано горњим десним бројем.\n", + "\n", + "- Ако ваш модел предвиди да бундева није бела, а она заиста припада категорији „није бела“, то називамо `истински негативно`, што је приказано доњим десним бројем.\n", + "\n", + "| Истина |\n", + "|:-----:|\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Предвиђено** | БЕЛА | НАРАНЏАСТА |\n", + "| БЕЛА | TP | FP |\n", + "| НАРАНЏАСТА | FN | TN |\n", + "\n", + "Као што сте можда претпоставили, пожељно је имати већи број истински позитивних и истински негативних резултата, а мањи број лажно позитивних и лажно негативних резултата, што имплицира да модел боље ради.\n", + "\n", + "Матрица конфузије је корисна јер омогућава израчунавање других метрика које нам могу помоћи да боље проценимо перформансе класификационог модела. Хајде да их размотримо:\n", + "\n", + "🎓 Прецизност: `TP/(TP + FP)` дефинисана као однос предвиђених позитивних резултата који су заиста позитивни. Такође се назива [позитивна предиктивна вредност](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Осетљивост: `TP/(TP + FN)` дефинисана као однос позитивних резултата у односу на број узорака који су заиста позитивни. Такође позната као `осетљивост`.\n", + "\n", + "🎓 Специфичност: `TN/(TN + FP)` дефинисана као однос негативних резултата у односу на број узорака који су заиста негативни.\n", + "\n", + "🎓 Тачност: `TP + TN/(TP + TN + FP + FN)` Проценат етикета које су тачно предвиђене за узорак.\n", + "\n", + "🎓 F мера: Тежински просек прецизности и осетљивости, где је најбољи резултат 1, а најгори 0.\n", + "\n", + "Хајде да израчунамо ове метрике!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Прикажи РОЦ криву за овај модел\n", + "\n", + "Хајде да направимо још једну визуализацију како бисмо видели такозвану [`РОЦ криву`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC криве се често користе за добијање увида у резултате класификатора у смислу његових истинских и лажних позитивних резултата. ROC криве обично приказују `True Positive Rate`/Сензитивност на Y оси и `False Positive Rate`/1-Специфичност на X оси. Због тога су стрмина криве и простор између средишње линије и криве важни: желите криву која брзо иде нагоре и прелази линију. У нашем случају, постоје лажни позитивни резултати на почетку, а затим линија правилно иде нагоре и прелази.\n", + "\n", + "На крају, хајде да користимо `yardstick::roc_auc()` за израчунавање стварне Површине испод криве (Area Under the Curve). Један од начина тумачења AUC је као вероватноћа да модел рангира насумични позитиван пример више него насумични негативан пример.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rezultat je oko `0.975`. S obzirom na to da AUC varira od 0 do 1, želite visok rezultat, jer model koji je 100% tačan u svojim predikcijama ima AUC od 1; u ovom slučaju, model je *prilično dobar*.\n", + "\n", + "U budućim lekcijama o klasifikacijama, naučićete kako da poboljšate rezultate svog modela (na primer, kako da se nosite sa neuravnoteženim podacima u ovom slučaju).\n", + "\n", + "## 🚀Изазов\n", + "\n", + "Postoji mnogo toga što treba istražiti u vezi sa logističkom regresijom! Ali najbolji način da naučite jeste da eksperimentišete. Pronađite skup podataka koji se može analizirati ovim tipom modela i napravite model koristeći ga. Шта сте научили? савет: пробајте [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) за занимљиве скупове података.\n", + "\n", + "## Преглед и Самостално Учење\n", + "\n", + "Прочитајте првих неколико страница [овог рада са Стенфорда](https://web.stanford.edu/~jurafsky/slp3/5.pdf) о неким практичним применама логистичке регресије. Размислите о задацима који су боље прилагођени једном или другом типу регресије које смо до сада проучавали. Шта би најбоље функционисало?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:32:16+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sr/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/sr/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..7da6ba756 --- /dev/null +++ b/translations/sr/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Логистичка регресија - Лекција 4\n", + "\n", + "Учитајте потребне библиотеке и скуп података. Претворите податке у датафрејм који садржи подскуп података:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Хајде да погледамо наше податке!\n", + "\n", + "Кроз визуализацију са Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjQAAAHpCAYAAACVw6ZvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABU3klEQVR4nO3deVRU5f8H8PeFkQFZZXNQ2RQBwy3NNRVGMTCz3JW0JJcyjdwXLJcwBSszTcU0wKxccl9KyoVxS0VTEhXXRM1A+7qwmOz394eH+/M6A7IKV9+vc+7Jee6zfO7IkXfP3JkRRFEUQURERKRgRlVdABEREVF5MdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdDQc0sURaSnp4MfxUREpHwMNPTcysjIgLW1NTIyMqq6FCIiKicGGiIiIlI8BhoiIiJSPAYaIiIiUjwGGiIiIlI8VVUXQFTVrq5qAkszZnsipXIbdqWqS6BqgP+KExERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdCUU3BwMHr27KnXrtPpIAgC7t27J7Xl5+djwYIFaNKkCUxNTVGrVi1069YNhw4dkvqcO3cOgiDgyJEjsvnatm0LU1NTZGVlSW1ZWVkwNTVFVFSUVIsgCBAEATVq1EDt2rXRtWtXREdHo6CgQDafm5ub1PfRIyIiAgCQnJwMQRDg6OiIjIwM2djmzZtj1qxZBp8PPz8/g/MWHo0bN4ZGo8HcuXP1xvbv3x9t27ZFfn4+Zs2aJY1RqVRwc3PDuHHjkJmZKavP0PH4c0dERM8+BpqnRBRFDBw4EGFhYRgzZgySkpKg0+ng7OwMPz8/bNmyBQDg7e0NjUYDnU4njc3IyMCJEyfg4OAg+2V9+PBhZGdno3PnzlJbYGAgUlJSkJycjJ07d0Kr1WLMmDF47bXXkJeXJ6spLCwMKSkpsiMkJETWJyMjA1988UWJr3PTpk3SXPHx8QCA3bt3S2379+/H8uXL8cknnyAxMVEat379euzYsQPfffcdjI2NAQA+Pj7StcybNw/Lly/HhAkTZOs9Onfh0bJlyxLXS0REzwZVVRfwvPjpp5+wYcMGbNu2DT169JDaly9fjtu3b2P48OHo2rUrzM3NodVqodPpMHXqVADAwYMH4enpiU6dOkGn08HPzw/Aw10gV1dXuLu7S/Op1WpoNBoAQN26ddGiRQu0bdsWXbp0wcqVKzF8+HCpr6WlpdS3KCEhIfjyyy8xevRoODo6PvE6bW1tpT8X7ibZ2dnJ1nn99dfx5ptvYsiQITh69Cju3buH0aNHIyIiAl5eXlI/lUoljRswYAD27NmDbdu24ZtvvpH6PD43ERE9n7hD85SsXr0anp6esjBTaMKECbh9+zZ27doFANBqtTh48KC0oxIXFwc/Pz/4+voiLi5OGhcXFwetVvvEtTt37oxmzZph06ZNpa47KCgIHh4eCAsLK/XY4ixcuBC3b9/G7NmzMWrUKDRu3Fhvd+hxZmZmyMnJKfOa2dnZSE9Plx1ERPRs4A5NBdixYwcsLCxkbfn5+bLHFy5cQKNGjQyOL2y/cOECgIeB5v79+zh27BjatWsHnU6HSZMmoUOHDhgyZAiysrIgiiLi4+NlOy7F8fb2xqlTp2RtU6ZMwccffyxr27lzJzp27Cg9LryvpkePHhg3bhwaNGhQovWexMrKCjExMXjllVdgbm6OU6dOQRCEIvv/8ccfWL16tezlNQBo3749jIzkubzwPpvHhYeH45NPPtFrd307EVZWVmW4CiIiqi4YaCqAVqtFZGSkrO3o0aMYPHiwrE0UxRLN5+HhgXr16kGn08HHxwcnT56Er68vHB0d4eLigsOHD0MURWRnZ5doh6Zw7ccDw6RJkxAcHCxrq1u3rt7YgIAAdOjQAdOnT8fq1atLtF5JdO7cGW3btkXz5s3h6uqqdz4xMREWFhbIz89HTk4OunfvjsWLF8v6rFu3rsig+LjQ0FCMHz9eepyeng5nZ+fyXQQREVULDDQVwNzcHB4eHrK2v//+W/bY09MTSUlJBscXtnt6ekptfn5+iIuLQ9OmTdGwYUPp/pXCl51EUYSHh0eJfyEnJSXJ7rUBAHt7e726ixIREYF27dph0qRJJepfUiqVCiqV4R9DLy8vbNu2DSqVCnXq1IGJiYleH2dn5xJfg1qthlqtLle9RERUPfEemqdk4MCBuHjxIrZv3653bv78+bCzs0PXrl2lNq1Wi99//x27du2SbgIGIN0YrNPpSrw7s3fvXiQmJqJPnz5lrr9169bo3bu3dKPy02BiYgIPDw+4ubkZDDNERESFuEPzlAwcOBDr16/HkCFD8Pnnn6NLly5IT0/HkiVLsG3bNqxfvx7m5uZS/8L7aKKjo7FixQqp3dfXV7pvZtSoUXrrZGdnIzU1Ffn5+bh58yZiY2MRHh6O1157DW+//basb0ZGBlJTU2VtNWvWLPJ+kjlz5sDHx6fIHZWqcPv2bb1rsLGxgampaRVVREREVYE7NE+JIAj46aefMG3aNCxYsABeXl7o2LEjrl69Cp1Op/fhfO7u7nB1dUVGRgZ8fX2ldhcXF9SpUwc5OTmynZtCsbGxcHJygpubGwIDAxEXF4dFixZh69at0ue7FJoxYwacnJxkx+TJk4u8Bk9PTwwdOlT24X5Vzd/fX+8aCj/Th4iInh+CWNI7VYmeMenp6bC2tkZaWhrf5UREpHDcoSEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVTVXUBRFXt6qomsDRjtq8obsOuVHUJRPQc4r/iREREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0z5ng4GAIggBBEGBiYgIPDw+EhYUhLy8PAKDT6SAIAu7duyd7bOhITU3Vm3/WrFlF9i88/P39ERAQoDd26dKlsLGxwd9//623bu3atdGnTx/89ddfUn83NzeD80dERFTOk0dERNWWqqoLoKcvMDAQMTExyM7Oxi+//ILRo0ejRo0aCA0NLXLM+fPnYWVlJWtzdHTU6zdx4kSMHDlSetyqVSu8++67GDFihNSWm5uLJk2a4JtvvsF7770HALhy5QomT56MyMhI1KtXD5cuXZLWtbS0xMWLF/Huu++iR48eOHXqFIyNjQEAYWFhsrkBwNLSspTPCBERKR0DzXNIrVZDo9EAAN5//31s3rwZ27ZtKzbQODo6wsbG5olzW1hYwMLCQnpsbGwMS0tLab1CCxcuxAcffIBXXnkFbm5uGDZsGF555RW89dZbBtd1cnLCjBkzMGjQIFy6dAleXl4AYHDuomRnZyM7O1t6nJ6eXqJxRERU/THQEMzMzHD79u2nuuaQIUOwefNmDB06FL1798bp06dx5syZYseYmZkBAHJycsq0Znh4OD755BO9dssOi2BlaQ7bBv5lmpeIiKoe76F5jomiiN27d+PXX39F586di+1br149affFwsICPj4+5V5/+fLlOH36NMaOHYvly5fDwcGhyL4pKSn44osvULduXWl3BgCmTJkiq8vCwgIHDhwwOEdoaCjS0tKk4/r16+W+BiIiqh64Q/Mc2rFjBywsLJCbm4uCggK8+eabmDVrVrFjDhw4ILs3pUaNGuWuw9HREe+99x62bNmCnj17GuxTr149iKKI//77D82aNcPGjRthYmIinZ80aRKCg4NlY+rWrWtwLrVaDbVaXe66iYio+mGgeQ5ptVpERkbCxMQEderUgUr15B8Dd3f3Et1DU1oqlarY9Q8cOAArKys4OjoavNnX3t4eHh4eFV4XEREpCwPNc8jc3FwxIaCyghQRET1bGGioRG7duoWsrCxZm52dXYW89FQeGRkZep+HU7NmTb23mBMR0bONNwVTiXh5ecHJyUl2/PHHH1VdFmbMmKFX1+TJk6u6LCIiesoEURTFqi6CqCqkp6fD2toaV05u4du2iYgUjjs0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4/LZteu7Z1tfy27mJiBSOOzRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4/KRgeu5dXdUElmYlz/Zuw65UYjVERFQW3KEhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCml4OBgCIIgHXZ2dggMDMSpU6dk/QRBwJYtW2RtcXFxeO211+Dg4ABTU1M0aNAAAwYMwP79+4td083NDYIgYO3atXrnfHx8IAgCVq5cqXcuPDwcxsbG+Pzzzw3Om5qaipCQENSvXx9qtRrOzs7o0aMH9uzZI1v7q6++kh6LooiJEyfCysoKOp2uyFqLOt58803UrFkTq1evlo0rKChA+/bt0bdvXwDy59nExAQeHh4ICwtDXl4eAECn0xW5RmpqarHPJxERPXsYaMogMDAQKSkpSElJwZ49e6BSqfDaa68VO2bp0qXo0qUL7OzssG7dOpw/fx6bN29G+/btMW7cuCeu6ezsjJiYGFnbkSNHkJqaCnNzc4NjoqOjMXnyZERHR+udS05ORsuWLbF37158/vnnSExMRGxsLLRaLUaPHm1wvvz8fAwbNgyrVq1CXFwc/Pz89PocO3ZMem42btwIADh//rzUFhkZiYiICISEhCAlJUUaN3/+fPz1119YtmyZ1Fb4PF+8eBETJkzArFmz9MLZo3MXHo6OjoafRCIiemapqroAJVKr1dBoNAAAjUaDqVOnomPHjvj333/h4OCg1//atWsYO3Ysxo4diy+//FJ2rmnTpvjwww+fuOagQYOwYMECXL9+Hc7OzgAeBpZBgwZh1apVev337duHBw8eICwsDKtWrcLvv/+O9u3bS+dHjRoFQRAQHx8vC0Q+Pj4YOnSo3nzZ2dkICgrC8ePHceDAAXh5eRms89Hrt7W1BQA4OjrCxsZGag8JCcGWLVswYsQI7NixA+fOncOMGTOwbt062NvbS/0efZ7ff/99bN68Gdu2bUNoaKjU5/G5iYjo+cQdmnLKzMzEDz/8AA8PD9jZ2Rnss3HjRuTm5mLy5MkGzwuC8MR1ateujYCAAHz33XcAgP/++w/r1q0zGD4AICoqCkFBQahRowaCgoIQFRUlnbtz5w5iY2MxevRog7s7jweEzMxMdO/eHWfPnsWhQ4eKDDMlJQgCYmJicODAAaxYsQLBwcEYOHAgXn/99WLHmZmZIScnp8zrZmdnIz09XXYQEdGzgTs0ZbBjxw5YWFgAAO7fvw8nJyfs2LEDRkaG8+GFCxdgZWUl7TYAD0POkCFDpMeHDx9GkyZNil136NChmDBhAj766CNs2LABDRo0QPPmzfX6paenY8OGDTh8+DAAYPDgwejYsSMWLlwICwsLXLp0CaIowtvbu0TXO3v2bFhaWiIpKcngDlRZuLq64quvvsLw4cNRr149/Pbbb0X2FUURe/bswa+//oqQkBDZuXr16unNe+bMGYPzhIeH45NPPtFrt+ywCFaW+sHOtoF/SS6FiIiqAe7QlIFWq0VCQgISEhIQHx+PgIAAdOvWDVevXi1yzOO7MAEBAUhISMDPP/+M+/fvIz8//4nrdu/eHZmZmdi/fz+io6OL3J1Zs2YNGjRogGbNmgEAmjdvDldXV6xbtw7Aw4BQGq+88gru37+PuXPnlmrck7zzzjtwcnJCSEgIrKys9M4XBkdTU1N069YNAwYMwKxZs2R9Dhw4IP1dJCQk4JdffilyvdDQUKSlpUnH9evXK/R6iIio6nCHpgzMzc3h4eEhPf72229hbW2NFStW4NNPP9Xr37BhQ6SlpSE1NVXapbGwsICHhwdUqpL/FahUKrz11luYOXMmjh49is2bNxvsFxUVhTNnzsjmLigoQHR0NIYNG4aGDRtCEAScO3euROt26dIFISEheOONN1BQUICFCxeWuOYnUalURT4HWq0WkZGRMDExQZ06dQz2c3d3L/E9NGq1Gmq1ujzlEhFRNcUdmgogCAKMjIzw4MEDg+f79u2LGjVqYN68eeVea+jQodi3bx/eeOMN1KpVS+98YmIijh8/Dp1OJ9u50Ol0OHz4MM6dOwdbW1sEBARgyZIluH//vt4c9+7d02t75ZVXsH37dqxYsaJENzFXhMLg6OLiUqrgR0REzx/+liiD7Oxs6bNO7t69i8WLFyMzMxM9evQw2N/FxQXz58/HmDFjcOfOHQQHB8Pd3R137tzBDz/8AAAwNjYu0dqNGjXC//73P9SsWdPg+aioKLRu3RqdOnXSO9eqVStERUXh888/x5IlS/Dyyy+jdevWCAsLQ9OmTZGXl4ddu3YhMjISSUlJeuP9/f2xY8cO9OjRAwUFBVi8eHGJaq5Mt27dQlZWlqzNzs4ONWrUqKKKiIioKnCHpgxiY2Ph5OQEJycntGnTBseOHcP69esNfi5LoZCQEPz222/4999/0bdvXzRs2BCvvvoqrly5gtjY2CfeEPwoOzs7mJmZ6bXn5OTghx9+QJ8+fQyO69OnD1atWoXc3FzUr18fJ06cgFarxYQJE9C4cWN07doVe/bsQWRkZJFrd+7cGT///DNWrlyJ0aNHl/p+nIrm5eUl/V0UHn/88UeV1kRERE+fIFb1bySiKpKeng5ra2tcObmF73IiIlI47tAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeLx27bpuWdbXwsrK6uqLoOIiMqBOzRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4/KRgeu5dXdUElmaVl+3dhl2ptLmJiOgh7tAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0ChMcHAwBEHAyJEj9c6NHj0agiAgODhY6tuzZ0+9sREREbJxW7ZsgSAI0mOdTgdBEHDv3j29Nby9vaFWq5GamirrW9yh0+kwa9YsNG/eXG++5ORkCIKAhIQEg/M5ODjg1VdfRWJiosHn4fEjMDCwBM8iERE9axhoFMjZ2Rlr167FgwcPpLasrCysXr0aLi4uxY41NTXFvHnzcPfu3VKve/DgQTx48AB9+/bFd999BwBo3749UlJSpKN///4IDAyUtbVv377Ua50/fx4pKSn49ddfkZ2dje7duyMnJ0fW5/F1UlJSsGbNmlKvRUREysdAo0AtWrSAs7MzNm3aJLVt2rQJLi4uePHFF4sd6+/vD41Gg/Dw8FKvGxUVhTfffBNvvfUWoqOjAQAmJibQaDTSYWZmBrVaLWszMTEp9VqOjo7QaDRo0aIFxo4di+vXr+PcuXOyPo+vo9FoUKtWrVKvRUREysdAo1BDhw5FTEyM9Dg6OhrvvPPOE8cZGxtj7ty5+Prrr/H333+XeL2MjAysX78egwcPRteuXZGWloYDBw6UqfbSSEtLw9q1awGgTMHoUdnZ2UhPT5cdRET0bFBVdQFUNoMHD0ZoaCiuXr0KADh06BDWrl0LnU73xLG9evVC8+bNMXPmTERFRZVovbVr16Jhw4bw8fEBAAwcOBBRUVHo2LFjma+hOPXq1QMA3L9/HwDw+uuvw9vbW9Znx44dsLCwkLVNmzYN06ZNMzhneHg4PvnkE712yw6LYGVpXhFlG3Tn8u4n9rFt4F9p6xMRPQ8YaBTKwcEB3bt3x8qVKyGKIrp37w57e/sSj583bx46d+6MiRMnlqh/dHQ0Bg8eLD0ePHgwfH198fXXX8PS0rLU9T/JgQMHULNmTRw5cgRz587FsmXL9PpotVpERkbK2mxtbYucMzQ0FOPHj5cep6enw9nZueKKJiKiKsNAo2BDhw7FBx98AABYsmRJqcZ26tQJAQEBCA0Nld4VVZSzZ8/iyJEjiI+Px5QpU6T2/Px8rF27FiNGjHjielZWVkhLS9NrL3wnlbW1tazd3d0dNjY28PLywq1btzBgwADs379f1sfc3BweHh5PXLuQWq2GWq0ucX8iIlIO3kOjYIGBgcjJyUFubi4CAgJKPT4iIgLbt2/H4cOHi+0XFRWFTp064c8//0RCQoJ0jB8/vsQvWXl5eeHvv//GzZs3Ze0nTpyAqalpse/OGj16NE6fPo3NmzeXaC0iInr+cIdGwYyNjZGUlCT9ubSaNGmCQYMGYdGiRUX2yc3Nxffff4+wsDA0btxYdm748OH48ssvcebMGenemqIEBATAy8sLQUFB+PTTT6HRaHDixAl8/PHHGDNmTLH116xZEyNGjMDMmTPRs2dP6TNzsrOzpc/DKaRSqUr10hsRET0buEOjcFZWVrCysirz+LCwMBQUFBR5ftu2bbh9+zZ69eqld65Ro0Zo1KhRiXZpVCoVfvvtN7i4uCAoKAiNGzfGzJkzMWbMGMyePfuJ4z/44AMkJSVh/fr1UltsbCycnJxkR4cOHZ44FxERPXsEURTFqi6CqCqkp6fD2toaV05uqdR3OZUE3+VERFQ+3KEhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFU1V1AURVzba+tlzfWE5ERFWPOzRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4pQ40cXFxlVEHERERUZmVOtAEBgaiQYMG+PTTT3H9+vXKqImIiIioVEodaG7cuIEPPvgAGzZsQP369REQEICffvoJOTk5lVEfUaW7uqoJkqPcq7oMIiIqh1IHGnt7e4wbNw4JCQk4evQoPD09MWrUKNSpUwcffvgh/vzzz8qok4iIiKhI5bopuEWLFggNDcUHH3yAzMxMREdHo2XLlujYsSPOnDlTUTUSERERFatMgSY3NxcbNmzAq6++CldXV/z6669YvHgxbt68iUuXLsHV1RX9+vWr6FqJiIiIDCr1l1OGhIRgzZo1EEURb731Fj777DM0btxYOm9ubo4vvvgCderUqdBCiYiIiIpS6kBz9uxZfP311+jduzfUarXBPvb29nx7NxERET01pX7JaebMmejXr59emMnLy8P+/fsBACqVCr6+vhVTIREREdETlDrQaLVa3LlzR689LS0NWq22QooiIiIiKo1SBxpRFCEIgl777du3YW5uXiFFEREREZVGie+h6d27NwBAEAQEBwfLXnLKz8/HqVOn0L59+4qvkIiIiOgJShxorK2tATzcobG0tISZmZl0zsTEBG3btsWIESMqvkIiIiKiJyhxoImJiQEAuLm5YeLEiXx5iYiIiKqNMr3LSa1WY/fu3fjmm2+QkZEBAPjnn3+QmZlZ4QUSERERPUmpP4fm6tWrCAwMxLVr15CdnY2uXbvC0tIS8+bNQ3Z2NpYtW1YZdRIREREVqdQ7NGPGjMFLL72Eu3fvyu6j6dWrF/bs2VOhxREREVHpzZo1C82bN6/qMp6qUgeaAwcO4OOPP4aJiYms3c3NDTdu3KiwwoiIiJ5XqampCAkJQf369aFWq+Hs7IwePXpw46AYpQ40BQUFyM/P12v/+++/YWlpWSFFPY+e9MPr5uaGr776Surv5uYGQRBw5MgR2Txjx46Fn5+f9HjWrFkQBAGCIEClUsHe3h6dOnXCV199hezsbNlYPz8/qe+jx8iRI6U+j7ZbWVmhVatW2Lp1q2yelStXGpzH1NRU6hMcHIyePXsW+Xw8fr1FWbNmDYyNjTF69Ogn9iUiUoLk5GS0bNkSe/fuxeeff47ExETExsZCq9U+1X/rcnNzn9paFaHUgeaVV16R/aIRBAGZmZmYOXMmXn311Yqs7blR1h9eU1NTTJky5Ynz+/j4ICUlBdeuXUNcXBz69euH8PBwtG/fXrqpu9CIESOQkpIiOz777DNZn5iYGKSkpOD48eN4+eWX0bdvXyQmJsr6WFlZ6c1z9erVUjwrJRMVFYXJkydjzZo1yMrKqvD5iYietlGjRkEQBMTHx6NPnz7w9PSEj48Pxo8fL/1P7LVr1/DGG2/AwsICVlZW6N+/P27evFnknAUFBQgLC0O9evWgVqvRvHlzxMbGSueTk5MhCALWrVsHX19fmJqa4scff6z0a61IpQ408+fPx6FDh/DCCy8gKysLb775pvRy07x58yqjxmdeSX54DXn33Xdx5MgR/PLLL8XOr1KpoNFoUKdOHTRp0gQhISHYt28fTp8+rfd3VrNmTWg0GtlhZWUl62NjYwONRgNPT0/Mnj0beXl5el9GKgiC3jy1a9cu5TNTvCtXruD333/H1KlT4enpiU2bNhXbPzs7G+np6bKDiKg6uXPnDmJjYzF69GiDH49iY2ODgoICvPHGG7hz5w727duHXbt24a+//sKAAQOKnHfhwoWYP38+vvjiC5w6dQoBAQF4/fXXcfHiRVm/qVOnYsyYMUhKSkJAQECFX19lKnWgqVevHv78809MmzYN48aNw4svvoiIiAicPHkSjo6OlVHjM60kP7xFcXd3x8iRIxEaGoqCgoJSrevt7Y1u3bo9MQQUJy8vD1FRUQCgd0/V0xATE4Pu3bvD2toagwcPlmopSnh4OKytraXD2dkZAGDZYRGs/FY8jZKJiIp16dIliKIIb2/vIvvs2bMHiYmJWL16NVq2bIk2bdpg1apV2LdvH44dO2ZwzBdffIEpU6Zg4MCB8PLywrx589C8eXO9l/bHjh2L3r17w93dHU5OThV5aZWu1IEGePh//IMHD8Znn32GpUuXYvjw4bJ3PFHJleSHtzgff/wxrly5UqatQW9vbyQnJ8vali5dCgsLC9nx+NxBQUGwsLCAWq3GuHHj4Obmhv79+8v6pKWl6c3TrVu3UtdYlIKCAqxcuRKDBw8GAAwcOBAHDx7ElStXihwTGhqKtLQ06bh+/XqF1UNEVBFEUXxin6SkJDg7O0v/UwYAL7zwAmxsbJCUlKTXPz09Hf/88w9efvllWfvLL7+s1/+ll14qY+VVr0SfQ7Nt2zZ069YNNWrUwLZt24rt+/rrr1dIYc+LkvzwFsfBwQETJ07EjBkzit1uLGrtx79odNCgQfjoo49kbY+/VLRgwQL4+/vjr7/+wrhx47Bo0SLY2trK+lhaWuLEiROytooMvbt27cL9+/el+7bs7e3RtWtXREdHY/bs2QbHqNVq2XeQERFVNw0bNoQgCDh37lyVrK/kbwEoUaDp2bMnUlNT4ejoWOw7UwRBMPgOKCpaRfzwjh8/HkuXLsXSpUtLNS4pKQnu7u6yNmtra3h4eBQ7TqPRwMPDAx4eHoiJicGrr76Ks2fPyl5yNDIyeuI85REVFYU7d+7IQlJBQQFOnTqFTz75BEZGZdp8JCKqUra2tggICMCSJUvw4Ycf6gWMe/fuoVGjRrh+/TquX78u7dKcPXsW9+7dwwsvvKA3p5WVFerUqYNDhw7B19dXaj906BBat25duRf0FJXoX/2CggLpl1VBQUGRB8NM6T36w3v//n298/fu3XviHBYWFpg+fTrmzJmj966lopw7dw6xsbHo06dPaUuWad26NVq2bIk5c+aUa57SuH37NrZu3Yq1a9ciISFBOk6ePIm7d+/it99+e2q1EBFVtCVLliA/Px+tW7fGxo0bcfHiRSQlJWHRokVo164d/P390aRJEwwaNAgnTpxAfHw83n77bfj6+hb5ktGkSZMwb948rFu3DufPn8fUqVORkJCAMWPGPOWrqzyl+uqD3NxcBAYGYtmyZWjYsGFl1fTcWbJkCV5++WW0bt0aYWFhaNq0KfLy8rBr1y5ERkYafE30ce+++y4WLFiA1atXo02bNrJzeXl5SE1NRUFBAW7fvg2dTodPP/0UzZs3x6RJk2R9//vvP6Smpsra1Go1atWqVeTaY8eORa9evTB58mTUrVsXwMOXsx6fBwAcHR2l3ZO0tDQkJCTIztvZ2Un/x3Hjxg29866urvj+++9hZ2eH/v37671k9uqrryIqKgqBgYFF1ktEVJ3Vr18fJ06cwJw5czBhwgSkpKTAwcEBLVu2RGRkJARBwNatWxESEoJOnTrByMgIgYGB+Prrr4uc88MPP0RaWhomTJiAW7du4YUXXsC2bduerd/lYinZ29uLFy5cKO0weoJ//vlHHD16tOjq6iqamJiIdevWFV9//XUxLi5OFEVRdHV1FRcsWCD1f/yxKIri6tWrRQCir6+v1DZz5kwRgAhANDY2Fm1tbcUOHTqICxYsELOysmTjfX19pb6PHgEBAVIfAOLmzZtl4woKCkRvb2/x/fffF0VRFGNiYgzOA0BMSUkRRVEUhwwZYvD8sGHDpOszdP77778XmzRpIo4aNcrg87hu3TrRxMRE/Pfff5/4nKelpYkAxCsnt4i3L+16Yn8iIqq+BFEs3V2p48aNg1qtRkRERPmSFFEVS09Ph7W1Na6c3AIrS3PYNvCv6pKIiKiMSv1t23l5eYiOjsbu3bvRsmVLvRuWvvzyyworjoiIiKgkSh1oTp8+jRYtWgAALly4IDv3+P0MRERERE9DqQPN4x9xT0RERFTV+GEdREREpHil3qEBgOPHj+Onn37CtWvXkJOTIztXnu8GIiIiIiqLUu/QrF27Fu3bt0dSUhI2b96M3NxcnDlzBnv37oW1tXVl1EhERERUrFIHmrlz52LBggXYvn07TExMsHDhQpw7dw79+/eHi4tLZdRIREREVKxSB5rLly+je/fuAAATExPcv38fgiBg3LhxWL58eYUXSERERPQkpb6HplatWtL3BdWtWxenT59GkyZNcO/ePfz3338VXiAREdHTlBzl/uROFcRt2JWnttazrsQ7NKdPnwYAdOrUCbt27QIA9OvXD2PGjMGIESMQFBSELl26VE6VREREJLl+/TqGDh2KOnXqwMTEBK6urhgzZgxu374t9fHz84MgCBAEAaampvD09ER4eDgMfUHA4cOHYWxsLL0C86jk5GQIggBHR0e9L0Bu3rw5Zs2aJWu7dOkShg4dChcXF6jVatStWxddunTBjz/+iLy8PKlfYW2PH2vXri3Tc1LiQNO0aVO0adMGTZo0Qb9+/QAAH330EcaPH4+bN2+iT58+iIqKKlMRREREVDJ//fUXXnrpJVy8eBFr1qzBpUuXsGzZMuzZswft2rXDnTt3pL4jRoxASkoKzp8/j9DQUMyYMQPLli3TmzMqKgohISHYv38//vnnH4PrZmRk4Isvvii2tvj4eLRo0QJJSUlYsmQJTp8+DZ1Oh+HDhyMyMhJnzpyR9Y+JiUFKSors6NmzZ+mfFAAl/i6nAwcOICYmBhs2bEBBQQH69OmD4cOHo2PHjmVamKiqFX6XU1paGqysrKq6HCKqJqr7S07dunXD6dOnceHCBZiZmUntqampaNCgAd5++21ERkbCz88PzZs3x1dffSX1admyJVxdXWUfsZKZmQknJyccP34cM2fORNOmTTFt2jTpfHJyMtzd3TFp0iRERkbi8uXLcHR0BPBwh6Znz56YNWsWRFGEj48Patasifj4eBgZ6e+ZiKIofauAIAjYvHlzmQPM40q8Q9OxY0dER0cjJSUFX3/9NZKTk+Hr6wtPT0/MmzcPqampFVIQERERGXbnzh38+uuvGDVqlCzMAIBGo8GgQYOwbt06vZeVRFHEgQMHcO7cOZiYmMjO/fTTT/D29oaXlxcGDx6M6Ohogy9LBQUFwcPDA2FhYQZrS0hIQFJSEiZOnGgwzACV+xVJpX6Xk7m5Od555x3s27cPFy5cQL9+/bBkyRK4uLjg9ddfr4waiYiICMDFixchiiIaNWpk8HyjRo1w9+5d/PvvvwCApUuXwsLCAmq1Gp06dUJBQQE+/PBD2ZioqCgMHjwYABAYGIi0tDTs27dPb25BEBAREYHly5fj8uXLeucLv9/Ry8tLart16xYsLCykY+nSpbIxQUFBsvMWFha4du1aKZ6R/1eurz7w8PDAtGnT8PHHH8PS0hI///xzeaYjIiKiEijh3SIYNGgQEhIScOjQIXTr1g0fffQR2rdvL50/f/484uPjERQUBABQqVQYMGBAkffEBgQEoEOHDpg+fXqJ1rezs0NCQgISEhJgY2Oj9+0CCxYskM4XHnXq1CnR3I8r01cfAMD+/fsRHR2NjRs3wsjICP3798ewYcPKOh0RERE9gYeHBwRBQFJSEnr16qV3PikpCbVq1YKDgwMAwNraGh4eHgAevrTk4eGBtm3bwt/fH8DD3Zm8vDxZiBBFEWq1GosXLzb4DQARERFo164dJk2aJGtv2LAhgIch6cUXXwQAGBsbS+urVPqRQ6PRSOfLq1Q7NP/88w/mzp0LT09P+Pn54dKlS1i0aBH++ecfrFixAm3btq2QooiIiEifnZ0dunbtiqVLl+LBgweyc6mpqfjxxx8xYMAAg/eqWFhYYMyYMZg4cSJEUUReXh5WrVqF+fPny3ZI/vzzT9SpUwdr1qwxWEPr1q3Ru3dvTJ06Vdb+4osvwtvbG1988QUKCgoq7qJLqMQ7NN26dcPu3bthb2+Pt99+G0OHDpW9TkZERESVb/HixWjfvj0CAgLw6aefwt3dHWfOnMGkSZNQt25dzJkzp8ix7733HmbPno2NGzdCpVLh7t27GDZsmN5OTOFHsYwcOdLgPHPmzIGPj49s10UQBMTExKBr1654+eWXERoaikaNGiE3Nxf79+/Hv//+C2NjY9k89+7d03tTkaWlJczNzUv7tABiCfXo0UPcsmWLmJeXV9IhRNVaWlqaCEBMS0ur6lKIiEolOTlZHDJkiFi7dm2xRo0aorOzsxgSEiL+73//k/r4+vqKY8aM0Rv73nvviT4+PuJrr70mvvrqqwbnP3r0qAhA/PPPP8UrV66IAMSTJ0/K+rz77rsiAHHmzJmy9vPnz4tDhgwR69WrJ6pUKtHa2lrs1KmT+M0334i5ublSPwAGj/Dw8DI9JyX+HBqiZw0/h4aI6NlRrnc5EREREVUHZX6XE9Gz4uqqJrA008/2/NI4IiLl4A4NERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4/h4aIiOgRdy7vfmpr2Tbwf2prPeu4Q0NERKQQy5Ytg6WlJfLy8qS2zMxM1KhRA35+frK+Op0OgiDg8uXLcHNzw1dffaU336xZs9C8eXODj93c3CAIQpFHcHAwABR5fu3atRV89cXjDg0REZFCaLVaZGZm4vjx42jbti0A4MCBA9BoNDh69CiysrJgamoKAIiLi4OLiwsaNGhQprWOHTuG/Px8AMDvv/+OPn364Pz589J335mZmUl9Y2JiEBgYKBtvY2NTpnXLioGGiIhIIby8vODk5ASdTicFGp1OhzfeeAN79+7FkSNHpJ0anU4HrVZb5rUcHBykP9va2gIAHB0dDQYVGxsbaDSaMq9VEfiSExERkYJotVrExcVJj+Pi4uDn5wdfX1+p/cGDBzh69Gi5Ao3SMNAQEREpiFarxaFDh5CXl4eMjAycPHkSvr6+6NSpE3Q6HQDg8OHDyM7OlgWaKVOmwMLCQnbMnTu3QmoKCgrSm/vatWsVMndJ8SUnIiIiBfHz88P9+/dx7Ngx3L17F56ennBwcICvry/eeecdZGVlQafToX79+nBxcZHGTZo0SbqRt9CiRYuwf//+cte0YMEC+PvL37FVp06dcs9bGtyheQYEBwcbvMP80qVLAIDw8HAYGxvj888/1xu7cuVK2euhK1eulMYbGRnByckJAwYMwLVr1yCKIvz9/REQEKA3z9KlS2FjYwN/f/9i74p3c3OTxpSmrpJe8+M3pRERPWs8PDxQr149xMXFIS4uDr6+vgAeBghnZ2f8/vvviIuLQ+fOnWXj7O3t4eHhITsK740pL41Goze3SvV090wYaJ4RgYGBSElJkR3u7u4AgOjoaEyePBnR0dElmsvKygopKSm4ceMGNm7ciPPnz6Nfv34QBAExMTE4evQovvnmG6n/lStXMHnyZHz99dfYuHGjrAbg4d3vhY+PHTsmjSttXSW55jVr1pRpLiIiJdFqtdDpdNDpdLK3a3fq1Ak7d+5EfHz8c3X/DMBA88xQq9XQaDSyw9jYGPv27cODBw8QFhaG9PR0/P7770+cSxAEaDQaODk5oX379hg2bBji4+ORnp4OZ2dnLFy4EBMnTsSVK1cgiiKGDRuGV155BW+99Rasra1lNQD/f/e7RqOR7povS10lueZatWqVeh4iIqXRarU4ePAgEhISpB0aAPD19cU333yDnJycpxpo7t27h9TUVNlx//79p7Y+wHtonnlRUVEICgpCjRo1EBQUhKioKLRv377E42/duoXNmzfD2NgYxsbGAIAhQ4Zg8+bNGDp0KHr37o3Tp0/jzJkzT7WussjOzkZ2drb0OD09vVLXIyJlUsKn92q1Wjx48ADe3t6oXbu21O7r64uMjAzp7d1PyzvvvKPXFh4ejqlTpz61GiCS4g0ZMkQ0NjYWzc3NpaNv375iWlqaaGZmJiYkJIiiKIonT54ULSwsxIyMDGlsTEyMaG1tLXsMQDQ3Nxdr1qwpAhABiB9++KFszZs3b4r29vaikZGRuHnz5iJrA6B3vix1leSazc3NxTlz5hQ5ZubMmdL1PHpcOblFvH1pV5HjiIio+uMOzTNCq9UiMjJSemxubo41a9agQYMGaNasGQCgefPmcHV1xbp16zBs2LAi57K0tMSJEyeQm5uLnTt34scff8ScOXNkfRwdHfHee+9hy5Yt6NmzZ6lqLWtdj3v8mgEUe4NbaGgoxo8fLz0ufAmNiIiUj4HmGWFubg4PDw9ZW1RUFM6cOSO707ygoADR0dHFBgcjIyNprkaNGuHy5ct4//338f3338v6qVSqMt3FXta6HmfomoujVquhVqtLVSsRESkDA80zKjExEcePH4dOp5PtWty5cwd+fn44d+4cvL29SzTX1KlT0aBBA4wbNw4tWrSoNnUREREVYqB5RkVFRaF169bo1KmT3rlWrVohKirK4Oe/GOLs7IxevXphxowZ2LFjx1OrKz8/HwkJCbI+arUajRo1AvDwJt/U1FTZeZVKBXt7+3LVSEREysO3bT+DcnJy8MMPP6BPnz4Gz/fp0werVq1Cbm5uieccN24cfv75Z8THxz+1ujIzM/Hiiy/Kjh49ekj9Y2Nj4eTkJDs6dOhQ5vqIiEi5BFEUxaougqgqpKenw9raGldOboGVpbki3qpJRESGcYeGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUT1XVBRBVNdv6WlhZWVV1GUREVA7coSEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgISIiIsVjoCEiIiLFY6AhIiIixWOgoefe1VVNkBzlXtVlEBFROTDQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQEBERkeIx0BAREZHiMdAQERGR4jHQKFRwcDAEQZAOOzs7BAYG4tSpU7J+giBgy5YteuOTk5MhCAISEhL0zvn5+WHs2LHSYzc3N9lahUdERESR8xZ3zJ49G+bm5rh06ZJs7D///INatWph8eLFeuuam5ujRYsWWL9+vdR/1qxZBuf39vYuxTNJRETPAgYaBQsMDERKSgpSUlKwZ88eqFQqvPbaa5WyVlhYmLRW4RESEqLXz9nZWdZnwoQJ8PHxkbVNnDgRAQEBCA4ORkFBgTR2xIgRaNmyJUaPHq237smTJ9GqVSsMGDAAv//+u3T+8blTUlJw8ODBSnkOiIio+lJVdQFUdmq1GhqNBgCg0WgwdepUdOzYEf/++y8cHBwqdC1LS0tpreIYGxvL+llYWEClUumN/eabb+Dj44Mvv/wSEydOxMqVK3Ho0CEkJiZCEAS9dTUaDZYsWYIffvgB27dvR/v27QHA4NxFyc7ORnZ2tvQ4PT29ROOIiKj6Y6B5RmRmZuKHH36Ah4cH7OzsqrqcJ3JwcMDy5csRFBSEZs2aYdy4cVi4cCGcnZ2LHKNSqVCjRg3k5OSUac3w8HB88skneu2ubyfCysqqTHMSEVH1wJecFGzHjh2wsLCAhYUFLC0tsW3bNqxbtw5GRhX/1zplyhRprcLjwIED5ZqzZ8+e6N+/PwIDA+Hr64shQ4YU2TcnJwfh4eFIS0tD586dpfbExES9ukaOHGlwjtDQUKSlpUnH9evXy1U/ERFVH9yhUTCtVovIyEgAwN27d7F06VJ069YN8fHxcHV1rdC1Jk2ahODgYFlb3bp1yz3v9OnTsWrVKnz88ccGz0+ZMgUff/wxsrKyYGFhgYiICHTv3l067+XlhW3btsnGFLXbolaroVary10zERFVPww0CmZubg4PDw/p8bfffgtra2usWLECn376abFjC3/pp6Wl6Z27d+8erK2tZW329vaytSqKSqWS/fdxhUHKwsICtWvXlt1fAwAmJiaVUhcRESkLX3J6hgiCACMjIzx48OCJfW1tbWFvb48//vhD1p6eno5Lly7B09OzssoslcIgpdFo9MIMERFRIe7QKFh2djZSU1MBPHzJafHixcjMzESPHj1k/a5cuaL3eTMNGzbE+PHjMXfuXNSuXRtt27bF7du3MXv2bDg4OKB3796y/hkZGdJahWrWrFnlN9Pm5eXp1SUIAmrXrl1FFRERUVVgoFGw2NhYODk5AXj49mZvb2+sX78efn5+sn7jx4/XG3vgwAFMnjwZFhYWmDdvHi5fvgxbW1u8/PLLiIuLg5mZmaz/jBkzMGPGDFnbe++9h2XLllXsRZXSmTNnpOegkFqtRlZWVhVVREREVUEQRVGs6iKIqkJ6ejqsra2RlpZW5TtNRERUPryHhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+Bhp57V1c1QXKUe1WXQURE5cBAQ0RERIrHQENERESKx0BDREREisdAQ0RERIrHQENERESKx0BDREREisdAQ0RERIrHQENERESKx0BDREREisdAQ0RERIrHQENERESKx0BDREREisdAQ0RERIrHQENERESKx0BTCsHBwejZs6esbcOGDTA1NcX8+fOlPoIgICIiQtZvy5YtEARBerxy5UrY2NgYXEcQBGzZskXWtnHjRvj5+cHa2hoWFhZo2rQpwsLCcOfOnSLnS0pKgrOzM/r164ecnJxi13y09sePwMDAIsfMmjULzZs312tPTk6GIAhISEiQPTZ0HDlyRDb2wYMHsLW1hb29PbKzs/XmdnNzk8aamZnBzc0N/fv3x969e4usk4iInm0MNOXw7bffYtCgQYiMjMSECROkdlNTU8ybNw93796tkHU++ugjDBgwAK1atcLOnTtx+vRpzJ8/H3/++Se+//57g2OOHTuGjh07IjAwEOvWrYOJiUmJ1goMDERKSorsWLNmTYVcBwDs3r1bb/6WLVvK+mzcuBE+Pj7w9vbWC3aFwsLCkJKSgvPnz2PVqlWwsbGBv78/5syZU2G1EhGRcqiqugCl+uyzzzBz5kysXbsWvXr1kp3z9/fHpUuXEB4ejs8++6xc68THx2Pu3Ln46quvMGbMGKndzc0NXbt2xb179/TG7N27F2+88QZGjRqFefPmlWo9tVoNjUZTrpqLY2dn98T5o6KiMHjwYIiiiKioKAwYMECvj6WlpTSPi4sLOnXqBCcnJ8yYMQN9+/aFl5dXpdRPRETVE3doymDKlCmYPXs2duzYoRdmAMDY2Bhz587F119/jb///rtca/3444+wsLDAqFGjDJ5//CWkzZs3o3v37vj4449LHWaqg8uXL+Pw4cPo378/+vfvjwMHDuDq1aslGjtmzBiIooitW7caPJ+dnY309HTZQUREzwYGmlLauXMnPvvsM2zduhVdunQpsl+vXr3QvHlzzJw5s1zrXbx4EfXr10eNGjWe2DczMxP9+vXDpEmTMGXKlDKtt2PHDlhYWMiOuXPnFjsmMTFRb4yPj4/Bvu3bt9fr+6jo6Gh069YNtWrVgq2tLQICAhATE1Oi2m1tbeHo6Ijk5GSD58PDw2FtbS0dzs7OAADXtxPhNuxKidYgIqLqiS85lVLTpk3xv//9DzNnzkTr1q31fiE/at68eejcuTMmTpxY5vVEUSxxXzMzM3To0AErVqxAUFAQGjVqVOr1tFotIiMjZW22trbFjvHy8sK2bdtkbTdu3ICfn59e33Xr1hVZV35+Pr777jssXLhQahs8eDAmTpyIGTNmwMjoyflbFEXZzdePCg0Nxfjx46XH6enpUqghIiJlY6Appbp162LDhg3QarUIDAzEzp07YWlpabBvp06dEBAQgNDQUAQHB8vOWVlZ4f79+ygoKJD9oi68J8ba2hoA4OnpiYMHDyI3N/eJuzTGxsbYsmULevfuDa1Wi7i4uFKHGnNzc3h4eJRqjImJid4Ylcrwj5azs3OR8//666+4ceOG3j0z+fn52LNnD7p27VpsHbdv38a///4Ld3d3g+fVajXUanWxcxARkTLxJacycHV1xb59+5CamorAwEBkZGQU2TciIgLbt2/H4cOHZe1eXl7Iy8uT3tZc6MSJEwAeBhkAePPNN5GZmYmlS5canP/xm4LVajU2bdqEVq1aQavV4uzZs6W8uqoTFRWFgQMHIiEhQXYMHDgQUVFRTxy/cOFCGBkZ6b21noiInn3coSkjZ2dn6HQ6aLVaBAQEIDY2FlZWVnr9mjRpgkGDBmHRokWydh8fH7zyyisYOnQo5s+fj/r16+P8+fMYO3YsBgwYgLp16wIA2rRpg8mTJ2PChAm4ceMGevXqhTp16uDSpUtYtmwZOnToIHv3E/Aw1GzcuBH9+vWDVqvF3r17pXta8vPz9UKUWq2WdnKys7ORmpoqO69SqWBvb1+u56vQ7du39ea3sbFBRkYGtm/fjm3btqFx48ay82+//TZ69eqFO3fuSC9/ZWRkIDU1Fbm5ubhy5Qp++OEHfPvttwgPDy/1DhMRESkfA0051KtXTxZqfv31V4P9wsLCsG7dOr32devWYebMmXjvvffwzz//oF69eujVqxemT58u6zdv3jy0bNkSS5YswbJly1BQUIAGDRqgb9++GDJkiME1TUxMsGHDBvTv318KNcDDG4dffPFFWd8GDRrg0qVLAIDY2Fg4OTnJznt5eeHcuXMle1KewN/fX69tzZo1uHHjBszNzQ3eaN2lSxeYmZnhhx9+wIcffggAmDFjBmbMmAETExNoNBq0bdsWe/bsgVarrZA6iYhIWQSxNHedEj1D0tPTYW1tjbS0NIO7a0REpBy8h4aIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUr0oDTXBwMHr27KnXrtPpIAgC7t27J7Xl5+djwYIFaNKkCUxNTVGrVi1069YNhw4dkvqcO3cOgiDgyJEjsvnatm0LU1NTZGVlSW1ZWVkwNTVFVFSUVIsgCBAEATVq1EDt2rXRtWtXREdHo6CgQDafm5ub1PfRIyIiAgCQnJwMQRDg6OiIjIwM2djmzZtj1qxZBp+P8tZfkufy0cePXrOhw83NDQDg5+dn8PzIkSMNXkdxcwqCAK1Wixo1auDgwYOycffv30f9+vUxceJEvXVNTU3xwgsvYOnSpVL/lStXGpzf1NTUYF1ERPTsUsQOjSiKGDhwIMLCwjBmzBgkJSVBp9PB2dkZfn5+2LJlCwDA29sbGo0GOp1OGpuRkYETJ07AwcFBFhQOHz6M7OxsdO7cWWoLDAxESkoKkpOTsXPnTmi1WowZMwavvfYa8vLyZDWFhYUhJSVFdoSEhMj6ZGRk4IsvvijxdZa3/tJauHChrH4AiImJkR4fO3ZM6jtixAi96/3ss88Mzvton6+++gpWVlaytu3btyMkJATBwcG4f/++NG7y5MkwMzPDp59+qrfu2bNn0b9/f4wePRpr1qyRzj8+d0pKCq5evVrm54SIiJRJEYHmp59+woYNG7Bq1SoMHz4c7u7uaNasGZYvX47XX38dw4cPl34xarVaWSA4ePAgPD090aNHD1m7TqeDq6sr3N3dpTa1Wg2NRoO6deuiRYsWmDZtGrZu3YqdO3di5cqVsposLS2h0Whkh7m5uaxPSEgIvvzyS9y6davE11qe+kvL2tpaVj8A2NjYSI8dHBykvjVr1tS7XisrK4PzPtrH2toagiDI2iwsLDB37lyYmJhgypQpAIC4uDh8++23WLVqlWyHpXDd+vXrY9asWWjYsCG2bdsmnX98bo1Gg9q1axusKzs7G+np6bKDiIieDYoINKtXr5Z+qT9uwoQJuH37Nnbt2gXgYSA4ePCgtKMSFxcHPz8/+Pr6Ii4uThoXFxcHrVb7xLU7d+6MZs2aYdOmTaWuOygoCB4eHggLCyvxmIquv7oyNTXFqlWrsHz5cmzduhVDhw7FtGnT0LJly2LHmZmZIScnp0xrhoeHw9raWjqcnZ0BAHf+isOdy7sr9CAioqerygPNjh07YGFhITu6desm63PhwgU0atTI4PjC9gsXLgB4GAju378vvVyi0+ng6+uLTp064ejRo8jKysKDBw8QHx9f4kDg7e2N5ORkWduUKVP06j5w4ICsT+F9NcuXL8fly5dLtFZ56i/Jc1lWS5cu1Zv7xx9/LNecL730EkJDQ9G7d2/Y2dnho48+KrJvfn4+fvjhB5w6dUr2MltaWlqJrzk0NBRpaWnScf369XLVT0RE1YeqqgvQarWIjIyUtR09ehSDBw+WtYmiWKL5PDw8UK9ePeh0Ovj4+ODkyZPw9fWFo6MjXFxccPjwYYiiiOzs7BIHGlEUIQiCrG3SpEkIDg6WtdWtW1dvbEBAADp06IDp06dj9erVlVp/SZ/Lshg0aJBe4CjqpZ3SmD59OsLCwjB16lSoVPo/jkuXLsW3336LnJwcGBsbY9y4cXj//fel85aWljhx4oRsjJmZmcG11Go11Gp1uWsmIqLqp8oDjbm5OTw8PGRtf//9t+yxp6cnkpKSDI4vbPf09JTa/Pz8EBcXh6ZNm6Jhw4ZwdHQEAOllG1EU4eHhIb3k8CRJSUl696rY29vr1V2UiIgItGvXDpMmTSpR/7LWX5Lnsqysra1LfL2lURhiDIUZ4P+DlJmZGZycnGBkJN9UNDIyqpS6iIhIWar8JaeSGDhwIC5evIjt27frnZs/fz7s7OzQtWtXqU2r1eL333/Hrl274OfnJ7V36tQJOp0OOp2uxLsze/fuRWJiIvr06VPm+lu3bo3evXtj6tSpJepfkfUrXWGQqlu3rl6YISIiKlTlOzQlMXDgQKxfvx5DhgzB559/ji5duiA9PR1LlizBtm3bsH79etk7jArvQ4mOjsaKFSukdl9fXwwfPhwAMGrUKL11srOzkZqaivz8fNy8eROxsbEIDw/Ha6+9hrffflvWNyMjA6mpqbK2mjVrFvnOnzlz5sDHx6fInYhHlbX+yvTff//pXa9arUatWrWeah2PE0VRry4AcHR0ZAAiInqOKOJffEEQ8NNPP2HatGlYsGABvLy80LFjR1y9ehU6nU7vA+Xc3d3h6uqKjIwM+Pr6Su0uLi6oU6cOcnJyZDsfhWJjY+Hk5AQ3NzcEBgYiLi4OixYtwtatW2FsbCzrO2PGDDg5OcmOyZMnF3kNnp6eGDp0qOzD8YpS1vor04oVK/SuNygo6KnWYEh6erpeXU5OTqV6qzwRESmfIJb0bluiZ0x6ejqsra1x5eQWWFmaP3lAKdg28K/Q+YiIqHiK2KEhIiIiKg4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESkeAw0REREpHgMNERERKR4DDRERESmeIr5tm6gy2dbXFvkt6UREpAzcoSEiIiLFY6AhIiIixWOgISIiIsXjPTT03BJFEQCQnp5exZUQ0dNmaWkJQRCqugyqQAw09Ny6ffs2AMDZ2bmKKyGipy0tLY1vBnjGMNDQc8vW1hYAcO3aNVhbW1dxNSWXnp4OZ2dnXL9+XTH/ICuxZkCZdSuxZuDp121paVnpa9DTxUBDzy0jo4e3kFlbWyvqH/5CVlZWiqtbiTUDyqxbiTUDyq2bqh5vCiYiIiLFY6AhIiIixWOgoeeWWq3GzJkzoVarq7qUUlFi3UqsGVBm3UqsGVBu3VR9CGLhe1eJiIiIFIo7NERERKR4DDRERESkeAw0REREpHgMNERERKR4DDT03FqyZAnc3NxgamqKNm3aID4+vspq2b9/P3r06IE6depAEARs2bJFdl4URcyYMQNOTk4wMzODv78/Ll68KOtz584dDBo0CFZWVrCxscGwYcOQmZlZaTWHh4ejVatWsLS0hKOjI3r27Inz58/L+mRlZWH06NGws7ODhYUF+vTpg5s3b8r6XLt2Dd27d0fNmjXh6OiISZMmIS8vr9LqjoyMRNOmTaUPcGvXrh127txZrWt+XEREBARBwNixY6t13bNmzYIgCLLD29u7WtdMCiYSPYfWrl0rmpiYiNHR0eKZM2fEESNGiDY2NuLNmzerpJ5ffvlF/Oijj8RNmzaJAMTNmzfLzkdERIjW1tbili1bxD///FN8/fXXRXd3d/HBgwdSn8DAQLFZs2bikSNHxAMHDogeHh5iUFBQpdUcEBAgxsTEiKdPnxYTEhLEV199VXRxcREzMzOlPiNHjhSdnZ3FPXv2iMePHxfbtm0rtm/fXjqfl5cnNm7cWPT39xdPnjwp/vLLL6K9vb0YGhpaaXVv27ZN/Pnnn8ULFy6I58+fF6dNmybWqFFDPH36dLWt+VHx8fGim5ub2LRpU3HMmDFSe3Wse+bMmaKPj4+YkpIiHf/++2+1rpmUi4GGnkutW7cWR48eLT3Oz88X69SpI4aHh1dhVQ89HmgKCgpEjUYjfv7551LbvXv3RLVaLa5Zs0YURVE8e/asCEA8duyY1Gfnzp2iIAjijRs3nkrdt27dEgGI+/btk2qsUaOGuH79eqlPUlKSCEA8fPiwKIoPg5yRkZGYmpoq9YmMjBStrKzE7Ozsp1K3KIpirVq1xG+//bba15yRkSE2bNhQ3LVrl+jr6ysFmupa98yZM8VmzZoZPFddaybl4ktO9NzJycnBH3/8AX9/f6nNyMgI/v7+OHz4cBVWZtiVK1eQmpoqq9fa2hpt2rSR6j18+DBsbGzw0ksvSX38/f1hZGSEo0ePPpU609LSAPz/l37+8ccfyM3NldXt7e0NFxcXWd1NmjRB7dq1pT4BAQFIT0/HmTNnKr3m/Px8rF27Fvfv30e7du2qfc2jR49G9+7dZfUB1fu5vnjxIurUqYP69etj0KBBuHbtWrWvmZSJX05Jz53//e9/yM/Pl/0jCQC1a9fGuXPnqqiqoqWmpgKAwXoLz6WmpsLR0VF2XqVSwdbWVupTmQoKCjB27Fi8/PLLaNy4sVSTiYkJbGxsiq3b0HUVnqssiYmJaNeuHbKysmBhYYHNmzfjhRdeQEJCQrWtee3atThx4gSOHTumd666Ptdt2rTBypUr4eXlhZSUFHzyySfo2LEjTp8+XW1rJuVioCGichs9ejROnz6NgwcPVnUpJeLl5YWEhASkpaVhw4YNGDJkCPbt21fVZRXp+vXrGDNmDHbt2gVTU9OqLqfEunXrJv25adOmaNOmDVxdXfHTTz/BzMysCiujZxFfcqLnjr29PYyNjfXeTXHz5k1oNJoqqqpohTUVV69Go8GtW7dk5/Py8nDnzp1Kv6YPPvgAO3bsQFxcHOrVqyerOycnB/fu3Su2bkPXVXiuspiYmMDDwwMtW7ZEeHg4mjVrhoULF1bbmv/44w/cunULLVq0gEqlgkqlwr59+7Bo0SKoVCrUrl27Wtb9OBsbG3h6euLSpUvV9rkm5WKgoeeOiYkJWrZsiT179khtBQUF2LNnD9q1a1eFlRnm7u4OjUYjqzc9PR1Hjx6V6m3Xrh3u3buHP/74Q+qzd+9eFBQUoE2bNpVSlyiK+OCDD7B582bs3bsX7u7usvMtW7ZEjRo1ZHWfP38e165dk9WdmJgoC2O7du2ClZUVXnjhhUqp25CCggJkZ2dX25q7dOmCxMREJCQkSMdLL72EQYMGSX+ujnU/LjMzE5cvX4aTk1O1fa5Jwar6rmSiqrB27VpRrVaLK1euFM+ePSu+++67oo2NjezdFE9TRkaGePLkSfHkyZMiAPHLL78UT548KV69elUUxYdv27axsRG3bt0qnjp1SnzjjTcMvm37xRdfFI8ePSoePHhQbNiwYaW+bfv9998Xra2tRZ1OJ3tb7n///Sf1GTlypOji4iLu3btXPH78uNiuXTuxXbt20vnCt+W+8sorYkJCghgbGys6ODhU6ttyp06dKu7bt0+8cuWKeOrUKXHq1KmiIAjib7/9Vm1rNuTRdzlV17onTJgg6nQ68cqVK+KhQ4dEf39/0d7eXrx161a1rZmUi4GGnltff/216OLiIpqYmIitW7cWjxw5UmW1xMXFiQD0jiFDhoii+PCt29OnTxdr164tqtVqsUuXLuL58+dlc9y+fVsMCgoSLSwsRCsrK/Gdd94RMzIyKq1mQ/UCEGNiYqQ+Dx48EEeNGiXWqlVLrFmzptirVy8xJSVFNk9ycrLYrVs30czMTLS3txcnTJgg5ubmVlrdQ4cOFV1dXUUTExPRwcFB7NKlixRmqmvNhjweaKpj3QMGDBCdnJxEExMTsW7duuKAAQPES5cuVeuaSbkEURTFqtkbIiIiIqoYvIeGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiIiIFI+BhoiIiBSPgYaIiIgUj4GGiKgUkpOTIQgCEhISqroUInoEAw0REREpHgMNESlKQUEBPvvsM3h4eECtVsPFxQVz5swBACQmJqJz584wMzODnZ0d3n33XWRmZkpj/fz8MHbsWNl8PXv2RHBwsPTYzc0Nc+fOxdChQ2FpaQkXFxcsX75cOu/u7g4AePHFFyEIAvz8/CrtWomo5BhoiEhRQkNDERERgenTp+Ps2bNYvXo1ateujfv37yMgIAC1atXCsWPHsH79euzevRsffPBBqdeYP38+XnrpJZw8eRKjRo3C+++/j/PnzwMA4uPjAQC7d+9GSkoKNm3aVKHXR0Rlo6rqAoiISiojIwMLFy7E4sWLMWTIEABAgwYN0KFDB6xYsQJZWVlYtWoVzM3NAQCLFy9Gjx49MG/ePNSuXbvE67z66qsYNWoUAGDKlClYsGAB4uLi4OXlBQcHBwCAnZ0dNBpNBV8hEZUVd2iISDGSkpKQnZ2NLl26GDzXrFkzKcwAwMsvv4yCggJpd6WkmjZtKv1ZEARoNBrcunWr7IUTUaVjoCEixTAzMyvXeCMjI4iiKGvLzc3V61ejRg3ZY0EQUFBQUK61iahyMdAQkWI0bNgQZmZm2LNnj965Ro0a4c8//8T9+/eltkOHDsHIyAheXl4AAAcHB6SkpEjn8/Pzcfr06VLVYGJiIo0louqDgYaIFMPU1BRTpkzB5MmTsWrVKly+fBlHjhxBVFQUBg0aBFNTUwwZMgSnT59GXFwcQkJC8NZbb0n3z3Tu3Bk///wzfv75Z5w7dw7vv/8+7t27V6oaHB0dYWZmhtjYWNy8eRNpaWmVcKVEVFoMNESkKNOnT8eECRMwY8YMNGrUCAMGDMCtW7dQs2ZN/Prrr7hz5w5atWqFvn37okuXLli8eLE0dujQoRgyZAjefvtt+Pr6on79+tBqtaVaX6VSYdGiRfjmm29Qp04dvPHGGxV9iURUBoL4+AvKRERERArDHRoiIiJSPAYaIiIiUjwGGiIiIlI8BhoiIiJSPAYaIiIiUjwGGiIiIlI8BhoiIiJSPAYaIiIiUjwGGiIiIlI8BhoiIiJSPAYaIiIiUrz/A+sUfVTiRBWAAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Предобрада података\n", + "\n", + "Хајде да кодирујемо карактеристике и ознаке како бисмо боље приказали податке и обучили модел.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Пазите**: Игнорисање упозорења НИЈЕ најбоља пракса и треба га избегавати кад год је то могуће. Упозорења често садрже корисне поруке које нам омогућавају да побољшамо наш код и решимо проблем. \n", + "Разлог због којег игноришемо ово конкретно упозорење је да обезбедимо читљивост графикона. Приказивање свих тачака података са смањеном величином маркера, уз задржавање конзистентности са бојама палете, ствара нејасну визуализацију.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhgAAAIjCAYAAABBOWJ+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABgUElEQVR4nO3dd1gUV8MF8LOUZelqsItBjTV2jcYKKgoWFDWKJUKIvUc0tqjYoibWxKDGFqwRNGqMjQiC3WhE7CX2BiixgHR27/eHL/tJBGVxl9lyfs/DE3aY2T07ETjcuTMjE0IIEBEREWmRmdQBiIiIyPiwYBAREZHWsWAQERGR1rFgEBERkdaxYBAREZHWsWAQERGR1rFgEBERkdaxYBAREZHWsWAQERGR1rFgEBERkdaxYBCZgODgYMhkMvWHhYUFypYtiy+++AIPHz7MdRshBDZs2ICWLVuiSJEisLGxQa1atTBz5kwkJyfn+Vo7duxA+/bt4eTkBLlcjjJlyqBnz544ePBgvrKmpaVh8eLFaNy4MRwdHaFQKFClShWMGDEC169fL9D7J6LCJ+O9SIiMX3BwMPz9/TFz5kxUqFABaWlpOHnyJIKDg+Hi4oKLFy9CoVCo11cqlejTpw9CQ0PRokULdOvWDTY2Njhy5Ag2b96MGjVqIDw8HCVLllRvI4TAl19+ieDgYNSrVw+fffYZSpUqhdjYWOzYsQNnzpzBsWPH0LRp0zxzJiQkwNPTE2fOnEGnTp3g7u4OOzs7XLt2DVu2bEFcXBwyMjJ0uq+ISEsEERm9X375RQAQp0+fzrF8woQJAoAICQnJsXzOnDkCgBg3btwbz7Vr1y5hZmYmPD09cyyfP3++ACC++uoroVKp3thu/fr14q+//nprzo4dOwozMzOxbdu2N76WlpYmxo4d+9bt8yszM1Okp6dr5bmIKHcsGEQmIK+CsXv3bgFAzJkzR70sJSVFFC1aVFSpUkVkZmbm+nz+/v4CgDhx4oR6m2LFiolq1aqJrKysAmU8efKkACAGDhyYr/VdXV2Fq6vrG8v9/PzEhx9+qH58+/ZtAUDMnz9fLF68WFSsWFGYmZmJkydPCnNzczF9+vQ3nuPq1asCgFi6dKl62bNnz8To0aNFuXLlhFwuF5UqVRLz5s0TSqVS4/dKZAo4B4PIhN25cwcAULRoUfWyo0eP4tmzZ+jTpw8sLCxy3c7X1xcAsHv3bvU2T58+RZ8+fWBubl6gLLt27QIA9OvXr0Dbv8svv/yCpUuXYtCgQVi4cCFKly4NV1dXhIaGvrFuSEgIzM3N0aNHDwBASkoKXF1dsXHjRvj6+uLHH39Es2bNMGnSJAQEBOgkL5Ghy/2nBxEZpRcvXiAhIQFpaWn466+/MGPGDFhZWaFTp07qdS5fvgwAqFOnTp7Pk/21K1eu5PhvrVq1CpxNG8/xNg8ePMCNGzdQvHhx9TIfHx8MHjwYFy9eRM2aNdXLQ0JC4Orqqp5jsmjRIty8eRNnz55F5cqVAQCDBw9GmTJlMH/+fIwdOxbOzs46yU1kqDiCQWRC3N3dUbx4cTg7O+Ozzz6Dra0tdu3ahXLlyqnXSUpKAgDY29vn+TzZX0tMTMzx37dt8y7aeI636d69e45yAQDdunWDhYUFQkJC1MsuXryIy5cvw8fHR71s69ataNGiBYoWLYqEhAT1h7u7O5RKJQ4fPqyTzESGjCMYRCYkKCgIVapUwYsXL7B27VocPnwYVlZWOdbJ/gWfXTRy898S4uDg8M5t3uX15yhSpEiBnycvFSpUeGOZk5MT2rRpg9DQUMyaNQvAq9ELCwsLdOvWTb3eP//8g/Pnz79RULI9fvxY63mJDB0LBpEJadSoERo2bAgA8Pb2RvPmzdGnTx9cu3YNdnZ2AIDq1asDAM6fPw9vb+9cn+f8+fMAgBo1agAAqlWrBgC4cOFCntu8y+vP0aJFi3euL5PJIHI5y16pVOa6vrW1da7Le/XqBX9/f8TExKBu3boIDQ1FmzZt4OTkpF5HpVKhbdu2GD9+fK7PUaVKlXfmJTI1PERCZKLMzc0xd+5cPHr0CD/99JN6efPmzVGkSBFs3rw5z1/W69evBwD13I3mzZujaNGi+PXXX/Pc5l28vLwAABs3bszX+kWLFsXz58/fWH737l2NXtfb2xtyuRwhISGIiYnB9evX0atXrxzrVKpUCS9fvoS7u3uuH+XLl9foNYlMAQsGkQlzc3NDo0aNsGTJEqSlpQEAbGxsMG7cOFy7dg3ffPPNG9vs2bMHwcHB8PDwwKeffqreZsKECbhy5QomTJiQ68jCxo0bcerUqTyzNGnSBJ6enli9ejV27tz5xtczMjIwbtw49eNKlSrh6tWrePLkiXrZuXPncOzYsXy/fwAoUqQIPDw8EBoaii1btkAul78xCtOzZ0+cOHECYWFhb2z//PlzZGVlafSaRKaAV/IkMgHZV/I8ffq0+hBJtm3btqFHjx5Yvnw5hgwZAuDVYQYfHx/89ttvaNmyJbp37w5ra2scPXoUGzduRPXq1REREZHjSp4qlQpffPEFNmzYgPr166uv5BkXF4edO3fi1KlTOH78OJo0aZJnzidPnqBdu3Y4d+4cvLy80KZNG9ja2uKff/7Bli1bEBsbi/T0dACvzjqpWbMm6tSpg/79++Px48dYsWIFSpYsicTERPUpuHfu3EGFChUwf/78HAXldZs2bcLnn38Oe3t7uLm5qU+ZzZaSkoIWLVrg/Pnz+OKLL9CgQQMkJyfjwoUL2LZtG+7cuZPjkAoRgVfyJDIFeV1oSwghlEqlqFSpkqhUqVKOi2QplUrxyy+/iGbNmgkHBwehUCjExx9/LGbMmCFevnyZ52tt27ZNtGvXThQrVkxYWFiI0qVLCx8fHxEVFZWvrCkpKWLBggXik08+EXZ2dkIul4vKlSuLkSNHihs3buRYd+PGjaJixYpCLpeLunXrirCwsLdeaCsviYmJwtraWgAQGzduzHWdpKQkMWnSJPHRRx8JuVwunJycRNOmTcWCBQtERkZGvt4bkSnhCAYRERFpHedgEBERkdaxYBAREZHWsWAQERGR1rFgEBERkdaxYBAREZHWsWAQERGR1pncvUhUKhUePXoEe3t7yGQyqeMQEREZDCEEkpKSUKZMGZiZvX2MwuQKxqNHj+Ds7Cx1DCIiIoN1//59lCtX7q3rmFzByL699P3799W3hyYiIqJ3S0xMhLOzs/p36duYXMHIPizi4ODAgkFERFQA+ZliwEmeREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1khaMw4cPw8vLC2XKlIFMJsPOnTvfuU1UVBTq168PKysrfPTRRwgODtZ5TiIiItKMpAUjOTkZderUQVBQUL7Wv337Njp27IhWrVohJiYGX331FQYMGICwsDAdJyUiIiJNWEj54u3bt0f79u3zvf6KFStQoUIFLFy4EABQvXp1HD16FIsXL4aHh4euYhoVIQSi7z3Dk6R0qaMQEVEhaFLJCY7WloX+upIWDE2dOHEC7u7uOZZ5eHjgq6++ynOb9PR0pKf//y/TxMREXcUzCCdvPUXvVSeljkFERIVk76gWLBjvEhcXh5IlS+ZYVrJkSSQmJiI1NRXW1tZvbDN37lzMmDGjsCLqvQsPnwMAnOys4PKBjbRhiIhI52zk5pK8rkEVjIKYNGkSAgIC1I8TExPh7OwsYSJp3XuaAgDo9YkzxnlUlTgNERFp05kzZ/DTTz9h5cqVsLQs/FGL1xlUwShVqhTi4+NzLIuPj4eDg0OuoxcAYGVlBSsrq8KIZxDuPU0FAJQvxtELIiJjcvr0abRr1w7Pnz9H+fLlJR+9N6jrYDRp0gQRERE5lh04cABNmjSRKJHhuf+/EQxnFgwiIqNx6tQptG3bFs+fP0ezZs0wbtw4qSNJWzBevnyJmJgYxMTEAHh1GmpMTAzu3bsH4NXhDV9fX/X6Q4YMwa1btzB+/HhcvXoVy5YtQ2hoKMaMGSNFfIOjVAk8ePaqYJTn/AsiIqPw119/oW3btnjx4gWaN2+Offv2wd7eXupY0haMv//+G/Xq1UO9evUAAAEBAahXrx6mTZsGAIiNjVWXDQCoUKEC9uzZgwMHDqBOnTpYuHAhVq9ezVNU8yn2RSoylQKW5jKUclBIHYeIiN7TyZMn0a5dOyQmJqJFixZ6Uy4AQCaEEFKHKEyJiYlwdHTEixcv4ODgIHWcQnX8ZgL6rPoLFZxsETnOTeo4RET0HlJTU1GpUiXExsbC1dUVu3fvhp2dnU5fU5PfoQY1B4PeD+dfEBEZD2tra2zevBkdO3bEnj17dF4uNGVQZ5HQ+8k+RfVDFgwiIoOVmZmpPgXVzc0Nbm5u0gbKA0cwTAhPUSUiMmxHjx5FtWrVcPHiRamjvBMLhgm5x0MkREQG68iRI/D09MStW7fw7bffSh3nnVgwTEj2HAyOYBARGZbDhw+jffv2SE5Ohru7O9auXSt1pHdiwTARSWmZeJqcAQBwLpb7VU+JiEj/HDp0SF0u2rZti127duV59Wp9woJhIrIPjxSzlcNeIe316YmIKH8iIyPRoUMHpKSkwMPDA7///rtBlAuABcNk8BRVIiLDIoTA3LlzkZKSAk9PT+zcudNgygXAgmEyeIoqEZFhkclk2LZtGyZMmIAdO3ZAoTCsKzCzYJiIe5zgSURkEO7evav+3MHBAfPmzTO4cgGwYJgMXgODiEj/HThwANWqVcO8efOkjvLeWDBMBOdgEBHptz///BNeXl5IS0vD8ePHoVQqpY70XlgwTABv005EpN/CwsLQuXNnpKeno0uXLti2bRvMzc2ljvVeWDBMAG/TTkSkv/bt24cuXbogPT0d3t7eCA0NhVwulzrWe2PBMAHZEzzLFbWBuZlM4jRERJRt79698Pb2Rnp6Orp27YqQkBCjKBcAC4ZJ4CXCiYj0040bN5CRkYHu3bsbVbkAeLt2k8BTVImI9NOoUaNQoUIFeHp6qm/Bbiw4gmECeIoqEZH+OHjwIJ49e6Z+7OXlZXTlAmDBMAn3/k0GwFNUiYik9vvvv8PT0xPt2rVDYmKi1HF0igXDBPAQCRGR9Hbs2IHPPvsMmZmZqFy5MmxsjPtnMguGkUtMy8SzlEwAvAYGEZFUtm/fjp49eyIrKwt9+vTB+vXrYWFh3NMgWTCMXPYZJB/YymFnZdz/mImI9NFvv/0GHx8fZGVloW/fviZRLgAWDKPHS4QTEUln586d6nLRr18/rFu3zuCv0Jlfxl+hTBznXxARSad69eooUaIE2rZti7Vr15pMuQBYMIweCwYRkXSqVq2KU6dOoXTp0iZVLgAeIjF6d/9lwSAiKky//vor/vzzT/XjcuXKmVy5ADiCYfQ4B4OIqPBs2rQJvr6+kMvlOHXqFGrVqiV1JMlwBMOIvbpN+/+u4slTVImIdGrjxo3w9fWFSqXC559/jo8//ljqSJJiwTBisS9SkaUSkJub8TbtREQ6tGHDBnW5GDhwIH7++WeYmZn2r1geIjEiT5LSkZKRpX4cc/85AKBcUWvepp2ISEfWrVsHf39/CCEwaNAgLF++3OTLBcCCYTR2n3+EEZvP5vo1zr8gItKNqKgodbkYMmQIgoKCWC7+hwXDSFx+9OqmOZbmMlhZ/P9sZbmFGbrVLytVLCIio9a8eXP4+PigaNGiCAoKgkzG0eJsLBhG5vNPP0Sgl2lPLCIiKiwWFhbYsGEDzM3NWS7+g+M4REREGli9ejUGDBgAlUoF4FXJYLl4E0cwiIiI8mnlypUYPHgwAMDd3R29evWSOJH+4ggGERFRPvz888/qcjF69Gj4+PhInEi/sWAQERG9w/LlyzFkyBAAwJgxY7B48WIeFnkHFgwiIqK3CAoKwrBhwwAAY8eOxcKFC1ku8oEFg4iIKA93795FQEAAAGDcuHGYP38+y0U+cZInERFRHj788EOEhobi1KlTmD17NsuFBlgwiIiI/iMpKQn29vYAgC5duqBLly4SJzI8PERCRET0msWLF6NmzZq4ffu21FEMGgsGERHR/yxatAgBAQG4d+8efvvtN6njGDQWDCIiIgALFy7E2LFjAQBTp05Vf04Fw4JBREQmb/78+Rg3bhwAYNq0aZgxYwYndL4nFgwiIjJp33//PcaPHw8AmD59OsuFlvAsEiIiMllpaWnYtGkTAGDGjBmYNm2axImMBwsGERGZLIVCgfDwcGzfvl19nxHSDh4iISIik3P27Fn158WLF2e50AEWDCIiMimzZs1C/fr1sXLlSqmjGDUWDCIiMhmvz7N4+vSpxGmMG+dgEBGRScg+QwQA5s2bhwkTJkicyLixYBARkVETQmD69OmYOXMmgFenpX799dcSpzJ+LBhERGS0hBCYNm0aZs+eDQBYsGABr9BZSFgwiIjIJCxatAhjxoyROobJYMEgIiKjJZPJMHPmTLRv3x5NmzaVOo5J4VkkRERkVIQQWLVqFVJSUgC8KhksF4WPBYOIiIyGEAITJkzAoEGD0LlzZyiVSqkjmSweIiEiIqMghMD48eOxYMECAEDXrl1hbm4ucSrTxYJBREQGTwiBcePGYdGiRQCAoKAgDBs2TOJUpo0Fg4iIDJoQAgEBAViyZAkAYPny5RgyZIi0oYgFg4iIDNvUqVPV5WLFihW8cZme4CRPIiIyaN26dUOxYsXw888/s1zoEY5gEBGRQatfvz7++ecfFCtWTOoo9BqOYBARkUHJPlvk5MmT6mUsF/qHBYOIiAyGSqXC8OHDMX/+fLRv3563XNdjPERCREQGQaVSYdiwYfj5558hk8mwZMkSjlzoMRYMIiLSeyqVCkOHDsXKlSshk8kQHBwMX19fqWPRW7BgGAkhdQAiIh1RqVQYPHgwVq9eDTMzM6xbtw6ff/651LHoHVgwjMSp26+OQ5Z2VEichIhIu4KCgtTlYv369ejbt6/UkSgfWDCMwLW4JJy5+wwWZjJ41ysrdRwiIq0aOHAgwsLC0KdPH/Tp00fqOJRPLBhG4NdT9wAA7tVLooQ9RzCIyPCpVCrIZDLIZDIoFAr88ccfkMlkUsciDfA0VQOXmqHEb9EPAAC9G5eXOA0R0ftTKpXw9/fH119/DSFezTBjuTA8kheMoKAguLi4QKFQoHHjxjh16tRb11+yZAmqVq0Ka2trODs7Y8yYMUhLSyuktPpnz4VYJKVloVxRa7T4yEnqOERE7yW7XKxfvx5LlizB+fPnpY5EBSRpwQgJCUFAQAACAwMRHR2NOnXqwMPDA48fP851/c2bN2PixIkIDAzElStXsGbNGoSEhGDy5MmFnFx/ZB8e6d2oPMzM2PCJyHAplUr4+flhw4YNMDc3x5YtW1CnTh2pY1EBSVowFi1ahIEDB8Lf3x81atTAihUrYGNjg7Vr1+a6/vHjx9GsWTP06dMHLi4uaNeuHXr37v3OUQ9j9frkzh4Ny0kdh4iowLKysuDr64tNmzbBwsICISEh+Oyzz6SORe9BsoKRkZGBM2fOwN3d/f/DmJnB3d0dJ06cyHWbpk2b4syZM+pCcevWLezduxcdOnTI83XS09ORmJiY48NYcHInERmD7HKxefNmWFhYIDQ0FN27d5c6Fr0nyc4iSUhIgFKpRMmSJXMsL1myJK5evZrrNn369EFCQgKaN28OIQSysrIwZMiQtx4imTt3LmbMmKHV7Prg9cmdfTi5k4gM2LFjx7BlyxZYWFhg69at8Pb2ljoSaYHkkzw1ERUVhTlz5mDZsmWIjo7G9u3bsWfPHsyaNSvPbSZNmoQXL16oP+7fv1+IiXUne3KnczFrNOfkTiIyYK6urggODsa2bdtYLoyIZCMYTk5OMDc3R3x8fI7l8fHxKFWqVK7bTJ06Ff369cOAAQMAALVq1UJycjIGDRqEb775BmZmb/YlKysrWFlZaf8NSCz78EivTzi5k4gMT2ZmJp4/f47ixYsDAO8rYoQkG8GQy+Vo0KABIiIi1MtUKhUiIiLQpEmTXLdJSUl5o0SYm5sDgPpcaVPAyZ1EZMgyMzPRu3dvtGzZEnFxcVLHIR2R9EqeAQEB8PPzQ8OGDdGoUSMsWbIEycnJ8Pf3B/Cq0ZYtWxZz584FAHh5eWHRokWoV68eGjdujBs3bmDq1Knw8vJSFw1TsO3Mq8M8bWtwcicRGZbMzEz06tUL27dvh1wux8WLF/MctSbDJmnB8PHxwZMnTzBt2jTExcWhbt262L9/v3ri571793KMWEyZMgUymQxTpkzBw4cPUbx4cXh5eeHbb7+V6i1I4u+7zwAAHh/zm5KIDEdGRgZ69eqFHTt2QC6XY8eOHTnOJCTjIhOmdGwBQGJiIhwdHfHixQs4ODhIHUdjWUoVak4PQ1qmChFjXVGpuJ3UkYiI3ikjIwM9e/bE77//DisrK+zcuROenp5SxyINafI7lDc7MzA3nrxEWqYKdlYWqPCBrdRxiIjeKSMjAz169MCuXbtgZWWF33//HR4eHlLHIh0zqNNUCbjw4AUA4OMyDjx7hIgMwtOnT3Hp0iUoFArs2rWL5cJEcATDwFx8+Kpg1CrrKHESIqL8KVWqFCIjI3Hjxg20atVK6jhUSDiCYWDOZxeMciwYRKS/0tPTERUVpX7s7OzMcmFiWDAMSJZShSuxr+6lUpMjGESkp9LS0tCtWze4u7tj69atUschibBgGBBO8CQifZeWloauXbti7969kMvl+OCDD6SORBLhHAwDwgmeRKTPUlNT4e3tjT///BM2NjbYs2cP3NzcpI5FEmHBMCAXOMGTiPRUamoqunTpggMHDsDGxgZ79+6Fq6ur1LFIQiwYBuQCJ3gSkR5KT09H586dER4eDltbW+zduxctW7aUOhZJjHMwDAQneBKRvpLL5ahcuTJsbW2xb98+lgsCwIJhMDjBk4j0lUwmw08//YTo6Gi0aNFC6jikJ1gwDAQneBKRPklOTsbMmTORmZkJADAzM0OVKlUkTkX6hHMwDAQneBKRvkhOTkbHjh1x6NAh3Lp1C8HBwVJHIj3EEQwDwQmeRKQPXr58iQ4dOuDQoUNwcHDAkCFDpI5EeoojGAbg9QmeHMEgIqlkl4sjR47AwcEBf/75Jxo3bix1LNJTHMEwAK9P8HThBE8ikkBSUhLat2+PI0eOwNHREQcOHGC5oLfiCIYBOM8JnkQkISEEevTogaNHj6JIkSI4cOAAGjZsKHUs0nMcwTAAvEU7EUlJJpNh0qRJKFu2LMLDw1kuKF84gmEAOMGTiKTm6uqKGzduQKFQSB2FDARHMPQcJ3gSkRRevHgBLy8vXLx4Ub2M5YI0wREMPccJnkRU2J4/fw4PDw+cOnUKN27cwMWLF2Fubi51LDIwLBh6jhM8iagwPX/+HO3atcPp06fxwQcfYMuWLSwXVCA8RKLnOMGTiArLs2fP0LZtW5w+fRpOTk44ePAg6tSpI3UsMlAcwdBjSpVAxJXHAIB65YtKnIaIjNnTp0/Rtm1bREdHq8tFrVq1pI5FBowjGHrs8D9P8PB5KhytLdGmegmp4xCREZs8eTKio6NRvHhxREZGslzQe2PB0GO//nUPANCtflkoLHkMlIh0Z/78+fD29sbBgwdRs2ZNqeOQEeAhEj0Vn5iGiKuvDo/0aVRe4jREZIxSU1NhbW0NALC3t8eOHTskTkTGhCMYeir09H0oVQKfuBRF5ZL2UschIiOTkJCATz/9FHPnzpU6ChkpFgw9pFQJbDl9HwDQpzFHL4hIu548eYLWrVvj/Pnz+OGHH/D06VOpI5ERYsHQQ69P7mxfs7TUcYjIiDx+/BitW7fGhQsXUKpUKURFRaFYsWJSxyIjxDkYeoiTO4lIF7LLxaVLl1C6dGlERkaiatWqUsciI8URDD3DyZ1EpAvx8fFo1aoVLl26hDJlyiAqKorlgnSKIxh6hpM7iUgXwsLCcPnyZXW5qFy5stSRyMixYOgRTu4kIl3x9fVFWloaWrVqxXJBhYIFQ49wcicRaVNcXBysrKxQtOirWw0MGjRI4kRkSjgHQ49s/t/kzu71y3FyJxG9l9jYWLi5uaFdu3Z4/vy51HHIBLFg6Im4F2k4mD25s7GzxGmIyJA9evQIbm5uuHbtGuLj4/Hs2TOpI5EJYsHQE6F/v5rc2cilGD4qwcmdRFQwDx8+hJubG65fv44PP/wQhw4dQoUKFaSORSaIczD0gFIlEPK/yZ29OXpBRAX04MEDtGrVCjdu3MCHH36IqKgouLi4SB2LTBRHMPTA4euc3ElE7+f+/ftwc3PDjRs34OLiwnJBkmPB0AObT3FyJxG9n9TUVKSkpKBChQosF6QXeIhEYpzcSUTaUKVKFURGRsLa2hrly/M6OiQ9jmBIjJM7iaig7t69i4iICPXjqlWrslyQ3mDBkNDrkzt55U4i0sSdO3fg5uaGjh074uDBg1LHIXoDC4aEXp/c6VmzlNRxiMhAZJeLO3fuwNnZmTctI73EgiEhTu4kIk3dvn0brq6uuHv3LipXroyoqCiULVtW6lhEb2DBkAgndxKRpm7dugU3Nzfcu3cPVapUYbkgvcazSAqRSiVw/Oa/eJmeicirTzi5k4jyLfvy3/fv30fVqlURGRmJ0qV53RzSXywYhWjTX3cx9fdLOZZxcicR5UeJEiXQrFkzxMTE4ODBgywXpPdYMAqJEALrTtwFAFQpaQcHhSVcnGzRoRZ/SBDRu1lYWGDDhg14/vw5nJycpI5D9E4sGIXk77vPcOPxS1hbmuO3oU1hr7CUOhIR6bnr169j1apV+O6772BmZgYLCwuWCzIYLBiFZPNfr84Y6VynDMsFEb3TtWvX0KpVK8TGxsLOzg6BgYFSRyLSCM8iKQTPUzKw50IsAKA351wQ0TtcvXpVXS5q1qyJoUOHSh2JSGMcwSgEv0U/REaWCjVKO6BOOUep4xCRHssuF3FxcahVqxYiIiJQvHhxqWMRaYwjGDomhMCv/7ugVu/G5SGTySRORET66sqVK3Bzc0NcXBxq166NgwcPslyQwWLB0LHTd/5/cqd33TJSxyEiPZWWlgYPDw/Ex8ejbt26OHjwICd0kkFjwdCx7NELTu4kordRKBQICgpC48aNER4ejg8++EDqSETvhQVDh54lc3InEb2dEEL9uZeXF44fP85yQUbhvQpGWlqatnIYpe1nObmTiPJ2/vx5NGzYELdu3VIvMzPj331kHDT+l6xSqTBr1iyULVsWdnZ26m+MqVOnYs2aNVoPaKiEENj816srd3JyJxH917lz59C6dWtER0dj3LhxUsch0jqNC8bs2bMRHByM77//HnK5XL28Zs2aWL16tVbDGbLTd57h5pNkTu4kojfExMSgTZs2+Pfff9GwYUP+cUZGSeOCsX79eqxcuRJ9+/aFubm5enmdOnVw9epVrYYzZNmjF5zcSUSvO3v2rLpcfPLJJzhw4ACKFi0qdSwirdO4YDx8+BAfffTRG8tVKhUyMzO1EsrQPUvOwN6LcQB4t1Qi+n/R0dFo06YNnj59ikaNGuHAgQMoUqSI1LGIdELjglGjRg0cOXLkjeXbtm1DvXr1tBLK0L0+ubM2J3cSEV7Nyxo7diyePXuGxo0b488//4SjI38+kPHS+FLh06ZNg5+fHx4+fAiVSoXt27fj2rVrWL9+PXbv3q2LjAaFkzuJKDcymQxbt27FhAkTsHjxYjg4OEgdiUinNB7B6NKlC/744w+Eh4fD1tYW06ZNw5UrV/DHH3+gbdu2ushoUDi5k4he9++//6o/d3Jywpo1a1guyCQU6GZnLVq0wIEDB7SdxShwcicRZTt9+jQ8PDwwb948DBo0SOo4RIVK4xGMihUr5mjk2Z4/f46KFStqJZSh4uROIsp26tQpuLu749mzZ9i0aROUSqXUkYgKlcYF486dO7l+o6Snp+Phw4daCWWofot+wMmdRIS//voLbdu2RWJiIlq0aIE9e/bkOK2fyBTk+xDJrl271J+HhYXlmP2sVCoREREBFxcXrYYzJK/flr0PJ3cSmawTJ07Aw8MDSUlJaNmyJfbs2QM7OzupYxEVunwXDG9vbwCvZkL7+fnl+JqlpSVcXFywcOFCrYYzJKduP1VP7uzCyZ1EJun48ePw9PREUlIS3NzcsHv3btja2kodi0gS+S4YKpUKAFChQgWcPn0aTk5OOgtliHhbdiKKjIxEUlISWrVqhT/++IPlgkyaxmeR3L59Wxc5DBondxIRAEyePBllypSBj48PbGxspI5DJKkC3Rc4OTkZe/fuxYoVK/Djjz/m+NBUUFAQXFxcoFAo0LhxY5w6deqt6z9//hzDhw9H6dKlYWVlhSpVqmDv3r0FeRtawyt3Epmu6OhoJCcnA3h1CNnf35/lgggFGME4e/YsOnTogJSUFCQnJ6NYsWJISEiAjY0NSpQogVGjRuX7uUJCQhAQEIAVK1agcePGWLJkCTw8PHDt2jWUKFHijfUzMjLQtm1blChRAtu2bUPZsmVx9+5dya/lf+nRCwCAZ81SnNxJZEIOHz6MDh06oFGjRti9ezeLBdFrNB7BGDNmDLy8vPDs2TNYW1vj5MmTuHv3Lho0aIAFCxZo9FyLFi3CwIED4e/vjxo1amDFihWwsbHB2rVrc11/7dq1ePr0KXbu3IlmzZrBxcUFrq6uqFOnjqZvQyesLAo0IEREBujQoUNo3749kpOTYWlpyT8uiP5D49+IMTExGDt2LMzMzGBubo709HQ4Ozvj+++/x+TJk/P9PBkZGThz5gzc3d3/P4yZGdzd3XHixIlct9m1axeaNGmC4cOHo2TJkqhZsybmzJnz1gvYpKenIzExMccHEdH7iIqKUo/kenh4YOfOnbC2tpY6FpFe0bhgWFpawszs1WYlSpTAvXuvzp5wdHTE/fv38/08CQkJUCqVKFmyZI7lJUuWRFxcXK7b3Lp1C9u2bYNSqcTevXsxdepULFy4ELNnz87zdebOnQtHR0f1h7Ozc74zEhH918GDB9XlwtPTk+WCKA8az8GoV68eTp8+jcqVK8PV1RXTpk1DQkICNmzYgJo1a+oio5pKpUKJEiWwcuVKmJubo0GDBnj48CHmz5+PwMDAXLeZNGkSAgIC1I8TExNZMoioQA4ePIhOnTohNTUV7du3x/bt26FQKKSORaSXNC4Yc+bMQVJSEgDg22+/ha+vL4YOHYrKlStjzZo1+X4eJycnmJubIz4+Psfy+Ph4lCpVKtdtSpcuDUtLyxyX3K1evTri4uKQkZEBuVz+xjZWVlawsrLKdy4iorwUKVIECoUCrVu3xm+//cafLURvoXHBaNiwofrzEiVKYP/+/QV6YblcjgYNGiAiIkJ9lVCVSoWIiAiMGDEi122aNWuGzZs3Q6VSqQ/TXL9+HaVLl861XBARaVP9+vVx/PhxVKhQgeWC6B20dtpDdHQ0OnXqpNE2AQEBWLVqFdatW4crV65g6NChSE5Ohr+/PwDA19cXkyZNUq8/dOhQPH36FKNHj8b169exZ88ezJkzB8OHD9fW2yAiyuHPP//E8ePH1Y+rVavGckGUDxqNYISFheHAgQOQy+UYMGAAKlasiKtXr2LixIn4448/4OHhodGL+/j44MmTJ5g2bRri4uJQt25d7N+/Xz3x8969e+qRCgBwdnZGWFgYxowZg9q1a6Ns2bIYPXo0JkyYoNHrEhHlx/79++Ht7Q25XI4TJ07g448/ljoSkcHId8FYs2YNBg4ciGLFiuHZs2dYvXo1Fi1ahJEjR8LHxwcXL15E9erVNQ4wYsSIPA+JREVFvbGsSZMmOHnypMavQ0SkiX379qFr165IT09H+/btUblyZakjERmUfB8i+eGHH/Ddd98hISEBoaGhSEhIwLJly3DhwgWsWLGiQOWCiEgf7d27F97e3khPT0fXrl0RGhrKeV5EGsp3wbh58yZ69OgBAOjWrRssLCwwf/58lCtXTmfhiIgK2+7du9G1a1dkZGSge/fuCAkJgaUl75BMpKl8F4zU1FT1dfZlMhmsrKxQunRpnQUjIipsx48fR7du3ZCRkYHPPvsMv/76K8sFUQFpNMlz9erVsLOzAwBkZWUhODgYTk5OOdbR5GZnRET6pH79+nB3d4ednR02bdrEckH0HvJdMMqXL49Vq1apH5cqVQobNmzIsY5MJmPBICKDpVAosH37dlhYWMDCQuPLBBHRa/L9HXTnzh0dxiAiksaOHTtw8uRJzJs3DzKZjJf+JtISVnQiMlnbt2+Hj48PsrKyULduXfTu3VvqSERGQ2tX8iQiMiTbtm1Dz549kZWVhb59+6rPkiMi7WDBICKTs3XrVvTq1QtKpRL9+vXDunXrOOeCSMtYMIjIpISEhKB3795QKpXw9fXFL7/8kuMOzUSkHSwYRGQy7t+/j379+kGpVMLPzw9r165luSDSkQIVjJs3b2LKlCno3bs3Hj9+DODVdfsvXbqk1XBERNrk7OyM1atXo3///lizZg3LBZEOaVwwDh06hFq1auGvv/7C9u3b8fLlSwDAuXPnEBgYqPWARETvKzMzU/25r68vVq9ezXJBpGMaF4yJEydi9uzZ6tu2Z2vdujXvckpEemfjxo2oV68e4uLipI5CZFI0LhgXLlxA165d31heokQJJCQkaCUUEZE2bNiwAX5+frh06RJWrlwpdRwik6JxwShSpAhiY2PfWH727FmULVtWK6GIiN7XunXr4OfnB5VKhcGDB2PKlClSRyIyKRoXjF69emHChAmIi4uDTCaDSqXCsWPHMG7cOPj6+uoiIxGRRoKDg+Hv7w8hBIYMGYJly5bBzIwnzREVJo2/4+bMmYNq1arB2dkZL1++RI0aNdCyZUs0bdqUfyEQkeR++eUXfPnllxBCYNiwYSwXRBLR+NJ1crkcq1atwtSpU3Hx4kW8fPkS9erVQ+XKlXWRj4go39LS0jB37lwIITB8+HAsXboUMplM6lhEJknjgnH06FE0b94c5cuXR/ny5XWRiYioQBQKBSIiIrBu3Tp88803LBdEEtJ43LB169aoUKECJk+ejMuXL+siExGRRm7fvq3+3NnZGVOmTGG5IJKYxgXj0aNHGDt2LA4dOoSaNWuibt26mD9/Ph48eKCLfEREb/Xzzz+jSpUqCA0NlToKEb1G44Lh5OSEESNG4NixY7h58yZ69OiBdevWwcXFBa1bt9ZFRiKiXC1fvhxDhgxBVlYWTp8+LXUcInrNe02trlChAiZOnIh58+ahVq1aOHTokLZyERG91bJlyzBs2DAAwNixY/H9999LnIiIXlfggnHs2DEMGzYMpUuXRp8+fVCzZk3s2bNHm9mIiHL1008/Yfjw4QCAr7/+GvPnz+ecCyI9o/FZJJMmTcKWLVvw6NEjtG3bFj/88AO6dOkCGxsbXeQjIsph6dKlGDVqFABg/PjxmDdvHssFkR7SuGAcPnwYX3/9NXr27AknJyddZCIiytO1a9cAvLrx4pw5c1guiPSUxgXj2LFjushBRJQvS5cuRbt27eDl5cVyQaTH8lUwdu3ahfbt28PS0hK7du1667qdO3fWSjAiomy///472rdvD7lcDplMxp8zRAYgXwXD29sbcXFxKFGiBLy9vfNcTyaTQalUaisbEREWLlyIcePGwdvbG9u2bYO5ubnUkYgoH/JVMFQqVa6fExHp0vz58zF+/HgAQO3atXnTMiIDovF36/r165Genv7G8oyMDKxfv14roYiIvvvuO3W5CAwMxIwZMzjngsiAaFww/P398eLFizeWJyUlwd/fXyuhiMi0zZs3DxMnTgQATJ8+HdOnT5c2EBFpTOOzSIQQuf4V8eDBAzg6OmolFBGZrvnz52PSpEkAgJkzZ2Lq1KkSJyKigsh3wahXrx5kMhlkMhnatGkDC4v/31SpVOL27dvw9PTUSUgiMh2NGjWCjY0NJk2ahClTpkgdh4gKKN8FI/vskZiYGHh4eMDOzk79NblcDhcXF3Tv3l3rAYnItLi6uuLKlSsoX7681FGI6D3ku2AEBgYCAFxcXODj4wOFQqGzUERkWhYsWABPT0/UrFkTAFguiIyAxpM8/fz8WC6ISGumT5+Or7/+Gq1bt8a///4rdRwi0pJ8jWAUK1YM169fh5OTE4oWLfrWU8WePn2qtXBEZLyEEJg+fTpmzpwJ4NWNyz744AOJUxGRtuSrYCxevBj29vbqz3kuOhG9DyEEpk2bhtmzZwN4dYhk7NixEqciIm3KV8Hw8/NTf/7FF1/oKgsRmQAhBKZOnYpvv/0WALBo0SKMGTNG4lREpG0az8GIjo7GhQsX1I9///13eHt7Y/LkycjIyNBqOCIyPqtXr1aXi8WLF7NcEBkpjQvG4MGDcf36dQDArVu34OPjAxsbG2zdulV9WV8iorz06tULzZo1w5IlS/DVV19JHYeIdETjK3lev34ddevWBQBs3boVrq6u2Lx5M44dO4ZevXphyZIlWo5IRIbu9SsA29vbIyoqKsfF+ojI+Gg8giGEUN9RNTw8HB06dAAAODs7IyEhQbvpiMjgCSHw9ddfY+7cueplLBdExk/j7/KGDRti9uzZcHd3x6FDh7B8+XIAwO3bt1GyZEmtByQiwyWEwLhx47Bo0SIAgKenJ+rVqydxKiIqDBqPYCxZsgTR0dEYMWIEvvnmG3z00UcAgG3btqFp06ZaD0hEhkkIgYCAAHW5WL58OcsFkQnReASjdu3aOc4iyTZ//nyYm5trJRQRGTYhBMaMGYMffvgBAPDzzz9j0KBBEqciosJU4AOhZ86cwZUrVwAANWrUQP369bUWiogMlxACo0ePxtKlSwEAK1euxMCBAyVORUSFTeOC8fjxY/j4+ODQoUMoUqQIAOD58+do1aoVtmzZguLFi2s7IxEZkEOHDmHp0qWQyWRYtWoV+vfvL3UkIpKAxnMwRo4ciZcvX+LSpUt4+vQpnj59iosXLyIxMRGjRo3SRUYiMiBubm5YsmQJVq9ezXJBZMI0HsHYv38/wsPDUb16dfWyGjVqICgoCO3atdNqOCIyDCqVCsnJyep7Fo0ePVriREQkNY1HMFQqFSwtLd9Ybmlpqb4+BhGZDpVKhWHDhqFVq1Z4/vy51HGISE9oXDBat26N0aNH49GjR+plDx8+xJgxY9CmTRuthiMi/aZSqTBkyBD8/PPPiI6OxuHDh6WORER6QuOC8dNPPyExMREuLi6oVKkSKlWqhAoVKiAxMVE9a5yIjJ9KpcLgwYOxatUqmJmZYf369ejcubPUsYhIT2g8B8PZ2RnR0dGIiIhQn6ZavXp1uLu7az0cEeknlUqFgQMHYu3atepy0bdvX6ljEZEe0ahghISEYNeuXcjIyECbNm0wcuRIXeUiIj2lUqkwYMAA/PLLLzAzM8OGDRvQp08fqWMRkZ7Jd8FYvnw5hg8fjsqVK8Pa2hrbt2/HzZs3MX/+fF3mIyI9Exsbi/3798PMzAybNm1Cr169pI5ERHoo33MwfvrpJwQGBuLatWuIiYnBunXrsGzZMl1mIyI9VLZsWURGRmLr1q0sF0SUp3wXjFu3bsHPz0/9uE+fPsjKykJsbKxOghGR/lAqlYiJiVE/rlq1Krp16yZdICLSe/kuGOnp6bC1tf3/Dc3MIJfLkZqaqpNgRKQflEolvvjiC3z66acICwuTOg4RGQiNJnlOnToVNjY26scZGRn49ttv4ejoqF6WfWtmIjJ8WVlZ8PPzw+bNm2FhYYGXL19KHYmIDES+C0bLli1x7dq1HMuaNm2KW7duqR/LZDLtJSMiSWVlZcHX1xe//vorLCwsEBISwsMiRJRv+S4YUVFROoxBRPokKysLn3/+OUJCQmBhYYHQ0FB07dpV6lhEZEA0vtAWERm3rKws9O3bF6GhobC0tMTWrVvRpUsXqWMRkYFhwSCiN5ibm8PS0hLbtm3j5b+JqEA0vhcJERk3CwsLrF+/HseOHWO5IKICY8EgImRmZmLZsmVQKpUAXpWMTz75ROJURGTIWDCITFxGRgZ8fHwwfPhwDB8+XOo4RGQkClQwjhw5gs8//xxNmjTBw4cPAQAbNmzA0aNHtRqOiHQru1zs2LEDVlZWnMxJRFqjccH47bff4OHhAWtra5w9exbp6ekAgBcvXmDOnDlaD0hEupGRkYEePXpg586dsLKyws6dO9G+fXupYxGRkdC4YMyePRsrVqzAqlWrYGlpqV7erFkzREdHazUcEelGeno6PvvsM+zatQsKhQK7du2Cp6en1LGIyIhofJrqtWvX0LJlyzeWOzo64vnz59rIREQ61rdvX/zxxx/qctG2bVupIxGRkdF4BKNUqVK4cePGG8uPHj2KihUrFihEUFAQXFxcoFAo0LhxY5w6dSpf223ZsgUymQze3t4Fel0iU+Xn5wdHR0f88ccfLBdEpBMaF4yBAwdi9OjR+OuvvyCTyfDo0SNs2rQJ48aNw9ChQzUOEBISgoCAAAQGBiI6Ohp16tSBh4cHHj9+/Nbt7ty5g3HjxqFFixYavyaRqfPy8sKdO3fg7u4udRQiMlIaF4yJEyeiT58+aNOmDV6+fImWLVtiwIABGDx4MEaOHKlxgEWLFmHgwIHw9/dHjRo1sGLFCtjY2GDt2rV5bqNUKtG3b1/MmDGjwKMmRKYkLS0N/fv3z3FzwiJFikgXiIiMnsYFQyaT4ZtvvsHTp09x8eJFnDx5Ek+ePMGsWbM0fvGMjAycOXMmx19RZmZmcHd3x4kTJ/LcbubMmShRogT69+//ztdIT09HYmJijg8iU5KamoouXbpg7dq16NSpk/piWkREulTge5HI5XLUqFHjvV48ISEBSqUSJUuWzLG8ZMmSuHr1aq7bHD16FGvWrEFMTEy+XmPu3LmYMWPGe+UkMlTZ5eLAgQOwtbXFihUrYG5uLnUsIjIBGheMVq1aQSaT5fn1gwcPvlegt0lKSkK/fv2watUqODk55WubSZMmISAgQP04MTERzs7OuopIpDdSUlLQpUsXhIeHw9bWFvv27eOcJSIqNBoXjLp16+Z4nJmZiZiYGFy8eBF+fn4aPZeTkxPMzc0RHx+fY3l8fDxKlSr1xvo3b97EnTt34OXlpV6mUqkAvLp3wrVr11CpUqUc21hZWcHKykqjXESGLiUlBZ07d0ZERATs7Oywb98+NG/eXOpYRGRCNC4YixcvznX59OnT8fLlS42eSy6Xo0GDBoiIiFCfaqpSqRAREYERI0a8sX61atVw4cKFHMumTJmCpKQk/PDDDxyZIPqf8ePHq8vF/v370axZM6kjEZGJKfAcjP/6/PPP0ahRIyxYsECj7QICAuDn54eGDRuiUaNGWLJkCZKTk+Hv7w8A8PX1RdmyZTF37lwoFArUrFkzx/bZM+H/u5zIlE2fPh3nzp3Dd999h6ZNm0odh4hMkNYKxokTJ6BQKDTezsfHB0+ePMG0adMQFxeHunXrYv/+/eqJn/fu3YOZGW/6SvQuSqVSPYHTyckJhw8ffut8KSIiXdK4YHTr1i3HYyEEYmNj8ffff2Pq1KkFCjFixIhcD4kAQFRU1Fu3DQ4OLtBrEhmTly9folOnTujduzcGDx4MACwXRCQpjQuGo6NjjsdmZmaoWrUqZs6ciXbt2mktGBHlT1JSEjp06ICjR4/i3Llz6N69e77PsiIi0hWNCoZSqYS/vz9q1aqFokWL6ioTEeVTUlIS2rdvj2PHjsHR0RFhYWEsF0SkFzSa3GBubo527drxrqlEeiAxMRGenp7qcnHgwAE0atRI6lhERAAKcKnwmjVr5rifAREVvuxycfz4cRQpUgTh4eH45JNPpI5FRKSmccGYPXs2xo0bh927dyM2Npb3+SCSQGhoKE6cOIGiRYsiPDwcDRs2lDoSEVEO+Z6DMXPmTIwdOxYdOnQAAHTu3DnHLHUhBGQyGW+kRFQI+vfvjydPnsDDwwP169eXOg4R0RvyXTBmzJiBIUOGIDIyUpd5iCgPL168gIWFBWxtbSGTyTBp0iSpIxER5SnfBUMIAQBwdXXVWRgiyt3z58/Rrl072NnZYffu3bCxsZE6EhHRW2k0B4MX7iEqfM+ePUPbtm1x+vRpnD9/Hvfu3ZM6EhHRO2l0HYwqVaq8s2Q8ffr0vQIR0f97+vQp2rZti+joaDg5OSEiIgLVqlWTOhYR0TtpVDBmzJjxxpU8iUg3nj59Cnd3d5w9exZOTk44ePAgatWqJXUsIqJ80ahg9OrVCyVKlNBVFiL6n3///Rfu7u6IiYlB8eLFcfDgQd4xmIgMSr7nYHD+BVHhefToEe7evYsSJUogMjKS5YKIDI7GZ5EQke7VqlUL4eHhUCgUqFGjhtRxiIg0lu+CoVKpdJmDyOQlJCTg9u3b6kt+8wJaRGTINL5UOBFp35MnT9C6dWu0adMGJ0+elDoOEdF7Y8Egktjjx4/RunVrXLhwAXZ2dihatKjUkYiI3ptGZ5EQkXZll4tLly6hTJkyiIyMRJUqVaSORUT03jiCQSSR+Ph4tGrVCpcuXULZsmURFRXFckFERoMjGEQSePLkCVq1aoUrV66oy8VHH30kdSwiIq1hwSCSgL29PVxcXJCUlITIyEiWCyIyOiwYRBJQKBTYvn07Hj9+jPLly0sdh4hI6zgHg6iQPHr0CN999536onUKhYLlgoiMFkcwiArBw4cP0apVK/zzzz9QqVSYNGmS1JGIiHSKIxhEOvbgwQO4ubnhn3/+wYcffojevXtLHYmISOdYMIh06P79+3Bzc8ONGzfg4uKCQ4cOwcXFRepYREQ6x4JBpCPZ5eLmzZuoUKECoqKi8OGHH0odi4ioULBgEOlAeno62rRpg1u3bqFixYosF0RkclgwiHTAysoK06ZNQ5UqVRAVFcWzRYjI5LBgEOnI559/jvPnz8PZ2VnqKEREhY4Fg0hLbt++DU9PT8TGxqqXWVlZSZiIiEg6LBhEWnDr1i24ubkhLCwMQ4YMkToOEZHkWDCI3tPNmzfh5uaGe/fuoUqVKli+fLnUkYiIJMcreRK9h+xy8eDBA1StWhWRkZEoXbq01LGIiCTHEQyiArpx4wZcXV3x4MEDVKtWDVFRUSwXRET/w4JBVEADBgzAw4cPUb16dURGRqJUqVJSRyIi0hssGEQFtGHDBnh5ebFcEBHlgnMwiDSQmpoKa2trAICzszN27dolcSIiIv3EEQyifLp27RqqVq2K0NBQqaMQEek9FgyifLh69Src3Nxw//59zJs3D1lZWVJHIiLSaywYRO9w5coVuLm5IS4uDrVr18aff/4JCwseXSQiehsWDKK3uHz5Mtzc3BAfH486deogIiICTk5OUsciItJ7LBhEebh06RJatWqFx48fo27duiwXREQaYMEgysPmzZvx+PFj1KtXDxEREfjggw+kjkREZDB4IJkoD7Nnz0aRIkXQv39/FCtWTOo4REQGhSMYRK+5ceMGMjIyAAAymQxff/01ywURUQGwYBD9z7lz5/Dpp5+iZ8+e6pJBREQFw4JBBCAmJgatW7fGv//+i0ePHiE1NVXqSEREBo0Fg0ze2bNn0aZNGzx9+hSNGzfGgQMH4OjoKHUsIiKDxoJBJi06OlpdLj799FOEhYWxXBARaQELBpmsM2fOoE2bNnj27BmaNGnCckFEpEUsGGSykpOTkZGRgaZNm2L//v1wcHCQOhIRkdHgdTDIZLVs2RKRkZGoXr067O3tpY5DRGRUWDDIpJw6dQoKhQK1a9cGADRq1EjiRERExomHSMhknDx5Em3btkWbNm1w9epVqeMQERk1FgwyCSdOnEC7du2QmJiIGjVqoFy5clJHIiIyaiwYZPSOHz8ODw8PJCUlwdXVFXv37oWdnZ3UsYiIjBoLBhm1Y8eOqcuFm5sb9uzZA1tbW6ljEREZPRYMMlpnzpyBp6cnXr58idatW7NcEBEVIp5FQkarSpUqqFOnDhQKBXbt2gUbGxupIxERmQwWDDJa9vb22LdvH8zNzVkuiIgKGQ+RkFE5dOgQ5s+fr35sb2/PckFEJAGOYJDRiIyMRKdOnZCSkoLy5cvDx8dH6khERCaLIxhkFA4ePIiOHTsiJSUFnp6e6NKli9SRiIhMGgsGGbyIiAh06tQJqamp6NChA3bs2AGFQiF1LCIik8ZDJGTQwsPD4eXlhbS0NHTo0AHbt2+HlZWV1LGIiEweRzDIYD18+BCdO3dGWloaOnbsyHJBRKRHOIJBBqts2bKYN28ewsPDsXXrVpYLIiI9whEMMjhCCPXno0aNws6dO1kuiIj0DAsGGZR9+/ahRYsWePbsmXqZmRn/GRMR6Rv+ZCaDsXfvXnh7e+PYsWM5LqZFRET6hwWDDMLu3bvRtWtXZGRkoHv37pgxY4bUkYiI6C1YMEjv/fHHH+jWrRsyMjLw2Wef4ddff4WlpaXUsYiI6C30omAEBQXBxcUFCoUCjRs3xqlTp/Jcd9WqVWjRogWKFi2KokWLwt3d/a3rk2HbtWsXunfvjszMTPTo0QObN29muSAiMgCSF4yQkBAEBAQgMDAQ0dHRqFOnDjw8PPD48eNc14+KikLv3r0RGRmJEydOwNnZGe3atcPDhw8LOTnpWnp6OkaPHo3MzEz4+PiwXBARGRDJC8aiRYswcOBA+Pv7o0aNGlixYgVsbGywdu3aXNfftGkThg0bhrp166JatWpYvXo1VCoVIiIiCjk56ZqVlRXCwsIwcuRIbNy4ERYWvGwLEZGhkLRgZGRk4MyZM3B3d1cvMzMzg7u7O06cOJGv50hJSUFmZiaKFSuW69fT09ORmJiY44P0W0JCgvrzKlWq4Mcff2S5ICIyMJIWjISEBCiVSpQsWTLH8pIlSyIuLi5fzzFhwgSUKVMmR0l53dy5c+Ho6Kj+cHZ2fu/cpDvbtm1DhQoVEBYWJnUUIiJ6D5IfInkf8+bNw5YtW95698xJkybhxYsX6o/79+8XckrKr61bt6JXr154+fIltm3bJnUcIiJ6D5KOOzs5OcHc3Bzx8fE5lsfHx6NUqVJv3XbBggXq+1DUrl07z/WsrKx4GWkDEBoaij59+kCpVMLX1xcrVqyQOhIREb0HSUcw5HI5GjRokGOCZvaEzSZNmuS53ffff49Zs2Zh//79aNiwYWFEJR3asmWLulz4+flh7dq1MDc3lzoWERG9B8lnzgUEBMDPzw8NGzZEo0aNsGTJEiQnJ8Pf3x8A4Ovri7Jly2Lu3LkAgO+++w7Tpk3D5s2b4eLiop6rYWdnBzs7O8neBxXMr7/+is8//xwqlQr+/v5YtWoVywURkRGQvGD4+PjgyZMnmDZtGuLi4lC3bl3s379fPfHz3r17OW5mtXz5cvUVHV8XGBiI6dOnF2Z00oJ9+/ZBpVLhyy+/xKpVq3jjMiIiIyF5wQCAESNGYMSIEbl+LSoqKsfjO3fu6D4QFZq1a9fC1dUV/v7+LBdEREaEP9Gp0B09ehRKpRIAYGFhgf79+7NcEBEZGf5Up0K1bt06tGzZEv3791eXDCIiMj4sGFRogoOD4e/vDyEErK2tIZPJpI5EREQ6woJBhWLt2rX48ssvIYTA0KFDERQUxMMiRERGjD/hSefWrFmDAQMGQAiBYcOGsVwQEZkA/pQnnXq9XIwYMQI//fQTD40QEZkAvThNlYxXiRIlYGlpiaFDh2LJkiUsF0REJoIFg3TKy8sLZ86cQc2aNVkuiIhMCA+RkNatW7cON2/eVD+uVasWywURkYlhwSCtWrZsGb744gu0atUKCQkJUschIiKJsGCQ1gQFBWH48OEAXt1j5oMPPpA4ERERSYUFg7Ri6dKl6vvJjB8/Ht9//z0PixARmTAWDHpvP/74I0aNGgUAmDBhAubNm8dyQURk4lgw6L1s3LgRo0ePBgBMmjQJc+fOZbkgIiKepkrvx9PTE7Vr14aXlxdmzZrFckFERABYMOg9OTk54fjx47CxsWG5ICIiNR4iIY3Nnz8fK1asUD+2tbVluSAiohw4gkEa+e677zBx4kQAwCeffIIGDRpInIiIiPQRRzAo3+bNm6cuFzNmzGC5ICKiPLFgUL7MmTMHkyZNAgDMmjUL06ZNkzgRERHpMx4ioXf69ttvMWXKFPXnkydPljgRERHpOxYMeqvDhw+ry8XroxhERERvw4JBb9WyZUtMmzYNNjY2mDBhgtRxiIjIQLBg0BuEEMjMzIRcLgfwakInERGRJjjJk3IQQiAwMBAeHh5ISUmROg4RERkoFgxSE0Jg2rRpmDVrFqKiorB7926pIxERkYHiIRIC8KpcTJkyBXPmzAEALFq0CD179pQ4FRERGSoWDIIQApMnT8a8efMAAIsXL8ZXX30lbSgiIjJoLBgmTgiBSZMm4bvvvgMA/PDDDxg1apTEqYiIyNCxYJi4R48eYeXKlQCApUuXYsSIERInIiIiY8CCYeLKli2LiIgI/P333xg4cKDUcYiIyEiwYJggIQTu3LmDChUqAADq1auHevXqSZyKiIiMCU9TNTFCCIwdOxZ16tTBiRMnpI5DRERGigXDhAghMGbMGCxevBhJSUm4dOmS1JGIiMhI8RCJiRBCYPTo0Vi6dCkAYOXKlRgwYIDEqYiIyFixYJgAIQRGjhyJoKAgAMCqVatYLoiISKdYMIycEAIjRozAsmXLIJPJsHr1anz55ZdSxyIiIiPHgmHkMjMzcefOHchkMqxZswb+/v5SRyIiIhPAgmHk5HI5fvvtNxw6dAgeHh5SxyEiIhPBs0iMkEqlwtatWyGEAAAoFAqWCyIiKlQsGEZGpVJhyJAh6NmzJ8aPHy91HCIiMlE8RGJEVCoVBg0ahDVr1sDMzAx169aVOhIREZkoFgwjoVKpMHDgQKxduxZmZmbYsGED+vTpI3UsIiIyUSwYRkCpVGLAgAEIDg6GmZkZNm3ahF69ekkdi4iITBjnYBiBQYMGITg4GObm5ti8eTPLBRERSY4Fwwi0atUKcrkcmzdvho+Pj9RxiIiIeIjEGHz++edwdXWFs7Oz1FGIiIgAcATDIGVlZWHixImIjY1VL2O5ICIifcKCYWCysrLg6+uL7777Dh4eHsjKypI6EhER0Rt4iMSAZGVloV+/ftiyZQssLCwwc+ZMWFjwfyEREekf/nYyEFlZWejbty9CQ0NhaWmJrVu3okuXLlLHIiIiyhULhgHIzMxE3759sXXrVlhaWuK3336Dl5eX1LGIiIjyxDkYBmDChAnYunUr5HI5tm/fznJBRER6jwXDAAQEBODjjz/G9u3b0alTJ6njEBERvRMPkegpIQRkMhkAoFy5coiJieGETiIiMhgcwdBDGRkZ6NGjB0JCQtTLWC6IiMiQsGDomfT0dHz22Wf47bff0L9/fzx58kTqSERERBrjn8V6JLtc7N69GwqFAtu3b0fx4sWljkVERKQxFgw9kZ6eju7du2PPnj1QKBTYtWsX2rZtK3UsIiKiAmHB0ANpaWno3r079u7dC4VCgT/++APu7u5SxyIiIiowzsHQA+vWrcPevXthbW2N3bt3s1wQEZHB4wiGHhg0aBCuX7+Ojh07onXr1lLHISIiem8sGBJJTU2Fubk55HI5ZDIZFi5cKHUkIiIireEhEgmkpqaiS5cu6NmzJzIyMqSOQ0REpHUcwShkKSkp6NKlC8LDw2Fra4urV6+idu3aUsciIiLSKhaMQpSSkgIvLy8cPHgQtra22LdvH8sFEREZJR4iKSTJycno1KkTDh48CDs7O+zfvx8tWrSQOhYREZFOcASjEGSXi6ioKNjb22P//v1o2rSp1LGIiIh0hgWjEFy9ehWnT5+Gvb09wsLC0KRJE6kjERER6RQLRiFo0KAB9uzZA7lcznJBREQmgQVDR16+fIkHDx6gWrVqAABXV1eJExERERUeTvLUgaSkJLRv3x4tWrTAhQsXpI5DRERU6FgwtCwxMRGenp44evQoMjMzkZaWJnUkIiKiQqcXBSMoKAguLi5QKBRo3LgxTp069db1t27dimrVqkGhUKBWrVrYu3dvISV9u7T0dHh6euL48eMoUqQIwsPD8cknn0gdi4iIqNBJXjBCQkIQEBCAwMBAREdHo06dOvDw8MDjx49zXf/48ePo3bs3+vfvj7Nnz8Lb2xve3t64ePFiISd/0+pVq3DixAkULVoU4eHhaNiwodSRiIiIJCETQggpAzRu3BiffPIJfvrpJwCASqWCs7MzRo4ciYkTJ76xvo+PD5KTk7F79271sk8//RR169bFihUr3vl6iYmJcHR0xIsXL+Dg4KCV9zBy42n8cfExnkWuhfk/kQgPD0f9+vW18txERET6QpPfoZKOYGRkZODMmTNwd3dXLzMzM4O7uztOnDiR6zYnTpzIsT4AeHh45Ll+eno6EhMTc3xom0z26r/WNjaIiIhguSAiIpMnacFISEiAUqlEyZIlcywvWbIk4uLict0mLi5Oo/Xnzp0LR0dH9Yezs7N2wr+mcqkiqFfOAVPHjkS9evW0/vxERESGxuivgzFp0iQEBASoHycmJmq9ZIxsUxkj21TW6nMSEREZMkkLhpOTE8zNzREfH59jeXx8PEqVKpXrNqVKldJofSsrK1hZWWknMBEREeWLpIdI5HI5GjRogIiICPUylUqFiIiIPC+p3aRJkxzrA8CBAwd4CW4iIiI9IvkhkoCAAPj5+aFhw4Zo1KgRlixZguTkZPj7+wMAfH19UbZsWcydOxcAMHr0aLi6umLhwoXo2LEjtmzZgr///hsrV66U8m0QERHRayQvGD4+Pnjy5AmmTZuGuLg41K1bF/v371dP5Lx37x7MzP5/oKVp06bYvHkzpkyZgsmTJ6Ny5crYuXMnatasKdVbICIiov+Q/DoYhU0X18EgIiIyBQZzHQwiIiIyTiwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdSwYREREpHUsGERERKR1LBhERESkdZLfrr2wZd88NjExUeIkREREhiX7d2d+bsRucgUjKSkJAODs7CxxEiIiIsOUlJQER0fHt64jE/mpIUZEpVLh0aNHsLe3h0wm08pzJiYmwtnZGffv34eDg4NWntPUcZ9qH/epdnF/ah/3qXbpYn8KIZCUlIQyZcrAzOztsyxMbgTDzMwM5cqV08lzOzg48JtCy7hPtY/7VLu4P7WP+1S7tL0/3zVykY2TPImIiEjrWDCIiIhI61gwtMDKygqBgYGwsrKSOorR4D7VPu5T7eL+1D7uU+2Sen+a3CRPIiIi0j2OYBAREZHWsWAQERGR1rFgEBERkdaxYBAREZHWsWDkU1BQEFxcXKBQKNC4cWOcOnXqretv3boV1apVg0KhQK1atbB3795CSmo4NNmnq1atQosWLVC0aFEULVoU7u7u7/x/YGo0/TeabcuWLZDJZPD29tZtQAOk6T59/vw5hg8fjtKlS8PKygpVqlTh9/5rNN2fS5YsQdWqVWFtbQ1nZ2eMGTMGaWlphZRW/x0+fBheXl4oU6YMZDIZdu7c+c5toqKiUL9+fVhZWeGjjz5CcHCw7gIKeqctW7YIuVwu1q5dKy5duiQGDhwoihQpIuLj43Nd/9ixY8Lc3Fx8//334vLly2LKlCnC0tJSXLhwoZCT6y9N92mfPn1EUFCQOHv2rLhy5Yr44osvhKOjo3jw4EEhJ9dPmu7PbLdv3xZly5YVLVq0EF26dCmcsAZC032anp4uGjZsKDp06CCOHj0qbt++LaKiokRMTEwhJ9dPmu7PTZs2CSsrK7Fp0yZx+/ZtERYWJkqXLi3GjBlTyMn11969e8U333wjtm/fLgCIHTt2vHX9W7duCRsbGxEQECAuX74sli5dKszNzcX+/ft1ko8FIx8aNWokhg8frn6sVCpFmTJlxNy5c3Ndv2fPnqJjx445ljVu3FgMHjxYpzkNiab79L+ysrKEvb29WLduna4iGpSC7M+srCzRtGlTsXr1auHn58eC8R+a7tPly5eLihUrioyMjMKKaFA03Z/Dhw8XrVu3zrEsICBANGvWTKc5DVV+Csb48ePFxx9/nGOZj4+P8PDw0EkmHiJ5h4yMDJw5cwbu7u7qZWZmZnB3d8eJEydy3ebEiRM51gcADw+PPNc3NQXZp/+VkpKCzMxMFCtWTFcxDUZB9+fMmTNRokQJ9O/fvzBiGpSC7NNdu3ahSZMmGD58OEqWLImaNWtizpw5UCqVhRVbbxVkfzZt2hRnzpxRH0a5desW9u7diw4dOhRKZmNU2L+bTO5mZ5pKSEiAUqlEyZIlcywvWbIkrl69mus2cXFxua4fFxens5yGpCD79L8mTJiAMmXKvPHNYooKsj+PHj2KNWvWICYmphASGp6C7NNbt27h4MGD6Nu3L/bu3YsbN25g2LBhyMzMRGBgYGHE1lsF2Z99+vRBQkICmjdvDiEEsrKyMGTIEEyePLkwIhulvH43JSYmIjU1FdbW1lp9PY5gkMGZN28etmzZgh07dkChUEgdx+AkJSWhX79+WLVqFZycnKSOYzRUKhVKlCiBlStXokGDBvDx8cE333yDFStWSB3NIEVFRWHOnDlYtmwZoqOjsX37duzZswezZs2SOhrlE0cw3sHJyQnm5uaIj4/PsTw+Ph6lSpXKdZtSpUpptL6pKcg+zbZgwQLMmzcP4eHhqF27ti5jGgxN9+fNmzdx584deHl5qZepVCoAgIWFBa5du4ZKlSrpNrSeK8i/0dKlS8PS0hLm5ubqZdWrV0dcXBwyMjIgl8t1mlmfFWR/Tp06Ff369cOAAQMAALVq1UJycjIGDRqEb775BmZm/PtYU3n9bnJwcND66AXAEYx3ksvlaNCgASIiItTLVCoVIiIi0KRJk1y3adKkSY71AeDAgQN5rm9qCrJPAeD777/HrFmzsH//fjRs2LAwohoETfdntWrVcOHCBcTExKg/OnfujFatWiEmJgbOzs6FGV8vFeTfaLNmzXDjxg11WQOA69evo3Tp0iZdLoCC7c+UlJQ3SkR2eRO8hVaBFPrvJp1MHTUyW7ZsEVZWViI4OFhcvnxZDBo0SBQpUkTExcUJIYTo16+fmDhxonr9Y8eOCQsLC7FgwQJx5coVERgYyNNU/0PTfTpv3jwhl8vFtm3bRGxsrPojKSlJqregVzTdn//Fs0jepOk+vXfvnrC3txcjRowQ165dE7t37xYlSpQQs2fPluot6BVN92dgYKCwt7cXv/76q7h165b4888/RaVKlUTPnj2legt6JykpSZw9e1acPXtWABCLFi0SZ8+eFXfv3hVCCDFx4kTRr18/9frZp6l+/fXX4sqVKyIoKIinqeqDpUuXivLlywu5XC4aNWokTp48qf6aq6ur8PPzy7F+aGioqFKlipDL5eLjjz8We/bsKeTE+k+Tffrhhx8KAG98BAYGFn5wPaXpv9HXsWDkTtN9evz4cdG4cWNhZWUlKlasKL799luRlZVVyKn1lyb7MzMzU0yfPl1UqlRJKBQK4ezsLIYNGyaePXtW+MH1VGRkZK4/F7P3o5+fn3B1dX1jm7p16wq5XC4qVqwofvnlF53l4+3aiYiISOs4B4OIiIi0jgWDiIiItI4Fg4iIiLSOBYOIiIi0jgWDiIiItI4Fg4iIiLSOBYOIiIi0jgWDiIiItI4Fg8jIBAcHo0iRIlLHKDCZTIadO3e+dZ0vvvgC3t7ehZKHiAqGBYNID33xxReQyWRvfNy4cUPqaAgODlbnMTMzQ7ly5eDv74/Hjx9r5fljY2PRvn17AMCdO3cgk8kQExOTY50ffvgBwcHBWnm9vEyfPl39Ps3NzeHs7IxBgwbh6dOnGj0PyxCZKt6unUhPeXp64pdffsmxrHjx4hKlycnBwQHXrl2DSqXCuXPn4O/vj0ePHiEsLOy9nzuv23e/ztHR8b1fJz8+/vhjhIeHQ6lU4sqVK/jyyy/x4sULhISEFMrrExkyjmAQ6SkrKyuUKlUqx4e5uTkWLVqEWrVqwdbWFs7Ozhg2bBhevnyZ5/OcO3cOrVq1gr29PRwcHNCgQQP8/fff6q8fPXoULVq0gLW1NZydnTFq1CgkJye/NZtMJkOpUqVQpkwZtG/fHqNGjUJ4eDhSU1OhUqkwc+ZMlCtXDlZWVqhbty7279+v3jYjIwMjRoxA6dKloVAo8OGHH2Lu3Lk5njv7EEmFChUAAPXq1YNMJoObmxuAnKMCK1euRJkyZXLcJh0AunTpgi+//FL9+Pfff0f9+vWhUChQsWJFzJgxA1lZWW99nxYWFihVqhTKli0Ld3d39OjRAwcOHFB/XalUon///qhQoQKsra1RtWpV/PDDD+qvT58+HevWrcPvv/+uHg2JiooCANy/fx89e/ZEkSJFUKxYMXTp0gV37tx5ax4iQ8KCQWRgzMzM8OOPP+LSpUtYt24dDh48iPHjx+e5ft++fVGuXDmcPn0aZ86cwcSJE2FpaQkAuHnzJjw9PdG9e3ecP38eISEhOHr0KEaMGKFRJmtra6hUKmRlZeGHH37AwoULsWDBApw/fx4eHh7o3Lkz/vnnHwDAjz/+iF27diE0NBTXrl3Dpk2b4OLikuvznjp1CgAQHh6O2NhYbN++/Y11evTogX///ReRkZHqZU+fPsX+/fvRt29fAMCRI0fg6+uL0aNH4/Lly/j5558RHByMb7/9Nt/v8c6dOwgLC4NcLlcvU6lUKFeuHLZu3YrLly9j2rRpmDx5MkJDQwEA48aNQ8+ePeHp6YnY2FjExsaiadOmyMzMhIeHB+zt7XHkyBEcO3YMdnZ28PT0REZGRr4zEek1nd2nlYgKzM/PT5ibmwtbW1v1x2effZbrulu3bhUffPCB+vEvv/wiHB0d1Y/t7e1FcHBwrtv2799fDBo0KMeyI0eOCDMzM5GamprrNv99/uvXr4sqVaqIhg0bCiGEKFOmjPj2229zbPPJJ5+IYcOGCSGEGDlypGjdurVQqVS5Pj8AsWPHDiGEELdv3xYAxNmzZ3Os89/by3fp0kV8+eWX6sc///yzKFOmjFAqlUIIIdq0aSPmzJmT4zk2bNggSpcunWsGIYQIDAwUZmZmwtbWVigUCvWtsBctWpTnNkIIMXz4cNG9e/c8s2a/dtWqVXPsg/T0dGFtbS3CwsLe+vxEhoJzMIj0VKtWrbB8+XL1Y1tbWwCv/pqfO3curl69isTERGRlZSEtLQ0pKSmwsbF543kCAgIwYMAAbNiwQT3MX6lSJQCvDp+cP38emzZtUq8vhIBKpcLt27dRvXr1XLO9ePECdnZ2UKlUSEtLQ/PmzbF69WokJibi0aNHaNasWY71mzVrhnPnzgF4dXijbdu2qFq1Kjw9PdGpUye0a9fuvfZV3759MXDgQCxbtgxWVlbYtGkTevXqBTMzM/X7PHbsWI4RC6VS+db9BgBVq1bFrl27kJaWho0bNyImJgYjR47MsU5QUBDWrl2Le/fuITU1FRkZGahbt+5b8547dw43btyAvb19juVpaWm4efNmAfYAkf5hwSDSU7a2tvjoo49yLLtz5w46deqEoUOH4ttvv0WxYsVw9OhR9O/fHxkZGbn+opw+fTr69OmDPXv2YN++fQgMDMSWLVvQtWtXvHz5EoMHD8aoUaPe2K58+fJ5ZrO3t0d0dDTMzMxQunRpWFtbAwASExPf+b7q16+P27dvY9++fQgPD0fPnj3h7u6Obdu2vXPbvHh5eUEIgT179uCTTz7BkSNHsHjxYvXXX758iRkzZqBbt25vbKtQKPJ8Xrlcrv5/MG/ePHTs2BEzZszArFmzAABbtmzBuHHjsHDhQjRp0gT29vaYP38+/vrrr7fmffnyJRo0aJCj2GXTl4m8RO+LBYPIgJw5cwYqlQoLFy5U/3Wefbz/bapUqYIqVapgzJgx6N27N3755Rd07doV9evXx+XLl98oMu9iZmaW6zYODg4oU6YMjh07BldXV/XyY8eOoVGjRjnW8/HxgY+PDz777DN4enri6dOnKFasWI7ny57voFQq35pHoVCgW7du2LRpE27cuIGqVauifv366q/Xr18f165d0/h9/teUKVPQunVrDB06VP0+mzZtimHDhqnX+e8IhFwufyN//fr1ERISghIlSsDBweG9MhHpK07yJDIgH330ETIzM7F06VLcunULGzZswIoVK/JcPzU1FSNGjEBUVBTu3r2LY8eO4fTp0+pDHxMmTMDx48cxYsQIxMTE4J9//sHvv/+u8STP13399df47rvvEBISgmvXrmHixImIiYnB6NGjAQCLFi3Cr7/+iqtXr+L69evYunUrSpUqlevFwUqUKAFra2vs378f8fHxePHiRZ6v27dvX+zZswdr165VT+7MNm3aNKxfvx4zZszApUuXcOXKFWzZsgVTpkzR6L01adIEtWvXxpw5cwAAlStXxt9//42wsDBcv34dU6dOxenTp3Ns4+LigvPnz+PatWtISEhAZmYm+vbtCycnJ3Tp0gVHjhzB7du3ERUVhVGjRuHBgwcaZSLSW1JPAiGiN+U2MTDbokWLROnSpYW1tbXw8PAQ69evFwDEs2fPhBA5J2Gmp6eLXr16CWdnZyGXy0WZMmXEiBEjckzgPHXqlGjbtq2ws7MTtra2onbt2m9M0nzdfyd5/pdSqRTTp08XZcuWFZaWlqJOnTpi37596q+vXLlS1K1bV9ja2goHBwfRpk0bER0drf46XpvkKYQQq1atEs7OzsLMzEy4urrmuX+USqUoXbq0ACBu3rz5Rq79+/eLpk2bCmtra+Hg4CAaNWokVq5cmef7CAwMFHXq1Hlj+a+//iqsrKzEvXv3RFpamvjiiy+Eo6OjKFKkiBg6dKiYOHFiju0eP36s3r8ARGRkpBBCiNjYWOHr6yucnJyElZWVqFixohg4cKB48eJFnpmIDIlMCCGkrThERERkbHiIhIiIiLSOBYOIiIi0jgWDiIiItI4Fg4iIiLSOBYOIiIi0jgWDiIiItI4Fg4iIiLSOBYOIiIi0jgWDiIiItI4Fg4iIiLSOBYOIiIi07v8A1k+hWenkwRsAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:27:28+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/3-Web-App/1-Web-App/notebook.ipynb b/translations/sr/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sr/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/sr/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..b53d90bf7 --- /dev/null +++ b/translations/sr/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:08+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/1-Introduction/notebook.ipynb b/translations/sr/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..5fb76844f --- /dev/null +++ b/translations/sr/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:46+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/sr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..ad1bfd5e3 --- /dev/null +++ b/translations/sr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,725 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T14:56:46+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Направите класификациони модел: Укусна азијска и индијска јела\n" + ], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Увод у класификацију: Чишћење, припрема и визуализација података\n", + "\n", + "У ове четири лекције истражићете један од основних аспеката класичног машинског учења - *класификацију*. Проћи ћемо кроз употребу различитих алгоритама класификације са скупом података о свим сјајним кухињама Азије и Индије. Надамо се да сте гладни!\n", + "\n", + "

\n", + " \n", + "

Прославите паназијске кухиње у овим лекцијама! Слика: Џен Лупер
\n", + "\n", + "\n", + "\n", + "\n", + "Класификација је облик [надгледаног учења](https://wikipedia.org/wiki/Supervised_learning) који има доста сличности са техникама регресије. У класификацији, обучавате модел да предвиди којој `категорији` неки елемент припада. Ако је машинско учење усмерено на предвиђање вредности или имена ствари коришћењем скупова података, онда се класификација углавном дели на две групе: *бинарна класификација* и *вишекласна класификација*.\n", + "\n", + "Запамтите:\n", + "\n", + "- **Линеарна регресија** вам је помогла да предвидите односе између променљивих и направите тачна предвиђања о томе где би нова тачка података пала у односу на ту линију. На пример, могли сте да предвидите нумеричке вредности као што је *која би цена бундеве била у септембру у односу на децембар*.\n", + "\n", + "- **Логистичка регресија** вам је помогла да откријете \"бинарне категорије\": на овој цени, *да ли је ова бундева наранџаста или није-наранџаста*?\n", + "\n", + "Класификација користи различите алгоритме за одређивање других начина за идентификацију ознаке или класе неке тачке података. Хајде да радимо са овим подацима о кухињама како бисмо видели да ли можемо, посматрајући групу састојака, одредити њихово порекло.\n", + "\n", + "### [**Квиз пре предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Увод**\n", + "\n", + "Класификација је једна од основних активности истраживача машинског учења и научника за податке. Од основне класификације бинарне вредности (\"да ли је овај имејл спам или није?\"), до сложене класификације слика и сегментације коришћењем рачунарског вида, увек је корисно бити у могућности да сортирате податке у класе и постављате питања о њима.\n", + "\n", + "Научно речено, ваш метод класификације креира предиктивни модел који вам омогућава да мапирате однос између улазних променљивих и излазних променљивих.\n", + "\n", + "

\n", + " \n", + "

Бинарни наспрам вишекласних проблема за алгоритме класификације. Инфографика: Џен Лупер
\n", + "\n", + "\n", + "\n", + "Пре него што започнемо процес чишћења наших података, њихове визуализације и припреме за задатке машинског учења, хајде да научимо мало више о различитим начинима на које се машинско учење може користити за класификацију података.\n", + "\n", + "Изведена из [статистике](https://wikipedia.org/wiki/Statistical_classification), класификација коришћењем класичног машинског учења користи карактеристике, као што су `пушач`, `тежина` и `године`, како би одредила *вероватноћу развоја X болести*. Као техника надгледаног учења слична вежбама регресије које сте раније радили, ваши подаци су означени, а алгоритми машинског учења користе те ознаке да класификују и предвиђају класе (или 'карактеристике') скупа података и додељују их групи или исходу.\n", + "\n", + "✅ Одвојите тренутак да замислите скуп података о кухињама. На шта би вишекласни модел могао да одговори? На шта би бинарни модел могао да одговори? Шта ако желите да утврдите да ли је одређена кухиња вероватно користила пискавац? Шта ако желите да видите да ли, уз поклон у виду кесе са намирницама пуне звездастог аниса, артичока, карфиола и рена, можете направити типично индијско јело?\n", + "\n", + "### **Здраво, 'класификаторе'**\n", + "\n", + "Питање које желимо да поставимо овом скупу података о кухињама је заправо **вишекласно питање**, јер имамо неколико потенцијалних националних кухиња са којима радимо. С обзиром на групу састојака, којој од ових многих класа ће подаци припадати?\n", + "\n", + "Tidymodels нуди неколико различитих алгоритама за класификацију података, у зависности од врсте проблема који желите да решите. У наредне две лекције, научићете о неколико ових алгоритама.\n", + "\n", + "#### **Предуслови**\n", + "\n", + "За ову лекцију, биће нам потребни следећи пакети за чишћење, припрему и визуализацију наших података:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) дизајнирана да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) је оквир који представља [збирку пакета](https://www.tidymodels.org/packages/) за моделирање и машинско учење.\n", + "\n", + "- `DataExplorer`: [DataExplorer пакет](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) је намењен поједностављењу и аутоматизацији процеса истраживачке анализе података и генерисању извештаја.\n", + "\n", + "- `themis`: [themis пакет](https://themis.tidymodels.org/) пружа додатне кораке за рецепте за рад са неуравнотеженим подацима.\n", + "\n", + "Можете их инсталирати помоћу:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Алтернативно, скрипта испод проверава да ли имате пакете потребне за завршетак овог модула и инсталира их у случају да недостају.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Касније ћемо учитати ове сјајне пакете и учинити их доступним у нашој тренутној R сесији. (Ово је само за илустрацију, `pacman::p_load()` је то већ урадио за вас)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Вежба - очистите и избалансирајте своје податке\n", + "\n", + "Први задатак који треба обавити пре почетка овог пројекта је да очистите и **избалансирате** своје податке како бисте добили боље резултате.\n", + "\n", + "Хајде да упознамо податке! 🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Интересантно! По изгледу, прва колона је нека врста `id` колоне. Хајде да добијемо мало више информација о подацима.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Iz rezultata odmah možemo videti da imamo `2448` redova i `385` kolona, kao i `0` nedostajućih vrednosti. Takođe imamo 1 diskretnu kolonu, *cuisine*.\n", + "\n", + "## Vežba - upoznavanje sa kuhinjama\n", + "\n", + "Sada posao postaje zanimljiviji. Hajde da otkrijemo raspodelu podataka po kuhinjama.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Постоји коначан број кухиња, али је расподела података неравномерна. Можете то поправити! Пре него што то урадите, истражите мало више.\n", + "\n", + "Следеће, хајде да сваку кухињу доделимо њеној појединачној табели (tibble) и сазнамо колико је података доступно (редови, колоне) по кухињи.\n", + "\n", + "> [Tibble](https://tibble.tidyverse.org/) је модеран облик података (data frame).\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Вежба - Откривање најбољих састојака по кухињи уз помоћ dplyr**\n", + "\n", + "Сада можете дубље истражити податке и сазнати који су типични састојци за сваку кухињу. Треба да очистите поновљене податке који стварају забуну између кухиња, па хајде да научимо више о овом проблему.\n", + "\n", + "Направите функцију `create_ingredient()` у R-у која враћа датафрејм са састојцима. Ова функција ће започети уклањањем некорисне колоне и сортирањем састојака према њиховом броју.\n", + "\n", + "Основна структура функције у R-у је:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "Уредан увод у функције у R-у можете пронаћи [овде](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Хајде да кренемо! Користићемо [глаголе из dplyr-а](https://dplyr.tidyverse.org/) које смо учили у претходним лекцијама. Као подсетник:\n", + "\n", + "- `dplyr::select()`: помаже вам да изаберете које **колоне** желите да задржите или искључите.\n", + "\n", + "- `dplyr::pivot_longer()`: помаже вам да \"продужите\" податке, повећавајући број редова и смањујући број колона.\n", + "\n", + "- `dplyr::group_by()` и `dplyr::summarise()`: помажу вам да пронађете статистику за различите групе и представите је у лепој табели.\n", + "\n", + "- `dplyr::filter()`: креира подскуп података који садржи само редове који задовољавају ваше услове.\n", + "\n", + "- `dplyr::mutate()`: помаже вам да креирате или измените колоне.\n", + "\n", + "Погледајте овај [*уметнички*-испуњен learnr туторијал](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) од Алисон Хорст, који представља неке корисне функције за обраду података у dplyr-у *(део Tidyverse-а)*.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада можемо користити функцију да добијемо идеју о десет најпопуларнијих састојака по кухињи. Хајде да је испробамо са `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "У претходном одељку користили смо `geom_col()`, хајде да видимо како можете користити и `geom_bar` за креирање стубичастих графикона. Користите `?geom_bar` за додатно читање.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да урадимо исто за јапанске податке\n" + ], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Шта је са кинеском кухињом?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Из визуализација података, сада можемо изоставити најчешће састојке који стварају забуну између различитих кухиња, користећи `dplyr::select()`.\n", + "\n", + "Сви воле пиринач, бели лук и ђумбир!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Предобрада података уз помоћ рецепата 👩‍🍳👨‍🍳 - Рад са неуравнотеженим подацима ⚖️\n", + "\n", + "

\n", + " \n", + "

Илустрација: @allison_horst
\n", + "\n", + "С обзиром на то да је ова лекција о кухињама, морамо ставити `recipes` у контекст.\n", + "\n", + "Tidymodels пружа још један користан пакет: `recipes` - пакет за предобраду података.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да поново погледамо расподелу наших кухиња.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Као што можете видети, постоји прилично неједнака расподела у броју кухиња. Корејске кухиње су скоро три пута бројније од тајландских. Неуравнотежени подаци често имају негативан утицај на перформансе модела. Размислите о бинарној класификацији. Ако већина ваших података припада једној класи, модел машинског учења ће чешће предвиђати ту класу, једноставно зато што за њу има више података. Уравнотежавање података узима било какве искривљене податке и помаже у уклањању те неравнотеже. Многи модели најбоље функционишу када је број посматрања једнак и, самим тим, имају потешкоћа са неуравнотеженим подацима.\n", + "\n", + "Постоје углавном два начина за решавање проблема са неуравнотеженим скуповима података:\n", + "\n", + "- додавање посматрања мањинској класи: `Over-sampling`, на пример, коришћењем SMOTE алгоритма\n", + "\n", + "- уклањање посматрања из већинске класе: `Under-sampling`\n", + "\n", + "Хајде сада да покажемо како се носити са неуравнотеженим скуповима података користећи `recipe`. Рецепт се може посматрати као план који описује које кораке треба применити на скуп података како би био спреман за анализу података.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да разложимо кораке претпроцесирања.\n", + "\n", + "- Позив функције `recipe()` са формулом говори рецепту *улоге* променљивих користећи `df_select` податке као референцу. На пример, колона `cuisine` је додељена улога `outcome`, док су остале колоне добиле улогу `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) креира *спецификацију* корака рецепта који синтетички генерише нове примере мањинске класе користећи најближе суседе тих случајева.\n", + "\n", + "Сада, ако желимо да видимо претпроцесиране податке, морамо [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) и [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) наш рецепт.\n", + "\n", + "`prep()`: процењује потребне параметре из тренинг скупа који се касније могу применити на друге скупове података.\n", + "\n", + "`bake()`: узима припремљен рецепт и примењује операције на било који скуп података.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Хајде сада да проверимо расподелу наших кухиња и упоредимо их са неуравнотеженим подацима.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ммм! Подаци су лепо уређени, избалансирани и веома укусни 😋!\n", + "\n", + "> Обично се рецепт користи као претпроцесор за моделирање, где дефинише које кораке треба применити на скуп података како би био спреман за моделирање. У том случају, `workflow()` се обично користи (као што смо већ видели у претходним лекцијама) уместо ручног процењивања рецепта.\n", + ">\n", + "> Сходно томе, обично не морате да користите **`prep()`** и **`bake()`** рецепте када користите tidymodels, али то су корисне функције које можете имати у свом алату за потврду да рецепти раде оно што очекујете, као у нашем случају.\n", + ">\n", + "> Када користите **`bake()`** на припремљеном рецепту са **`new_data = NULL`**, добијате назад податке које сте дали приликом дефинисања рецепта, али који су прошли кроз кораке претпроцесирања.\n", + "\n", + "Хајде сада да сачувамо копију ових података за употребу у будућим лекцијама:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Овај нови CSV сада се налази у главном фолдеру за податке.\n", + "\n", + "**🚀Изазов**\n", + "\n", + "Овај курикулум садржи неколико занимљивих скупова података. Претражите `data` фолдере и видите да ли неки садрже скупове података који би били погодни за бинарну или мултикласну класификацију? Која питања бисте поставили о овом скупу података?\n", + "\n", + "## [**Квиз након предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Преглед и Самостално учење**\n", + "\n", + "- Погледајте [пакет themis](https://github.com/tidymodels/themis). Које друге технике можемо користити за решавање проблема са неуравнотеженим подацима?\n", + "\n", + "- Референтни сајт за Tidy models [вебсајт](https://www.tidymodels.org/start/).\n", + "\n", + "- Х. Викам и Г. Гролемунд, [*R за науку о подацима: Визуализација, Моделовање, Трансформација, Уређивање и Увоз података*](https://r4ds.had.co.nz/).\n", + "\n", + "#### ХВАЛА:\n", + "\n", + "[`Елисон Хорст`](https://twitter.com/allison_horst/) за креирање невероватних илустрација које чине R приступачнијим и занимљивијим. Пронађите више илустрација у њеној [галерији](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Кеси Бревиу](https://www.twitter.com/cassieview) и [Џен Лупер](https://www.twitter.com/jenlooper) за креирање оригиналне Python верзије овог модула ♥️\n", + "\n", + "

\n", + " \n", + "

Илустрација од @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/sr/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..2059f5a87 --- /dev/null +++ b/translations/sr/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,737 @@ +{ + "cells": [ + { + "source": [ + "# Укусна азијска и индијска јела\n", + "\n", + "## Увод\n", + "\n", + "Азијска и индијска кухиња су познате по својим богатим укусима, разноврсним зачинима и јединственим техникама кувања. У овом водичу, истражићемо неке од најпопуларнијих јела и савете за њихову припрему.\n", + "\n", + "## Популарна јела\n", + "\n", + "### 1. Пилећи кари\n", + "\n", + "Пилећи кари је класично јело које комбинује нежно месо са ароматичним сосом од зачина. \n", + "\n", + "#### Састојци:\n", + "- 500 г пилећег меса\n", + "- 2 кашике уља\n", + "- 1 главица црног лука, ситно сецкана\n", + "- 2 чена белог лука, уситњена\n", + "- 1 кашичица куркуме\n", + "- 1 кашичица куминa\n", + "- 1 кашичица гарам масале\n", + "- 200 мл кокосовог млека\n", + "\n", + "#### Упутство:\n", + "1. Загрејте уље у тигању и пропржите црни лук док не постане стакласт.\n", + "2. Додајте бели лук и зачине, па мешајте док не осетите њихов мирис.\n", + "3. Додајте пилеће месо и пржите док не порумени.\n", + "4. Сипајте кокосово млеко и кувајте на лаганој ватри 20 минута.\n", + "\n", + "[!TIP] Послужите са куваним пиринчем или нааном за потпуни оброк.\n", + "\n", + "### 2. Пад Таи\n", + "\n", + "Пад Таи је популарно тајландско јело од резанаца које је савршено за брз и укусан оброк.\n", + "\n", + "#### Састојци:\n", + "- 200 г пиринчаних резанаца\n", + "- 2 кашике тамаринд пасте\n", + "- 1 кашика рибљег соса\n", + "- 1 кашика шећера\n", + "- 2 јаја\n", + "- 100 г шкампа или тофуа\n", + "- 2 чена белог лука, уситњена\n", + "- 50 г кикирикија, уситњених\n", + "\n", + "#### Упутство:\n", + "1. Скувајте резанце према упутству на паковању и оставите их са стране.\n", + "2. У тигању загрејте мало уља и пропржите бели лук.\n", + "3. Додајте јаја и мешајте док се не испрже.\n", + "4. Додајте шкампе или тофу, а затим резанце.\n", + "5. Умешајте тамаринд пасту, рибљи сос и шећер.\n", + "6. Послужите са кикирикијем и лиметом.\n", + "\n", + "[!NOTE] Ово јело можете прилагодити додавањем поврћа по вашем избору.\n", + "\n", + "## Савети за кување\n", + "\n", + "- Увек користите свеже зачине за најбољи укус.\n", + "- Не плашите се да експериментишете са различитим комбинацијама зачина.\n", + "- За аутентичан укус, користите традиционалне састојке као што су гарам масала, тамаринд или кокосово млеко.\n", + "\n", + "[!IMPORTANT] Увек пробајте јело током кувања како бисте прилагодили зачине по укусу.\n", + "\n", + "## Закључак\n", + "\n", + "Азијска и индијска јела нуде невероватну разноврсност укуса и текстура. Уз мало праксе, можете припремити ова укусна јела у удобности свог дома. Испробајте рецепте из овог водича и уживајте у гастрономском путовању!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Инсталирајте Imblearn који ће омогућити SMOTE. Ово је Scikit-learn пакет који помаже у руковању неуравнотеженим подацима приликом извођења класификације. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Овај скуп података укључује 385 колона које указују на све врсте састојака у разним кухињама из датог скупа кухиња.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Прикажи кухиње у стубичастом графикону\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAdqUlEQVR4nO3de5xXdb3v8dcbREhBvICJeBkvKAocbmN57eb9btsLFpmkD3nUMT1mdg4dtWNnu/fW3NXOtHQ6JewyE1DTorwcNDVF5TfcBi9gKZ5AVLAaRcQQP+eP9Z36McwMg/5m1vrN7/18PObBWt91+X3WmoE337XWrK8iAjMzs6LolXcBZmZm5RxMZmZWKA4mMzMrFAeTmZkVioPJzMwKZau8C+gJBg0aFHV1dXmXYWZWNQYNGsR99913X0Qc13qZg6kC6urqKJVKeZdhZlZVJA1qq92X8szMrFAcTGZmVigOJjMzKxTfYzIz6wbr169n+fLlrFu3Lu9Sul2/fv3Ybbfd6NOnT6fWdzBVQNOKZuqmzMq7DHufll1zYt4lWA1Yvnw5AwYMoK6uDkl5l9NtIoLXX3+d5cuXs9dee3VqG1/KMzPrBuvWrWOnnXaqqVACkMROO+20RT3FmgomSb+RtH3edZhZbaq1UGqxpcddU5fyIuKEvGswM7OO9ahgkvQ14J2IuF7Sd4HREfEpSZ8CzgcOA+qB/sBvgd8DhwIrgFMj4m1J+wA3AoOBtcAFEfFcDodjZj1Ype9LV/peaf/+/VmzZk1F99lZPe1S3qPAEWm6HugvqU9qe6TVusOAGyNiBPBX4PTU3gBcFBHjgcuAH7T1QZImSypJKm1Y21zhwzAzq109LZgagfGStgPeAeaQBdQRZKFV7sWIWFC2XZ2k/mQ9qBmSFgA3A0Pa+qCIaIiI+oio773NwC44FDOzypkyZQo33njj3+evuuoqrr76ao488kjGjRvHqFGjuPvuuzfZ7ne/+x0nnXTS3+e//OUvM3XqVAAaGxv5+Mc/zvjx4zn22GNZuXJlRWrtUcEUEeuBF4FJwONkYfRJYF/g2Varv1M2vYHssmYv4K8RMabs64AuL9zMrItNmDCB6dOn/31++vTpnHvuudx1113MmzePhx56iK9+9atERKf2t379ei666CJmzpxJY2Mj5513HpdffnlFau1R95iSR8kuwZ0HNAHfARojIjb3ZEhEvCHpRUlnRsQMZRv8l4hY2OVVm5l1obFjx/Laa6/x8ssvs2rVKnbYYQd22WUXvvKVr/DII4/Qq1cvVqxYwauvvsouu+yy2f0tWbKExYsXc/TRRwOwYcMGhgxp8wLTFuupwXQ5MCci3pK0jk0v43VkIvBDSVcAfYBfAA4mM6t6Z555JjNnzuSVV15hwoQJ3HrrraxatYrGxkb69OlDXV3dJr9vtNVWW/Hee+/9fb5leUQwYsQI5syZU/E6e1wwRcRsskBpmd+vbLouTa4GRpa1/3vZ9IvAJuODmJlVuwkTJnDBBRewevVqHn74YaZPn87OO+9Mnz59eOihh3jppZc22WbPPffkmWee4Z133uHtt99m9uzZHH744ey///6sWrWKOXPmcMghh7B+/XqWLl3KiBEjPnCdPS6Y8jBq6EBKfq2NmW2BPF6FNWLECN58802GDh3KkCFDmDhxIieffDKjRo2ivr6e4cOHb7LN7rvvzllnncXIkSPZa6+9GDt2LABbb701M2fO5OKLL6a5uZl3332XSy65pCLBpM7e6LL21dfXhwcKNLOOPPvssxxwQO0+S9XW8UtqjIj61uv2qKfyzMys+jmYzMysUBxMZmbdpFZvnWzpcTuYzMy6Qb9+/Xj99ddrLpxaxmPq169fp7fxU3lmZt1gt912Y/ny5axatSrvUrpdywi2neVgMjPrBn369On0CK61zpfyzMysUBxMZmZWKA4mMzMrFN9jqoCmFc0VH43SiiuPV8mY1RL3mMzMrFBqNpgkTZJ0Q5r+oqTP512TmZnV6KU8SRsdd0TclFctZma2saoOJklXAp8DVgF/AhqBZmAysDXwB+CciFgraSqwDhgLPAYsKtvPVcCaiPh3SfsCNwGDyYZcPzMi/thdx2RmVuuq9lKepIOA04HRwPFAy6vT74yIgyJiNPAscH7ZZrsBh0bEpR3s+lbgxrT9ocDKdj5/sqSSpNKGtc0f8GjMzKxFNfeYDgPujoh1wDpJv0rtIyVdDWwP9AfuK9tmRkRsaG+HkgYAQyPiLoC07zZFRAPQANB3yLDaevmVmVkXqtoeUwemAl+OiFHAN4HyNwe+lUtFZmbWadUcTI8BJ0vqJ6k/cFJqHwCslNQHmLglO4yIN4Hlkk4DkNRX0jaVLNrMzDpWtcEUEXOBe8geYvgt0ET24MOVwJNkwfXc+9j1OcDFkhYBjwO7VKRgMzPrFFXz2CCS+kfEmtSreQSYHBHzuruO+vr6KJVK3f2xZmZVTVJjRNS3bq/mhx8AGiQdSHYfaVoeoWRmZpVV1cEUEZ/NuwYzM6usqr3HZGZmPZODyczMCsXBZGZmheJgMjOzQnEwmZlZoTiYzMysUBxMZmZWKA4mMzMrlKr+BduiaFrRTN2UWXmXYVVo2TUn5l2CWeG4x2RmZoXiYDIzs0Kp+mCS9L8lHZV3HWZmVhlVf48pIr7R1Z8hqXdHQ7KbmVnlVFWPSdKVkpZI+r2k2yRdJmmqpDPS8mWSvilpnqQmScNT+2BJD0h6WtL/kfSSpEFp2eckPSVpgaSbJfVO7WskfVvSQuCQ3A7azKzGVE0wSToIOB0YDRwPbDK4VLI6IsYBPwQuS23/C3gwIkYAM4E90j4PACYAh0XEGGAD/xiOfVvgyYgYHRG/b6OeyZJKkkob1jZX5BjNzKy6LuUdBtwdEeuAdZJ+1c56d6Y/G4F/StOHA58GiIh7Jf0ltR8JjAfmSgL4EPBaWrYBuKO9YiKiAWgA6DtkWPUOA2xmVjDVFEyd9U76cwObPz6RjXz79TaWrfN9JTOz7lc1l/KAx4CTJfWT1B84aQu3PQtA0jHADql9NnCGpJ3Tsh0l7VnBms3MbAtVTY8pIuZKugdYBLwKNAGdvbnzTeA2SecAc4BXgDcjYrWkK4D7JfUC1gMXAi9V/ADMzKxTFFE9t0ck9Y+INZK2AR4BJkfEvE5s1xfYEBHvSjoE+GF62KEi6uvro1QqVWp3ZmY1QVJjRGzyIFvV9JiSBkkHAv3I7g1tNpSSPYDpqVf0N+CCrirQzMw+mKoKpoj47Pvc7nlgbIXLMTOzLlBNDz+YmVkNcDCZmVmhOJjMzKxQHExmZlYoDiYzMysUB5OZmRWKg8nMzArFwWRmZoVSVb9gW1RNK5qpmzIr7zKsB1l2zYl5l2CWG/eYzMysUHpcMEmqk7Q4TX9C0q/T9CmSpuRbnZmZbU7NXMqLiHuAe/Kuw8zMOla4HpOkbSXNkrRQ0mJJEyQdJOnx1PaUpAGpZ/SopHnp69DN7HeSpBvSdJ2kByUtkjRb0h6pfaqk69NnvSDpjO44ZjMz+4ci9piOA16OiBMBJA0E5gMT0mCB2wFvA68BR0fEOknDgNuATcb1aMf3yYbNmCbpPOB64LS0bAhwODCcrIc1s60dSJoMTAbovd3gLT9KMzNrU+F6TGQj0x4t6VpJR5CNpbQyIuYCRMQbEfEu0Af4kaQmYAZw4BZ8xiHAz9P0T8mCqMUvI+K9iHgG+HB7O4iIhoioj4j63tsM3IKPNjOzjhSuxxQRSyWNA04ArgYebGfVr5ANsT6aLGDXVaiEd8qmVaF9mplZJxWuxyRpV2BtRPwMuA74KDBE0kFp+QBJWwEDyXpS7wHnAL234GMeB85O0xOBRytVv5mZfTCF6zEBo4DrJL0HrAe+RNZz+b6kD5HdXzoK+AFwh6TPA/cCb23BZ1wE3CLpa8Aq4AsVrN/MzD4ARUTeNVS9vkOGxZBz/yPvMqwH8ZsfrBZIaoyITR5aK2KPqeqMGjqQkv8hMTOriMLdYzIzs9rmYDIzs0JxMJmZWaE4mMzMrFAcTGZmVigOJjMzKxQHk5mZFYqDyczMCsXBZGZmheJgMjOzQvEriSqgaUUzdVNm5V2G9XB+f57VCveYzMysUBxMZmZWKA4mMzMrlB4fTJI+J+kpSQsk3Sypt6TzJS1N7T+SdENadx9JT0hqknS1pDV5129mVmt6dDBJOgCYABwWEWOADWRDqV8JHAwcBgwv2+R7wPciYhSwfDP7niypJKm0YW1zl9RvZlaLenQwAUcC44G5khak+UuBhyPizxGxHphRtv4hZfM/72jHEdEQEfURUd97m4FdULqZWW3q6cEkYFpEjElf+wNX5VyTmZl1oKcH02zgDEk7A0jaEZgPfFzSDpK2Ak4vW/+Jsvmzu7VSMzMDengwRcQzwBXA/ZIWAQ8AQ4B/BZ4CHgOWAS03iS4BLk3r7lvWbmZm3aTHv/khIm4Hbi9vk7Q4IhpSj+ku4Jdp0Qrg4IgISWcD+3dvtWZm1uODqR1XSToK6Afczz+CaTxwgyQBfwXO68zORg0dSMmvizEzq4iaDKaIuKyd9keB0d1cjpmZlenR95jMzKz6OJjMzKxQHExmZlYoDiYzMysUB5OZmRWKg8nMzArFwWRmZoXiYDIzs0JxMJmZWaHU5JsfKq1pRTN1U2blXYbViGV+/ZX1cO4xmZlZodRsMEn6naT6vOswM7ON1WwwmZlZMfWoYJK0raRZkhZKWixpgqRvSJqb5hvSkBbl2/SSNFXS1Wn+GElzJM2TNENS/3yOxsysNvWoYAKOA16OiNERMRK4F7ghIg5K8x8CTipbfyvgVuD5iLhC0iCyEW+PiohxQAm4tK0PkjRZUklSacNaD3RrZlYpPS2YmoCjJV0r6YiIaAY+KelJSU3Ap4ARZevfDCyOiH9J8wcDBwKPSVoAnAvs2dYHRURDRNRHRH3vbQZ22QGZmdWaHvW4eEQslTQOOAG4WtJs4EKgPiL+JOkqslFrWzxOFlzfjoh1gIAHIuIz3V27mZllelSPSdKuwNqI+BlwHTAuLVqd7hWd0WqTHwO/AaZL2gp4AjhM0r5pf9tK2q97qjczM+hhPSZgFHCdpPeA9cCXgNOAxcArwNzWG0TEdyQNBH4KTAQmAbdJ6ptWuQJY2vWlm5kZgCIi7xqqXn19fZRKpbzLMDOrKpIaI2KT3yftUZfyzMys+jmYzMysUBxMZmZWKA4mMzMrFAeTmZkVioPJzMwKxcFkZmaF4mAyM7NCcTCZmVmhOJjMzKxQetq78nLRtKKZuimz8i7DrBCWXXNi3iVYlXOPyczMCsXBZGZmhZJrMEk6TdKBnVhvqqTWYykh6ROSfl3BeuolXZ+mJ0m6oVL7NjOzzsm7x3Qa2VDmhRARpYi4OO86zMxqWYfBJOkaSReWzV8l6TJJX5M0V9IiSd8sW36lpCWSfi/pNkmXpfZ9JN0rqVHSo5KGSzoUOIVsYL8FaZ0L0n4XSrpD0jZl5RwlqSRpqaST2qh1W0k/kfSUpPmSTu3guPpJukVSU1r3k6m9oj0wMzPbcpvrMd0OnFU2fxawChgGfAQYA4yX9DFJBwGnA6OB44HywZ8agIsiYjxwGfCDiHgcuAf4WkSMiYg/AndGxEERMRp4Fji/bB916TNPBG6S1K9VrZcDD0bER4BPkgXetu0c14VARMQo4DPAtDb21yFJk1NQljasbd6STc3MrAMdPi4eEfMl7SxpV2Aw8Bey4cuPAean1fqTBdUA4O6IWAesk/QrAEn9gUOBGZJadt2Xto2UdDWwfdrvfWXLpkfEe8Dzkl4Ahrfa9hjglJZeGtAP2IMs4Fo7HPh+OsbnJL0E7NfRuWgtIhrIApe+Q4Z5GGAzswrpzO8xzQDOAHYh60HtCfxbRNxcvpKkS9rZvhfw14gY04nPmgqcFhELJU0CPlG2rPU//q3nBZweEUs68TlmZlZQnXn44XbgbLJwmkHWizkv9YSQNFTSzsBjwMnp/k1/4CSAiHgDeFHSmWl9SRqd9v0mWU+rxQBgpaQ+wMRWdZwpqZekfYC9gdYBdB9wkVK3TNLYDo7p0Zb9S9qPrGflQDMzK4DNBlNEPE0WGCsiYmVE3A/8HJgjqQmYCQyIiLlk94wWAb8FmoCWmy8TgfMlLQSeBloeTPgF8LX0AMI+wJXAk2Qh91yrUv4f8FTa9xfTJcNy/wz0ARZJejrNt+cHQK9U/+3ApIh4Z3PnwszMup4iKnd7RFL/iFiTnqZ7BJgcEfMq9gEFVV9fH6VSKe8yzMyqiqTGiKhv3V7pd+U1pF+Y7QdMq4VQMjOzyqpoMEXEZyu5vw9K0rHAta2aX4yIT+dRj5mZbV6Pfrt4RNzHxo+cm5lZweX9SiIzM7ONOJjMzKxQHExmZlYoDiYzMysUB5OZmRWKg8nMzArFwWRmZoXSo3+Pqbs0rWimbsqsvMswqyrLrjkx7xKsoNxjMjOzQqmKYJK0q6SZeddhZmZdryqCKSJejogz8vhsSb7caWbWjQoXTJKukXRh2fxVki6TtDjNT5J0p6R7JT0v6Vtl6x4jaY6keZJmlA1meIKk5yQ1Srpe0q9T+0fS+vMlPS5p/7LPuEfSg8Dsbj0BZmY1rnDBRDZw31ll82eRDR5YbgwwARgFTJC0u6RBwBXAURExDigBl0rqB9wMHB8R44HBZft5DjgiIsYC3wD+tWzZOOCMiPh4W0VKmiypJKm0YW1zW6uYmdn7ULjLVBExX9LOknYlC5G/AH9qtdrsiGgGkPQMsCewPXAg8FgaXX1rYA4wHHghIl5M294GTE7TA4FpkoYBQTYCbosHIuLPHdTZADQA9B0yrHKjLZqZ1bjCBVMyAzgD2IWsB9Va+TDoG8iOQ2Rh8pnyFSWN6eBz/hl4KCI+LakO+F3Zsre2uGozM/vAingpD7IwOpssnGZ0cpsngMMk7QsgaVtJ+wFLgL1T8EB2CbDFQGBFmp70wUo2M7NKKGQwRcTTwABgRUSs7OQ2q8jC5TZJi0iX8SLibeC/AvdKagTeBFpuCn0L+DdJ8ylu79HMrKYoouffHpHUPyLWKLv5dCPwfER8t1L77ztkWAw59z8qtTuzmuA3P5ikxoiob91eK72ECySdS/ZAxHyyp/QqZtTQgZT8l8zMrCJqIphS76hiPSQzM+s6hbzHZGZmtcvBZGZmheJgMjOzQnEwmZlZoTiYzMysUBxMZmZWKA4mMzMrFAeTmZkVioPJzMwKpSbe/NDVmlY0UzdlVt5lmFkX8Dv9up97TGZmVig1F0ySfiNp+7zrMDOzttXUpbw07MVJEfFe3rWYmVnbenyPSVKdpCWS/hNYDGyQNCgt+7ykRZIWSvppahss6Q5Jc9PXYXnWb2ZWa2qlxzQMODcinpC0DEDSCOAK4NCIWC1px7Tu94DvRsTvJe0B3Acc0HqHkiYDkwF6bze4Gw7BzKw21EowvRQRT7Rq+xQwIyJWA0TEn1P7UcCB2VU/ALZrGQG3fOOIaAAaIBvBtssqNzOrMbUSTG9twbq9gIMjYl1XFWNmZu3r8feYOvAgcKaknQDKLuXdD1zUspKkMTnUZmZWs2o2mCLiaeBfgIclLQS+kxZdDNSnhyKeAb6YV41mZrWox1/Ki4hlwMiy+bqy6WnAtFbrrwYmdFN5ZmbWSo8Ppu4wauhASn5tiZlZRdTspTwzMysmB5OZmRWKg8nMzArFwWRmZoXiYDIzs0JxMJmZWaE4mMzMrFAcTGZmVigOJjMzKxS/+aECmlY0UzdlVt5lmJl1q2Vd9MYb95jMzKxQHExmZlYoDiYzMysUB5OZmRVKlwaTpG0lzZK0UNJiSRMkHSlpvqQmST+R1FfSpyT9smy7oyXd1c4+e0uamvbXJOkrqf0CSXPTZ90haZvUPlXSGWXbrymb/h9pHwslXZPa9pF0r6RGSY9KGt5V58fMzDbV1T2m44CXI2J0RIwE7gWmAhMiYhTZU4FfAh4ChksanLb7AvCTdvY5BhgaESPTPm5J7XdGxEERMRp4Fji/o8IkHQ+cCnw0bfOttKgBuCgixgOXAT9oZ/vJkkqSShvWNnd8FszMrNO6OpiagKMlXSvpCKAOeDEilqbl04CPRUQAPwU+J2l74BDgt+3s8wVgb0nfl3Qc8EZqH5l6OE3ARGDEZmo7CrglItYCRMSfJfUHDgVmSFoA3AwMaWvjiGiIiPqIqO+9zcDNnQczM+ukLv09pohYKmkccAJwNfBgB6vfAvwKWAfMiIh329nnXySNBo4FvgicBZxH1hM7LSIWSpoEfCJt8i4pgCX1ArbuoIZewF8jYkxnjs/MzCqvq+8x7QqsjYifAdeR9YTqJO2bVjkHeBggIl4GXgau4B+X59ra5yCgV0TckdYdlxYNAFZK6kPWY2qxDBifpk8B+qTpB4AvlN2L2jEi3gBelHRmalMKQTMz6yZd/eaHUcB1kt4D1pPdTxpIdqlsK2AucFPZ+rcCgyPi2Q72ORS4JfV+AL6e/rwSeBJYlf4ckNp/BNwtaSHZPa63ACLiXkljgJKkvwG/Af4nWaj9UNIVZCH2C2Dh+zx+MzPbQspu7xSDpBuA+RHx47xr2RL19fVRKpXyLsPMrKpIaoyI+tbthXlXnqRGst7MV/OuxczM8lOYYEqPZ29E0pNA31bN50REU/dUZWZm3a0wwdSWiPho3jWYmVn38iuJzMysUBxMZmZWKIV6Kq9aSXoTWJJ3HZ00CFiddxFbwPV2LdfbdaqpVuj+elcDRMRxrRcU+h5TFVnS1iOPRSSpVC21guvtaq6361RTrVCsen0pz8zMCsXBZGZmheJgqoyGvAvYAtVUK7jeruZ6u0411QoFqtcPP5iZWaG4x2RmZoXiYDIzs0JxMH0Ako6TtETSHyRNybue1iTtLukhSc9IelrSf0vtV0laIWlB+joh71pbSFomqSnVVUptO0p6QNLz6c8d8q4TQNL+ZedwgaQ3JF1SpPMr6SeSXpO0uKytzfOZxh+7Pv08L0qDfOZd63WSnkv13JVGuEZSnaS3y87xTe3vuVvrbfd7L+nr6dwukXRsQeq9vazWZWnk7vzPb0T46318Ab2BPwJ7k42KuxA4MO+6WtU4BBiXpgcAS4EDgauAy/Kur52alwGDWrV9C5iSpqcA1+ZdZzs/D68Aexbp/AIfIxtMc/HmzifZSNO/BQQcDDxZgFqPAbZK09eW1VpXvl6Bzm2b3/v0924h2Uup90r/dvTOu95Wy78NfKMI59c9pvfvI8AfIuKFiPgb2YCCp+Zc00YiYmVEzEvTbwLPkg20WG1OBaal6WnAaTnW0p4jgT9GxEt5F1IuIh4B/tyqub3zeSrwn5F5Athe0pDuqbTtWiPi/oh4N80+AezWXfVsTjvntj2nAr+IiHci4kXgD2T/hnSbjuqVJOAs4LburKk9Dqb3byjwp7L55RT4H31JdcBYstF9Ab6cLo/8pCiXxpIA7pfUKGlyavtwRKxM068AH86ntA6dzcZ/qYt6fqH981n0n+nzyHp0LfaSNF/Sw5KOyKuoNrT1vS/6uT0CeDUini9ry+38OphqgKT+wB3AJRHxBvBDYB9gDLCSrAtfFIdHxDjgeOBCSR8rXxjZdYZC/Y6DpK2BU4AZqanI53cjRTyfbZF0OfAucGtqWgnsERFjgUuBn0vaLq/6ylTN976Vz7Dxf6xyPb8OpvdvBbB72fxuqa1QJPUhC6VbI+JOgIh4NSI2RMR7wI/o5ksKHYmIFenP14C7yGp7teWSUvrztfwqbNPxwLyIeBWKfX6T9s5nIX+mJU0CTgImpiAlXRJ7PU03kt2z2S+3IpMOvveFPLcAkrYC/gm4vaUt7/PrYHr/5gLDJO2V/sd8NnBPzjVtJF03/jHwbER8p6y9/L7Bp4HFrbfNg6RtJQ1omSa78b2Y7Lyem1Y7F7g7nwrbtdH/Not6fsu0dz7vAT6fns47GGguu+SXC0nHAf8dOCUi1pa1D5bUO03vDQwDXsinyn/o4Ht/D3C2pL6S9iKr96nurq8dRwHPRcTylobcz29eT130hC+yp5iWkv1v4vK862mjvsPJLtMsAhakrxOAnwJNqf0eYEjetaZ69yZ7cmkh8HTLOQV2AmYDzwP/F9gx71rLat4WeB0YWNZWmPNLFpgrgfVk9zXOb+98kj2Nd2P6eW4C6gtQ6x/I7s20/PzelNY9Pf2MLADmAScX5Ny2+70HLk/ndglwfBHqTe1TgS+2WjfX8+tXEpmZWaH4Up6ZmRWKg8nMzArFwWRmZoXiYDIzs0JxMJmZWaE4mMzMrFAcTGZmVij/H6ovFeU7ywQTAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Уравнотежите податке са SMOTE прекомерним узорковањем до највеће класе. Прочитајте више овде: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [ + "Сачувај датотеку за будућу употребу\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен помоћу услуге за превођење уз помоћ вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:51:50+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sr/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/sr/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..803ecb1bf --- /dev/null +++ b/translations/sr/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:36+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Изградња модела класификације\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/sr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..bce33895c --- /dev/null +++ b/translations/sr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1298 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:36:23+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Направите модел класификације: Укусна азијска и индијска јела\n" + ], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Класификатори кухиња 1\n", + "\n", + "У овом часу, истражићемо различите класификаторе како бисмо *предвидели одређену националну кухињу на основу групе састојака.* Док то радимо, научићемо више о начинима на које алгоритми могу бити коришћени за задатке класификације.\n", + "\n", + "### [**Квиз пре предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Припрема**\n", + "\n", + "Овај час се надовезује на наш [претходни час](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) где смо:\n", + "\n", + "- Направили лаган увод у класификације користећи скуп података о свим сјајним кухињама Азије и Индије 😋.\n", + "\n", + "- Истражили неке [dplyr глаголе](https://dplyr.tidyverse.org/) за припрему и чишћење наших података.\n", + "\n", + "- Направили прелепе визуализације користећи ggplot2.\n", + "\n", + "- Демонстрирали како се носити са неуравнотеженим подацима кроз њихову претходну обраду користећи [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Показали како да `prep` и `bake` наш рецепт како бисмо потврдили да ће функционисати како је предвиђено.\n", + "\n", + "#### **Предуслов**\n", + "\n", + "За овај час, биће нам потребни следећи пакети за чишћење, припрему и визуализацију наших података:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [колекција R пакета](https://www.tidyverse.org/packages) дизајнирана да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) оквир је [колекција пакета](https://www.tidymodels.org/packages/) за моделирање и машинско учење.\n", + "\n", + "- `themis`: [themis пакет](https://themis.tidymodels.org/) пружа додатне кораке за рецепте за рад са неуравнотеженим подацима.\n", + "\n", + "- `nnet`: [nnet пакет](https://cran.r-project.org/web/packages/nnet/nnet.pdf) пружа функције за процену неуронских мрежа са једним скривеним слојем, као и за моделе мултиномијалне логистичке регресије.\n", + "\n", + "Можете их инсталирати као:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Алтернативно, следећи скрипт проверава да ли имате пакете који су потребни за завршетак овог модула и инсталира их уколико недостају.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Поделите податке на сетове за обуку и тестирање.\n", + "\n", + "Почећемо са неколико корака из нашег претходног часа.\n", + "\n", + "### Избаците најчешће састојке који стварају забуну између различитих кухиња, користећи `dplyr::select()`.\n", + "\n", + "Сви воле пиринач, бели лук и ђумбир!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Савршено! Сада је време да поделимо податке тако да 70% података иде за тренинг, а 30% за тестирање. Такође ћемо применити технику `стратификације` приликом дељења података како бисмо `задржали пропорцију сваке кухиње` у тренинг и валидационим сетовима података.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), пакет у оквиру Tidymodels-а, пружа инфраструктуру за ефикасно дељење и ресемплинг података:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Рад са неуравнотеженим подацима\n", + "\n", + "Као што сте можда приметили у оригиналном скупу података, као и у нашем скупу за обуку, постоји прилично неједнака расподела броја кухиња. Корејске кухиње су *готово* три пута бројније од тајландских кухиња. Неуравнотежени подаци често имају негативан утицај на перформансе модела. Многи модели најбоље функционишу када је број посматрања једнак и, самим тим, имају потешкоћа са неуравнотеженим подацима.\n", + "\n", + "Постоје два главна начина за рад са неуравнотеженим скуповима података:\n", + "\n", + "- додавање посматрања мањинској класи: `Прекомерно узорковање` (Over-sampling), на пример, коришћењем SMOTE алгоритма који синтетички генерише нове примере мањинске класе користећи најближе суседе тих случајева.\n", + "\n", + "- уклањање посматрања из већинске класе: `Потцењивање узорка` (Under-sampling)\n", + "\n", + "У нашем претходном часу, демонстрирали смо како се ради са неуравнотеженим скуповима података користећи `recipe`. Рецепт се може сматрати као план који описује које кораке треба применити на скуп података како би био спреман за анализу. У нашем случају, желимо да имамо једнаку расподелу броја наших кухиња за наш `скуп за обуку`. Хајде да почнемо.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Можете, наравно, да потврдите (коришћењем prep+bake) да ће рецепт функционисати како очекујете - сви ознаке кухиње имају `559` посматрања.\n", + "\n", + "Пошто ћемо користити овај рецепт као претпроцесор за моделирање, `workflow()` ће обавити сав припремни и завршни посао за нас, тако да нећемо морати ручно да процењујемо рецепт.\n", + "\n", + "Сада смо спремни да обучимо модел 👩‍💻👨‍💻!\n", + "\n", + "## 3. Избор класификатора\n", + "\n", + "

\n", + " \n", + "

Илустрација од @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Сада морамо да одлучимо који алгоритам да користимо за овај задатак 🤔.\n", + "\n", + "У оквиру Tidymodels-а, [`parsnip пакет`](https://parsnip.tidymodels.org/index.html) пружа конзистентан интерфејс за рад са моделима кроз различите механизме (пакете). Погледајте документацију за parsnip како бисте истражили [типове модела и механизме](https://www.tidymodels.org/find/parsnip/#models) и њихове одговарајуће [аргументе модела](https://www.tidymodels.org/find/parsnip/#model-args). Разноврсност може изгледати прилично збуњујуће на први поглед. На пример, следеће методе укључују технике класификације:\n", + "\n", + "- C5.0 модели класификације засновани на правилима\n", + "\n", + "- Флексибилни дискриминантни модели\n", + "\n", + "- Линеарни дискриминантни модели\n", + "\n", + "- Регуларизовани дискриминантни модели\n", + "\n", + "- Модели логистичке регресије\n", + "\n", + "- Модели мултиномијалне регресије\n", + "\n", + "- Модели наивног Бајеса\n", + "\n", + "- Машине за подршку векторима\n", + "\n", + "- Најближи суседи\n", + "\n", + "- Дрвеће одлуке\n", + "\n", + "- Методе ансамбла\n", + "\n", + "- Неуронске мреже\n", + "\n", + "Списак се наставља!\n", + "\n", + "### **Који класификатор одабрати?**\n", + "\n", + "Па, који класификатор треба да изаберете? Често је тестирање неколико њих и тражење доброг резултата начин да се испроба.\n", + "\n", + "> AutoML решава овај проблем елегантно тако што врши ове поређења у облаку, омогућавајући вам да изаберете најбољи алгоритам за ваше податке. Пробајте [овде](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Такође, избор класификатора зависи од нашег проблема. На пример, када се исход може категоризовати у `више од две класе`, као у нашем случају, морамо користити `алгоритам за мултикласну класификацију` уместо `бинарне класификације.`\n", + "\n", + "### **Бољи приступ**\n", + "\n", + "Бољи начин од насумичног погађања је да следите идеје из овог преузимљивог [ML Cheat sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott). Овде откривамо да, за наш мултикласни проблем, имамо неке опције:\n", + "\n", + "

\n", + " \n", + "

Део Microsoft-овог Cheat Sheet-а за алгоритме, који приказује опције за мултикласну класификацију
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Размишљање**\n", + "\n", + "Хајде да видимо како можемо да приступимо проблему узимајући у обзир ограничења која имамо:\n", + "\n", + "- **Дубоке неуронске мреже су превише захтевне**. С обзиром на наш чист, али минималан скуп података, и чињеницу да обуку изводимо локално преко нотебука, дубоке неуронске мреже су превише захтевне за овај задатак.\n", + "\n", + "- **Нема класификатора са две класе**. Не користимо класификатор са две класе, што искључује приступ један-наспрам-свих.\n", + "\n", + "- **Дрво одлуке или логистичка регресија би могли да функционишу**. Дрво одлуке би могло да ради, или мултиномијална регресија/логистичка регресија за више класа.\n", + "\n", + "- **Дрво одлуке са више класа и побољшањем решава другачији проблем**. Дрво одлуке са више класа и побољшањем је најпогодније за непараметарске задатке, нпр. задатке који су дизајнирани за креирање рангирања, тако да нам није корисно.\n", + "\n", + "Такође, обично је добра идеја да се пре него што се крене са сложенијим моделима машинског учења, као што су ансамбл методе, изгради најједноставнији могући модел како би се стекла идеја о томе шта се дешава. Зато ћемо у овој лекцији почети са моделом `мултиномијалне регресије`.\n", + "\n", + "> Логистичка регресија је техника која се користи када је излазна променљива категоријална (или номинална). За бинарну логистичку регресију број излазних променљивих је два, док је број излазних променљивих за мултиномијалну логистичку регресију више од два. Погледајте [Напредне методе регресије](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html) за додатно читање.\n", + "\n", + "## 4. Обучите и процените модел мултиномијалне логистичке регресије.\n", + "\n", + "У Tidymodels-у, `parsnip::multinom_reg()`, дефинише модел који користи линеарне предикторе за предвиђање података са више класа користећи мултиномијалну дистрибуцију. Погледајте `?multinom_reg()` за различите начине/моторе које можете користити за обуку овог модела.\n", + "\n", + "За овај пример, обучићемо модел мултиномијалне регресије преко подразумеваног [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) мотора.\n", + "\n", + "> Изабрао сам вредност за `penalty` прилично насумично. Постоје бољи начини за избор ове вредности, као што је коришћење `ресемплинг`-а и `тјунинг`-а модела, о чему ћемо говорити касније.\n", + ">\n", + "> Погледајте [Tidymodels: Почетак](https://www.tidymodels.org/start/tuning/) ако желите да научите више о томе како да подесите хиперпараметре модела.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одличан посао 🥳! Сада када имамо рецепт и спецификацију модела, потребно је да пронађемо начин да их спојимо у један објекат који ће прво обрадити податке, затим применити модел на обрађене податке, а такође омогућити потенцијалне активности након обраде. У Tidymodels-у, овај практични објекат се назива [`workflow`](https://workflows.tidymodels.org/) и згодно чува ваше компоненте за моделирање! Ово је оно што бисмо у *Python*-у назвали *pipelines*.\n", + "\n", + "Хајде да све спакујемо у workflow!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Воркфлови 👌👌! **`workflow()`** може се прилагодити на сличан начин као и модел. Дакле, време је да обучимо модел!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Излаз приказује коефицијенте које је модел научио током обуке.\n", + "\n", + "### Процена обученог модела\n", + "\n", + "Време је да видимо како се модел показао 📏 процењујући га на тестном скупу! Хајде да почнемо са прављењем предвиђања на тестном скупу.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Одличан посао! У Tidymodels-у, процена перформанси модела може се обавити коришћењем [yardstick](https://yardstick.tidymodels.org/) - пакета који се користи за мерење ефикасности модела помоћу метрика перформанси. Као што смо радили у нашој лекцији о логистичкој регресији, хајде да почнемо израчунавањем матрице конфузије.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "Када се ради са више класа, генерално је интуитивније ово визуализовати као топлотну мапу, овако:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Тамнији квадрати на графикону матрице конфузије указују на велики број случајева, и надамо се да можете видети дијагоналну линију тамнијих квадрата која указује на случајеве где су предвиђена и стварна ознака исте.\n", + "\n", + "Сада ћемо израчунати резиме статистике за матрицу конфузије.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ако се фокусирамо на неке метрике као што су тачност, сензитивност, ppv, нисмо лоше започели 🥳!\n", + "\n", + "## 4. Дубље истраживање\n", + "\n", + "Хајде да поставимо једно суптилно питање: Који критеријум се користи да се одреди одређени тип кухиње као предвиђени исход?\n", + "\n", + "Па, статистички алгоритми машинског учења, као што је логистичка регресија, засновани су на `вероватноћи`; дакле, оно што класификатор заправо предвиђа је расподела вероватноће за скуп могућих исхода. Класа са највишом вероватноћом се затим бира као највероватнији исход за дате опсервације.\n", + "\n", + "Хајде да видимо како ово функционише тако што ћемо направити и тврде предвиђања класе и вероватноће.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Можете ли да објасните зашто је модел прилично сигуран да је прва опсервација тајландска?\n", + "\n", + "## **🚀Изазов**\n", + "\n", + "У овом лекцији, користили сте своје очишћене податке да изградите модел машинског учења који може да предвиди националну кухињу на основу серије састојака. Одвојите мало времена да прочитате [многе опције](https://www.tidymodels.org/find/parsnip/#models) које Tidymodels пружа за класификацију података и [друге начине](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) за примену мултинационалне регресије.\n", + "\n", + "#### ХВАЛА:\n", + "\n", + "[`Елисон Хорст`](https://twitter.com/allison_horst/) за креирање невероватних илустрација које чине R приступачнијим и занимљивијим. Пронађите више илустрација у њеној [галерији](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Каси Бревиу](https://www.twitter.com/cassieview) и [Џен Лупер](https://www.twitter.com/jenlooper) за креирање оригиналне Python верзије овог модула ♥️\n", + "\n", + "
\n", + "Додао бих неке шале, али не разумем баш добро игре речи о храни 😅.\n", + "\n", + "
\n", + "\n", + "Срећно учење,\n", + "\n", + "[Ерик](https://twitter.com/ericntay), Златни амбасадор за Microsoft Learn студенте.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/sr/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..fb91d4078 --- /dev/null +++ b/translations/sr/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "source": [ + "# Изградња модела класификације\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:05+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sr/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/sr/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..6df7eafbf --- /dev/null +++ b/translations/sr/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:19+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/sr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..6129989c4 --- /dev/null +++ b/translations/sr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,650 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:46:04+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [ + "# Направите класификациони модел: Укусна азијска и индијска јела\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Класификатори кухиња 2\n", + "\n", + "У овом другом часу о класификацији, истражићемо `више начина` за класификацију категоријских података. Такође ћемо научити о последицама избора једног класификатора уместо другог.\n", + "\n", + "### [**Квиз пре предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Предуслови**\n", + "\n", + "Претпостављамо да сте завршили претходне лекције, јер ћемо наставити са неким концептима које смо раније научили.\n", + "\n", + "За ову лекцију биће нам потребни следећи пакети:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) је [збирка R пакета](https://www.tidyverse.org/packages) дизајнирана да учини науку о подацима бржом, лакшом и забавнијом!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) је [оквир](https://www.tidymodels.org/packages/) који обухвата пакете за моделирање и машинско учење.\n", + "\n", + "- `themis`: [themis пакет](https://themis.tidymodels.org/) пружа додатне кораке за рецепте који се баве небалансираним подацима.\n", + "\n", + "Можете их инсталирати на следећи начин:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Алтернативно, скрипта испод проверава да ли имате потребне пакете за завршетак овог модула и инсталира их ако недостају.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Мапа класификације**\n", + "\n", + "У нашој [претходној лекцији](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1), покушали смо да одговоримо на питање: како да изаберемо између више модела? У великој мери, то зависи од карактеристика података и типа проблема који желимо да решимо (на пример, класификација или регресија?).\n", + "\n", + "Раније смо научили о различитим опцијама које имате када класификујете податке користећи Microsoft-ов подсетник. Python-ов оквир за машинско учење, Scikit-learn, нуди сличан, али детаљнији подсетник који може додатно помоћи у сужењу избора проценитеља (други термин за класификаторе):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Савет: [погледајте ову мапу онлајн](https://scikit-learn.org/stable/tutorial/machine_learning_map/) и кликните дуж путање да бисте прочитали документацију. \n", + "> \n", + "> [Референтни сајт за Tidymodels](https://www.tidymodels.org/find/parsnip/#models) такође пружа одличну документацију о различитим типовима модела.\n", + "\n", + "### **План** 🗺️\n", + "\n", + "Ова мапа је веома корисна када имате јасно разумевање ваших података, јер можете „шетати“ дуж њених путања до доношења одлуке:\n", + "\n", + "- Имамо више од 50 узорака\n", + "\n", + "- Желимо да предвидимо категорију\n", + "\n", + "- Имамо означене податке\n", + "\n", + "- Имамо мање од 100.000 узорака\n", + "\n", + "- ✨ Можемо изабрати Linear SVC\n", + "\n", + "- Ако то не функционише, пошто имамо нумеричке податке\n", + "\n", + " - Можемо пробати ✨ KNeighbors Classifier\n", + "\n", + " - Ако ни то не функционише, пробајте ✨ SVC и ✨ Ensemble Classifiers\n", + "\n", + "Ово је веома корисна путања коју треба пратити. Сада, хајде да одмах пређемо на то користећи [tidymodels](https://www.tidymodels.org/) оквир за моделирање: конзистентну и флексибилну колекцију R пакета развијених да подстакну добру статистичку праксу 😊.\n", + "\n", + "## 2. Поделите податке и решите проблем неуравнотеженог скупа података.\n", + "\n", + "Из претходних лекција смо научили да постоји скуп заједничких састојака у нашим кухињама. Такође, постојала је прилично неравномерна расподела у броју кухиња.\n", + "\n", + "Ово ћемо решити на следећи начин:\n", + "\n", + "- Избацивањем најчешћих састојака који стварају конфузију између различитих кухиња, користећи `dplyr::select()`.\n", + "\n", + "- Коришћењем `recipe` који претходно обрађује податке како би их припремио за моделирање применом алгоритма `over-sampling`.\n", + "\n", + "Ово смо већ обрадили у претходној лекцији, тако да би ово требало да буде лако 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Рад са неуравнотеженим подацима\n", + "\n", + "Неуравнотежени подаци често имају негативан утицај на перформансе модела. Многи модели најбоље функционишу када је број опсервација једнак и, самим тим, имају потешкоћа са неуравнотеженим подацима.\n", + "\n", + "Постоје два главна начина за рад са неуравнотеженим скуповима података:\n", + "\n", + "- додавање опсервација мањинској класи: `Прекомерно узорковање` (Over-sampling), на пример, коришћењем SMOTE алгоритма који синтетички генерише нове примере мањинске класе користећи најближе суседе тих случајева.\n", + "\n", + "- уклањање опсервација из већинске класе: `Потцењено узорковање` (Under-sampling)\n", + "\n", + "У претходном часу, демонстрирали смо како да радимо са неуравнотеженим скуповима података користећи `рецепт`. Рецепт се може сматрати планом који описује које кораке треба применити на скуп података како би био спреман за анализу података. У нашем случају, желимо да имамо једнаку дистрибуцију броја наших кухиња за наш `тренинг скуп`. Хајде да почнемо.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Сада смо спремни да обучавамо моделе 👩‍💻👨‍💻!\n", + "\n", + "## 3. Изван модела мултиномијалне регресије\n", + "\n", + "У претходној лекцији, разматрали смо моделе мултиномијалне регресије. Хајде да истражимо неке флексибилније моделе за класификацију.\n", + "\n", + "### Машине за подршку векторима\n", + "\n", + "У контексту класификације, `Машине за подршку векторима` су техника машинског учења која настоји да пронађе *хиперплан* који \"најбоље\" раздваја класе. Хајде да погледамо једноставан пример:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ не раздваја класе. H2~ их раздваја, али само са малом маргином. H3~ их раздваја са максималном маргином.\n", + "\n", + "#### Линеарни класификатор подршке векторима\n", + "\n", + "Кластерисање подршке векторима (SVC) је део породице техника машинског учења заснованих на машинама подршке векторима. У SVC-у, хиперплан се бира тако да правилно раздвоји `већину` посматрања из тренинг скупа, али `може погрешно класификовати` нека посматрања. Дозвољавањем да неке тачке буду на погрешној страни, SVM постаје отпорнији на изузетке, чиме се побољшава генерализација на нове податке. Параметар који регулише ово кршење назива се `cost`, који има подразумевану вредност 1 (погледајте `help(\"svm_poly\")`).\n", + "\n", + "Хајде да направимо линеарни SVC тако што ћемо поставити `degree = 1` у полиномском SVM моделу.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Сада када смо обухватили кораке предобраде и спецификацију модела у *workflow*-у, можемо наставити са тренингом линеарног SVC-а и проценом резултата у истом процесу. За метрике перформанси, хајде да направимо сет метрика који ће процењивати: `тачност`, `осетљивост`, `позитивну предиктивну вредност` и `F меру`.\n", + "\n", + "> `augment()` ће додати колону(е) за предикције у дате податке.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Машина за подршку вектора\n", + "\n", + "Машина за подршку вектора (SVM) је проширење класификатора за подршку вектора како би се омогућила нелинеарна граница између класа. У суштини, SVM користи *трик са језгром* да прошири простор карактеристика и прилагоди се нелинеарним односима између класа. Једна популарна и изузетно флексибилна функција језгра коју SVM користи је *функција радијалне основе.* Хајде да видимо како ће се она показати на нашим подацима.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Много боље 🤩!\n", + "\n", + "> ✅ Молимо вас да погледате:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Практично машинско учење са R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), Увод у статистичко учење са апликацијама у R\n", + ">\n", + "> за додатно читање.\n", + "\n", + "### Класификатори најближих суседа\n", + "\n", + "*K*-најближи сусед (KNN) је алгоритам у којем се свака опсервација предвиђа на основу њене *сличности* са другим опсервацијама.\n", + "\n", + "Хајде да га применимо на наше податке.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Чини се да овај модел не ради баш најбоље. Вероватно ће промена аргумената модела (погледајте `help(\"nearest_neighbor\")`) побољшати перформансе модела. Обавезно пробајте.\n", + "\n", + "> ✅ Погледајте:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> да бисте сазнали више о класификаторима *K*-најближих суседа.\n", + "\n", + "### Енсембл класификатори\n", + "\n", + "Енсембл алгоритми функционишу тако што комбинују више основних проценитеља како би произвели оптималан модел, било кроз:\n", + "\n", + "`bagging`: примену *функције просека* на колекцију основних модела\n", + "\n", + "`boosting`: изградњу секвенце модела који се надовезују један на други ради побољшања предиктивних перформанси.\n", + "\n", + "Хајде да започнемо са испробавањем модела Случајне шуме (Random Forest), који гради велику колекцију одлуковних стабала, а затим примењује функцију просека ради добијања бољег укупног модела.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Добар посао 👏!\n", + "\n", + "Хајде да експериментишемо са моделом Boosted Tree.\n", + "\n", + "Boosted Tree представља метод ансамбла који креира серију секвенцијалних одлука стабала где свако стабло зависи од резултата претходних стабала у настојању да постепено смањи грешку. Фокусира се на тежине погрешно класификованих ставки и прилагођава модел за следећи класификатор како би исправио грешке.\n", + "\n", + "Постоје различити начини за подешавање овог модела (погледајте `help(\"boost_tree\")`). У овом примеру, подешаваћемо Boosted стабла преко `xgboost` механизма.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ Молимо вас да погледате:\n", + ">\n", + "> - [Машинско учење за друштвене научнике](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [Практично машинско учење са R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [Увод у статистичко учење са апликацијама у R](https://www.statlearning.com/)\n", + ">\n", + "> - - Истражује модел AdaBoost који је добра алтернатива за xgboost.\n", + ">\n", + "> за више информација о класификаторима ансамбла.\n", + "\n", + "## 4. Додатно - поређење више модела\n", + "\n", + "У овом лабораторијском раду смо применили прилично велики број модела 🙌. Може постати заморно или напорно креирати много радних токова из различитих сетова претпроцесора и/или спецификација модела, а затим израчунати метрике перформанси једну по једну.\n", + "\n", + "Хајде да видимо да ли можемо да решимо ово креирањем функције која примењује листу радних токова на сет за обуку, а затим враћа метрике перформанси на основу тест сета. Користићемо `map()` и `map_dfr()` из пакета [purrr](https://purrr.tidyverse.org/) да применимо функције на сваки елемент у листи.\n", + "\n", + "> [`map()`](https://purrr.tidyverse.org/reference/map.html) функције вам омогућавају да замените многе for петље кодом који је и сажетији и лакши за читање. Најбоље место за учење о [`map()`](https://purrr.tidyverse.org/reference/map.html) функцијама је [поглавље о итерацији](http://r4ds.had.co.nz/iteration.html) у R за науку о подацима.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "Пакет [**workflowset**](https://workflowsets.tidymodels.org/) омогућава корисницима да креирају и лако прилагоде велики број модела, али је углавном дизајниран за рад са техникама ресемплирања као што је `крстална валидација`, приступ који тек треба да обрадимо.\n", + "\n", + "## **🚀Изазов**\n", + "\n", + "Свака од ових техника има велики број параметара које можете прилагодити, на пример `cost` у SVM-у, `neighbors` у KNN-у, `mtry` (случајно изабрани предиктори) у Random Forest-у.\n", + "\n", + "Истражите подразумеване вредности параметара за сваки модел и размислите шта би значило прилагођавање ових параметара за квалитет модела.\n", + "\n", + "Да бисте сазнали више о одређеном моделу и његовим параметрима, користите: `help(\"model\")`, на пример `help(\"rand_forest\")`.\n", + "\n", + "> У пракси, обично *процењујемо* *најбоље вредности* за ове параметре тако што тренирамо многе моделе на `симулираном скупу података` и меримо колико добро ти модели раде. Овај процес се назива **тјунинг**.\n", + "\n", + "### [**Квиз након предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Преглед и самостално учење**\n", + "\n", + "У овим лекцијама има доста стручних термина, па одвојите минут да прегледате [ову листу](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) корисне терминологије!\n", + "\n", + "#### ХВАЛА:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) за креирање невероватних илустрација које чине R приступачнијим и занимљивијим. Пронађите више илустрација у њеној [галерији](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) и [Jen Looper](https://www.twitter.com/jenlooper) за креирање оригиналне Python верзије овог модула ♥️\n", + "\n", + "Срећно учење,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Златни амбасадор студената Microsoft Learn.\n", + "\n", + "

\n", + " \n", + "

Илустрација од @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/sr/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..72550a064 --- /dev/null +++ b/translations/sr/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Пробајте различите класификаторе\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:42:48+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sr/4-Classification/4-Applied/notebook.ipynb b/translations/sr/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..aac5cbed1 --- /dev/null +++ b/translations/sr/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:29+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да превод буде тачан, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/4-Classification/4-Applied/solution/notebook.ipynb b/translations/sr/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..206efb042 --- /dev/null +++ b/translations/sr/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:41:54+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/1-Visualize/notebook.ipynb b/translations/sr/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..e699f99ca --- /dev/null +++ b/translations/sr/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:07:59+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/sr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..b0576f099 --- /dev/null +++ b/translations/sr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Нигеријска музика преузета са Spotify - анализа**\n", + "\n", + "Кластеризација је врста [ненаџираног учења](https://wikipedia.org/wiki/Unsupervised_learning) која претпоставља да је скуп података необележен или да његови уноси нису повезани са унапред дефинисаним излазима. Користи различите алгоритме за сортирање необележених података и пружа груписања на основу образаца које препознаје у подацима.\n", + "\n", + "[**Квиз пре предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Увод**\n", + "\n", + "[Кластеризација](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) је веома корисна за истраживање података. Хајде да видимо да ли може помоћи у откривању трендова и образаца у начину на који нигеријска публика конзумира музику.\n", + "\n", + "> ✅ Одвојите минут да размислите о употреби кластеризације. У стварном животу, кластеризација се дешава кад год имате гомилу веша и треба да сортирате одећу чланова породице 🧦👕👖🩲. У науци о подацима, кластеризација се дешава када покушавате да анализирате корисничке преференције или одредите карактеристике било ког необележеног скупа података. Кластеризација, на неки начин, помаже да се уведе ред у хаос, као у фиоци за чарапе.\n", + "\n", + "У професионалном окружењу, кластеризација се може користити за одређивање сегментације тржишта, на пример, за утврђивање које старосне групе купују које производе. Друга употреба би била откривање аномалија, можда за откривање преваре из скупа података о трансакцијама кредитним картицама. Или бисте могли да користите кластеризацију за одређивање тумора у серији медицинских снимака.\n", + "\n", + "✅ Одвојите минут да размислите о томе како сте можда наишли на кластеризацију „у природи“, у банкарству, е-трговини или пословном окружењу.\n", + "\n", + "> 🎓 Занимљиво је да је анализа кластера настала у областима антропологије и психологије 1930-их. Можете ли замислити како је могла бити коришћена?\n", + "\n", + "Алтернативно, могли бисте је користити за груписање резултата претраге - на пример, по куповним линковима, сликама или рецензијама. Кластеризација је корисна када имате велики скуп података који желите да смањите и на којем желите да извршите детаљнију анализу, па се техника може користити за учење о подацима пре него што се изграде други модели.\n", + "\n", + "✅ Када су ваши подаци организовани у кластере, додељујете им идентификатор кластера, а ова техника може бити корисна када желите да сачувате приватност скупа података; можете се уместо тога позивати на тачку података преко њеног идентификатора кластера, а не преко откривенијих идентификационих података. Можете ли смислити друге разлоге зашто бисте се позивали на идентификатор кластера уместо на друге елементе кластера да бисте га идентификовали?\n", + "\n", + "### Почетак рада са кластеризацијом\n", + "\n", + "> 🎓 Начин на који креирамо кластере има много везе са начином на који групишемо тачке података у групе. Хајде да разјаснимо неке термине:\n", + ">\n", + "> 🎓 ['Трансдуктивно' наспрам 'индуктивно'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Трансдуктивно закључивање се изводи из посматраних случајева обуке који се мапирају на одређене тест случајеве. Индуктивно закључивање се изводи из случајева обуке који се мапирају на општа правила која се тек онда примењују на тест случајеве.\n", + ">\n", + "> Пример: Замислите да имате скуп података који је само делимично обележен. Неке ствари су „плоче“, неке „ЦД-ови“, а неке су празне. Ваш задатак је да обезбедите ознаке за празнине. Ако изаберете индуктивни приступ, обучили бисте модел тражећи „плоче“ и „ЦД-ове“ и применили те ознаке на необележене податке. Овај приступ ће имати потешкоћа у класификовању ствари које су заправо „касете“. Трансдуктивни приступ, с друге стране, ефикасније обрађује ове непознате податке јер ради на груписању сличних ставки и затим примењује ознаку на групу. У овом случају, кластери би могли одражавати „округле музичке ствари“ и „квадратне музичке ствари“.\n", + ">\n", + "> 🎓 ['Нефлатна' наспрам 'флатна' геометрија](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Изведено из математичке терминологије, нефлатна наспрам флатна геометрија односи се на мерење удаљености између тачака било „флатним“ ([Еуклидским](https://wikipedia.org/wiki/Euclidean_geometry)) или „нефлатним“ (не-Еуклидским) геометријским методама.\n", + ">\n", + "> „Флатна“ у овом контексту се односи на Еуклидску геометрију (делови које се уче као „планарна“ геометрија), а нефлатна се односи на не-Еуклидску геометрију. Шта геометрија има са машинским учењем? Па, као две области које су укорењене у математици, мора постојати заједнички начин мерења удаљености између тачака у кластерима, а то се може урадити на „флатан“ или „нефлатан“ начин, у зависности од природе података. [Еуклидске удаљености](https://wikipedia.org/wiki/Euclidean_distance) се мере као дужина сегмента линије између две тачке. [Не-Еуклидске удаљености](https://wikipedia.org/wiki/Non-Euclidean_geometry) се мере дуж криве. Ако ваши подаци, визуализовани, изгледају као да не постоје на равни, можда ћете морати да користите специјализовани алгоритам за њихово обрађивање.\n", + "\n", + "

\n", + " \n", + "

Инфографик од Дасани Мадипалли
\n", + "\n", + "\n", + "\n", + "> 🎓 ['Удаљености'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Кластери се дефинишу њиховом матрицом удаљености, нпр. удаљеностима између тачака. Ова удаљеност се може мерити на неколико начина. Еуклидски кластери се дефинишу просеком вредности тачака и садрже „центроид“ или централну тачку. Удаљености се стога мере удаљеношћу до тог центроида. Не-Еуклидске удаљености се односе на „кластроиде“, тачку најближу другим тачкама. Кластроиди се могу дефинисати на различите начине.\n", + ">\n", + "> 🎓 ['Ограничена'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Ограничена кластеризација](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) уводи „полу-наџирано“ учење у овај ненаџирани метод. Односи између тачака су означени као „не могу се повезати“ или „морају се повезати“, тако да се нека правила намећу на скуп података.\n", + ">\n", + "> Пример: Ако је алгоритам пуштен на серију необележених или полу-обележених података, кластери које производи могу бити лошег квалитета. У горњем примеру, кластери би могли груписати „округле музичке ствари“, „квадратне музичке ствари“, „троугласте ствари“ и „колачиће“. Ако се дају нека ограничења или правила која треба да се прате („ставка мора бити направљена од пластике“, „ставка мора бити у стању да производи музику“), то може помоћи да се „ограничи“ алгоритам да прави боље изборе.\n", + ">\n", + "> 🎓 'Густина'\n", + ">\n", + "> Подаци који су „шумни“ сматрају се „густим“. Удаљености између тачака у сваком од њихових кластера могу се показати, при испитивању, као више или мање густе, или „претрпане“, и стога ови подаци треба да се анализирају одговарајућим методом кластеризације. [Овај чланак](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) демонстрира разлику између коришћења К-Меанс кластеризације и ХДБСЦАН алгоритама за истраживање шумног скупа података са неравномерном густином кластера.\n", + "\n", + "Продубите своје разумевање техника кластеризације у овом [Learn модулу](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Алгоритми кластеризације**\n", + "\n", + "Постоји преко 100 алгоритама кластеризације, а њихова употреба зависи од природе података. Хајде да разговарамо о неким од главних:\n", + "\n", + "- **Хијерархијска кластеризација**. Ако се објекат класификује према његовој близини другом објекту, а не оном који је удаљенији, кластери се формирају на основу удаљености њихових чланова од других објеката. Хијерархијска кластеризација се карактерише поновним комбиновањем два кластера.\n", + "\n", + "\n", + "

\n", + " \n", + "

Инфографик од Дасани Мадипалли
\n", + "\n", + "\n", + "\n", + "- **Центроидна кластеризација**. Овај популарни алгоритам захтева избор „к“, или броја кластера који треба формирати, након чега алгоритам одређује централну тачку кластера и окупља податке око те тачке. [К-Меанс кластеризација](https://wikipedia.org/wiki/K-means_clustering) је популарна верзија центроидне кластеризације која раздваја скуп података у унапред дефинисане К групе. Центар се одређује најближим просеком, отуда и назив. Квадратна удаљеност од кластера се минимизира.\n", + "\n", + "

\n", + " \n", + "

Инфографик од Дасани Мадипалли
\n", + "\n", + "\n", + "\n", + "- **Кластеризација заснована на дистрибуцији**. Заснована на статистичком моделовању, кластеризација заснована на дистрибуцији се фокусира на одређивање вероватноће да тачка података припада кластеру и њено додељивање у складу с тим. Методи Гауссове мешавине припадају овом типу.\n", + "\n", + "- **Кластеризација заснована на густини**. Тачке података се додељују кластерима на основу њихове густине, или њиховог груписања једна око друге. Тачке података далеко од групе се сматрају изузецима или шумом. ДБСЦАН, Mean-shift и OPTICS припадају овом типу кластеризације.\n", + "\n", + "- **Кластеризација заснована на мрежи**. За мултидимензионалне скупове података, креира се мрежа и подаци се деле међу ћелијама мреже, чиме се стварају кластери.\n", + "\n", + "Најбољи начин да научите о кластеризацији је да је сами испробате, па ћете то урадити у овом задатку.\n", + "\n", + "Биће нам потребни неки пакети за завршетак овог модула. Можете их инсталирати као: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Алтернативно, скрипта испод проверава да ли имате потребне пакете за завршетак овог модула и инсталира их за вас у случају да неки недостају.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Вежба - кластерисање ваших података\n", + "\n", + "Кластерисање као техника је знатно олакшано правилном визуализацијом, па хајде да почнемо са визуализацијом наших музичких података. Ова вежба ће нам помоћи да одлучимо који метод кластерисања би био најделотворнији за природу ових података.\n", + "\n", + "Хајде да одмах кренемо тако што ћемо увезти податке.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Ponekad želimo da saznamo malo više informacija o našim podacima. Možemo pogledati `podatke` i `njihovu strukturu` koristeći funkciju [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Браво!💪\n", + "\n", + "Можемо приметити да `glimpse()` приказује укупан број редова (посматрања) и колона (променљивих), затим првих неколико уноса сваке променљиве у реду након имена променљиве. Поред тога, *тип података* променљиве је наведен одмах након имена променљиве унутар `< >`.\n", + "\n", + "`DataExplorer::introduce()` може сажети ове информације на уредан начин:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Сјајно! Управо смо сазнали да наши подаци немају недостајуће вредности.\n", + "\n", + "Док смо ту, можемо истражити уобичајене статистике централне тенденције (нпр. [аритметичка средина](https://en.wikipedia.org/wiki/Arithmetic_mean) и [медијана](https://en.wikipedia.org/wiki/Median)) и мере распршености (нпр. [стандардна девијација](https://en.wikipedia.org/wiki/Standard_deviation)) користећи `summarytools::descr()`.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да погледамо опште вредности података. Имајте на уму да популарност може бити `0`, што указује на песме које немају ранг. Ускоро ћемо их уклонити.\n", + "\n", + "> 🤔 Ако радимо са кластерисањем, ненадзираним методом која не захтева означене податке, зашто приказујемо ове податке са ознакама? У фази истраживања података, оне су корисне, али нису неопходне за рад алгоритама кластерисања.\n", + "\n", + "### 1. Истражите популарне жанрове\n", + "\n", + "Хајде да сазнамо који су најпопуларнији жанрови 🎶 тако што ћемо пребројати колико се пута појављују.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "То је прошло добро! Кажу да слика вреди хиљаду редова података у оквиру (заправо нико то никада не каже 😅). Али разумеш суштину, зар не?\n", + "\n", + "Један од начина да визуализујеш категоријске податке (карактери или факторске променљиве) је коришћењем стубичастих графикона. Хајде да направимо стубичасти графикон за топ 10 жанрова:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Сада је много лакше уочити да имамо `недостајуће` жанрове 🧐!\n", + "\n", + "> Добра визуализација ће вам показати ствари које нисте очекивали, или ће покренути нова питања о подацима - Хадли Викхем и Гарет Гролемунд, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Имајте на уму, када је главни жанр описан као `Недостаје`, то значи да га Spotify није класификовао, па хајде да га уклонимо.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Из малог истраживања података, сазнајемо да три најпопуларнија жанра доминирају овим скупом података. Хајде да се усредсредимо на `afro dancehall`, `afropop` и `nigerian pop`, и додатно филтрирамо скуп података како бисмо уклонили све што има вредност популарности 0 (што значи да није класификовано са популарношћу у скупу података и може се сматрати шумом за наше потребе):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да видимо да ли постоји очигледна линеарна веза између нумеричких променљивих у нашем скупу података. Ова веза се математички квантитативно изражава помоћу [статистике корелације](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Статистика корелације је вредност између -1 и 1 која указује на јачину везе. Вредности изнад 0 указују на *позитивну* корелацију (високе вредности једне променљиве обично се поклапају са високим вредностима друге), док вредности испод 0 указују на *негативну* корелацију (високе вредности једне променљиве обично се поклапају са ниским вредностима друге).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Подаци нису јако повезани, осим између `energy` и `loudness`, што има смисла, с обзиром на то да је гласна музика обично прилично енергична. `Popularity` има везу са `release date`, што такође има смисла, јер су новије песме вероватно популарније. Дужина и енергија такође изгледају као да имају корелацију.\n", + "\n", + "Биће занимљиво видети шта алгоритам за кластеризацију може да уради са овим подацима!\n", + "\n", + "> 🎓 Имајте на уму да корелација не подразумева узрочност! Имамо доказ корелације, али немамо доказ узрочности. Један [забаван веб сајт](https://tylervigen.com/spurious-correlations) има неке визуализације које наглашавају ову тачку.\n", + "\n", + "### 2. Истражите расподелу података\n", + "\n", + "Хајде да поставимо нека суптилнија питања. Да ли се жанрови значајно разликују у перцепцији њихове плесности, на основу њихове популарности? Хајде да испитамо расподелу података за наша три најпопуларнија жанра у погледу популарности и плесности дуж задате x и y осе користећи [графиконе густине](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Видимо да постоје концентрични кругови који се поклапају, без обзира на жанр. Да ли је могуће да се укуси у Нигерији конвергирају на одређеном нивоу плесности за овај жанр?\n", + "\n", + "Уопштено, три жанра се поклапају у смислу популарности и плесности. Одређивање кластера у овим лабаво поравнатим подацима биће изазов. Хајде да видимо да ли расејани дијаграм може да подржи ово.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Дијаграм расејања истих оса показује сличан образац конвергенције.\n", + "\n", + "Уопштено, за кластеризацију можете користити дијаграме расејања да бисте приказали кластере података, па је овладавање овом врстом визуализације веома корисно. У наредној лекцији, узет ћемо ове филтриране податке и користити k-means кластеризацију да бисмо открили групе у овим подацима које се на занимљив начин преклапају.\n", + "\n", + "## **🚀 Изазов**\n", + "\n", + "У припреми за наредну лекцију, направите графикон о различитим алгоритмима кластеризације које можете открити и користити у производном окружењу. Које врсте проблема кластеризација покушава да реши?\n", + "\n", + "## [**Квиз након предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Преглед и самостално учење**\n", + "\n", + "Пре него што примените алгоритме кластеризације, као што смо научили, добро је разумети природу вашег скупа података. Прочитајте више о овој теми [овде](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html).\n", + "\n", + "Продубите своје разумевање техника кластеризације:\n", + "\n", + "- [Тренирање и евалуација модела кластеризације користећи Tidymodels и пријатеље](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Бредли Бемке и Брендон Гринвел, [*Практично машинско учење са R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Задатак**\n", + "\n", + "[Истражите друге визуализације за кластеризацију](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## ХВАЛА:\n", + "\n", + "[Џен Лупер](https://www.twitter.com/jenlooper) за креирање оригиналне Python верзије овог модула ♥️\n", + "\n", + "[`Дасани Мадипали`](https://twitter.com/dasani_decoded) за креирање невероватних илустрација које чине концепте машинског учења интерпретативнијим и лакшим за разумевање.\n", + "\n", + "Срећно учење,\n", + "\n", + "[Ерик](https://twitter.com/ericntay), Златни амбасадор Microsoft Learn програма.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:13:36+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/sr/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..636e875a8 --- /dev/null +++ b/translations/sr/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,882 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Добиј информације о датафрејму\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Погледајте опште вредности података. Имајте на уму да популарност може бити „0“ - и постоји много редова са том вредношћу\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Хајде да испитамо жанрове. Прилично много њих је наведено као „Недостаје“, што значи да нису категорисани у скупу података са жанром.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Увод\n", + "\n", + "Овај документ објашњава како да анализирате жанрове музике користећи податке са Spotify-а. Циљ је да се идентификују жанрови који су најзаступљенији у одређеном скупу песама.\n", + "\n", + "## Захтеви\n", + "\n", + "Пре него што започнете, уверите се да имате следеће:\n", + "\n", + "- Активан Spotify API кључ\n", + "- Основно знање о Python-у и библиотекама као што су pandas и requests\n", + "\n", + "## Припрема података\n", + "\n", + "1. Преузмите податке о песмама користећи Spotify API.\n", + "2. Уверите се да су подаци форматирани у табелу која садржи следеће колоне:\n", + " - Назив песме\n", + " - Извођач\n", + " - Жанр\n", + " - Популарност\n", + "\n", + "[!NOTE] Ако колона \"Жанр\" садржи вредност \"Missing\", уклоните те редове, јер Spotify не класификује те жанрове.\n", + "\n", + "## Анализа жанрова\n", + "\n", + "Када су подаци припремљени, можете започети анализу. Ево корака:\n", + "\n", + "1. Групишите песме по жанру.\n", + "2. Израчунајте просечну популарност за сваки жанр.\n", + "3. Сортирајте жанрове по просечној популарности.\n", + "\n", + "[!TIP] Користите pandas функцију `groupby` за груписање података.\n", + "\n", + "## Пример кода\n", + "\n", + "Ево примера Python кода за анализу жанрова:\n", + "\n", + "```python\n", + "import pandas as pd\n", + "\n", + "# Учитавање података\n", + "data = pd.read_csv('spotify_data.csv')\n", + "\n", + "# Филтрирање жанрова\n", + "filtered_data = data[data['genre'] != 'Missing']\n", + "\n", + "# Груписање и анализа\n", + "genre_popularity = filtered_data.groupby('genre')['popularity'].mean().sort_values(ascending=False)\n", + "\n", + "print(genre_popularity)\n", + "```\n", + "\n", + "[!WARNING] Уверите се да сте правилно конфигурисали API кључ пре покретања кода.\n", + "\n", + "## Резултати\n", + "\n", + "Након анализе, добићете листу жанрова сортираних по њиховој просечној популарности. Ово вам може помоћи да разумете који жанрови су најпопуларнији у вашем скупу података.\n", + "\n", + "[!IMPORTANT] Ова анализа је заснована на подацима са Spotify-а и може се разликовати у зависности од региона и временског периода.\n", + "\n", + "## Закључак\n", + "\n", + "Анализа жанрова је користан начин да се стекне увид у музичке трендове. Користећи Spotify API и Python, можете лако да обрадите и анализирате податке. Уклоните \"Missing\" жанрове како бисте осигурали тачност резултата.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAGkCAYAAAB+TFE1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOydd3gc1dWH39m+q9Xuqvde3eTeG7bBdAwm9A6BBEJJvtASSggJCYQk1IQSCBBIqKaYjgvuvTfZktV71/Y+8/2x0tpCkrst28z7PPvM7MydmbNl7m/uveeeI0iSJCEjIyMjIzPAKAbaABkZGRkZGZAFSUZGRkbmJEEWJBkZGRmZkwJZkGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMC1UAbICNzuIiSiD8YwBf04Q368AX9+AKhZfd7b8CHL9j98qNRqjFpIzHrIkmOTMCkjUQQhIH+KDIyMvshC5LMgODyuWlyttLhttLpsdLhtuL0uXAHvLgDHjx+D+6AF4/fgyfg3U9sQgJztBg1EeREZzAsoZCihEFkWFJkgZKRGWAEOVKDzPGm1dXO7pa9lLZVUmtroNbWQIfb2qucVqVFr9KiV+nQqbXoVDr0Ki06lRaNSoNWqUGjVKNVadB0rytD6z22qfbbp9KgUajxBX3YvA7a3Vbq7Y3UWBvY3bqXOlsjAKmmJM7ImsD0zAmYdaYT/RXJyMggC5LMccDld7O1cReb6newq6WUFmcbEBKc1MhEUsyJpJqSSDTGEa23YNGbsehMaJTqE25ru6uTjfXbWVq5hpK2ctRKNWflTGVO4Wyi9OYTbo+MzI8ZWZBkjgl2r4PVNZtYX7eVHc17CIpBIjURDI7PZ1BcLoWxuWRaUlEoTl4/mlpbA/OLF7Csai1KhZI5hbO5uHA2GpVmoE2TkflRIAuSzBHjC/rZVL+dZZVr2dywg6AkkmSMZ0xKEWNThpMfk31SC1B/NDpaeG/7fFZVbyAhIpZbRl/FiKTBA22WjMxpjyxIMoeFJEkUt+xledU6VtdsxOV3E6UzMyVjLFMzxp9WzgE7mnbz+sb3qbM3MjtnGteNuBSt3FqSkTluyIIkc0jYvA6WVqxhYflyGuzNaFVaxqeOYFrGeIbGF5ySLaFDwR/08972+Xy+ZyHJkQn8cuJPyYxKHWizZGROS2RBkukXSZLY3bqXBWUrWFOziYAYoCA2h7NypjIudQQ6lXagTTxh7GjazQtr38Tpc/GzMdcyNXPcQJskI3PaIQuSTC9EUWRt3WY+Lf6Wio4aDGo90zLGc2bOFNItKQNt3oDR6bHxzKrXKG4p5by8GVw74lJUCuVAmyUjc9ogC5JMmIAYZFnlGj7b/R0N9maSjPFcWHgWUzLG/qhaQwciIAZ5Z+vHfFWymEFxufxq0q1Y5HlLMjLHBFmQZJAkic0NO3h7y8fU2RvJikrjkkHnMC5lxGk7NnS0rKhax8vr3yFSY+S+KT8jOzpjoE2SkTnlkQXpR06rs51XN/yXLY27SIqM57rhcxmdXHTaeModTyo6anh6xctYvXZuH3stUzLkcSUZmaNBFqQfKZIk8X3FKt7a/BEiElcOvZCzc6ejUsrhDQ8Hq8fG31f9i+KWvVxUeBZXD7tYblXKyBwhsiD9CPEEvLy07m1W12xkSHw+t4+9jnhj7ECbdcoSCAZ4c/OHfFe2jBGJg7l74s0YNREDbZaMzCmHLEg/MhrtzTy98hVqbQ1cNWwOFxWehUKQn+iPBQvLlvP6pveJM0Rz/5TbSTUnDbRJMjKnFLIg/YjY21bJn5a9CMAvJ95CUeKgAbbo9GN3y17+tvJVfEE/d064kbEpwwfaJBmZUwZZkH4k7GjazV9WvIxJa+ThM+4h0Rg30CadtrS62vnrilco76jmnLwzuHb43AGJZC4jc6ohC9KPgE31O/jbyldIjIznoel3Ea23DLRJpz2+oJ//bfuUr0oWk25O4c7xN5AZlTbQZsnInNTIgnSas6u5hCeWvUiqKZFHpt+DUSsPtp9INjfs4J/r3sbudXBR4Vn8ZPB5cjoLGZl+kAXpNKasvYrHv3+WaIOF38/8NSatcaBN+lHi8Dp5e+vHfF+xiviIGK4ZfgkTUkfJc71kZH6ALEinKY2OFh5a+Bd0Ki1/mHkv0QbLQJv0o2dH0x7e3Pwh1dY68mOyuXLYhQyJL5CFSUamC1mQTkOcPhcPL3waq9fOE2feT1Jk/ECbJNOFKIosqVzD+9vn0+GxkhudycWDzmZ08jCUcqBWmR85siCdZgTEIE8u+wc7W0p4ZPrdDI7PH2iTZPrAF/SzpGI183d/R7OzjWi9hZnZk5iZNZnYiOiBNk9GZkCQBek049+b3ueb0iXcPvY6ZmRPGmhzZA5CUAyysX47i8pXsKVhFwgwInEw0zLHMzZ5uOwAIfOjQhak04hllWt5ce2bnJ8/ixtG/mSgzZE5TFqcbSwqX8nSyjW0uTrQq3VMSB3F9MzxFMblyhE1ZE57ZEE6TajsqOXhRX8hNzqTR864Rx6POIURJZHilr0srVzDmppNeAJe4gzRTM0cx7SM8SSbEgfaRBmZ44IsSKcBDp+T33z3JD7Rz1OzfysnjDuN8AZ8rK/bwrLKtWxtKkaSJPKiM5maOZ7J6WOIlF35ZU4jZEE6xRElkaeWv8S2pmJ+P+P/yI/NHmiTZI4THW4rK6rWs6xyDVXWOtQKFVMzxnF+wSzSzMkDbZ6MzFEjC9Ipzkc7v+SDHV9w86grOCfvjIE2R+YEUdlRy4KyZSytXIMv6Gd44mAuLDiTYQmF8rwmmVMWWZBOYbY07OTPy/7BlIyx3Dn+Rrki+hFi9zpYULacb0qX0OmxURibw2VDL2CoPOFW5hREFqRTlGZnGw989ydi9VH88cz70cruwT9q/EE/i8tX8UnxN7S7OxkUl8tlQy5gaELBQJsmI3PIyIJ0CuIL+nl00V9pdLTw5FkPkihHYpDpwhf0s7h8JZ8Uf0OH28qQ+HyuGHoRhXE5A22ajMxBkQXpFOTl9e+wuHwl90/5OWPkBHAyfeAL+llYtpxPir/F6rExMmkIVwy9iOzo9IE2TUamX2RBOsVYXL6Kl9e/zSWDzuGqojkDbY7MSY4n4OWb0iV8tvs7nD4X41NHcsXQC+X06jInJbIgnUKUt1fxyKK/UhiXy0PT7kKhkGfuyxwaLp+bL0oW8sWeRXiDPqZmjOOyIeeTIGcOljmJkAXpFKHd1clvFj6JUlDy5OzfyLmNZI4Im9fBZ8Xf8s3epYhikJnZk7l08HlyehKZkwJZkE4BPAEvv1v8Nxrszfxx1n2kW1IG2iSZU5x2dycf7/qaReUrUQgKzs6ZxsWDzsakixxo02R+xMiCdJIjiiJ/X/0v1tdt5YEptzMqedhAmyRzGtHsaOWjnV+xtGoNWqWGM7OncF7BTGINcgoMmROPLEgnMZIk8eqG/7GofAU3jryM8/JnDrRJMqcpdbZG5u38ilU1GxGAieljuLDgTLKi0gbaNJkfEbIgnaRIksQ7Wz/m8z0LmTv4HK4cdnw86oJBkU6Hlzarh3abhw6bB5vLh9sTwOUN4PYG8PqCiKLUZReIXX8ZhSCgUSvQqJVdLwUaVWhd27Vdq1ai1SjD65qu99of7NOolSgVB48sIIoSgaCIPyASCIrh9e6XLxDE7xfx+oP4A0F8fhGfP4gvIPZ+7w92lQsd173PH+h5vD8QRBQP7ftUKEClVIReqtBSHV4Xwvu6P7tOq0Kn6VrXdK+r0He912qV6DUqIvRqjHo1Wo3yuEZgaHW281XJYhaWr8AT8DIsoYBz8mYwKmmoHEFe5rgjC9JJiCRJfLDjC+bt+oqzc6dz86grjroS8ngDVDbYqGiwUd/ioLbZQX2Lg8Z2V1hs9ketUqDXqtBrVWg1ShSCgCCA0L0ERJEeFXf3eiB4ZH8phUIgpEn7rkHX9QACAZFgH7YeLoIAalVINNUq5T5RVSm6titRh8U1tO1QxBJCYr2/WAYCUg/h3CegQTy+rpc3cMifS6UUwuJk1GuIMITWTREaok06oiK1RJl0REXqiDJpMUVoD9n2/XH6XCwsW8HXpd/T7u4kRh/FrJzJzMyeTLTectjnk5E5FGRBOskQJZE3N33IN3uXMDNrEreNveawE7OJokRVo40dZW2UVHdQVtdJXbOD7jpPo1aSEhdBcpyR5NgI4qIMxJh0oQqtqxJTq47cpTwoSqFWiD+I1xdqhXi71n1d6/vv6173B8Ww/RBqjUmEBBpCItnd2lB3tz66liqlIiwePxQUjSr0XqtWolYpUSmFky7Omz8g4vUF8PiC4VapZ7/3Trcfh9uPw+XD4fbve+/243T5sTq9uDyBXudVKASiIrXERxlIiDGQEG0gMTqChBgDybERRJt0B/wuujPaLihbxtbGYhSCgjEpRczMmszwxEFyq0nmmCIL0kmEL+jn5XVvs6J6PRcWnMm1w+ceUsUZFCUq6q3sKGtjR1kruyrasLv8AMSYdeSkWMhJNZOTYiYr2UysRY/iCJ6aZU5uPL4AnXYvHTYvHfZQ92uH3Uur1U1Tu4umdhdtnW72b4xF6NWkJ0SS1vVKT4wkPSGSGHNvoWq0N7OwfAXfl6/C7nNi1pmYkj6WaZnjybSknnQiL3PqIQvSSUKzs42/r3yV8o5qri66mDmFs/u9wYNBkbI6KzvKWtle1kZxRRvOrqfjpJgIhubEhF7ZscRHG07kx5A5yfEHRFo6XTS1uahvcVDVZKemyU51ox2b0xcuF6FTkZNqITvFTE6qhdxUM8mxRhQKgUAwwObGnSytXMPG+u0ExSBp5mSmZ45ncvpYYgxRA/gJZU5lZEE6CVhft5WX1r2NKIncOf6GXvHp/AGRvTWd7ChvZUdZG8WVbbi9QQBS4oxdAhTLsJwYYsz6gfgIB0QK+Am6bIg+N5Lfi+j3IPm6lx5EvxcpGABJBFFE+sESQFAoQaEILZXK0FJQIigUoFCiUGsR1FoUGl1o2fU+tK5DUGtCx8j0i9XhpbpLnCobbJTVdlLZYMMfCP0Geq2SrOSQQOWkmMlNtWA2w7r6zSyrXEdJWzkAeTFZjE8dyfjUEXIkCJnDQhakAaTd1cm/N7/PutotZFhS+fWkW0mMjMfl8bO7qoNdFW3sKm9nT3UHPn9IgNITIxmaHRKgodkxRJl0A2K7JEmIHicBa0voZWshYG0l6LKFxMdlI+gOrUs+z1FcqbuVeAycGdRaFFoDCl0ECm0ECp0h/F7Zva17v26/dW2onKDW/ui6pQJBkZomO2W1VspqOymrs1Jeb8XrC/0fNSoFWclmslPNxMYo6FRUU+raRpW1GoBMSyrjUkcwPHEwOVEZcrgrmQMiC9IAYPPY+aJkEd+ULiEgipybcS7p6qGUVHWys6KNijorogQKAbJTzAzOimFIduhlNmpPiI2SJCG6bPg7m7vEpuvV9d5vbUXyunocI6g0KCPMKPQmlIZIlAYTCoMJpcGEUh+JQqtHUOtCrReNLtRy0YRaMChVCIIi1ArqWiIowgIQai0FkcTuZTDUihKDSMEAUsC3r7Xl94aXkt+D6PeFlj43oseF6HEieruXToIeJ6LHBWJvp4AeKJQotPougdonVGFh2/8VFrKe5QXNgZ0ITgWCokRds53yOitldVb21nZSXmcNO1WolAIp8Qb0Ji92ZR2tVCAYbBh1OoYlFDI8YRDDEgcRZ4g+5b8LmWOLLEgnCEmS2NNcwVc717G2pByf3UBkMAWvw4DLHbqRNWolhRlRDM6KYXBWNAUZURh06uNjjxgkaG8nYGsjYG3Bb23eJzrWZgLWVqSAr8cxgtaA2hyHyhyHyhzftYwLb1MYTKdsBSNJElLAFxIs736C1f3yunq++trmdYe6HQ+EoAh1K2r0IbHqWu57b0Ch0XWJlx6FVhfa1lVW2P8Y9ckjbqIo0dju7NGSKqvtDDvXAOgjgohaG0FNJ4LegckE+alxDE7KJD8mi+zoDHSqE/PAJXNyIgvSMUSSJJxuP21WD21WD9XNneysqaOisZO2Dh8Bt5buLiiVUiAzOeT51t0nn51iRqU8ui4NSZKQfO5Q15nTSsDRTtDWRsDW2vUKrQcdnb0qT4XBhMoUh9oSFxab/YVHqYs4KttOd0LfvacPofqhgLlD42k+d3hd9LoQfZ7wtoMKGwBCl0CFBEyhCa0L3evdIvYD8eu9zYCg0YZapsf4+2jpdFNWa6Wy3kpNs4OaJju1zfaec9VUXgStG4XWQ0SERKxFR3KMiYz4WLLj48mMjSc2wiJ39/0IkAWJru4pKfSUFwyK4TkzP1x6fAEcbj82p5dOhwery4PN6cXqCLna2hwBAj/s9VEEUOjcWCwK8pJjGZOVQ0F6LGkJkaiUitAcGzGIFPQjBQKhZdDf1Q3lD73CFZerRwUmda13j9sEnVZElw0p6O/1GQWVBpUpBqUpFpUpBlVk19IUGxYchWZgxqNkehJurXldXb+9p8dvL4X/A/uWkq/7/+HpKhMSONHrOkRxIyRS+7XKBK0ehUoTcghR7XspfvBeUGtQqLTh9X371AgKJYJSCQpV17qKIAKtNj+1rW5qW11UNlmpbu6gpcONwyEhin20+pR+VJoAWp2EwaAkQq/AoFMTodNg1GswGbSY9AZMBi1GvQa9Roteo+5aatCp1aE5a11z2eRpDycnp6QgBQIBGhsbD1jG6vDywgdbcHr8BEWJYFBCEiWCkoQohl5BUULsen+4CIjoFD50gh+d4MOocBOpcGNUeDEpvZiVXqJUoaU6/OQpgtQ10VOSIBgAMcgRDdgLyq6nYy1KnRFBF4nSYAyN1eiMoTEcfSQKgxlVZBSCNuKk6d6ROXFIkgRBf2hczedG9HlDLTG/p2tczbOvZeb3dnk/uhG9XiS/e99DUdCHFPAjBvwQ8IMUPIZWCiAoQBnynHQKejoDeqyiDoeowhnU4BA1OEUNrqAGp6TBJWrwSWpEjrDVJIihCCDCfvde13r39q6gISgEAZPOiFJQ7BetZN+91L3avWV4fhxzz8g9qAmJiYmoVKojs/805ZQUpNraWmbNmjXQZsjIyMgcMYsWLSI1NXWgzTipOCUF6VBaSEdKY2Mj11xzDf/9739JTEw8Ltc4Xsi2Dwyy7QPDqWw7yC2kvjglvw2VSnXcnywSExNP2acX2faBQbZ9YDiVbZfpiey2IiMjIyNzUiALkoyMjIzMSYEsSDIyMjIyJwWyIP0Ak8nEnXfeiclkGmhTDhvZ9oFBtn1gOJVtl+mbU9LLTkZGRkbm9ENuIcnIyMjInBTIgiQjIyMjc1JwSgpSIBCgtraWQK/AcTIyMjKnDz+2uu6UFKTGxkZmzZp13KI1yMjIyJwM/NjquuMuSA6HgwsuuIDa2tpe+xYuXMicOXO46KKLuOOOO7BarcfbHBkZGRmZk5TjKkhbt27lqquuorKystc+h8PBY489xquvvsr8+fMpKCjghRdeOJ7myMjIyMicxBzXWHYffPABv/vd77j//vt77fP7/Tz22GMkJCQAUFBQwOeff96rnM1mw2az9dj2Y2m+ysjI/HiQ67rjLEhPPPFEv/uioqI488wzAfB4PLz66qtcd911vcq99dZbvPjii8fNRhkZGZmTAbmuOwmifdvtdu644w4KCwu55JJLeu2/4YYbem3vDjsvI/Njw+/3U1tbi8fjGWhTZA4BnU5HamoqarX6oGXlum6ABam5uZlbbrmFCRMm8Nvf/rbPMiaTSQ4NIiPTRW1tLZGRkWRmZsoZgE9yJEmira2N2tpasrKyDlperusG0O07GAzy85//nHPPPZeHHnpIvrlkZA4Bj8dDTEyMfL+cAgiCQExMjNyaPQxOeAvp1ltv5e6776axsZFdu3YRDAb59ttvARg6dOgBx51kZGSQxegUQv6tDo8TIkiLFy8Or//rX/8CYNiwYezevftEXF5GRkZG5hTglIzUICMjc+zYtm0bjz76KADbt2/n7rvvPuTyx6KcjEw3siDJyPzI2bt3L01NTUCo5+L5558/5PLHopyMTDcD7vYtIyNzfBBFkT/96U9s3boVp9OJJEn88Y9/5MMPP6Szs5OamhqGDx/OqlWrsNvt/OY3v+Hiiy/mD3/4A1988QUbNmzgySefRBRFAH72s59RVFTE888/Hy7/5z//uc9rNzQ09Cr3/vvv8/bbb6NQKIiNjeWRRx4hKyuLBx98EEEQKCsro729ncmTJ/Pwww8f0FU6GAzyl7/8hcWLFxMZGUlRURFlZWW8/fbb2O12nnjiCUpKSvD7/UycOJH7778flUrFsGHDuO2221i5ciXNzc1cf/313HjjjXz88cd89NFHuN1ujEYjb7/9Nh9++CHvvvsuoihisVh45JFHyMnJOS6/lUwX0ilITU2NlJ+fL9XU1Ay0KTIyJ5Rdu3YdctlNmzZJd911lxQMBiVJkqRXXnlF+tnPfiY98MAD0g033BAuN2/ePOm2226TJEmS1qxZI51//vmSJEnS9ddfL33xxReSJElScXGx9Nhjj/UqfyD2L7dq1SrpzDPPlNra2sL7zj33XEkURemBBx6QLr74YsnhcEher1e65pprpLfffvuA53733Xela665RvJ4PJLX65Vuvvlm6dprr5UkSZIefPBB6T//+Y8kSZIUCASke++9V3r11VclSZKk/Pz88Lm3b98uDR06VPJ4PNK8efOksWPHSna7XZIkSVq7dq109dVXSy6XS5IkSVq+fLl07rnnHvQz98Xh/GY/5MdW18ktJBmZ05SRI0diNpt57733qKmpYe3atURERGCxWBg9evRBjz/33HN5/PHHWbx4MZMmTeL//u//jtiW5cuXc9555xEdHQ3A3LlzeeKJJ8JBly+55BIiIiIAmDNnDosWLeLaa6/t93xLly5lzpw5aLVaAK644grefvttAJYsWcL27dv56KOPAHq5Xc+aNQuAIUOG4PP5cLlcQCh8mdFoDJ+jqqqKK6+8Mnyc1Wqls7MTi8VyxN+DzIGRBUlG5jRlyZIlPPHEE9x0003MmjWL7Oxs5s+fD4DBYDjo8VdeeSUzZsxg5cqVLF++nBdffDF8/OEiSVKf27rz/CiVyh7bFYoDD2+rVD2rrv3Li6LIc889F+5es9lsPdyvu0Wse1u3bft/J6IoMmfOHO67777w++bmZsxm80E+qczRIDs1yMicpqxcuZIZM2Zw9dVXM2zYMBYuXEgwGOxVTqlU9pkA7sorr6S4uJi5c+fyhz/8AZvNhtVq7bf8gc47ZcoUvvrqK9rb2wGYN28eFouFjIwMAL7++mt8Ph9er5dPPvmEGTNmHPDc06dPZ/78+fh8PgKBAJ988kl435QpU3jzzTeRJAmfz8ftt9/OO++8c1B792fy5Ml8+eWXNDc3A/Duu+9yww03HNY5ZA4fWZBkZE5TrrzyStavX8+FF17IFVdcQVpaGrW1tWEnhW5GjhxJeXk5v/jFL3psv/fee3n++ee5+OKLuf7667nzzjtJTU3tt/wP2b/c5MmTufHGG7nhhhs4//zz+fTTT3nllVfCLRudTsfVV1/NhRdeyJgxY7j00ksPeO65c+dSVFTExRdfzJVXXolarUav1wPw0EMP4XK5uPDCC7nwwgvJz8/npz/96WF9d1OnTuXWW2/l5ptv5sILL+SLL77gxRdflCe6HmcEqa+29ElObW0ts2bNYtGiRaSmpg60OTIyJ4zi4mIGDRo00GYcUx588EHy8vK45ZZbDvmYFStW0NbWxpw5cwD44x//iFarDXexnUwczW/2Y6vr5DEkGRmZI6K8vJxf/epXfe7Lysri2WefParzX3311Tidzj73/fOf/+T111/n9ddfJxgMUlhYyGOPPXZU15MZeGRBkpGROSKys7P57LPPjvo8Tz75ZJ/b//e//x3wuDfeeOOory1zciGPIcnIyMjInBTIgiQjIyMjc1IgC5KMjIyMzEmBLEgyMjIyMicFsiDJyMjIyJwUyIIkIyNzTHn++eeZNWuW7AUnc9jIbt8yMjLHlM8++4zXXnuNrKysgTZF5hRDFiQZmVOUxRuqWbCu+ric+6xx6cwck37AMoFAgMcee4zS0lJaW1vJysoiOTmZpqYmfvGLX/C3v/2Nm266iSFDhtDa2spHH33E66+/zvz581EqlUyePJn77ruPhoYGbr/9dtLS0qiqqiI5OZmnn34ai8XC999/z7PPPosoiqSlpfH4448TGxvLzJkzmTlzJhs2bADgT3/6E4MHDz4u34XMiUPuspORkTkiNm/ejFqt5v3332fBggV4vV4mT55MfHw8r776KoMGDaKjo4PbbruNzz77jFWrVrF48WI+/vhjPvnkE6qqqnjvvfcAKCkp4YYbbuDLL78kJyeHF198kba2Nh599FH+8Y9/8PnnnzNq1Cgef/zx8PUtFguffvopd999Nw888MBAfQ0yxxC5hSQjc4oyc8zBWzHHk7Fjx2KxWPjvf/9LeXk5lZWV4dxC+zN8+HAA1qxZw/nnn49OpwPg0ksv5dNPP2X69OlkZmYyfvx4AC6++GLuvfdeJk+eTFFRUTiG2xVXXMGrr74aPu/ll18OwMyZM3nwwQdpb28P51uSOTWRW0gyMjJHxKJFi7j33nvR6XTMnTuXsWPH9pn3qFuAfhhlHAinp9g/v5EkSSiVyl7l98+f9MNjRFHskVNJ5tREFiQZGZkjYvXq1Zx77rlceumlxMbGsn79+j7zLXUzYcIEvvzySzweD4FAgHnz5jFhwgQAKioqKC4uBkK5kqZNm8bw4cPZunVrOKvs+++/H25FAXz55ZcALFiwgJycHDl53mmA3GUnIyNzRFx22WXce++9fPPNN2g0GkaMGBEWj76YMWMGxcXFXHrppQQCAaZOncq1115LY2MjZrOZ559/nurqagoKCvjjH/+IwWDg8ccf584778Tv95OcnMwTTzwRPt+mTZv46KOP0Ov1/QZolTm1kAVJRkbmiCgoKODzzz/vtX3/lBR79uzpse+OO+7gjjvu6HWMXq/npZde6rW925uuL37961//KHIE/ZiQu+xkZGRkZE4K5BaSjIzMgJKamsrixYsP65jDLS9zaiC3kGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMCWZBkZGSOKU1NTdx6663H5FzPPfccixYtOibnkjn5kb3sZGRkjikJCQn861//Oibnuueee47JeWRODWRBkpGROSLWrl3LK6+8gk6no6ysjIKCAv7617/S3NzM9ddfz+LFi2lsbOTee+/FarWSn5/P+vXrWbZsGU6nk8cff5zS0lKCwSC33norF1xwQTgSeGdnJzNmzKC5uZlx48Yxd+5cnnnmGVavXo3VaiUqKooXXniBuLg4pkyZwtlnn83GjRtRKpU8++yzpKWl9bC1v3QVFRUVPProo3R2dmIwGHjooYcoKiriwQcfRBAESkpKcDgc3H777Vx88cUD8C3/uJAFSUbmFMW+bQn2rcdnPk7k8JlEFp1x0HKbN2/m66+/Jj4+nssvv5wVK1aQn58f3v/EE09w7rnncs0117BgwQK++OILAF566SWGDBnCU089hcPh4MorrwxHBW9qauKrr75CpVLx4IMPAlBVVUV5eTnvvfceCoWC+++/n88//5ybb76ZlpYWJk6cyCOPPMKTTz7Jf//73/Bx+9OdrmLx4sU88MADfP7559x3333cdtttzJ49my1btnDPPffw7bffhu147733aGtrY+7cuUyePJm4uLij/WqPiL6C1p6OyGNIMjIyR0xeXh6JiYkoFApycnKwWq099q9cuZI5c+YAcNZZZ2EymQBYtWoV7733HnPmzOGaa67B5XJRWloKwODBg3tE8gbIyMjggQce4MMPP+TJJ59ky5YtPVJdTJ06NWzPD23oZv90FU1NTTQ2NlJdXc3s2bMBGDFiBGazmfLycgDmzp2LWq0mMTGRUaNGsXHjxqP6ro6OH4cgyS0kGZlTlMiiMw6pFXM80Wq14XVBEHo9ySuVyj6f7kVR5Omnn2bIkCEAtLa2Yjab+fzzz8PpKvZnx44d/PrXv+bGG2/k7LPPRqFQ9Dhvtx192dDND9NVBIPBXmUlSQpHLN8/nYUoir1E8oQS8A/ctU8gcgtJRkbmuDFp0qRwANalS5dis9mAUCqKd999F4Dm5mYuuugiGhoa+j3P+vXrGTduHFdddRW5ubmsXLnygKku+uKH6SpSUlJIS0vju+++A2DLli20traSl5cHwNdff40kSdTV1bFt2zZGjx59eB/+GCIGfxyCJLeQZGRkjhu//e1veeCBB/jggw8oLCwMd9ndeeedPPbYY1xwwQUEg0Huu+8+0tPTw04HP+S8887jzjvv5MILL0StVlNQUHDAVBd90Ve6iqeffprHHnuMF154AbVazQsvvIBGowHA4/Fw6aWX4vP5ePzxx4mKijqKb+LocDl6Z+I9LZFOQWpqaqT8/HyppqZmoE2RkTmh7Nq1a6BNOCzeeustqbS0VJIkSdqxY4d0ySWXDIgdM2bMOKz64oEHHpDmzZt3TK59NL9Zd123esnSY2LLyc5xbyF1e9C8/PLLvXKXFBcX8/DDD+NwOBgzZgy///3vB7afVua0R5IkvAEvLr8HT8CDN+jHH/QTlLrHEwRUCiUqhRKNSoNepSNCrUer0iIIwkCbf8qRkZHB//3f/6FQKNBqtfzhD38YaJNOSaxW50CbcEI4rrX/1q1befjhh6msrOxz/3333ccf//hHRowYwW9/+1s++OADrr766uNpksxpjCRJWD02Gh2tNDtbaXW10+7qpNXdQYe7k06PDbvXSUAMHPa51QoVJl0k0XoLcYZo4o2xxEfEkhwZT4opEbPOdBw+0anP9OnTmT59+kCbcdjpKk62DLStVvtAm3BCOK6C9MEHH/C73/2O+++/v9e+uro6PB4PI0aMAEIuls8//3wvQbLZbOGB0G4aGxuPm80yJz+iKNLkbKXGWk+drZE6eyN11kbq7U24A54eZY2aCGIMUUTrzWRYUjFpI4nURBCh0aNTadEoNWiUahSCAoUgIAFBUSQg+vEGfbj9Xlx+Fzavg063jXZ3J+Ud1ayt3UxQEsPXMetMZEelkR2VQU50OnkxWSdUpCRJQpQkRDG0HnrtcxYWAEEIeaEpFAKKrqXMycOB6rqmDlmQjponnnii333Nzc09JpnFxcXR1NTUq9xbb73Fiy++eFzskzn56fTYqLHWU2Otp9paT1VnLTXWenz7eR1F6c2kmpKYnjmBpMh4kiLjiYuIIdYQjValOS52iaJIq7uDelsTtbYGqjprKe+oZkvjrrArcVJkPIPi8hgcl8fg+DxiDdFHdC2vP0hjm5Omdhcqb4DWTjeBoEgwKBEQQ0tRlA57pooggEohoFIqUKkUqLteGpUSjVopC9YJ5kB1XWOnp8/tpxsDNmAj9TFXoK8++htuuIFLLrmkx7bGxkauueaa42abzIlFlETa3Z002JuptzVRbw9V8tWddVi9+54MIzURZFhSOStnGmnmZNLNySSbEjCo9SfcZoVCQXxEDPERMYxIGhze7g34qOiopqStnF0te1lTs4nF5SsBSDTGMTShkCHxeQyJy8eiN4ePkySJDruX6kYb1U126pod1LU4qGt20GrdVxnde2kqnQ4vKoWAUhkSD6VGQKEUUO7X+uluDe1PqBXVNddG7HoFRQJBCY8viMPl79GiUqsV6DQq9BolOq0KjVqJzPHjQHVdu0Ps56jTiwETpISEBFpbW8PvW1paiI+P71XOZDKFXUVlTi0CwQBOvwunz4Xd58TudWD12On02OhwW2l1d9DiaKXJ2dqjxaNVaUmNTGRk8lDSzSlkWFJIMydj1kae9I4FWpWGwrhcCuNyuahwNqIkUt1Zz87mPWxv3sPK6vUsLFsOgEUdQ0QwgYDVQmu9DodVTUgKIEKnIiXeyNDcWFLijCTFRJAQYyDoaCQnxXxcvgdJkvAHRHz+IF5/EK8viNPtx+b0AaBWKYjQqYjQq9FrVSf9b3GqcaC6rsP943gYGDBBSklJQavVsnHjRkaPHs2nn37KtGnTBsocmX5w+z20uTpod3di9dixee04fC6cfhcuvxtPwIs34MXj9+IJeHEHPKGl34P/AM4DkZoIovUWEoxxDE8cTGJXV1tyZALRestpU9lZHT7amtS46tJQ1pvR1+fT6axHEdlGm6mdzsgSiAxAAUQpDKQaU8mPyyQ/PpF0SzKJxnhUin2VUXFx03H7bgRBQKMOddcZu7ZJkoQvIOL2BHB5/FidPjodPpQKgUiDmsgIDTqN7Bl7vHEGdDTZOkkwWQbalOPKCf8n3Xrrrdx9990MGzaMv/71rzz88MM4nU4GDx7M9ddff6LNkemi022lvKOGamsdtbYGGuzNNDpasHsdvcoKCOjVIXdonUqLTqVFq9Ji0kWiU2nRq3To1DoMah0GtZ4ItYFIbQSRWiMmrRGLzoRaqT7un0mSJFx+Nx0eKzaPHbvPidPnxhPw4Au7e4uAhEJQoFKo0Cg16FVaIjSGkK16MzH6qIOORYmiRGO7k/I6K+V1VirqbZTXWWm37etui4/Sk5VsZtqIcWQlm8hKNhNr0VFra6CkrYyS1grKOqr4unwBX5WFOs+UgoJEY3yXV18MI9T5OLzOLtd0FUqF8riKtyAIaNVKtGollkgtoijh8vixu/aJk06jxGzUEmlQh215/vnn+eyzz7j22mu56aabjsqGF154AYC77rrrqD/PgaitrQ1HKT9UZs6cyX/+8x/WrVvHunXrjqN3nsCCbdu4dsrp/dB+QgRp/x94/zwphYWFfPTRRyfCBJn9kCSJBnsTO5r3UNyyl5LWclpc7eH90XoLSZHxjE8ZQbwxllhDNNF6MxadCZM2EoNGj0LoHXVKCvrxNlXha64i0N5AwF6D6HYgBkJdPoJShag1YI0wozLForYkoo5NQR2dhKA4si4JURJpcbZRZ2uk3t5Mo6OZFmcbLc52WlzteAPeAx4vIIBw8GjKJq2RRGOoFZcYkYA2GIXfbqSpOUhFfUiA3N5Qi1CpEEiNN1KUF0tOioWcFDNZKWaM+r5FODMqlcyoVGbnhtyjfQEftbZGam0N1NoaqLc10eRspaS1jLz0VJqc+7q6BUClUIUFSqVQoVaqUCvUqJUhwTqWKBQCRoMGo0FDMCh2CZOXpnYXbVaBqEgdpggNn332Ga+99hpZWVnH9Po/Zlbu2SsLkszpQSAYYEfzHjbUbWNL406anW1AyEOtICaHc/NnkB2VQYYlhQiN4ZDPG3TZcBavxlmyHk/1TqQu8UGhQmW0oNBHIqhDk0pFr4tARyMBpxXJuy8UiqDSoEnMRpeSjy5tELr0wSj1xl7XcvicVHbUUNlZR3VnHTW2emqtDXiDvnAZg1pPQkQsyZEJFCUOIkYfRZTejKmrdWbQGDCodGhUGlQKZVhYQwP9wZCrd8CDw+ui02OnqrWZypYmaq0t1De3Utq0BUm1n8eTT48hMo68kWkMSchnTGYumUnmo3IA0Kg0ZEenkx2d3mvfzl27SDUlERCDLK9cy/LqdSEXb6Twcn8EBBSCIuTu3eXaLnDwFtWMrElMz5pwwDKSJPLXp/5ASUkJra1tJKem8+Cjf+b1l5+lsbGRX/ziF/ztb3/jpptuYsiQIbS2tvLRRx/x+uuvM3/+fJRKJZMnT+a+++7rEcgU4LXXXuODDz4gKioKk8lEUVERAO+88w6fffYZbrcbQRB49tlnycnJYebMmVx00UWsWLECt9vNU089xdChQykuLubRRx/F4/FgNpv561//SmJiIq+++ipff/01wWCQKVOmcN999wGhcEG/+tWvKC0txWQy8Y9//IOoqKh+r3uiMCq9NDRCVWctGZbUgx9wiiIL0mmMKInsbC5hedU61tdtxelzoVVpGRZfwEWFsylKHERCROxBu3yCotTlZizS3ZAINFfg2vgF7j1rQQygjk4icsSZ6NIHoU3IQmWJP2CrJ+hxEmhvwNdag7epEm9dKdYNX2FdOx9JUOBJzqI5KZ0mk4maoIvKzlpaukQUQvN+0s1JzMqeTJo5mRRTEsmmBCI1EYfdheUPiDS2OalttlPT5KCm2U5NU2jd5w8CWgQhlcToAoqSIkmO1hIR5SKg6aDRXc/e9kpKXSspbVjJgjYDQ+LyGZpQQFFCIUmRCce0S00hCGhVGrSAXq1Dreh9C4thgRJD65JEUNoXiFSBAoVCgVJQ9NnSPVQ2b96MWq3mgw8+QBRFrr/+esp3b+L/7nuIDevW8Ps/PUN2Tg4dHR3cdtttjB8/nqVLl7J48WI+/vhjVCoVd911F++9914Pr9nt27czb948PvnkEwRB4IorrqCoqAiHw8HChQt5++230el0PPfcc/zvf//jkUceAUL5jj766CPefvttXnnlFV544QXuvfde7r33XmbMmMH//vc/3nrrLSZOnMiOHTv46KOPEASB++67j/nz5zN69Gja29u56aabKCoq4u677+arr75izpw5B7zuiSBbZ2W3LZt5O7/h/yb/9IRd90QjC9JpSKurncXlK1lSsYZWVzt6tY4xyUVMTBtNUeIgxKBAU5uL6ioXGzsqaLN56LR76XR4sTt9ONx+XJ4AHl8Ary9IUNz31B2jsHORYSMjNNV4JDVrvbms9+fR5opF16xCu8mFXrsHg66MCJ2aCL2aSIOGSIMao0GDKWLfy2xMRR2dgjU1m+qcAio7qqlqKaPG3oRdtEPnToQOidggpOksTE8eQ172WLJjMg866VSURGweO+1uK50eG802Ky02G+1OJ1anC7vLg8Ptx+H243QHkYIKpKAKAipMWiNJ5mhmZSWSlxRHRpKJ9IRIdNr+b5dWZzu7WkrZ0byHnU17WFe3BYBYQ3RYnIYmFGI5hpNlp2dNOGgrphtREvEGQq2/bgcUCQmloCBCY8CoiUB3mOGRxo4di8Vi4b///S/l5eVUVVUR9HtJjTeiVAgEgiI1TaExyGFdLZw1a9Zw/vnnh1NMXHrppXz66ac9BGndunVMnz6diIgIAM455xxEUcRoNPK3v/2NL7/8ksrKSpYvX86gQYPCx+2fE+m7776jvb2dlpYWZsyYARCedP/UU0+xbds25s6dC4RaRcnJyYwePZr4+Phwayw3N5eOjo6DXvdEkK1ppdhbyKqSPZyXX0Zh3IlrnZ1IZEE6TZAkieKWvXxVupgNdduQJImhCYWclXY2OncKNY0uPtti5x/Ni3vMa4HQuIDFqMVi1GKK0BBj1mPQqdBrVWg1StQqJWqlRGL9chJrFiIpFDQkn0ljwmR0aBjf5Srs8QXx+AK4vQFcngBN7S7sbh8unwsPDgSNG4XOhaB1I+icoZfGQ3cdKIhKNKKZCDLJVsYRrzST57WR5CjDXLMH5e5SxJWLqIwZhDN+GPaofDokL62eFjp8LXT627EHOnGLdrw4QTjA3A1JAK0AWglVVM9uLg9QAVR4YUN9BAm2WFLqE0kzJ5MVlUZ2VDpGbUSPY2IjopkWMZ5pmeMBaHK0sLWxmG1Nxayv28qSitUApJqSGBSXS0FsDvmx2YfUQj0WKAQFerUOvVoH+tDEXlfAHXbJt3kdqJWqrkgWRpSKg7ecFi1axPPPP8/111/P3Llz6ejoQJKkcDSIlDgjWkNo3KzVFiBRFUQUe/8mgUBPb0xBEHqUU6lU+Hw+GhoauO6667j22muZNm0asbGxFBcXh8vtnxMJQK3uOWbn9Xppbm4mGAxyww03hJ0tbDYbSqWSjo6OHrE0u3MrHey6J4IsQqk5dO40Xtv4Ln8+64ET4hh0opEF6RRHlEQ21G3jk+JvKGuvQqfUk6Ecga8xlS2bAqwLdAAd6DRK0hIiGZYbS3KckeTYCBKiDcRFGbAYtQeclR+wt9P8yd/x1BQTUTiRmLNuIivCTKfXRqfbRqfHhtVjw+q1d63bET023B4rQbcVAl72T7mmVeowq6OIUMSjk8yo/WYErwm/S4fLHcTh9tPo8VPhDbDUG4kojUCpGEx6ZCWxhgZUUiVtnVU0uVT49rNb8msQfBGoRTNGIRWjMhKT1kS0wUy8yUJSlJnkKDMpcWZMhn2J5URJxB8M4Pa7cfhd2DwOOj1WWl3tNDlaaXS0sKN5D8uq1oaPSTTGkR+bzaDYXIbE55NgjOshLAnGOGbnxjE7dxqiKFLRWcP2pt0Ut5Syono9C7rmIkVqIsiMSiPdnEKaOYkUUyJJkUfW9Xg4KBQKjJoIjJoIREnE6QuFR2pzddDhtoa9IQ/kFLF69WrOPfdcLr30Upqamli/fj0TJ07c7xoCCTEh4Q4ERGqa7YwYNYY3Xv8XV1xxBSqVinnz5jFhQs9W3sSJE7nnnnu466670Gg0LFiwgOnTp7N9+3YyMjK48cYb8fl8vPzyy0RH9x/9IjIyksTERFauXMnkyZP57LPPWLduHeeddx7PP/88l19+OVqtll/84hdccskljBs3rs/zHO51jwcmyUp6QiR48qm2fsb/tn3GDSN/ckJtOBHIgnSKIkoiq6s38v62L2l0NaEKGPHXDcHdnIxdoSY3NYJzJ0WTl2YhN81CUkzEYYWCCYpBGh0tVFZtYe+aebQJIs4Rw7EpPbQvfrJPd3AIjWtYtCYsehOZljRGJg0lWm8hPiKGuK7IBsaDVLa+gI/KzlrK2qsoa6+ivKOaensTjZJIIxChtpCssTDeFyC+vZmEznbi/EHM8VkYcoZhyB2FOikbp9+D1WvH7nXi9Lvw+DtoCDZTUx+K7B1y9VaiVWkxqPVEaiOw6Ewkxcb3WRF3O1Xsba+itK2CrQ27WFYZEql9XXODKEochEm7zylDoVCQE51BTnQGFw86G1EUqbbWU9JWTnlHNZUdNXxXtgz/fpOD9SodsYaoUBw+QxTReksosGswEm/AF3bIOBaipRAURGqNRGqNeANeOj1dDxleOxatCbPO1GeL6bLLLuPee+/lm2++QaPRMGLEiH5zFKUlGGloc5E7eCzjJxZz6aWXEggEmDp1Ktdee22PsoMGDeKGG27gJz/5CSaTieTkZAAmT57Mu+++y3nnnYdGo6GoqCic9rw/uvMd/eUvfyEqKoq//OUvxMfHs3v3bi6//HKCwSBTp07lkksuoa6urs9zHMl1jwfTiuJ4Z0E5s0fP4MuSRRTEZjMhbdQJt+N4IkgH83c9CamtrWXWrFksWrSoV0qL0x1RFPlqxzo+2f0FdqkN0R1BoD6HTEMho/ITGJEfR0FGNNrD8PIKikGqOmspaatgb3slVR211NmbekTFjlDpiDfGEW0IVYxRejMWXcgV3KIzYdZFYtZGojnM2HEun5vKzhoqOmqo6FrW2RoRuwKXmnUmcqLSyezqKsuOTidGHxXu1mmwN1NZt4Pq6q3UtdfQ4nPQqVJgUykIHmFlLQgC0ToLCcZYkk2JpJuTybCkkGVJQ6fe19aTJIl6exM7mvawozn0cvpcCAhkRqWGxakgNgfNQbpXRFGk2dlKvb2JenvIdb3V1R6KWO4OzaOSkLgz5xpSstNCdnanylCqUCtUqJVqNEo1GqWmx2TaI8EX9NPh7sThc6EUlEQbLEfdahNFiaZ2Fw63H4tRQ6xFf9pMgD4QxcXFRzzm1F3XvTk3n/TbXuTnL27hytl5FCu/pKKzht+d8UvyY7OPscUDh9xCOkWwOrzMW72JRXXf4Nc3I3kMpElTmV0wifGXJhIVqTv4SbqQJIk6WyObGnawvWk3e1rL8HTN1+mOWj3EEI9x2wqS9FEMveQBzNHJR2W/w+ekwd5MXdf8mhprA9XWOtpcHeEyFp2JrKh0xqYUdUXNzghHbfAEvFR21LKudgsVHTVUd4Ym8O4fDcISYSIhLokCEYwOB4b2ZiJcdiKCIgatEVNiNsakPAzJ+Wjj0pAUCgLBAJ6u/Eh2376I3i2uNprsLayu2RgO9SMgkGZOJj8mi0FxeQyJzyfFlEiKKZGz86YjiiLlHdVsbdzFtqbdfLFnIZ/t/g61QkVe1zGFcTnkRWdh0PSMv6dQKEiMjCcxMp6+nnkDYpBOj5X68joSjLEExCBBMYhfDBAQAzh8rrCIA6GWn1KDTq1Dr9KhUaoPq/LXKNUkGOMwB7y0uTpocbZh9zqIi4g5qLj2h0IhkBhjoNXqptPuQ5IgLurHIUrHglh9kKE5MSzbVM9ffvkzHl78V55c/k8eOeMesqLSBtq8Y4IsSCc5e2s7mbdsJ+valqGIq0ap1TA+6kxunnQeUcZDDyoqSRKlbRWsqtnI+rqtYRfqFFMi0zLHUxibS2FsDjGGKHxNFdS//QgqcxzJVz2O0nBwzzBPV8XV6mqnxdlGs7ON5q44dU2OVhy+fQnGVAoVKaZEBsXmkmZOJjMqlSxLWjjYaCAYoNpax8b6bextq6KsvZJae2N48qpZG0lmVCrnJJxBmjmZ1C6X7x8GWZUkiUBHA+7KHXhqivFU7yKweyMuwK3SoEnMQpuUQ1RSDolJuaiTMnu5qkuSRIfbSkVnDWVdXXWrajaysHwFEIroPSyhkOGJgxkSn09uTCa5MZlcOuQ83H4PxS2lbG/aw66WEj4u/hppV1ckcGM8GVGpZJhDcfqSIxOIj4jpt4WpUiiJNUTTomzCqInos0xQDOIL+vAG/XgDPrwBL06/O3y8Qa0nQmNAr9IdsgjoVFqSIxOw+5y0uzqotTZ0tY5NRyQkgiAQaw6JUIfNiyDwo2kpHS2i18WM0Wm88MEWGpsDPDz9Lh77/hkeX/Isj0y/m+zojIE28aiRu+xOQiRJYntZKx8sLGFH+1Y0GXsQVH4mp0zi5rGX9PLwOhBNjha+r1jN8sq1tLjaUSlUFCUOYnTSMEYmD+mVEiHg6KDu3/eDoCDlxj+jigzt9/g9NDpaaXK20ORo3a9LKSRCTp+rx3mUgoLYrjGjBGMcica4rmR2ScRHxITHaERRpN7eFB4vKmuvpLKzNtzyMWmN5ERnkhOd3tVll0GU7siDiwZsrXjqSvDW7sFTvxdfY3l4Mq+g1qFJyESbmI02KQdtUjbqmJReIiWKIpWdtexsLmFH8x52tZTiDXhRCgryYrIoShxMUUIhOdEZPcaiXH43e9sqKW2roLyjmqrO2vAE5W4sOhMxhihi9FHhrtHwS2fGUW+lsLAQhUKxX86j0C0s0DvKdyAYwBXw4PK7cfndSJKESqEMhXHSGFEpD/2ZNCAGw7+1TqUlPiLmiD29JEkKt5RizTqiTIfewj+VkCSJ3bt3H5Muu6IbH4b0UVz/2DfMHp/Bz+YW0exo5fffP4Pd5+T/Jt3WI/L8qYgsSCcZu6va+c+XxeyoqcGQuwvR2EJOVCY/H3fNIc/QFkWRDfXb+HbvErY37UEQBIoSBjElfSxjU4f3m65BEoPU/Pd3VLdW4ppxOXWiJ5QEz95Ih9vao6xBrSfOEE1MRDSx+tDge6whmtiIKOIMMUTrLSh+MBDuCXiptYZyB1V21nZFXagNR1rQqrRkR6WTE51BbnQmudEZxEXEHNenZ0kM4m+tw9tYjrexDG9DOb6mCiR/qAtTUGtDApWShy6lAG1qASpjVI9zBIIB9rSVh7rqGoup6KhBQkKv0jEoLpfB8XkUxuaSFZXWqwL3+D3U2ZuotzXR7GylxdlOg72FVkcHNp8dr9jTRf/y1HMoTM5HZ4wAlCAqQFKAqETqWlcoQKlQoFKGch2pVUq0agUajQJf0Ivd58TldyMQSmBo0ZsPuRtOkiQcPhetrnYkJGIN0Uc8tiRJoTElu8tPcmwEEf2EVjpVkSSJtrY27Hb7EYdQ2l+Qhl71f0QWzeCp/6xn295W3vrd2aiUCtpdnfx5+T+osdbz09FXcmbO1GP8SU4ccpfdSUJLh5s3vtjJ8i21RKY2EzlyJwoFXDP8CmbnTjukGfWegJfF5Sv5qmQxzc42YgxRXD70QmZkTSTGENXnMaFupb3sailhR9laqlVWAsmRsOdrtCotaaYkihIGhWK4GeNJNMYSb4ztt9tIkiTsXgel7RXU25qoszdSa2ukztpAs7Mt/DSvV+nIsKQwM3syWVFp5ERnkBKZ2EvEDgdPwEurqz3sumzzOnD6XLj9HnxBH4GuaAUKQYFGoUan1mLURGDSGomKiyUuoyD01K9Q4m+r7xKoMrz1e7Gu/wrrmvkAqKOT0WUMQZ9VhD5zGCp9JEPi8xkSn8/VRRdj8zrY0bSbHU2h1tOmhh0AKBVKMswpZEalkRaZjE6KwufQ0dEGdS0KalsMNLSKuL2R+z6UIojBGMBkEdFFBNlQY0VHExatDoRQNIb9wwUJKFCgROgSK1Gkx8RmpSIU0VutEgjgoy5QiyRJ6FRaItSGQ/7+g6KI3eegNliNVqXBqIk4oqgP3XmgGmolok3aQ5r/dCqh0+mO2UOz2BVua8aYNFZsrWfT7mbGDUkk2mDh8Zm/5tnVr/Hqhv+xt62Sm0dfecRjfQOJLEgDTFCU+Hx5Ge98sxsJH7mTq6jzlzAoNpdfjLuBeGPsQc/hCXj5tnQp8/cswO51UBCbw7XD5zI2ZXif7sv19ibW125lU8N2SlrLCUoiKkFJitvDFH0cKYXnEqNORC+Y8AdCidzwA53Q5lBgUznwCy24JSsO0UqHt51mZyuNjmaaHK24usYtIDRelBQZT050BtOzJpBmTibDnEK8MfaIw9aEPPNqqbbWUd2VxrzB3kSnx9arbPeEUI1SjUro6iaUJHyiH08/KTLiDNGkmpPJtKSSM3g0uVMvJVkdgbepAk/1LjzVu3DsWol98wIQFOhSCzDkjyNi0ATU5nhMWiOT0scwKX0MAFWtLazcu4Pi5nIa2uqpaF2PpNwXf08SFSiJQJ8QSXKamRiDhURzDKmWGNJj4kg0R2PWmfr0nJMkCavHRrW1nsrOGva2VbGntYwOT6hFm25OYWzyCNK1hbQ0CWwva2VraT1ubxCzUcPUMbFIcWUsq1iJUqHkJ4PP4/yCWYfkpSeKIvP3LOD97e9i1pm4c/wNDE0oPLQfcT8a25zc8/clZCaZ+NMdU1DKmWr7RPSEBGlUQTymCA2LN9YwbkgiEJpu8cCUO/hg5+d8vOsbKjpquGfizSSbEgfS5MNG7rIbQOpbHPz93U3sqepg6BA1ttjVtHnauXzoBVxcePZBn1YDYpCFZcuZt+trrB4bwxMHc+ngcymMy+1VttnZxoqqdaysWk+NLTTr26KKI8KfjNgZxcVt3xMlOviz9SJcUtekUUFE0DlR6B0IekdoqXMi6FwIin0eXZIooAgY0EkmIlUW4gyxpEclUpiYzrD0NIx6bS97DhV/0E9FRw2lXS7p5e3VNDiaw/sj1HpSTUkkmRJINMYRHxFDjCGKKJ0Zky7yoAP43oAPm9dOm6uTVlcbjY5W6m2NVFnrqLc1dqWnCIlUYVxoEuzQ+ALiDFF46/fiKtuEq2QDvuZKAJRJ+XTEj2aXkEtpg4fyuk7abfsijsda9GQmR5IYr8Rg8aDUu/EKNto87bQ622lzd4ZdvPdHQMCsiyTGEEVcREzXmFwCqaYk0szJPdJjdHtRbm7Yyfq6LexuLQNgWEIB5+bNZFj8YLaWtLJwfTVrdzSgUCiYPiEKZ/RWtjbtIMOcwu3jrjvkQfLy9iqeW/NvGuzNnJ8/i6uGXXTY7v+LN1TzzLubue3iYVw49fRxYz5awl12lw8jf/oFxJx5IwCvfLyNb9dW8Z/HzukVRX5j/XZeXPsm/qCfa4fPPeQelpMBWZAGiO831vDPj7aiUiqYdZaapS1fYFDr+eWkWxgUl3fQ4zfV7+CtLR/SYG9mSHw+Vw67iILYnvGtAsEA6+q28vWepexpD03kU7qjcTfHI3YkIPn0REVqmWkqZ6p7EWszzqc5Lgar1ESrr5FWT3M4KKeAQIw+hhhdLFGaaIxKC1rJjDIQgd+lpdPuo7XTTVO7i5YOF/v1EoXzAOWkmMlJs5CfFoUlsm+Rsnkd7GktC71ayijrqA7Ph4rWW8iNziQ7Op1MSyoZltTjmszPF/RT2SWGe1rLKW4pDadUj4+IYXBsATHKNALWaNoqGzA0bGawVEKi0opHUlGsHERL8mQSM7PJSTWTlWTCaDh4Rd3t4h2KgmGlw22jw9NJu6uT1i4X7GZXG0Gx67cRBFJNSRTE5jA0Pp9hCYVE7jcxt9XZztLKNSwsW0Gbu4NUUxI/GXIeE9JG0drh4YNFJSxYV41Rr+asszSsbl+AzWvn0iHnc8mgsw8phYUn4OWdrR/z3d5lJEcmcMe46w9rfowkSfzu1dXsqe7glQfP7Pf/8WOju677z7XjyB45kbgL7gCgpLqDXz+3jDsvG8HZE3o/OLS7O3l53dtsadxFUcIgbh93Xb/d9icTsiCdYAJBkdc/28EXKysYnB3NsImdzC/9ioKYbH49+baw63N/tLk6+Pem91lft5WkyHiuH/ETRiUN7VEpO3xOPtq6kMUVy/BILkSvnmBLChGeLIamplGQEUVumgWLRWRP207WrnqXMp0KhxD6KxjUenKiM8iKSg+7JaeYEg7Zo8ofEGlqd4YiZzfZqWywUVFvpa7FEY4WHh9toCDDQkqKgMrUSXuwgZLWcursjUCoqy87Kp382GwKYrPJi84i2mA5/C/8GNE9AL+qZA+baoupdlbgVjchqAJIEqh8FuJUaQyKzWOiyUBCw3rce1ZDMEjE4ElETbkMTdyxmysSFIM0OVuptTZ0ddVVsqetHLffg4BAYVwOE9NGMyl9TDhqRFAMsrpmI5/s+oYaWwPZUencOPJyCuNyqGqw8cIHW9hT3cGZE5OQkrezqmYDhbE53D3x5l7emP2xrbGYl9a/Tburk3PyzuDKYReF4ucdAjVNdu786/ecNzGTn80tOuLv5nSiu65755ZppOcUkHDpvUDo/3j7U4uwROp48hdT+jxWkiQWlC3n7S3zUCqUXDnsIs7KmXrMc2QdS2RBOoG4PH6e+s8GNu1p5qKpWUgp21lYvoIpGeO4fey1B6zwJUliUflK3t4yj6AU5CdDzueC/Fk93HbbnFZeWfEpW9s3ICkCBK2xJElDmJ47krGDk8hIjKTJ0cKqmo2sq91CeUc1AKZAkKFJQxiWMZqCuBySIxOOSxO/3WFnZWkxW+tLqbJWYZWaQNU1lhJQEyHFk2XKZHT6IKYVDiFSN3CuwMGgSEW9jV0VbRRXtlNc2U5bV1BavVZJXloUBRkWTHEunKpGSjpKKWkrJygGEQSBTEsqeaYUUtrbiN6zmRi3B3PRGURNvxpV5PF5UhVFkbKOKjY37GBtzWZqbA2oFCompo3i/PxZ4fxKoiiyono97277jDZ3BzOzJ3Pt8EvQK/W8/XUx877fy5hBCUybAW9seQ+VQsWd429kVPLQQ7LD5Xfz7rbP+G7vMqL0Zm4Y+RMmpI46pJbsix9uYdH6Gl5/+CyiT1NX8MOhu6773+3nkBwfTdLVvwvve3/BHt75ZjevP3QW8dH95zBrtDfz6ob/saN5D2nmZG4aeTlDEwpOhPmHjSxIJwib08fv/rWa8jort186lGJxMatrNnLxoLO5aticA96sVo+Nl9a/w6b67QyNL+BnY68hwRi3b7/TyXPfz2OHbT2SIoDGmcaMtDOYM3YkcVF63H4PK6rW833FKva2VwKQF5PFmITBJH7/EZkJeSRd8dAx/bwev4fKzjoqOqop66iivL2aOltjeGwkOTKB/Jhskgyp4Iiivk6guKKd6iZ7qMWhFMhNtTAoK4aCjCgKM6KIMR/aRGBvwIfVa+/ysHPjFwOIktgVakcV8ijTGDBrI9GrQ2NMHXYPJVUd7KnuYE9VByXVHXh8oS6xuCg9gzKiGZwVzaCsGDKSTH0OvHsCXkpayylu2cvu1r3sbasMu7SrUZDg8REfEElPHUpG4WQSjPHERURj0kYel27Hqs5aFpWvZGnFGtwBDyMSB3PlsDlhYfIEvHy080u+2LMIi87EHeOupyhxEF+vquCf87YxqSiJ6y5O57k1r1PVWcvcwedw+ZALD9kTr6S1nNc2vktlZy1D4vO5YcRlZEYd+H6tb3Xw8ycXccWZBVxzzuE7SJxudNd1795zCQk6iZSbnwrva2xzcuufFnLTBUOYO6P3uPH+SJLEurot/GfzR7S42pmQNorrh19KbMSJDRJ7MGRBOgE43H4eemklNU127r9+NCs7v2Bt7WauHX4JFxXOPuCxxS2lPLvqdRw+J9cMv4Rz8s4It14CgSAvL/mWZY0LQO3B6EvnimEXMnv4EARBoN7exFcli1lWuRZPwEuaOZnpmROYlD6aWEM01nVf0LbgDZJv/DO6lPwj+mzdKcSrrfVUd9ZRZa2jqqOWRkdLWHzMOlMouGhUOnkxWeRGZ/Y7udfh8rGrop2d5aGWSWlNJ4FgyLEgxqwjJ8VCVrKJtEQjmkgXLtppcjVRb2+iydFKq7MtHJ3gUFBIKiSfnoBLj+iOQPBEkmRMYmhKJoMzYxmcFUNc1KFHxNifoBikztYYCqDaWUt1awU1bdVYhWCPcmqFiii9uWsirCW8HmOwEKMPOTFE6c1H5ZX4XdkyPt+9ALvPyRmZE7lm+MXhnFJl7VW8uPZN6myNXFR4FlcOm8MXyyt4ff5Orjt3EBefkcnrm97n+4pVDEso5J4JN2PSRR7kqiFEUWRh+Qre3z4fh8/FjKyJXD7sQqL1ln6Peexfq6lssPH6w7N/9B533XXd+/dfTYyvlbTbX+ix/5fPLEGpEPjbPdMP6Xy+gI/5exbwSfG3CMDFg87mooKzDtsJ5XghC9JxxusP8ugrqyip7uChm8ay1vYdSyvXcP2In3BBwawDHvtN6RLe2vwh8RGx/GrSrT2eLteVlvP8qv/g0zWh8Udz3fCfcPawkUDoyXjezq9ZW7sZpULJlPSxzM6dRk50RvhJXBKD1PzzTlSmGJKv/+MhfRarx0ZVZx011nqqrfXUWOupsTXgDezzIkuIiCXDkkpmVCqZllBA1Cj9kUdW8AeClNVZ2VJRw7b6Emqd1bgUrQgG2z5PP0lAGTCilUwYFJHoBCMaQY9S1CIGlXi94HIHsDq8uHxeUAYRlH4UGh9GcxCd0Y+kduAQO8NOHBqlumsMK4fC2BwK43L6nXt1OEiSRNv2JZQueYs2gviHTsYRHUeH20q7uzO83D8tO4REK9EYR4o5iQxzCjnRGeTFZB1WunmXz83Hxd/wZcki9CodN468jKkZ4xAEAV/Ax5tbPmJh2XIKYnP45cRbeH1eGau21fPMr6aTlWxmcflKXt/4HiZtJL+a9NPDclpw+Jx8vPNrvt67BJWg5IKCM7mo8Kw+x5eWba7l6Xc28uc7JjM05+DTHk5nuuu6Dx+5BXPrbjJ++e8e+z9cVMJ/vio+aLfdD2lxtvH21o9ZU7OJOEM014/8CeNSRgx4CCdZkI4jkiTxt/9uYunmWu6/bgxVwjrm7/6Oy4acz2VDL+j3OFEUeXPLh3xTuoRRycO4e/xN4WCcgaDI0199yib7YgQEzkg+k59NvQClUkmzo5X/bf+MVdUb0Kt1nJN7Bufmz+gzS6mrdCONH/yJ+Ln3Yhw0sdf+dlcne9srKWuvCkfj3n+eT6TWGHZ4SDMnk9710h3iAPbB6PTYwlG0dzWX0OhoAUJCkWVJJ06bhEGMQXKbcNu02BwBbE4fTo8ff0AkKEooBQGtRoleqyLSoCHarCPOoicpNoKUOCOp8UY0+0VFD4pBGuzNVHTUUNZeSWl7JeUd1aFxIQTSLSkMictjcHw+g+PzjkqgAvYOWj5/HnfFNoxFZxB77s9QdD2lSpKE2++h3d1Jq6udZmcbTY4W6u1N1FobaHK2AoSjio9MGsK4lJFkRaUdUoVSa2vglXXvsKetnAlpo/jZmGvCwraiaj2vbPgvepWWn4+6mb/9q5ysZBN//PlkACo6avj7yldpdbVz7fC5nJc/87AqsUZHC//b9ilrajZh0hq5dPB5nJUztcdYqMvj5+pHvubi6TnceMGQQz736Uh3XTfvj3cSUbaSrAff67G/odXJbX9eyK1zhnLRtMPPIruzuYQ3Nn1AtbWOYQkF3DjyctLMRxdI+WiQBek4Mn9ZGf/6bAfXnltIfHYbL61/m9k507hl9JX93sQBMciLa95gVc1Gzs+fxXXD54b77Fttdh747J/YNZWYxCQenv0zMmMS8AV8fFL8LfN3f4cgCJyfP4sLC888YIXZ+OGTeOtKSb/rFVAoQ0nomvaws7mE3a1ltLtDUbgVKIg3xJNmSiE7Oo3c2HQyo1IOmkL8cPEEvOxu2cu2xmK2Ne2m2hrKTWNQ6xkcl8eguDwGxeWSGZV21KkVDgdfwMfe9ip2tZSyq7mEPW3l+IN+BAQyLCkh2+LzKIjNOez05JIk0rHsAzpXfIg2JZ+EnzyAymg56HEuv5vy9iqKW/ayvWk3JW0ViJJIcmQCM7MnMTN78kHFct+k1vnEGqL5v8m3hSNGV3fW8fTKV2hzdTDGOIvvFwo8fddUCjND4w1On4t/rPsPG+q2MjZlOLePu+6wxXlvWyX/3fYJO5tLSDTGcXXRxYxPHRm+L+5/YTmSJPH03dMO67ynG9113cd/uQ/9jq/JeuA9BFVP56efP7mQpFgjv/vpoaWz/yFBMciCsuW8v+Nz3H4P5+RO57KhFxxW6/tYIQvScaKqwcYvn1nKqIJ4rro4gUcX/41Bcbn8dtqd/bpdBsQgz61+vc/xpd31tfx+0QsE1DbGRE3jvtlXoBAU7G4p46X1/6HB3syU9LFcO3zuQd2jg04r5c/fSvOIKawzWNjesgtHsCtWnV9LwBaF6LAgOixIrkiQ9tmrUgrERRlIiTOSlWyiID2KIdkxhzS/Zn+6UzVsayruSoFRTkAMoFaoKIjNoShxEEPjC8iOSj+qcELHGn/Qz972SnY2l7CzuYSStopwYr0EYxx50ZnkRGeQHZ1Ohjm1V5qJvnDuXkPzZ8+hjIwm6apHUEcd3ux6u9fButotLK1cw+7WMrRKDWflTuPiwtkHHespaS3nmVWvYfc5uGPc9eHoEg6vk2dW/4vtTXugOZvxMTP49dVjwsdJksSXJYv479ZPiNZbuGfiLYedl0eSJLY07uSdLR9TY2tgcFweN468nMyoVP716Xa+WVPFh386/7ASS55udNd1nz77CNpNn5Dxy3+jjOg5NeSf87by/YYa3v3jeaiUR36v2LwO3t8+n4VlKzBqI7h62BxmZE06ofefLEjHgaAocd/zy2jucPH0PZP406q/EpCC/GX2b3tMWNwfURJ5ce1brKhaxw0jfsL5+40vbaou4anl/0RC5OpBV3PxqAmIoshHu75k3q6viTVE87Mx11CUePCIwhUdNXy69B22OytxqBRIogLRGoPel0x6RBaZ0UnEWgyYIjTotSqUSgFRlPD4gjhcPtptHpraXdQ2O6husiOKEoIA+elRTByaxPRRqcRaelfCoiRSY63vipBdwq7mknCIoQxLKkUJhRQlDmJQbO5RD7D6gn46PTZsHjvugAdf0B/2stMo1ejVulBQUZ3pkOfI9EcgGKC8o5rdrXspaQ1Fk2h3d4b3xxqiSTMnkRSZQHJXPMD4iBhiDdE9uqk8dSU0vv8EglJD0rWPoYlJOSJ7Kjtq+WLPQpZXr0On0nLZkPM5N2/GAeeedHps/H3lq+xuLePyoRdy6eBzEQSBgBjkjU3vh9Ktdybw+vUPEKnv+duWtlXw3OrXaXV1cMXQC5kzaPZhO18ExSCLy1fx3o75OHxOzs6dTqx7BP/6eDevP3wW8VEn/kn9ZKG7rpv/0p9Rr/4vqT9/AU1Mzy61lVvrefI/6/nr3VMpyDh6r7mKjhre2PQ+u1vLyI5K56ZRl/eadH+8kAXpOPDtmipe/HALv75mNMWB71lSuZrHZ/76gD/qf7d+wme7v+PKYRcxd/C54e2b63bz5LJ/IPnV3D3uZ0wpLMDudfDs6tfZ3rSb6ZkTuHnUFQesWEVRZE3tJt7f8i0N7loUIuQ4g4i6s5hRMJrR+UmH7FK9P15/kJLqDraVtrJhdxN7azoRhFCsrQumZhAZ42ZPWzm7W/ZS3Lo3nKIiISKWoQmFDE0IheE50u6/7lTn3V5sddYGGh0t4WgKh4JBrSc+IoakyATSzElkWEK5mWIMUUc8wNvptlLeUUNVZ23Y8aPB3oRvvxTlAgIWvYlYQzRxhmhiI2KIFgWUaz4nNigw5KrH0cUceV9+ra2Bt7fMY3PDTrKj0rlrwk2kHCCumT/o55X1/2VZ1VpmZk/m1tFXoVQokSSJV5Z/xqL6b0mOSOH3Z97Va/K2y+fm1Q3/ZVXNRobGF3DnhBsP6EXXHw6vk/d2zGfB3uVEqI207cjnz9ddwuCsmMM+1+lCd133xevPolz6Gsk3PYUuuaeLd1O7i58+sYA7Li3i3ElHFlX8h0iSxMrqDbyz9WPa3Z1MyxjPNcMvIeogE/ePFlmQjjE+f5Db/ryQWIue6y6P44mlzzOncDbXDL+k32NWVK3n+TX/5sycqdw6+qpwRbintZzfLXyGgEfLXaN/zvRhuTQ5WvjT0hdpcbXz09FXMTN7Ur/nFSWRlVUbeHfb57S6WxE9BizOLO6xLSN+9AUknHX9MfnMoiTS5GhlQ9UeVpTupNJajajtRFDsS0ZXGJfLoK5YcHERR1bBePweilv3sqNpD8Ute6noqA7HmovURJBqTibJGNflJm3BpDViUOvRqjQoBAWiJOIP+nH5PTh8Tjrc1i6ngdau9A/7opGbtEZyozPJj82mMDaH3OjMo2q5iZJIh9tKk6OVJkcLLa5Q7LruNOWtro4eKeOVUmiuVmZMKHNuYWwOmZa0w+o+kSSJNbWbeG3je/gCPm4dczXTMscfsPz7O+bz8a5vGJcygnsm3oxaqcbl8XPNX99Al7edaIOJ30y7k1RzUq9jv69YzRub3kejVPOL8TcwKnnY4X9RdLW6Vr5Js7uZ4dGjuW/G9SeNW/KJpruu+/LtV1AseIHEqx7FkD28RxlJkrj6ka+ZPDyZOy8bcUyv7/F7+HT3t8zfvRCVQsllQy7g3PwZx20cV472fYxZtL6aNquHu68czr83vUSiMe6AHnV1tkZeWf8OhbE53DzqirAY1dub+OP3LxLwargo5WqmD8ul1tbA498/S0AM8ugZv6Qwrv8WV0lrOa9vei+Um8cVibptDDdMmcnEyHpaP/0ec+G4I/p8oiTS6GihoqOa8vZqyjuqqeioCXe/aZRq8tPSUXvzKNkN1uYIskfkcf3MoYed70aSJKo6a9ncsJMtjbvCkRBUChV5MZlcWHhWV96kzKNyLe/GE/BS3VnX5WUXyg67f+qInKgMBsXlMiguj4LY7MMa9FUIilDiPUMUg+N7xyoUJZFOt41mZys1NTso3TCf5mArOwJulletA0K5i0YkDmZS+mhGJA09aKUgCAIT00ZTEJvD86v/zYtr36TaWs/VRXP67FYTBIErh83BpI3kzc0f8pcVL3Hv5J9j0GlI0eZidKbQplvGw4ue5v4pt/f4HIIgMDN7EgWx2Ty7+nWeXP5PLsifxdXDLznsyisvJotfjb6Hez/8F1vZyMOLmrh/yu0n3STOE4lCE+oBEX+QCBNC331GkomapkPvGThUdGodVw6bwxlZk3hz0we8vXUeSyvX8NPRVx2w/jlSZEE6hkiSxBcrK8hNNdNEMQ32Zh6ceke/eUmCYpAX176JRqnml5N+Gr5xXT43Ty79J15fkBTHbK6ZNZJGRwuPf/8sAI/P/HWvJ9RuvAEf/9v2KV+Xfo+WCHxlRYyIH8Gvfj4Ks1FLy5ffoNAa0KYcPICrJEm0uToo6Yq0XdZeRWVHDe5AKISOWqEi3ZLCpPQx5EZnkB2VQZo5KTxe4Z4V4P0Fe/hkyV62lbXymxvGkptqOeA1nT4X25t2s6lhB1sadoZdzTMtqZyfP4uihEIKYnN6RLc+VuhUWvJjs3sMztu9DkraKkLdji17+aJkEZ/t/g4BgTRzMoWxORTE5pAXk0mCMe6IRVEhKIg2WIg2WCiMy2VyZBqNHz6JIX8sqgvuZ3drGVsbi9lUv50V1esx60zMzpnKufkzDurhFq238PAZ9/DGpveZv/s7Oj1Wbh97Xb/jSuflz0Sv0vHy+nd4cvk/eHDqL8hMMrOzIsBfrrifPy17kT8ufZ67J9zEhLRRPY5NMSXyxJn38/aWeXxRsojStgp+NfnWw+7CUylUBGoKuWTMeBY1f8ZvFjzJb6bdGY4y8WNDodEhApK370nfCdEGtpW2HLfrJxrjeGDqHayv28obmz/g0cV/ZWbWJK4efkk4VuKxQBakY0hZrZXqRjs/u3QQH+96jSHx+YxM6j/+15cliyhrr+KXE38avmElSeLlDe/Q5GzBWzqWO2+ZhNPv5ImlLxAUg/z+AGJUZ2vkbytfpdbWQLZ2BDtXxnLBxDx+evGw8Ix3d9UOdOlDeqXl7qbZ0cq2pt3saN7DnpYy2rrcv9UKFZmWVKZmjiM7KoPsqHRSzUkHfPrVa1XceMEQJgxN4qm3N/DAiyt44PoxjBu8bywjKAYpa69iW1MxWxt2UdJegSRJRKj1FCUOZmTSEIYnDj7ufdf9Eak1Mjp5GKO7up+8AR+lbRXsbt3L7pYyllWt5buyZUCoBZMTHcp4m2lJI8OSSkJE7BF5KRnyRhM963raF75JVFIuUybPZUrGOAJikK2Nu1hQtpwPd37JFyWL+Mng8zkv/8COCyqFkp+OvooovYUPdnyOKEncOf6Gfh0QZmRPQqVQ8eK6N3lq+T/JTTiLpZvdRKrM/GHmvTy14iWeWfUat465qleGUo1SzS2jr2RQXB4vrX+bB7/7M/dN+Tl5MYc+vuELhCYoF1gKmFl0P39a+iK/X/IMD0+/+7DOc7rQLUiiz9Pn/oRoA202D/6AiFp1fLziBEFgXOoIihIK+WjX13y5ZyEb67dz29hrGJsy/OAnOARkQTqGrNhah1IhEDTXYK2x8+uht/X7xNzpsfHRzq8YlTyMifs9Za6oWs+amk0omwYxKnUQOSlmnlj2PO2uDn4341f9itG2xmL+tvJV1EoVV+Vcz7/fbWbmmDRuu2RY2IaAvYNARyOm0WeHjwt1i9WxqmYD6+u2UmcLRduO0pkZFJ9HYWwOeTFZZJhTeniFHQ6FmdH8/ZfTePz1tfzpzTXceFkqisgOdjWXUNyyF3cgFKE6OyqdSwadw4jEIeTFZJ6UUYm1Kg1DEwrCwSmDYpBaW0NXvqYqytqr+LT4O8SusS21Uk1aV86iVFMSKaZEUk2JxB+CUJnHXYC3vpSOpe+iTx+ELm0QKoUyLJBVnbX8b9tnvL11HquqN3DPxJtJjIzv93yCIPCTIeehEATe2z4fo8bAzaOu6Lf81MxxSEi8uPZNnEYJyKShzUlWspmHp9/N31f9i1c3/A9f0M95+TN7HT8pfTSppkT+suIlHvv+GX458ZZDrrhc7tB4WoReTaoplj/MupfHvn+GPy19gT+eef8BHTRORwR1d5dd3y0ks1GLJIHT7T/uqTt0ah3XDr+EaRnj+Mfat3h6xctMyxzPzSOvOKRpDgdCFqRjyMbdzQzOjub7qoXkRmf2mSivm492fok/6OeGET8JC4bD6+TNLR+SpE+hvCqd83+axae7v2V70x5+Pva6fud5rKnZxHOrXyfFlMT9k2/n4Rc3kxIXwR0/Gd5DEL11ewDQpRbi8XtYUrmGRWUrqLLWoRAUDInP48zsKQxPGkxKZOIxCSNi8zrY21ZJSVs5pmFlaBPK+V95qLJJioxnSsbYUAUfX9CvS/yR4Al4aXa00ubuwOoJBVr1Bf1ISCgFZTjtdpTeTKwhihhD9BEN1CoVSjK6cjN1txR8QT811vpwmKUaaz3bGotZWrkmfJxaoSLVnESWJY3s6HRyozPJsKT2EGFBEIg77+d4G8po/vRZUm/9Owrdvu65DEsqv5n2C9bUbOKVDf/lwQVP9hrb6YtLBp2Dw+vki5JFpEQmcnZe/3HQpmWOx+V38+9N76POdFHXPI6sZDNalYb7Jv+MZ9e8zpubP0QhKDgn74xex6dbUvjTmQ/w5PJ/8reVr3Ln+BuZkjH2oN9rpyMUjsoUEeqajTFE8cgZ9/DbBU/y9IqXefKsB49ZVJBTAUGpAoUqnMb8h+g0of+NxxcATkwuqXRLCk+ceT/zdn3NJ8XfUNpawa8n30a65cimLIAsSMcMp9tPVaONM2caWGFv4s7xN/Zbtt3dyeLyVczInkzSfk+0H+36CofPSZ50Dk06D7GJAf628EsmpY1mRlbv8D4Amxt28Nzq18mJzuS30+5kU3E7Da1OfnvjOLTqnhWst6EMr1LJ5+17+Grtyzj9brKi0vjp6CuZkDb6qPuCXX43lR01lLWHInyXtVWGw9woBAUZ5hTOyJrImrVeJHs0j//yHMzGo795XD43e9sr9wt1VEuLs+2wzqFUKEmNTCSrSxwKYrNJMycfUUBTjVIdCib7g4yrTp+LOlsjtbZGam0NVHfWsb5uK4srVgGhMayihEGMSx3BmJQiDGo9Cq2B+Dn3UP/WQ7QtfJO4C37R63oT0kaRHZ3Bn5e9yBNLn+eh6XcxOL7/YLmCIHDt8LnU25t4c8uH5ERnkBuT2W/5c/LOoMXRyed8y5KaZUwZcRUAKqWKX078KX9f9S/+vel9jJqIPsXGpIvk0TPu4cnl/+SFtW+gU2kYc5CWUoc91DW1fwqK+IgYfjXpVh7//lne2foJPx1z1QHPcbqh0OqQ+umy02lDVbnbG+hz//FCpVRxxbALKUos5JlVr/HQwr9w+7jrmZQ++sjOd4zt+9FSXm9FksCqKkev0jEhdWS/ZReWLScoBrmo8KzwtnZ3Jwv2LuOMzIls+x6GZMfy9taP0Km0Pbzv9qfGWs/fV71GujmF306/E4Naz7LNdUSbdIwf0rNLQ5IkVjTt5LOMGOy7vmJMchEXDzr7sGfXd+P2e0KpJbrEp6Kjmgb7vtTisYZosqPTmZUzhfyYLLKjM9CpQuJzdpqVXz+3jH98tJXf3DD2sFpioiTSaG+mtCsh3Z7WMmqtDWF37SRjPHnRmczMmkRiZByxhmgsOhMRGgMapQYFAkFJxB3w4PA66fBYaXG202Bvoqqzlo3121lSsRoIjQkNTxzEqKRhDE8afNSCHaEx9HKakCSJFlc7e9sq2NFcwqb67ayr24JWqeHMnKnMKTwLS0o+lolz6Fz1Ccah09Bn9nanjo+I4fGZv+bRxX/jLyte5snZvyFxvxQlP0ShUHDnhBu579sneGHtGzw9+6EDulZfM+Iivtq4nW3ScopbxoSzGqsUSn458RaeWPoCL637D4nGuD7FTafW8eDUO/j9kmd5bvW/eeLM+w/4JN3a6UGjVvbyzBwSn885eWfwTekSZudOO6qn8VMNhcbQb5edXhOqyj3eYJ/7jzeD4vJ4avZv+fvKV3lu9es4fA5m5x5aBPL9kQXpGFHf4gQkKhyljE4p6vfmFkWRReUrGZE0uEeF8VXJ9wSkIBcVzOabD9eSPzjI6qbd3Djysj7Dv/gCPv6+8l/oVFoemHoHBnWo73ZXRRtjBiX0CLdi9zp4ad3bbBA6yFTo+M2sew74RPxDJEmi3t7E7pa97GkrZ29bZY/cRjGGKLKj0pmWMZ7s6HSyo9IPONk1O8XMdecW8sYXu1i5rZ4pw/uuVDwBL/W2RmqsDVRZQ7mV9ncx16t05MdmMTFtNPkxWeREZxySK7aK0FiQRWfqcz5Nk7OVPS1lbG/ezdaGXays3oBCUDA8cRBT0scxNqXomHUXCYJAfEQM8RExTEofgyiJlLZVsKBsOV+Xfs/i8pX8dPRVTJ5yGY7i1bR+/Sqpt/0doQ/PzUitkd9Mu5MHvn2CF9e8yeOzfn3AFp5RE8Ed467nD0ue49Pd33L50Av7LasQFOSI0ykLfs7za97g7+c8Gp6MrVGquXfybTz43Z95ZtW/ePqch8P/x/3RqXU8MOV27vvuTzy35t88ddZv+h2XbOl0EWfR9/mwctmQ81lSsZpPir/hnom39Gvz6YZCq0Psx8tO26PLbmCI0pt55Ix7+Pvq10Jz34J+Lig487DOIQvSMaLd6kaIsOLwOxl1AM+6nS0ldLit3DTy8vC2QDDA9+UrGZsyHK1kQhQlKoObiNKbe3kwdfPBzi+oszfy8PS7w7HrPL4AVoePlLh9T/K1tgaeWvZP2twdnN9i54LhM4k5BDFy+JxsadjJpoadbG/ajbXL/TpSE0FeTBaT0keHYrYdRHz6Y860HJZuruPV+ZuITvBi83fS7Gyl0dFKk6OZenszba6OcHm1QkW6eZ+LeW50JqmmpGMeZ0sQBBKNcSQa45ieNQFREilvr2Zd3RZWVK3nhbVvoFfrmJU9hXPyziD+CCf59odCUFDQ5Up+6eDzeHn927yw9g06hs/lzNm30Pj+E1jXf4Vlwpw+j4+PiOGGkZfxz3X/YVX1xoOO1wxLKGRS2mjm717A7NzpBwwQm5MUw65Nw2gvXMOHO77g+pE/Ce+L1Bq5Z+ItPLL4r7y3bT43j+7bWcKiN/OzMdfwlxUv8XXpEi4s7LvCau5wE99PHiqjNoIzsiayoGw5Lr+7T/E7HRE0+v5bSF1ddgMpSAAalYZ7J/+M59f8m/9smUdcRAzjD9Bb9ENkQTpGONx+dNGdABQl9J/pcn3dVtRKdQ938K1Nxdh9TmZmTcLtDSDoHDR4q7hq2Jw+5zDV25v4cs8iZmZN6hG/LhgMtVi63T6rO+v4/ZJnUSDw0PCr0H/4d7QHCEfjD/pZX7eNpZVr2Na4i6AkEqk1UpRQyJD4AgbH5ZIUmXBYXWxBMUirq51GRwtNjpZQpAJnK82OVjrSW/EEPTy29Ktw+UhNBInGOIbE5ZMUGR/ySjMnkWSMHxCvO4WgIDcmk9yYTK4cdhF7Wsv4bu8yvipZzNel33N+/ix+Mvjc4zLAnhQZz6Nn/JLn17zBO1s/Jn/mvZhzRtK5ch6Rw2ei1PcdOHVa5ng+2/0dX5YsOiQHgiuGXcTq2k18XfI9VxX1LXQAeekW/EvMTIgfzdd7l3Bu/oweUTfyY7M5K2cq35Ut47z8Gf16/I1JKaIoYRDzd3/HuXln9NlKam53kTOsb49SgPGpI/i69Ht2NpccM5fjkx2FSoMY8PW5T6ftmvs3QF12+6NSKLlz/I20Odt5cc2bpM5OOmSvSFmQjhG+gIjC2ElSZOIBIyzvbNrD4LjcHhM719duQa/WUZQwiKZ2D8qYhlCuo34cGebt/AqVQsWVP6g89FoVKqVAp91Lq6udPy59HrVCxWMzfkVkfSVNgMqS0Ot8br+Hb/cu5auSxXR6bMQYoji/YBbjU0eSE51xSAP7/qCfOlsj1dZ6am0N1NoaabA10ehsISjuu0nUSjXxhhjijbEUxOawu9TD3jIf910xjRHpGUftNno8UQiKrjQYeVwz/BI+2P4F83d/x9bGXTx6xj3H1EuwG6VCyR3jrmdXSylf7FnIXTOupe61e+lc/SkxM6/r184ZWRN5Z+sntLk6iDFEHfAaSZHxjEwaypLK1Vwx7MJ+f++C9FCkhDRGsVnaxNelS7h+xKU9ylw6+DwWl6/im71LuXHkZf1e89z8GTy1/J9sayruFWLI4w3ltjpQUNWc6EwEBKo6a380goRSjeTp28suPIY0wC2kbjRKNfdO+Tn/9/XveX3jezxyxj2H9CArC9IxQgBEnZWsqP6765w+F7W2xnCIfwiNWWxtKqYoYRAqpQqjXo0yqok4TUqfk0E73FZWVW/g7LwzenWvKBQC6QkmSmvbeWblF3gDPp44834SI+Ox2jcBoDLte6KVJIklFav537ZPsXrtDE8cxB3511OUMOiAXWGiKFJtraekrZy97ZWUt1dTZ2sIx5VTCgoSI+NJNiUwJqWIpK4o14nGOCx6U48Kz1rg5fanFvHxV82Mv7PgwF/ySUSsIZo7xoe8iZ5e8TJvb/2YO8YdXmxASZJweUKZbO0uH15/EFGUUCoUGHQqTBFaok1atCoNg+PyKO+oRpuQiXHIFGwbvsEy/qJeqQi6KYwNTTmo6Kg5qCABTE4bw6b67VR01PTyDuwmLkpPfLSB8kofI3OHsrJ6PdcNn9ujoonSmxmZNIQ1NZt6TGn4IcMSClEr1exo2tNLkFo6Q91S/XXZQWgMMFIbQbvbetDPdrogqNRI+wXo3Z/wGNJJ0ELqJkpv5qqiOby28T021m9nTErRQY+RBekYoVKLEHQfsGla0VGDhNTDoaDN3UGbq4M5XbmPRIUbhcGBWew7MOXyqrUEJZHZuX0nLivKi+WrvQtQtlfyq0k/DQ/aBx0dIChQGkIiZvM6+MfaN9ncsJOC2Bzun3r7AWfAt7ra2VS/nS2NxT1SR0RqjeREpTM6eRjplmQyzKkkRsYf8pwes1HLbZcU8bf/buSdr4u54fzBh3TcycKIpCHkx2ZT1lbZbxlJkmjpdFNWa6WiPhTNo67FQVO786BdLAqFQFJMBL6MWrRaJTVNduInX4pj5wqsG74ienrfrs/mrla6w+c8pM8xKD4kYHvbKvsVJIDhubGs2t7ALVOHsaF+Gw32JpJ/8J8fnjiY9XVbaXG2EW/sOwW5RqkmOTKBentTr32tXYIUd5C0EwpBgSiePBXw8UZQqqAfQdJ1tZBc3r73DxSzsqcwb9fXLChbLgvSiUTQecADUbr+n0ZrbQ0APVIEV3bUAJAdFYrRVdpeCYC7ve/B5bU1m8mJyiA5snfXG8CooWa+cZSRpstjYtq+uQBBtw2F3oigUNJgb+aPS5+n023l5lFXMDt3Wp/dNE6fi+VV61hetY7StgoA4iJimJA2isFxoSgOcRExRz2B9oxRqewoa+WjxaUkxhg4e0LmUZ3vRLKudgu7Wko5P29fpAKXx09JdQfFlR3sqWqntKYTmzPU9y8IkBgTSqE+LDeWWLMeS6QWU4QGrUaJQhAIBEVcngCdDi8tHS52teyhXNFER3ked6xcTFJMBLeaC2H911gmzUWh7j2XqzvVx6Hme4rRR6FSqGhxtR+w3Ij8OBasq0bpDbXMam2NvQSp+6GsydnaryBBKPWHJ+Dttb3NGhKkGPOBU6o4fC6Mx6Gb9GRFUKqRgn13ySkUAnqt6oTPQzoYSoWS6ZkT+Gz3dzh8zoPGXTyugvT555/z0ksv4ff7ufHGG7nmmmt67N+5cyePPvoofr+fpKQknn76aUymY5sa+0Sh1YWe1BTB/m+iFmcbaqWaKN2+bpb6rrk73Tdxd+rumkp6xaVy+Jzsba/i0iHn9XuNnfb1CAqRll0ZeM4JhCfMiR4nSl0EzY5WHvv+7+G4eH25f3e4rXy2+zsWla/EG/CSYU7hqmFzGJ864rCdGkRRxOl34faHEuV1p1hQCAq0Kg0GtZ4IjYGfXVJEa6ebFz/cSpvVwxVnFYTj752MBMUgnxR/y4c7vyA9Mo143whe/HALuyvbqW6yI0kh8UlLiGTc4ERy0yzkpJrJTDKFn2YPhZLWcr5ftpwUfSL/d8PNbC/tYPW2Bv5Xmc5dkcW8+/JbjDj/UoZk9/T229NaDoSiORwKgiCgVqh6pMDoi+F5cQgCVNSEhMTudfQqo1WGxkd9/TzNd+PyuYjpI4J3m7X3pNgf0uhoJiAGSD5AqKTTDUGpQgr0/50adCrcnpNLkACGxhfwafG3lLdXHzSJ6HETpKamJp555hk+/vhjNBoNV155JePHjyc3d184nSeeeIK7776b6dOn8+STT/L666/zq1/96niZdFyJMIY83Jy9788wnR4bFp2pR4Xe5upAr9KF58802luIVJto9gjsKm9jeP6+uUpl7VVISAzqJySRP+hncfkqBkcPYeN6Fe8t2MONFwwBQPJ58Ku1PLviJXxBP7+f8X+9JhUGxSCf71nIvJ1fERADTE4fy3n5M8g+QBcOhFKv11obqOysoc7WSIO9mRZnG+3uTmxeR3i+Un8IgoBFayI2M5r0SBUfbqtgZfk2fn7ONIZlnTwxyyRJorXTw5q9u5lf+SlWsRmhI4Xd6/PZLe4kQqeiICOayUXJFGREU5ARddgpN7oJBAPM37OAD3d8QWxEDL+ddidxERbS4iycNymL1s4RNPx7C+mdG3nwHwmMyIvjpguHkJ1iRpREFpavIMOccsDJsfvjC/hwBzwHfYI1G7XkplrYXtYEsfTp+ej0h1pnhgO0zgLBAA2OZob24ZHabvNg1KvRqPvv9t3RXAJwwjKZngyExpD6FxyDToXrJBSkzK6Holpbw8AJ0qpVq5gwYQIWiwWAs88+m2+++YY777wzXEYURZzOUB+32+3GbO49QGuz2bDZbD22NTY2Hi+zjxhTZOjmaW7t2y0TwOl3Y1T37Be3ee09vPI6PJ3EG6OxapQs31rXQ5CqO+sByLKk9Xn+7U27cficXDR+OtEdfj5ZspdxQxIZnBWDFPTzpT5IjbWe3067q5cYtbk6+Puqf1HaVsHYlOFcN3xuv267kiRR0VHD5oYd7GjeQ2lbRfhpWKVQkWCMJT4ilpzoDMw6E5HaCAxqPRqlGqVCiYBAUAriDfhw+d3YvA463Faana0EDE2o0620sIfH1yxBvzKGooQhXDJiKjlxJ25Wvs3po67ZQU2znaoGG5UNNsob2/FG70SZUAUBDVG2iQyLHUb+iGgKM6NIi4/sMSH5SAiIQVZVb+DDnV/S5GhhQtoobht9NUZtT6GItRhQTz2fiO/+zS9mRvGfNZ386pklXDAlm/i8Jmqs9fzyMCaNVnbWApDWT/De/RlVEM9H68rQxNKnw0StNdQ1nWTsv/VS0vWf6SunTofdS9QBWkcAK6s3kGSM77fr+lTkYHXdgbrsAAw6NU73yTWGBIQftp195HL6IcdNkJqbm4mL21eZxsfHs23bth5lHnzwQW666Sb+9Kc/odfr+eCDD3qd56233uLFF188XmYeMzTqrrk/jf03kfxBX695RW6/B4Nq383n8LqIMliYOCyJFVvq+OlFQ8Pdbk3OFiI0hl6VUzebG3aiVWkZllBA4RzYUdbGU/9Zz99/OZ0G0csKlZezc6czIqmn40CdrZE/LHkOl9/NLyfe0sMLcH9aXe0sKlvJ8qq1NDvbEBDItKQyK3sKeTFZZEWlkWiMO+r5Qjavg12NZXyzbRO720tY176UdYuXovHFkBcxgklpY8hIMBMXpcccoT0sEZAkCY8viM3pw+rw0m7z0NbppqXTTXOHm4Y2J42tThz73dhajZL4NAeqwRsJ4mBM/DhuGXspMcZj171s9dj4vmI135Yupc3dQYYlld9Ou5MRSUP6PcY4ZBptC99irK6SKb+5kv98XcyXW9ej8WygIKqgxxjiwVhXtxWFoGBw3MHzZA3Pj2Pezk5g39Pv/uxo3kN8REyvVOf7s6ZmE2qFiqKE3k/MnXYvUQeIWF3ZUUtxSynXFF1yTAIAnywcrK4TlKp+vewg1MVZ23zsk/QdLYIgdD2Eigcte9wEqa/M6Pv/eTweDw899BBvvfUWRUVFvPHGGzzwwAO8+uqrPY654YYbuOSSnum/Gxsbe41HDTTdn620phNJkvq8UURJ6uVOHRCDPSYGeoJetEoNsydksmRjLd9vquXciZkAdLptROv6v8n3tJaRH5OFWqlGrYSHbhrHfS8s4/evrSElyYUaemWvbXd18sclzxOURP4w694+xxxanG18sOMLVlStQ5QkihILuXTweYxKHnpEURoOhklrZELGcCZkDEcUJdaVVvJN8SpKA9vY6V/E9uJVBBbmEGxNQaFQYIrQEKFTo9cqUauUKBQCCkFAlCQCARFfIIjHF8TtCeBw+wkEe98YKqVAXJSBhGgDU0ekkBxnJDkuguRYAwtrv+Wr0qWkmBK5feztRxz/74cExSBbG4v5vmIVG+q2EpREhsTnc8voKxmVPPSg87+UhkgM2SNw7lpJ9MzrGDsBVgQ3I3oi2f59Gt+aqjin679zIDwBL9+Xr2RU8rBDmktVkB6FKqqFSCG2V3mX3822pt3Myprc7/Fuv4elVWsYnzqyT6eLdpuHQZn9Z4edt+sr9CodZ+ZMOaitpxKHVNf1Ua92E2fRs6Wkud/6Z6Bw+z1ISBgPMazXcSEhIYENGzaE3zc3NxMfv68JX1JSglarpago5Ap4xRVX8Nxzz/U6j8lkOiUcHZRdQmNzeahuspOR2NtmhSDg/0FlKAj0+JOJkoRSUDA4K5rcVDOfLNnL7PEZKBUCTr+r31htoihSa2vkvPwZ4W0ZSSYevH4cf3hrOc0pXiYGND0ChIqSyPNr/o3T7+Lxmb/uJUaiJPL57oV8sPMLBODs3OmcVzDrmIfLORAKhcCEgiwmFGQhSVezoW477237ghrtDmIKWhmingmeSJxuPx5fEH8gSFCUQuIvCBh0KixqLVqNEoNOTYROhdGgwRyhwWzUEmXSEmPWYzH2bml5Az7+uvJltjYWc07eGVw7fG6/2X8Ph0Z7M4srVrG0Yg0dHiuRWiPn5s1gZs5kUk0H7zLbn4jCCVjLNvLWqjf4qnY9OVEZ3DX2Nl7x7uEfH22lucPFdecOOmAF9Vnxd9h9Ti7umnpwMFrcLQjGTlT2Eb32rahahz/oZ2rmuH6P/3bvUtx+D+cXzOq1T5Ik2m2efrvsdrfsZW3tZi4bcv5hpZA/FTikuu4Av2OsRY/bG8Tp9mM0HPuMykdKd1ddhHoABWnSpEm88MILtLe3o9fr+e677/jDH/4Q3p+RkUFjYyPl5eVkZ2ezaNEihg3re+7NqYBW2dXFoAyyeU9zn4KkVmqwe3vOC1EqVAT2m0uhFBSIkoggCFw2K58/v7WepZtqmDkmHX8w0G/q7nZ3JwExQOIP+u1HFcZz4XkRfFMPGc0ebE5fOMfM9+Wr2NVSyu1jryMzque4lMfv4ZnVr7O5YQfjUkZw46jLiDX0/9R6IhAEgbGpRYxJGcbyqnW8vWUeqzwfcMOInzA7d9oxfSoUJZFnV7/Gtsbd/HzstczM7v+J/1DPt6l+B1+Xfs/2pt0IgsDIpKHcnDWR0UnDjij5oSRJlJgjeT0tmtba9ZyZM5UbR16GRqnm0ZvH89LH2/hwUSkA15/X9/yu/2fvvMOjKtM+fJ9pmZmUmfTeeyAJCaH3KoioKApi1/WzrH11i72srrruuq517b03RJDeWyChJJCekN57Mpk+5/tjkkBIIaEISO7r8pLMnHPmzWTmPO/7vM/z+xU1lvBTzhomB40Z9MpvRe56JEhpLPbAZhO7A7nNZmNl7kbCXO0WHn3RYdLzc846knxH9Nnv1NZhxmyx9VnybbFZeS/9K9zVriw8Rin/wkHE3oLfN56djcR1zfpzKiDVdlrQePRRUXk8Z3SF9MADD3DDDTdgNptZvHgxCQkJ3Hbbbdx7773Ex8fzj3/8g/vvvx9RFHF3d+f5558/U8M543QJPHp7yNlzuIbLp/WuhFPLVZSZK3s8ppQ5oLcc9ThRSOUYrfbCiPEjfQkP0PD56px+FbG7aO4UP+1L3cHkUIdSlOCr0/PXN7bx1B8m4KZ14LusVUS5h/WSKDJaTDy/9XXyGo7wh9FLmRN+em/2p4ogCEwNGcconzje2PMJ7+/7ipKWCv6QvPS0ia2uyd9CemUmtyQvOaVgZLPZ2FGaxg9Zv1LRVo27ypUlIxcyI3RityjuUBFFkey6fL49vJLDtXl4yBTcYVQzM2VZ9zFSqYQ/LrZL6ny7IR8vV3Wv9F2zoZWXd7yD1sGFm5OvZjBUt9ex5cguopwS2d8ho6HF0H0j3FmWRlV7LQ9OvK3fz8uP2atpN+lY0o+yeF2TfTbtoe2t0vBT9hpKWyp4ePId3VYmFxKiKA4Uj7rfs/pmPaF+/af2f2sqWu3Nz/7OJ66YPaN9SAsXLmThwp4fvHfffbf739OmTWPatKF7ZpyLOHcWGkSEqNm5tZ7mNmMvK2EXhSPtx62QnOTqHtUnKrmKDrM9QEkkAjcvGMFj/9vJ8q2FyCTSfvtEuoKaStZ7ZnmkuYwQuSMeylYamw089N+tLF6koaGjiZuTru5183h/31fk1Bdy/4Q/nLTR1m+Bi9KZv0y5ky8zlrM8Zy06Uwf3jL/5pJxfj8VsNfND1q/Ee0dz0Ul4unSRW1/Ie+lfUdJcTrDGn3vH38L4wOSTHp/VZmVPxQF+yd1AfsMRNEoXbk66mtHl5bSnrsBm1CNxOHojFwSBO69MpK5Jzzs/ZRIT4kaI71Gljr9v/i9txnaenvngoHX4PjnwPTKpnFlBM9nPYbtNhKsKi9XC15krCNb4MzZgVJ/n1uoaWJW3sdOmpO9WgupG+3fB261neqe4qZzvD69kYlDKhaNd1wfCQCsk7dEV0rlERVs1SplDn5Pl4zm92v0XMF2b+wF+CmwibD9Y0ecxeosB4zGKvS5KZ9pNHd1pO2cHxx7NholRnowb4cM36/OQIEdv7tsx0mqz7031dbOraa/HR+GCYOrgxbsmIJdL+WT7JhQSh146YhnV2Ww+sotFsfPO6WDUhUSQcG3iIq5LvIJdZem8uecTbIOo5hmI3PoiWoxtzIuccVIrQ6vNyucHf+SJDf+i3aTj3vG38OJFjzA5eMxJBaNmQys/ZP3K3b88zis736PV0MatyUt5Y8GzzI+agVPwSLBZMVbm9zpXKhF4cFkyjko5r39zAFEUqdU18OTGf1HVXsvDk+84YZ9ZF3srDpJWcZAr4+bj72pXYOhSoFhTsIUaXT3LEi/vtxjj84M/IgjCgIrilXX2z76fx9FKUrPVzOupH+Hk4MStyX3bWlwQdHVb94PWWYlUInRLL50rVLbW4O/sM6jv0nBAOk04KdTIJDJEuYFQPxc2ppX1OsZNpQWg4Rh5FjeVBhGRZoNdJFKrdKHJ0FMw8rbL47GJUFFlpK0fbbKuogrLcdpeFquFDrO+uwTX38nKv++bioO2BX2DC9+sy+9REfnd4ZV4qt1YPIAaxLnIpTFzWBp/KdtL9vB15opTulZFq733o799kIEwWky8sO1NluesZVbYJP497wkmB485KSv0goZi/rv7Q+5c8QhfZf6Mn4s3f558B69e/DQXRU7rNoF08LfblRsq8vq8jsbJgesvjiW3tInv9uzkb+teoFnfwqNT7z5ho2IXbcZ23k37gmCNP5dEz0bRqSBiNttoM7bz3eGVJHjH9rBVOZacugJ2laVzWczcAcVeS6vb8NAoUSuPFo98e3glpS0V3DnmujOiqH7+MHCDuVQi4K5VnZMrJD+XwfWLDWvZnSYkggR3lZb6jkZmjI7ngxWHKatpI9D7aNNrl3dMXUdjt/5X15ezXteEh9oNd5UrOlMHBouxO0/u7aZm6ZwovsjIQilvxSbaet3guo49Xhusq2FVpbKv4Cyt9Tj5u2GVtxOkDePLtblU1LZz39IkmoyN5NQXcm3CIuSnoZrst2ZR7DzqdI38mL2aUNdAxgcmn9R1zDb7ezbUijqrzcrLO/5HRk02t6dcy6yTKEsWRZF9VYf4KXsNufWFqGRK5oRPYV7EtF6acV1IlY7IXH0w1RT3e93JSd58lJ7Pt0dWE6Dx5aFJ/9fv9foa09t7P6PNpOORqXfbU8ed3ltSqcDXmSvQW4zceIxh3/Hnf37wR1yVmhMWIxSUNxPmr+3+uaixlJ9z1jE9dEKv1fwFh8iAKySwp+3qms6dgGQwG2joaBq0H9LwCuk04uXkTl17PdOTA5BIBDbsLe3xvHen0GRNe93Rcxztj3VVonQHLV1Dj3MXTY/ATaXFKlqpbm7ieJw7JV9ajcc1xnV+fgWVfWZpbqxCZ+rAJtqYkxzJjQvi2Hqggife2UV6eTbAoFR5z0UEQeDW5CVEuoXw1t5PqT3uPRwsLg72SUSXS+5g+T5rFQers7ht9LKTCkYZ1dn8dd0/eHHbmzR2NHFT0lW8fek/uCV5yQmDh8IrGFNtSZ/PHag6zF/WPY/FvRBbfRBPT3940MEIYFXeRvZWHOTahMu7qzHbOlN1HTSyrmgbcyOm9hANPpb9VYfJbShi8YgFAxYjtOpMlNe2Ex1sn6TZRBvvpn2Bi4NTL9+lC5OBq+wAPDSqcypl16XVOVhFjeGAdBrxcvSkWlePq4uSsXHebNhbhtlydD/DVaVBLpVT1XZsQHJHQKC63f6H69Ieqz4maAHIpBIun2APFB+vT+N4XDvTgY365h6PKzvL0Y0yGUhkmBoqutN6CqmcxTMjefi60eSWNPLN9n3IJLLzWo5FJpVx38Q/gAhv7fmkzwbtExHQebMubu69D9gf1W21/Ji9hinBY4fcsNmob+al7W/z9y3/RWfq4K6xN/Dqgme4OGrmoNW6Fe7+mJtrekjLlDSX84+tr/P81teRCAKXB1yL8UgcdY39y1sdz+HaPD49+ANj/BNZEHW0b6iqQQeIbK5Zg6NczdUjLun3Gj9mr8ZT7caMsIkDvlZGgf0zPzLcPinbXrKXwqYSrk+88oQaexcCNrMRQT5wObenq4qGFj0229A/92eCyjZ7+nvYMfYs4OvsSZuxHZ2pg4vGh7D7UDWph6u6S7YlggQfR48eKyS5VI6noxuVnaWRvp36cZWtNXBcpXdKWBif5kBqQQFZR1KICz3aoOogU6BxcKa2vb7HORKJBBcHJ5qN7cjd/TDXleEks6eiDJ3FFVOTAnBSKXh+wyFkZhk6gwWnkxQFPRfwcnTnusQreDf9C7YWpzItdPyQzg/U+KOQKjhQmUO4UywmsxWzxYbFasMmit0TValEQCaVIJdJ+LFgDQIC1yYsOuH1jyWt4iBvpH6M2WZhWcLlLIiaeVLpUpnWG2xWLG2N1EisfHd4FbtK01HLlVyfeCXzIqeRX9rKl2ynua235UNfVLXV8q8d7+Dr7MUfx93YY1O6oLwZtVcjeY0F3JK8pF85q6LGEnLrC7kp6aoTFnSkZ9fiqJITHeSK1Wblm0MrCHUNZFJw31JWFxqi2YjkBBMUd40Si1WkrcOExunsl8ZXtNYgCMKgRX6HA9JpxLdzZVHVVktSdDAeWhXrUkt79BD5OHt1B58u/F18Ke/cSHdUqNEqXbq9k47Fy9EduUSG3NXAG98d5NUHpyOTHl3k+rn4dG/IH4u3owfV7bU4eIegLz6Et1yNXCrvsZpKjvEipdyX9JoqXvx4L0/dNh6p9PxdQM8Kn8SmIzv5IvMnJgQmIyCjoUVPXZOe+hY9jS0GmtqMNLcZadHZHVvbOszo9Gb0BjOySBc2tu/j128HsYku2FAm7cHa7MUtT25F46TAzUWJl5saf08ngnxciArU4uvh2H1TF0WRn7LX8GXmcsJdg7l3wi3dk5GTQabxoFoh5ce0z9jTUIBCpuCy2LlcGjOne3Vh7Zw1D8bWo8XQyvNbX0cQBP4y5a7uPruusR/Ir0ERmoe7szezw6f0e50NRTtQSOVMD5nQ7zEAVquNPVnVjI7xQiqVkFq+n1pdAw+NWnxSBSG/R2wmA4Ji4IDUZdnR0GI4RwJSNd6OHoOeZA0HpNNIV6qrsq2GCPcQZo0J5Jv1edQ2deDV6X7p6+zNvqpDWG3WbhHSABcfMmtysNlsSCQSAjV+3YrJxyKVSAlw8cWmspGzqY2ftxZxxYyjDbhBGj+2FO/uVfQQqPFjb8VBFH7TaD+0FWtbI35OXpS39GzSjQvwZ1/DHg4UVfD5mpx+u/vPZdo6TBRX2tW5HZvjKbCt4Na3PqClxLeXDJiDQoqrswMaRwdcnZUEejvjpJSjUsootxk4oNvMLVeE4q52Qy6TIJVKkAgCgmCvwO3SyitoKeCXSgszw8fhEhpAc5uRhhYDJVVtpB6q7g4EWmcHkqI8mZYcQIF5D99nrWJy0BjuGHv9KUkSFTWW8m3RRtKD3HFoOsJlsXO5JGpWDxV5gJoGe4WmxwDW4GD33Xpuy2s06Zt5Yvr9vWa3RRUt1AsFKCQtXJOwpN+Vj81mY3f5flL8E1ErBn7N9NxaWnUmpo6yT962FKfiqtSQ4nd+7meeCUSzAcmJAlKnwkVjq4Ew/7PfHFvcXEaQZvAq/cMB6TTi7ejRYz9o9pggvl6Xx4a9ZVwzNxoAf2dvrDYrtbqG7hmxv4svFpuFWl09Ps5eBGn8WVe4tTtAHUuwawD7KjMZHTuZr9blMjMlsLsBN8w1iDUFW6hsq+mhiRbuFsLGIztpcbcHTENFLuFuwaSW7+/xGl2utaNHy/l+Yz4T4n2JDOy/RPdsI4oiFXXtZBTUc7iogdySJmoajzYZq1Uy5DGuSD3LWRI5HW83NZ5aFe5aJW4uPUuLj6e0WcOBNZtx8+tgaujAN8UjB1ORVkn4w+xpvTbtzRYb5bVt5JU2kVnQwN6sGraW7EYRdohIx3juGnsjMunJNcqWNJfzdeYK0iozUMuUzGrUsXDsUvwS+i7ZP1TUgJNKjo97//sx7UYdz219jfLWav48+Y4+5YR+3VWE3L+QEE0gY/1H9XutI81ltBnbSRlEddyvO4vROjswOtYbk9XMweosZoVOOm3KG78HbCZDn+7Ax3LsCuls027SUd1e10sJZiCGA9JpRCaV4a52pbpzH8fH3ZGECA82pZWxdE4UgiB0b+5VtFZ3B6SuTfTy1qrOgOSHyWqmRlffK40T5hrE5iO7uHJuAPtfq+PrdbncfoX9hhndefPIrSvsEZDivOyWArliB5FyJYaSw8THjWLjkZ3kNRzp9qSJ9ghHJVPi6tuES7Yv7y0/xAt/nHxOyQaJokhuSRNb9peTeri6u8TVzUVJbIgb8yaEEOanIdjXGTcXJSty5Xx28EcWTPdDOwRl8gAXXxykCo40lzGNgfegcuoKCHML7rOCTC6TEOqnIdRPw0XjQ8irO8KTm1YhN/iQsceXx0t38bcbx3brCw6GRn0zX2YsZ2txKiq5kiUjFzI3IIW61+/EsZ/NbKPZyu5D1YyJ8+43Zdesb+G5ra9T0VrNgxNv69P6oqnVwOaiVCQhepYkLBzws5FbXwhAnGfUgL9PWU0badk1XDM3GplUQnZdIWarmQSf3uZ9FzKiyYCgHvgz3GXb0dI+uH3CM0lho73qsy/Nwv4YDkinGW8njx6FBTNGB/Dq1wfIL2smKsi1R0DqKq8O6DRFK2upIsU/sds8r6S5vFdA6vrjdkjqmTM2iNW7S7hyZiQeWhW+zt5olC4crs3rUXbs5+yNl6M76VWHiQ+KRX8kg6RZ1yGXyNhRurc7IMmlciYGpbCtJJVFs8bw0U+FZBTUkxg5uA3JM4nRbGVdagkrthVRWa9DIZOQFO3FVbOiSIz0wNfdsc+bY1cpcnVb3ZACkkQiwcvRvVf5/fGYLCYKmkpYEDXzhNc0WUy8ufdjtEoXXrz0QfbENPHm9wd55M3tvHzf1BNam9tsNn7N38RXh1ZgtVlZGDOby2MvwknhiM2gow4QbX2rVKxPLUGnN3PRuL5vDmUtlbyw9Q1aje38ZcqdJPr0na79Yk0OeBfh6+hDcj9NsF2UtlTi7OB0Qs2+7zbmo5BLuXhiKAAlndWNg1WQuFCwmY1ITpD6lMukyKQCeuPZd449UJWFXCIbkqvv8Hr4NOPp6N7dUwR2gVSpRGBnhn2/pqtooaLtaPGBWq7CXe1KWWchQ6CLL4IgUNrSu+w4RBuATCIjr76Iq2dFYRNFlm+1z0QFQSDeO4aMmuwe8jmCIDAuIMn+eHAc5sZK5O0tjAtIYmtxKh2mo30LC6NnYbFZaVBmoHFS8Mv2otP7Bg0RURTZsLeU255bx/9+zMTZUcF9S5L49Ol5PHbLOOZPCMHPw6nfmbrebJ8pnswejVLmgMk6cIl0Vl0BVpuVEV4DrwLArjhQ2VbDnWOvx0XpzOyxQTx681hKqtv4bkNv2Z9jqW6v44mN/+LjA98R5xnBK/Of4LrEK7oLFrrK2/t6H3R6M1+tzyMu1I0RYb2tQ3aX7ePR9S9htll4auaD/Qaj/LIm1mWnIVG1c8WIi064cq7T1Z+wuqq4qpVN6WVcPDGkO/Vcq2tAIZXjOoD314WIaDIgnCBlB6BykJ31gGSz2dhTvp8RXlFDEsIdDkinGS9HD5oNrd0KCU5qBfERHuw+dLRIwc/Zu1elXaCLb3eRgUKmwM/Ju88+GLlUTphrEHn1RXi5qZmU4Me6PaUYTPYPYLLvCFqN7RQ0FPc4b2rIOKw2K+lK+02kIz+NS6Jno7cYWJW/8ejYXHy4KGIaG4q2kzhKQlp2DW0dg+9bOZ3o9Gae/SCV/3y1Hy83Nf+4axIv3zuV2WODBtz/6UIURTYf2YmTwpGgfpo2B6LVpDuhh0taxUEUUvkJ01LFTeWsyF3PjNCJPeR6Rsd4M26ED+v2lPbbM7W7bB9/WfM85a1V3DPuZv465Y94H3ejFztL+IU+7Ek+/OUwre1G/nDZyJ4mmWYD7+z9nH/vfJdAjR8vzPlbv+kVo9nKK1/uR+lfjouDM5P6cRU+lmZ964BBRRRF3v0pEyeVnKtnH33/Wg1taBycz6lU8bmAbRBFDXBuBKQD1VnUdTSesPfseIYD0mnGs9MzqKHjqJrCmDhvKup0VHdWOfk6e1PVWfjQRYDGj4q2GmydKZdgrT8lTb318ABiPMMpbCrFZDExf0IIOr2Z3Zn2gJfkOxKpRMru8v09zgnWBhDlHsa6in1IPAPR5ewmzC2Isf6jWJ6zjvpj9PWuib8UX2cvsmzrsMraST3Uu5T8TNNhMPPImzvYl1PLbZeP5KW7pzAy3GPQ59tEG58e+J4D1VlcGTd/yH5DTfoWatrr+nTQ7cJoMbGjLI0Uv4R+farAPlt8J+1znBRqrk+8otfzsSFuNLYaet1EbKKNrzNX8O+d7xLg4sPLFz3GlJCxfbsRG+yipBJlz4KF7QcrWLO7hEXTI3oUqByszuJPa/7OhqIdXBozl6dnPNhvak0URf73QwblTTXYnGqZHT55UO9n+wCGkgCb0svIKKjn+vmxOB/j39NhMaD+nZnvnSqiKCKajINaIUkkdrfks4VNtPH94ZW4qbSMGaDopS+GA9Jpxr3zS32sgOqozj2YjAJ7Ks/byYM2Y3sP5e4AF1/MVnN3ui/ENZC6jkba+xBTjfWMxGKzkN9YzIgwdzw0SrYfPJoSTPSOZVdpei/V60tj5lCjqycrOAxDWTaW1nq7JIso8vaez7qPV8qV/HnKnUgkoIpLY2t2zml6dwbPf77aT0l1K4/dMo5Lp4T3cnMdiIKGYh5f/09+ydvAvIjpzD/GRXewrM7fDMC4fqwUADYUbbc3QUcObFHxS94GChqLuSnp6j4bSHUGMxIBFPKj1XYWq4XXd3/E91mrmBE6kadn/mlAgzNLu30CJHXUdj9WVNHCq1/tJzrYlWvn2Vdlte31/GvHOzy35TVkgpSnZj7AdYmLBgwwK3ccYd2eUkaMbUciCMwZoO/oWI7VYzye+mY97/x0iNgQNy4aH9LjOZPFhIP03DGYOyewmADxhHtIYO83G0yv2Zlie8le8huLWRp/6ZDV7YcD0mmmS9G7SX9UBy3Q2xknlZzcEvtNw7PzxnLshrl/pxpuRZs9lRfcWdhQ2kfaLsYjHEEQOFybZ7f4HunL/rw6TGa7JNDUkHE06Js4VJPb47wU/wTCXINYYajEJED7oW14OXlwY9JiMmqy+ebQL93H+jl78/j0+5DJIUe+gu3FveWKzhQ5xY3syqxi2UUxpMQOTsbIYDawtTiVJzf+i0fWv0htRyN3j7uJm5OvHnJj5ZGmMn7JXc/EoJR+Nd9aDW18f3gVI7yiiPHobcbYhb00+2dS/BP7THOJokhadg2Rga7dTc4Gi5EXt7/J9tK9LI2/lDvGXHfCFYml2b7ilmk6pacadDz93m6c1Ar+duMYDNYOPjnwPff/+jT7qw5x9ciF/HPeY8R6Rg543V2Zlbz7UyYpI9ypFrMZGzBqQLXuY3+v/gKS1Sby7y/2YbXauP+apF6TDZPVhPwkHHR/z3SnZAchJWW1ikjOUrqzur2O9/d9RaRbCFNDxg35/OGAdJrp8kVqMR4NSIIgEBGgpaiiGQBPtX1juf6YtN6xKg9Ad6qopI+A5KhQE6YN4lCNfeWSFO2FyWztDnhj/BNxVjiytmBrj/MkgoQbkxbTYGhhY3AQbQc3Ioois8ImMzN0Ij9k/cqqvKP7SSGugdwQeRs2vSP/TX2ff25/u5fG3plgZ2YVMqmEhVMGttSu1TWwvnAbL2x7k1uX/5nXUz+iUd/CdYlX8OrFTzE1ZNyQ9yFKmyt4fuvrODs4cXPSVX0eYxNtvLX3UzosBm5JXtLva3SY9byy8z0cFWpuT1nW53F7s2s4UtnKnHH2HjCdqYPnNv+XjJoc7hhzPVfEzR/U72CqK0OQOyDTeFDb1MFjb+/EZLby15uS2Fy+ibtXPs7KvA1MDhrDqxc/zeIRF5+w0GNfTi0vfZpOVJArCWN16Mx6LjlGz24gjFYToij2qcX3+epsMgvruX1RAn4evZUwjFYTiuEVUg9spk7TTsWJU3YWq62HgstvRYdZz793vIMEgfsm/uGkFDYGNQ1ZtGgRy5Yt45JLLkGlOvGS8UJGLVchFSS0HmOyBxDk68zqXSXYbGJ3rv7YfSZnhSOOclV3U62rUoOTwpHS49QUuoj3iWFFzjo6zHriQu0rrqziBuIj7DIds8InszxnLdXtdT0qnWI9I5kTPoX1hdsI1zfhUXoYVfBI/pCyjHZzBx/t/5YmfQvXxF+GRCJh2ohI3vlmAuGjG8moPsD9q55iQmAyF0VMI7pzpXa6aWjR46FVonI4+vG02KyUt1SR33CE7PoCcuoKuve9PB3dmR02mXEBo4jxjDipL4JNtLH5yC4+3P8tarmSx6bf2z25OBZRFPki4yfSKzO5KemqfhWurTYr/9n5nr06bvr9fV6rpd3IW99nEOjtxMyUIJoNrTy35TXKW6t4YMIfhmSfYawqQOEVQlVDB4+/vZN2g4F5lwi8lP6SvTnVP5GlIxd2txSciLTsGp7/aA9B3s48eEM8j216jnjvmD4bZfui1WBXndc49FSL2La/gm835HPR+GBmjw3q81y92dA9QRvGTvcK6QRFDVabSHuHaUh9bacDg8XIP7a+QVlLJX+ecidejr2rOQfDoALS448/ztdff82rr77K3Llzueaaa4iMHHipf6EiCAKOCjXtx9iSA/h7OmEyW2lqM+DqrEFA6KElJwgC3k6e3cKrgiAQpPHrs/QbYJRPHD9lr+FQTS5jA0bh7+lIfunR682PnMHK3A38mLWaO8de3+Pc60ddSXZdPl/YRHz2LicxeCQyiZQHJvyBD/Z9zfKcteQ1HOGusdfj7eTJ6Bhfsg858O+HL2N1wSbWF25nR2kaHmo3UvwTSPCOIco9rJdUzckgiiJqZyuN1gp+ydlIZVsVR5rLKG2uwNxp365RuhDrEcGlMXMY4RVFQGeZ/Mlgs9k4UH2Ybw+tpLCphJFe0fxx3I19pqVsoo3PD/7Iitz1zI2YyvzIvvembDYbb+z5hAPVWfxfyrXdjcnHYjRb+cfHe2lpN/LITZNpMjTy9y3/pVHfzF8m38Uo38HLNtlMeoyVhYixc3jy9a1YXEpwHlHMryWtxHvHsDT+UiLdQwd9va37y3nly30E+7rw9G0T+Pzw1+jM+iFZQHTthR6775V9pJFXvtpHbIgbty/qX72h1diOywVtxNcbm9mIACcUV23vMGET+U117FqN7by8/W3yGoq4f8Kt/Zo0DoZBBaTk5GSSk5NpbW1lxYoV3HnnnXh5eXH99dczf/78k37x3ytquYoOc09Pki4tu7omPe4aFS5K517OsD5OnhQ2HfVQCtL6s/nIrj4N+aI8wlHLVaRXZjI2YBRh/lpyS44WUriqNMwJn8KvBZtZEDWzx8xYKXPgr1P+yCOrnuG/xhIeL8skJDAeqUTKH0ZfQ5R7GB/s+5oHf7VbIExMTmRXZhXZ+R1cn3QlV41YwO7y/aSW72dj0Y7uAgB3lSsBGh+8HT1xU2vRODjjqFCjkCq6Nzdtog2T1YzBYkRn6qDN1E6zvpUGfTP1ugZqdQ0YrSakUfDJwVQcFWpCtYFcFDmdMNcgItyC8XbyPKWVmdVmpaiplL0VB9lRspe6jkY81G7cNfYGpoaM63OF1W7S8WbqJ6RVZnBRxDRuTr66zzFYrBZe3/MxO0vTuCb+sj6tKDoMZp7/aA9ZRxp46NrRSBxbeXTDm1hsFp6Yfv+gVyHd1yvcj2iz8O/cZszhuYiKdnycw3gg4dZB9Ud1IXb2tH2w4jBxoe48dss4dpTvZHPxLq6Imz9gxeHxFDeXA/Z2BoDS6lae/WA3nloVj948Frms783uDpOeDrMej0HsU11IiF0B6QQrpC4vpC5NuzNNZWs1/+j077pv/K1MCBx9Stcb9M5ha2sry5cv57vvvsPZ2Zn58+ezfPlyNm3axEsvvXRKg/i9oZIpMZh7akl1SXo0ttofd1W60HycAZyXkwe7y/d3C68Ga/wxWIzU6hp6NRjKJFKSfEeQVpmBzWYj1M+FbQcq0OnNOHZaR1w54mK2lKTywb6veWLG/T1utF5OHjw26Q6e2fwqT+38H/dPtc/KBUFgWuh44r1j+CLjJ37OWYdcuglNrD+fbTMyIf4KlHIl00MnMD10AiarmYKGYgoaiylpLqeitZqCxhJ0x60Q+0NAwNnBETeVFm8nT+K9Y/B28mT99kZKim389dZZRAf3X102EFablWZDK3W6Bqrb6yhvraKosZSCxmIMFiMSQUKCdwzXJi5irP+oPgsHRFEkrTKD99K/pNXQxs1JVzMvcnqfwajdqONfO9/hcG0e1yYs4rLYub2OqarX8fxHeyitaeP+pcnIPWp4cuPHaByceXLG/T0knwaDzSayeevPbPJzo05diI+jF9cnXUeKX8KQgrbFauPtHzJYs7uESQl+PLAsmW2lO/lg/9ck+8UP6HfUF5k1uXg7eaJVaaiq1/H4/3Yhk0p4+v8mDDh7L2u1p6jPZ0+uM4HY2eB9oqKGslr7VkGA55lfYe4sTeN/aZ8jl8h4csYDQ55I9cWgAtKf/vQntm7dyvTp03nqqadISkoC4JprrmHixKE1Pl0IOMgUGI/r8O/qQm/pdNrUKF16OZL6OHlhE23UdzTi7eTZ7c5Z3FTWZ8f7uIAkdpSmcag2lxBf+4yyuKq1uxvf2cGJ6xOv4O29n7EqbyOXRM/ucX6IfxwPu8TyVvNhnt/6GvMiprM04VLUchVuai13j7+Jy+MuYmXuRrbYUml2LubWH/cwPjiBkV7RRLmH4u3kSZxXZK+0lMliotXYjs7cgdFiwipaERAQBAG5RI5S7oCTXI2jQt2ten4s430MPPzaNp58Zxd/vXEMo6K8MFnNtBrbaDPqaDO202psp91k/3ebUUebyf7/FmMbLYZWWoxtPZpNZRIZQRo/poWMJ9YzgnjvGJwHSA0VNpbwRcZPZNbkEKjx4y+T7+xXzqagoZhXdr5Lk6GVu8fd1KvCyK44Uca7yzORSgQeu3UMucZd/LxzHdHuYTw0+fY+95kGorShhhdWvUe9tgWlzYGbR13F3MjJfb6fA9HcZuTFT/dyqLCBxTMjWTw7lI8PfMX6ou0k+Y7ggQl/GJLIaauhjYyabOZHTKemsYNH396B2WLjH3dNGlDYFSC/4QgwLBt0PDazESknLmoor2lDIoCf55kzNDRYjHy8/zs2FG0nyj2M+ybc0u10faoMKiBFRkby6KOP4ubWc6Yqk8n48ssvT8tAfk8opAr0lp4rJKfOxr/2jq6A5NzL86gr6FS11eHt5Emgxg+pIKGoqbTPDe5k35Go5Sq2FO9mWexSAAormnvIw8wInci+ykN8fvBHQrSBjPSO7nGNiMlLuet/97IxOoY1BVvYUZbGZTFzmR02GbVCRYCLL7ePuZbrE6/g8S9/oqQjn93CfjYf2QXYV4MBLj74Onvj6eiOm0qLRumMk8IRtVyFUqawBx1BigDYELHZbJhtFpoMLVS116I3G+kw69GZOtCZO7oDTOD4ZnIranhu1wZk+y1YMff7njvKVTg5OOGicMRD7Uq4axBalQY3lRYvR3e8nDzwdvQ44c3aarOyv+oQq/I2cag2FyeFIzclXcXciGl99lRYrBZ+zF7N91m/4qbS8szMPxHhHtLjmNLqVv73YyYZBfWMCHPnhstD+CL7M/IaipgbPpUbkxYPyZTPYDbw3u7lbC3fikRmY3qTnmuveg6N++Bl/rvIKWnkhY/30tZh5v5rEhHcKnlwzdM061u5LGYuS+MvHXKAW12wGavNyki3JP725nb0Bgt/v2Miwb4nDrgHqrLwd/bpbp8Yxs7Rsu+Bi8ryy5sJ8HbuNyV6quyrPMT76V9S39HE5bEXcfXIhUPuNRqIQQWktLQ07rjjjh6PXX311XzzzTeEhw9eOO9CQS6V0WrsefNUyCTIpAI6vf1xrVJDi8E+g+9KrXQJqVa11TDKNw6FVE6Q1p/CxuI+X0chUzApKIXNxbu5cdRiXJ0dyC9r7nGMIAjcNfYGHtvwT17a/haPTbu3x9Ja7uqDe8Is5h3cyOxlf+GbI9v57OAPfHd4JRODUpgclEKMZyRqhYqnrrqSP726ldYKI/dcF4ZZ0ciRpjLKW6s4XJdHY2nzSVmG937/5LgonHBxcCI6wJO6OiuV1WaUUjVjo4MYGxWEm9oFJwdHnBWOOCkch3zTPBaL1UJOfSF7Kw6ysyydFkMrrioN1yYsYk7ElB7mdMeSVZvHe+lfUd5axeTgsdyavKSHMkFVvY6v1+eyKa0MlVLO7YviUfpW8sLulxEEgfsm3MKkoDGDHqfVZmVj4S4+2f8jRrEDrc6d2xtyCR17+ZCDkSiK/LytiI9+OYyrG1w818Z3le/QUNhEhFsIf5r4fyeVgqnTNbAiZz0JnvG8+lE+ZovI3++YSHiA9oTnthraOFyb22slPwzYTPaU3UB7SFabSG5xI1OSBr/XN1ia9S18uP9bdpWlE+Diy9MzHyTGs//+u5NlwIB07733cuTIEcrKyli4cGH34xaLZdinZADkEnl3RVgXgiCgcpDT0SkPo1W6YLFZaDfputNGWqULarmqh+trpFso20r29OmNBHBRxDTWFW5jbeE24kI9OVTY0CPIAagVKh6bdi9Pbvo3z25+lXvG38zYYxQIXKdcRXvmZjT7NvPYogcpbCxhdf5mdpSmsbFoB2q5ihFeUcR6RnDj1T6893UJr31SyAPXjObW0ZO6r2OxWe2pMkMb7SYdBosRg8WI2WrGeoxqhEwiRS6RI5fKUMocUMmVqOUqHDtTeH3J8GQdaeCjX7LYsLqR9B21zJ+gZs5YLRrl0NoQRFGkQd9EaXMlR5pKya0vJLu+EKPFiFwiI8lvJNNCxpPkO7LfmV9FazVfZi5nT/kBPNVu/GXKXYzu9Pyx2UQyC+tZueMIuw9VIZdKWDglnGnjtXyV/T2ZaTmM8IrirrE3DDrNIYoi+6oO8WH699R21GBr1zBGOYcl7auQanzRTl48pPegpd3Iy99sI7P+MK6jmtBJa1lTIjLCK4o/pFxDsu/IkyoasdisvJ76MaIIWTu8kYrwjz9OIthncKnIjUd2YhVtTAkeO+TX/r1zdA+p/3Lu0upWdAYLsSEnt+faFzbRxsaiHXx+8EdMVjNLRi7kspi5Q5biGiwDXvXPf/4zFRUVPP744zz++OPdj0ul0uGy7wGQS2WYrb3TS04qOR16e0A6KjHU3B2QBEEgwMW3RyovxjOctYVbKW4u6zOvHqT1J9kvnpV5G7gs4jZ2ZFRSWtPW6ybgptby7KyHeGnbW7y843/MjZjKsoTLUctVyJzd0Iy/jObt32JImU94YCx/HHcjt45eSkZ1NvuqDnG4Jpe9FQftFwsHqVXFy7t34ZflybioULyd3bpTdY4KFT5OnihlDsilcuQSGVKJtNdNThRFRFHEIlqx2uz/6c162oztWGwWLDZr9/+lzlZuXOJNXqmCnZkVfJ26nW/2bCfQx4m4EFfCAjQ4OUqx2uxVfCarCb3ZQIdZT5tRR5OhhcaOpu4qvi4CXHyZFjKOBO9YErxjUA6waVzeWsVPWWvYVroHhVTBkpELuSR6NgqpnMLyZnZkVLJlXzm1TXqc1XIWz4xkznh/tlVu5amta5FJZPxh9FJmh08ZdK9UVm0enx9cTn5jEaJBjUPDGO6ZMhG/fW9hMenxuuaxE5q2gb1p8XBtHpty9pNeeQjRUYfCEVxdfLk48GKmBI/F5xQs1G2ijXfTviC7Lh9KRuEsOPPsHyfiN8jNdYPFyMrcDcR7xwy6V+pCQrQYEWQKhAEyAXuz7CoviZGD13wciPKWKt5J+5yc+kJGeEVxW8qyM15sMmBACggIICAggDVr1gwr7w4BB6kCYx8ByVEtp11vvxm6q+xFCI36JkJcjy6xg7T+7CpN617ljPSy7/lk1OT0u9G7dOSl/GXt81RJ9yERHNmyr7xP+3Gt0oWnZj7IlxnLWZW3kdSy/SyKm8essMloJy6iLWMT9avfxf/WfyJIpChlDowNGNW9mmrWt1DUVEpxczllLVUcLi+j2lDMz7m5MIiPhyAICJ0Hiognn95Tg0PnVlgNUNMImxr7PrQr/adROuPt7EWCTxy+zl4EuPgSog04obW2TbRxqCaXVfmb2FeZiUIqZ0HkTCZ4T6G8ysTb3x1mf14tDS0GJAKMivLi+vmxjIv3YW/lPp7Z/iIN+iYmBaVww6jFuKoGZ6mQU1fIt4d+IbM2B8xKTOVxzAqfxHVTHWlb+R8sumZ8lj6Gg3dIn+dbbVbyG4rJqMkiozqHgsZibKIN0SpBYfHiougZzI0dc0pB6NjXejf9SzYd2Ym1MgJvInn2ngm4awa/el2evZYWYxtLRi488cEXIKLJeMKm2D1Z1UQGaof0vveFyWrmx6zV/JSzBpVMyV1jb2BayPjfJAYMGJCuueYavvzyS5KTk3sMputmuW/fvjM+wPMRpcyhV9k3gIujorvKritdU3ucAVyYaxDrC7dR3V6Hr7MXWpWGMNcg9lYc5PLYi/p8vRDXAOZGTGVtwVaiR8xlXWopS+dE9xDr7EIhlXNj0mImB4/hkwPf89H+b/nu8CqmBo8leeIlqFd/RMuelWjHX9rrXK1KQ7IqnuRjLKmPVLbw7vIMMksq0Whg/Cg34iI0CFILRqsJk9XUudKx2m+Ix/j2SAQBiSBBKkiRSqTIuv+TIZPIkEokR/8tSJFKJJ3H2/8vdJ5f26gnt7iZ3JImCsta0RsAmwS1QkmAj5ZAb2f8XZzwclbjrlWidXLAxVGBSt7/x7+ypY5NhbvZXpZKg6EBB4maUEkK1IXw6wEj3+p3A/ZVb2KkJymx3qTEeqNxUrC/6hBPbPqEkuZywlyDuG/CLYPKt4uiyKHaXH7I+tWuU2h1wFwRTbA8njuujMarYitNXy1H6qjB97qnUfr37DFqNrSyv/IQ+6oOkVmTQ4dZb3cpdvTHoTmK5kpn5o4cxa2LE05oBjhYWg1t/Hf3h2TUZGOpDCNUksJTd0/ood59IkqbK/gpZw2Tg8acltLh3yM2i2nAptj6Zj15pU0su+jUXHYP1+bxTtrnVLXVMjV4HDeMuvK0NLwPlgE/la+++ioAv/zyy0CHDXMcaoUaY+eN+Nh9CFdnB0qq7KXeWqULSplDt3ZdFzGd7orZdfndRQ7jA5P5IuMnatrrevngdHFtwuUcrM6iXraDZmMKq3Ye4fJp/d8Ew92CeXrmg+TUFbAqfxNrC7exymbBOcKHiMPLGaUWiA5IIFDjN2AFWKifhufumExGQT3fb8xnzfo6NmwyMDHBl9ljIkiI9EB6hnW1wt1gQuevarXaOFLVSkFZM0UVLZRUt7Izo6pPTyeJAA4KGXKZxC7ZL2vH4liNzaUSwcku62RtdcVal4C+0QejQkGQj5xJie5EBGiJDnYlyMcFqURAFEUya3L4JvUX8hqK8Hb04L4JtzAhcPQJ03M20UZaRQbLs9eQ31iM1KbCVBaDxhjBH6d4EWvJpv3nj2k26HAaORX3OTcjVbsgiiIVbdXsLT9IWsVB8juLX9xUWsYHJjPCM4acTIFfNlfg5uzAk9ckkRx96iuiLtIqMng37QtaDO2YikaS4JbEIzeNRekw+GBnMBt4ddf73dWMw/SNzWwYcIW0fm8pogjTk0+uoEFn6uDTgz+wsWgH3o4ePDbt3h6+Xb8VA35yDhw4MODJ/v7Dud6+cO508WwztvdI0XhoVTS1GjBbbMhlEgJcfHtJA/m7+OCq1HCgOouZYfaCgcnBY/gyczkbi3ZyTcJlfb6mUq7koUm38/iGl9EmHOCLDTImxvvh5Tawr0yMZwQxnhHoTB2kV2ayr3QfGeUH2J+9CrJXIREkeDt54OvkhZejBx6ObriptLiqNGgcnHF2sFe5JUZ6khjpSUlVK6t3F7MpvZyt+yvQOjkwMcGXiQl+jAhzP+Oij1KphIgALRHHVXW1d5iobdLT0KKnuc1IW4eZxo4WaozlNFjKaRDLMdIMgLPgRrBqIrGakQTEeuOhVeGpVaF1duhzH2xfZSbfZ/1KfsMR3FWu3DZ6GTPCJp6wHNZkNbO1eDcrctdT1VaLzOqEvDyMOL2SOYFmfExrsOyooFWQ4Bg9Fu3EK1H4hFLUVEpqwUb2lB+gslMdPtwtmCUjFzLaL55gbQCHihp446sDVNTpmDM2iFsuHYmTauiuuX1R3VbLpwd/YG/FQZwl7nRkjmNSVAwPLhuNXDb4v6/NZuP11I8pb6vm0an3/KYz8fMN0WxEoup7r9BmE1mXWsKoSM8T9nn1xd6Kg7yX9iUtxjYujZnLVSMWDOjvdSYZMCB9+umn/T4nCAJz5/buRB8GtCp7QUFzZ/lwF77ujthEuzVAoLczoa6BbC/d20MaSBAEkv3i2VG6F5PVjEIqt2vG+SWwrnAbi2Iv6nfjPUjrz8OT7+CFrW9CxE6e/ULOv25f0Gfq7ngcFWqmhoxjasg4Wg9sIHfN27SkzKbBy4/y1iqq22rJrivo1V/VxbGVcioXJfGzHejogMZmMxsqD7K2RIJCqiDQU0u4rzuR/h64OTmilDngIHOwV9vJlJ0/K06pjPt4DBYjjaZ66sRaysUqSiwVFOlKutOlCqmcWM8IRvnMZrRf/KD2VSw2K7vL0lmes46S5nI81W7cNnoZ00PH91hRilYzNkMHVoMOm7EDm0FHS3s9G6sPsbG5kDbRjIdBYF6DhfEdJSiFIpCBUKdAHhSLJnku6piJHDE1s6ZsH6lp71PX0YhEkDDCK5L5kTMY45/YLdjb0m7kv18fYP3eUrzd1Dx7+wRGRZ2eVVFNex3Ls9ey6chOZFI5kbLxZOxyYe7YUO5anDgkDx5RFHl/31fsqTjATUlXnZXZ+PmEaDYh0Wj7fO5gfh21TXpuWjBiSNdsMbTywb5v2FWWTrA2gL9M6b/x+7fipAPSMP3joT7qdxTaqbYAdFe+lVS3EujtTLRHOOsKt1HaXNGtygAwITCZDUXbSavIYGKQXRtqUew89la8yC95G1g8YkG/rz3SO5onZtzH85vfpFq2loe/aOTFa65FqRj87Ng5cSaBBem479nIlJuexyHevtEsiiJ6s4FGfTPNhhZajG20GtppM+nQmTroMNt1yPRmA+0mHXoM2JwMODrYy79FREqB0lrYVDvwGORSOapjgpRS3hmspAoUUjlyqRyZRNodyLuq9UxWMwazAZ25g1ZjO82G1l4yRt6OHoS6BjE3YirRHuGEuQYNujG1w6xnY9FOVuVtpL6jEV+llpu9kkgRVYhZ+6nfswmrrgWrvg2bob27XBegXi5lm1ZFurMKi0QgWmdkTKMZb7MaR3dfPEZOROkZiIN3CHKvIIpaKllXmsauLS/T0NGETCIjwTuGxSMWMMY/sYfZn9VqY01qCZ+uykZvtHDljAiWzo0+5b0iURTJqS/g1/zNpJbvRypImR0+BUNZKKu3VXPJpFBuuzx+SAaKNpuN9/d9xbrCbVwWM5eLo2ae0hgvBESzsV/ZoLWpJTir5YyP79u7qy8OVB3mjdSP0Zn1LI2/lEtj5p7WBteTZcBP63PPPcejjz7aqym2i7fffvuMDOp856jiQs+7brCvMzKphNySJiYn+nerJhyozuoRkEZ6ReOhdmNd4dbugBThHsL4gGR+yl7DlOCx/e4lAUR5hPHvBY/x9Jq3qTKmctu3+dw+4SomhSYOqlJGEAQ8F9xJ+XsPUfPjvwm45SUkSkcEQUCtUNkVHDRD01wTRRGzzUKHyUBeeR0ZRdVkldRRXNOIFQsSmRUvdzleHg64aWU4O0uwCRYMZgMGixGj1UiHqYNGSxMmqxmzzYLFZum2ahYEAZkgRSGVo5QrcZSr8HfxYaRXNO5qVzwd3fBx8sLf2XvA0u7+qGyqYNXB5WytzcIgWgk1WLm4sY2Yjlok5NEmkSJzckXq5IrMxR2FTyhSlTMSpSNFNgPLGwrIMdYgiAKyBk/UjaH4xMQTOyesh7V4aXMF20v3smPfR9TpGpBKpIzyieOa+MsY7RffpyX4gbxa3lt+iJLqNhIiPLh9UTxBg+z96Y9mQyvbS/awqWgnZa1VOMpVLIyezcVRM1m1uYqftuWxcEoYt102tJ4lo8XEG6kfs7t8H5fFzGVZwuWnNM4LBZvF2GdTbEu7kd2Hqrh4Uuig1BksVgufZ/zEyrwNBGr8eHz6fedUmf2AAWnChAkAXHRR39Vdw/SNo0KNm0pLyXH7Q3KZlJgQVzLy7dL8biqtvYKu/ECPCjqJRML8yBl8evB78uqLuiuPbkxazMGaLF5P/ZgnZzww4IzGTaXlP5f9hXe3rGFd6Rr+u/d/fJnpxYLYaUwMSkF7At00qcoZ78sfoPLTx6ld8Treix9GOAmfoS4EQUAhlaNQyRkb6czYSPvvZDBZyC1uIrOwnszCeg7tbsZitTfR+rq7ER3iSlyQK5GhWkL9NINKP54urGYje/b9zNriXWSJHQhAQruJGTJXIr2jUYwMRu4RgNzVB6mTtkePSE2jjuUZqWwv20q7pAbRIkesi2CkJplZk6MYE+fdvXpp0rewvWQvW4p3U9pSgUSQEO8dw1WdK6G+ghBAYXkzn6zKZl9uLV5uav56wxgmJpy8FYfO1GFXQC9NI7MmB5toI9IthDvGXMfEoBSUMgdWbi/i6/V5zB0XPORgVK9r5F873qGoqZQbRl05rMgwBPor+96UXobFKjJ37IlTbS2GVv69812y6wqYFzGd6xIXoThLe0X9MWBAmjnTvpRetGgRTU1NHDhwAJlMRmJiIi4upzYD+70T7hZMQadQ5LGMjvHm45VZ1DZ14OWq7q6gq2qr7a6qA5gTPpmfc9by2cEfeHrmnxAEAXe1K/+XsoxXd33AR/u+4dbRSwe8IQiCwP9Nn8eM8jH8Y/mP1Kjy+Wj/t3y8/zsi3EOI944hxiOCcLegPkVGlYExuM++kYZ1H9K8/Xtcp5z+KiilQkZilCeJUfYVn8lsJb+smZziRnJKGjmYV8fmdLuVgVQiEOjtTJi/hlA/DSG+zgT5uODaR7HBqdDUVMnqnZ+xpbmQRpkEJ6vIfLUfc6Jm4BuRguS43iWj2UpReSv5pU1klzaQUXuIDk02EnUbWFWEMIGLR0xlbIw/aqU9NWi1WUmrOMj6oh3srzqEKIpEuoVwS/ISJgQmDyi0WlzVyldrc9mRUYmTSs6tl47g4omhJxWsW43tpFVkkFq+n4yabKw2K56O7lwaM4epweN6rIT35dTyzk+ZjBvhw11XDk1NfF/lId7Y8zEWm4WHJ99Oin/ikMd6IWMv++5Z1CCKImtTS4gOdj2hTmBpcwX/2PYGbcZ27h1/C5ODBy9Z9VsyqATz5s2b+ctf/kJkZCQ2m43S0lJeeeUVxow5N3+pc4FYzwj2VhykoaOph9nbpAQ/Pl6ZxZZ95Vw1K4ppIeP5OvNn1hRs6VH2qpQrWRJ/Ke+kfc7mI7uYEWZXVZ8UNIYjTWX8nLMOR4WapfGXnvDGEBngzv9uv4UfNxfw3c50TE6VVNhaKGhYg8ivwFEvIx8nL7ydPPBQ26vptHHjcajMp2nrVyg8A3GMGX8G3q2jKORSRoS5dwvEiqJIfbOBgvIm8suaKaxoYX9uLRvTyrrPcVTJCfB0wtfTET93R7zd1Xi6qvHQqHDTKHEYxI3aJtrIrDzM6vTvONBRg1UQCJepuDpsElOSLkMQZDS2Gsgu76C6oY6qeh3lte2UVLdSWa/DZrMhdavGIbAQ0bcdrdSNeeFXc1nCFOTHyKy0GtpYX7SdtQVbadQ3o1W6cFnMXKaHjMfPZeA9gLzSJr7dkMfuQ9WoHGQsmRPF5dMihlw9V9tez96Kg+ypOEhOfQGiKOKpdmN+5AwmBCYT4RbS6zNV16Tn5c/TCPJx4U/Xjh50Kb/RYuKLjJ/4NX8TQRp/Hpx027C1xMlg7r1CyiluoqymnXuuHjXgqXn1Rfxj6+s4yBx4ZuZDhLn17dR7LjCogPTqq6/y2WefdcsFHT58mMcff5wffvjhjA7ufKZLYSGzJofpoRO6H/f1cGREmDtrU0u4YkYkrioNE4NS2FC0gyvi5vdwypwZNpFtJXv4aP+3xHhGdK+gliVcjs6k58fs1bSZdNySvOSEG5JymYSrZ0dx0fhglm8tZNXOYjqMejz8jASF2VA6d9BibCCvIRV9H0296nBvHPd+gPuRDbhpvNEqXdCqNLgqNbirtbir3XBXu6IYgmr1YBAEAU9XFZ6uKibEH7ULb24zUlLdSml1G2W1bVTWtXO4qIEt+8o5XgBCrZShcXTASS3HUSVH5SBDqZCikEsxCW3UkE+N5RAdgh6V1Ua8XgXWcbSYAviuxMT7KzbQ1mHqcV2JAN7ujgR6OxEea6TIlkq9sRZ/F18Wj1jK+ICkHtqDNe11rMhZz6biXZitZhK8Y7kleQnJfvED/u0sVhu7D1Xx89YisosbcVTJWTInisumhg+6+VQURY40lZFWeZC95Qe7U8mBLr5cETufsQGjCNEG9DuxEUWR177Zj9li4283julhLT8QOXUFvLX3U6raapkXOZ3rEq847Z+PCwex1wpp3Z4SVA5Spozqfw8op66Q57a+hlbpwuPT7ztpa/HfikF9sgRB6KFdN2LEiNOi6vx7Jkjrj5tKy96Kgz0CEsDCyWG88MledmZUMmWUP4vi5rG9dC8/Zq3mxqSjQpkSQcI9427iL2uf5+Xtb/PMrIdwVKiRCBJuS7kGZwdHfspeQ1lLJfeMuwkvpxNrWGmcHLjh4jiunhXFjoxKNqaVcWBbPTbRGXdNMImRHkSFOOHpBTKlkRZjG036Fhrb6qjJ201rXSmF5g5azHbx1ONxU2nxcfLEz8WHABcfgjR+BGkDTrsltdbZAa2zvffpWMwWK3XNemobO6hvNtDUZqCpzUhLu5H2DjM6vZm61jY6HEoxOZVic6wHEcL1JuJaRIraJlAiC0XpIMNJJSHAy5mRYQ5onR1w1yjxdFXj7abGy1VNaWsZnxz4jj11Bfg4eXJv0i1MDBzdIxDV6Rr49vBKthTvRipImRoyjkuiZp2wKKSmsYN1e0pYl1pKY6sBbzc1t102ktljg7rTfgNhsVrIqssnrTKje6UuCALR7mHcMOpKUvwT+/TY6ovdh6rYn1fH/10ePyhtug6zni8yfmJdwTY8HN14fPp9xHufmoLAMD3N+QxGC9sPVjA50b/fCUJ5SxUvbn8TN5WGp2Y8OGjZqrPJgAGpubkZgJEjR/L++++zdOlSJBIJP/zwA+PHn9nUzfmORJAwPiCJtYXbaDO299ijGR/vS6C3E5+vzmFCvC8BLr7MCJ3I6vxNzAid0KPqxcPRjQcm/oHntr7Oi9ve5JGpd6OUK5EIEpYlXE6Qxp9307/gT2v+zlUjLubiyJmDUuJVOsiYNSaIWWOCaG4zsjermvScWtKyatmUZt+zcVTJCffXEOoXRKjvCKaOGoNi3UtI2xrxu/7vWNRONOlbaOhooqGjibqORmra66hqq2VXWXqPcmtPtRsR7qFEuYcS7RFOiGvgGSkzlcuk+Hk44efR88ZpsBg5UHWYHaWZ7KvMxGyz4OPkSYrBkxEF2QREjMNz2V1IHAZuJAb75vB7+z5n05GdaByc+cPoa5gZNqnH76M3G/gxezW/5G5AAOZHzuDSmDkD+vy0683szKhkc3o5mYX1CAIkR3vxx8WJjI71PmGfT4dZz8HqLPZWZLCvMpMOsx65VE6idyxLRi4k2XfkkJtPRVHkq3V5+Hs6cfHEkBMeu7t8Hx/t+5ZmQyvzIqdzTfylJ1XVOExvjlX63plZhd5oZdaYvtNv7UYd/9j6OnKJjEen3nNeBCMAQRxgqRMTE4MgCH2uhgRBIDs7+4wOrj/Ky8uZNWsWGzZsICDg9Ht/nC5Kmyt4aM3fuS7xCi6NmdPjuT2Hq3n2g1RuWTiCRdMjaDW288CvT+OhduW5WX/uFVR2laXz6q4PCHcN4i9T/9hjxVGna+D9fV+zrzITT0d3roybz5TgsUMyfevCZhMpq20jp7iJgvJmCsqbKa1uw2S2AhAkredul3XopM6kBt6Am5cXvu5qvN3sezddBQaiKNJiaKW0pZLi5nIKGospaCimvsOuguogcyDWI5wRXtGM8Ioi1DXwtDbDgn2/5kB1FnsrDrK/6hAmqxmN0oUJgclMDkjGecsP6PP24Dp1CdrJV51wL04URTYf2cUnB77DYDGyIHoWV8TN7+WXtK/yEO+mfUGDvompweNYmnBpd2/a8TS3GdmTVc2uzCoO5NVisYr4eTgyMyWQGSmBeLkOHCCb9C2kV2ayt+IgmTU5WGwWnBWOjPZLYExAIgnesafUdV9S1crdL2/ijkXxLJjcv85cTXsd76d/xYHqLEK1gdyWsqyXUeEwQ6frXvfRFVHEX/dnnEZMAeCxt3dQ09jBO3+b3ad6yD+3v83+6sM8O/Oh8+rvMOBUOicn55QuvmLFCt566y3MZjM33XQT1157bY/ni4qKePLJJ2lpacHT05N///vfaDTnRyQfDEFaf2I8wllbsIUFUTN73HDHxHkzJs6bz1bnMHaED/6eTtyeci0v7/gfnx38gZuSr+5xrQmBo5EKUl7d/QGPrHuBhybd3t275Onozl+n3MWBqiy+ylzO23s/48uM5UwPncC0kPFD6hmSSASCfVwI9nHhIuylpFabSE2DjtKaNipq29lX6sGYis9JLv6EVw/MRicenQErZBI8XdX4ejji46bG290RH/eRJISOw2e0mg5bOzl1hWTV5XG4No/PM34E7M6zMZ4RxHpGEOkeSphrEKohzqxbDW0UNJaQU19AZnUORU2liIi4KjVMD53A+IBk4jwjEQSoXf4qurw9uM+9Bc2Y/huNu2jsaOatvZ9ysDqLGI9w/m/MtQS49HxfzVYznxz4njUFWwhw8eXvEx/uJRZqtdrIL29mf04t6Tm15JU1IYrg5aZm4ZRwJif6ERmoHTA41ukaSC0/QGr5fvLqixAR8XJ0Z17ENMYEJBLlHnbagvuhIruaxZi4vgsuLFYLv+Rt4NvDK5EKEm5KuoqLIqad9snFMCDI7HtITW0GMgrqWTonus/PyeYju0irzODGUYvPq2AEg9xDMplMbNmyBZ1OB4DVaqW0tJQHHnig33Nqamp45ZVX+OGHH1AoFCxdupRx48YREWFXwRRFkTvvvJNHH32UqVOn8vLLL/POO+/w8MMPn4Zf69xhYcwc/rn9bbaV7OmxlyQIAn9cnMjd/9zEy5+l8eLdUxgbMIr5kTNYlb+JQI0fs8In97jW2IBRPDXjAf614x0eXf8S1yRcxsVRM7vVCkb5xpHoE0tmTQ6r8zezInc9y3PWEujiy2j/BEb5xBHlHjZkcy2pRMDP0+mY/YNIOo6EIP/mBV6K3AVzH6LWIKemQUd1Ywc1jR3UNHSQfaQBnaGnUaGrswO+Ho74ekQzwWM02gjokFVTZSglr6GQ/VWH7O8PAl5OHvi7+OCldkercsFJoUYukSMCJqsJnamDJn0LNbp6yluqaNDbBVGlgoQItxCuGrmAJN+RhLoG9hA4bdz0GbrD23Gbce2ggtH+qkO8vvsjTFYztyQvYW7E1F6CqW3Gdv65/W1y6gtZEDWLZQmXIZfKMVusFFa0kFXUSGZhPVlHGugwWBAEiAzUsuyiGMbG+RDq5zJgEDJYjOwqTWdL8W6y6vIBCNL4c9XISxjrn0igxu+M2AM0tRoQBPB07W1pUNRYwlt7PqWkpYKxAaO4JWlJt4TRMKcfobOoIS2rBlGECfG9J5rtJh2fZfxItEf4eamAMag70wMPPEBZWRl1dXXExcVx8OBBxo4d2NVx586djB8/Hq1WC9iba1evXs3dd98N2Cv11Go1U6dOBeCOO+6gtbW113VaW1t7PV5dXd3ruHOV0X7xhLsG882hX5gYlNKjyshdo+L+pUn8/cM9vPV9BvcuGcX1o66ksq2Gd9O/xMnBkXEBST2uF+keyotz/8b/9n7OJwe+Z0dpGrcmL+2eCQmCQIJPLAk+sTTrW9hVto89FQf4OWcdP2WvQS6VE+kWQpRHGGGuQYS5BuHp6D7km5k6NBGfq/9G9bcvIFv9IqOWPYEstndKp73DRFWDjur6DqobdVTV66is17E/t5YNe8uOOdIZd814wrxkOLrrENStGKVNVLXUkV2b36+GnqNCjbejB7GeEYS4BhDhFkK4W0i/aar2nF007/wR56Q5aCYsGvB3FEWRH7NX81XmzwRr/Hmgn5LldpOOZza/SkVrNctiluFiDubDFTnklTZRWN7S3ejr7+nIlFH+jIryJCHCExfHE6fSDGYDq/I3sTJ3A20mHb5OXiyNv5SJgaNPi5fRiVArZYiifX+rq6rParPyU/Yavj28Eo2DMw9PvoMxw31Fp8yJ7nVdVXaph6vxdFUR0kfv0c8562g36rh12pLz0sNuUAEpOzubtWvX8tRTT3HzzTcjiiJPP/30gOfU1tbi6Xm0isfLy4uMjIzun0tLS/Hw8OAvf/kLWVlZREVF9XCl7eLjjz/m9ddfH+zvc84hESRcm3g5z2x+lRU567hyxMU9nh830pelc6L5al0uvh6OXD07ij9NvI2/b3mN/+x8j/sm3Mr4wOQe52iULjw8+Q52lO7l4wPf88j6F5kYOJrFIxf0SCNpVRrmR81gftQMdKYOsuryOVybR05dASty1nXbijvKVQRpA+wVcRp/grR+BGsDUMoGdiJVhSbge83jVH/9PBUfP4rvNY+h8Oy5yeqkVhCpVvSQx+lCb7RQWddOZb2Oyrp2Kjr/O3xAis7gDDgDQUgkAl5uDnh6yvB0dcDLVY2PqzMB7q74urvgrJYP6stnbq6l7pc3cfCLxGPurQOeY7PZeCf9CzYW7WBy0BjuGHMdCpkCURRp6zBT0xlcy2vb2Nj4Ha1CNZb8ZN7f3Qg04qCQEhGg5ZLJocSEuBEX4oary9BSkEWNJbyy631q2utI8h3J5bFzifGI+E1vNNHB9r2vtOwaZowOpNXQxqu73yezJpeJQSn8YfRSnBRDV5gepjcnutcJcgVGs5X9eXXMGRvU63OgM3WwpmAL4wOTe0iRnU8MKiB5eXkhk8kICQkhLy+P+fPno9frBzynv0KILiwWC3v27OGzzz4jPj6e//znP7zwwgu88MILPc658cYbWbSo50y2urq6137UucxI7xjGBybzQ9avTAhM7tUAec3caKobdXz6azZOajkXTwzlkal3849tb/DKzve4Kekq5kfN6HGOIAhMDh5Lsl88P+esZWXuRnaV7SPFP4EFUTOJ9Yzs8X47KtSM8U/snsmarGZKmysobi6jqKmM0uYKthandq9EBAT8XLwJdw0mwj2EGI9wgrT+vVJVysBYfK9/luovn6Xyk8fwXvxnVMEjB/W+qBxkhAdoCT/OKkIURVp1JirrdFTUtVPVoOsOXEVHWtAZepoaKuRS3DVK3DVKXJ2VaJwUuDg64NLZd6RWyVHJJKi2/BuJzYZl8v9R0WhAEIzYbCI2m4jZasNktmIwWekwmFlV9iMFusOEy1KwFCfybEYaDS0G6ps70But3a8t8ypFHlKJn2ECo5JGE+LrQpi/Bn8v5yGpXx9PZVsNT2/+D2q5iqdmPECcV9SJTzoDxIS4EeDlxLcb8ggPk/LPHW/RaGjhjjHXM7OzWXuY08OJ7nWC3IG80iZMZmufvlbbS/aiNxu47LgCqvOJQQUktVrNihUriImJ4ZtvviEsLKy7JLw/vL29SUtL6/65trYWL6+jb6KnpyfBwcHEx9vdRy+55BLuvffeXtdxcXH5XcgU3ZJ0NZnV2byR+jHPzHqox6avRCJw35IkOvQW3vo+A0EQmD8hhMem3curu97nw/3fUNZSyc3JV/eqnFPLVSyNv4yLo2axKm8Dawu2sbfiYHcp+ZTgMWj7KPlUSOVEuIf02PQURZH6jkZKmsspairjSFMpGTXZbC1JBcDZwYkE7xhG+9ldY7uqyxy8Q/C7+R9Uf/UcVV88i+fFt+OcePL5a0EQ0Dg5oHFyIDa0Z3WaKIq0683UNHRQ29RBXbOe+mY9DS0GGlsNFJY309Ju7LV3Nc0hmysc8/msfRJ7384c8PVlgbnIfY9gLo8kt9aLGucGXJ0dCPByYlSUJ16uqs6qQiUv7H0BP+dInpxx/WlduXxy4HukgpRnZv6p2134bCCVCNx66Uie+WwDj6xdidJBytMzHjzvNsvPB050r5PIHcgpse+TRgf3zjhsK9lDoMbvrFtInAqDCkhPPPEE33zzDQ8//DDfffcd1113HQ8++OCA50ycOJHXXnuNxsZGVCoVa9eu5dlnn+1+PikpicbGRnJycoiJiWHjxo2MGDE0P4/zCa1Kw20py/jPrvf55tAvvYz2ZFIJf70xhec/2sub3x1Eb7BwxYwIHpp0O18d+pmfstdQ1FTK/RNu7XPvwMXBiaXxl7Eodj47S9NYX7iNTw9+z2cZP5DgHcP4gGTGBozqU7OuC0EQ8HR0x9PRvVtrrCtIZdXmk1mTw8HqLHaUpiGXyhnjl8CMsInEe8cg13jhd+Pz1P7wMnW/vIGptgS3WTf0EBw9HQiCgLNagbNaQUSgtt/jLFYbbR0mOgwWdLWVSFd8hclzJLNTljDDBlZRBFG0q4RLJUilAgq5lIK2LL4rPMKUwInccukS1Mr+04E5dYU0G1q5Ofnq0xqMrDYrB6oOc3HkjLMajLqICXfCNeEgHWYr872Hy7nPFoLMgZziRnw9HNE49Uyn17TXkddQdN6rpw8qIIWEhPDnP/+Z1tZW/vOf/wzqwt7e3jzwwAPccMMNmM1mFi9eTEJCArfddhv33nsv8fHxvPHGGzz22GPo9Xp8fHx46aWXTuV3OeeZGJRCRnU2P2avJtI9lBT/hB7Py2VSHrlpLK98uY8PfzlMc7uRmxbEsSzhciLdQ3kz9WMeXvs8N41azMywSX3eBB1kCmaETWRG2EQqWqvZWpzKztI0/pf2Oe+mf8kIryjG+CcyNmDUgE2aXXQFqWmh7kwLHY9NtJHfcITtJXvZUZrGzrJ0/Jy9uThqJtNDJ+Cz9DEa1n1Ey55fMNYcwevyB5E5nfh1TjcyqQRXZyVaJ5Hq9V9jkEqIuOoeZC79q1k0G1p57dcVRLqHcuf4ZSds3O1yaw1zPb3aYHqzAZto66GBeDb5eP93mIR2ws3z+PqXCnydvJiZcu7qof1eEeQK8suaeqmTAByszgLoVQR1vjGogFRUVMQ999xDa2sr3333HTfddBOvv/464eHhA563cOFCFi5c2OOxd999t/vfiYmJfPfddycx7POXW5KXUNxczmu7P+S5OX/u1csil0n407WjcXFU8OPmAuqb9dy/NIkx/on8c95jvJH6Mf9L+5w9FQe4bfQyPBz7brgEux36NQmXsTT+Uo40lbG7fB+p5fv5YN/XfLjvGyLdQxkXkMSEoOR+GzePRyJIiPYIJ9ojnBtGXUlq+X5W5m7kvfQv+f7wKi6Lncvs2Tfg4BtO/a//o+L9h/C6/P5B7yudbnTZO9EXHcB97i0DBiOAbzJXYLAYuWvsDYNSkZB27qdZbdYTHDk0lHIlAgI688D7tL8F5S1VbCnezWUxc1kcdzHPvr+b/3y1H4lEwvTkc7cp/feIzizQ2GrsNvo8lqza/G7ZrvOZQUn2/v3vf+eRRx7B3d0db29vrrvuOp544okzPbbfJQqZgocm345CpuCFrW/QYuhd6i6VCNy+KJ6bFsSx7UAFj729k5Z2Ix5quy7YLclLyKrN50+rn2VN/hZsndVy/SEIAmFuQSxLuJxXL36af89/gqtGXoLJauLTg9/zxxWP8dTGf7OxaEef+nT9IZfKmRw8lufn/IUnpt+Pn4s3H+3/lvtWPkmqowzvG55DolBR9dlTNG7+AtFqOfFFTyNWfRsNa9/HwTccl9HzBjy2Sd/CpiM7mRU2Cf8TqG53Edwp8ZTXh83IqSCTSHFVaajTNZz44DPM9tI9SAQJC6Nn4yCX8tgt4xgZ5sErX6SzM6PybA/vwkGmoKre3gfal55gdl1BZ9P3+VfqfSyDCkjNzc1MmjSp++drr72W9vb2Mzao3zseajf+MvlOmg2tvLjtLYwWU69jBEHgypmR/PWGMRSWN/Pwf7dRUdeORJAwL3I6/5r3OBHuIby/7yue3vQKla2D780KcPFl8YiLeemiR/nvgme4auQlNBtaeXvvZ9z+81/5YN/X1LbXD/p6giAw0juaJ2c8wBPT78NNreWdtM/5276PKbnoGtTx02ne8T0VH/0NU23poK97qjSs+whrRxseC+464V7WluLdWEUbl0TNGvT1g7UBeDt6sK5w22kXG/ZUu3XLLJ1NipsrCHDx7dbAUypkPH7rOCKDXPnnZ+lkHTn7QfNCQJApqOwMSP6ePcvsWwytNBlaCD+Pixm6GLQFqNFo7I6+dXV12GwDz8qHGZgI9xDuHX8LhY0lvLrr/X7TPpMS/XjurknoDGYe/u9Wso/Yb1JeTh48Nu1e7hxzPaUtlTy89nlW5Kw/4WrpeHycPFk84mJemf8kz8x8iNF+Cawr3Ma9q57kjdSPhxSYwF7i/vdZD/PnyXcgl8r5795PeVnRTNmsqzC11lP+wcM0bf8O0Woe0nWHii5nN+2Zm9FOvAIH75ATHp9emUm4a/CQmk0FQeDSmLnkNxwhtXz/yQ+2D1yUzrQZdaf1mieDVJBgsfVc2aocZDxx63i8XFU8/9EeGlrOfmrx944gtSuhAPi49wxIpS32lWqgxq/XeecbgwpIy5Yt49Zbb6WhoYF//etfLFmyhGuuueZMj+13z9iAUdycfDVplRl8uO+bfmfZMcFuvHzvVJzUCh57ewd7s+yrIUEQmBE2kX/Pe4JEnzg+Pfg9z25+lSZ9y5DHIggCMZ7h3Dv+Zt5Y8HfmR85gZ1k69//6NJ8d/AFDHx5JA10rxT+Rl+Y+wr3jb8FsM/NGyRbeiA4mKyKW+i1fUv7un9AXD1x+fbKYm2uoW/UWCp/wQbnc2mw2ihpLiPWMGPJrzQybSLA2gA/3fUO76fQFEJlE1isQnA1iPSOobKshr76ox+Mujgoeu2UcBpOV1789OGxHc4YRZDKa242olbJezsAVndmRoWhWnqsMKiAtXryY++67j4ULF2KxWHj22WdZtmzZmR7bBcG8yOlcGjOXtYVb+TV/U7/H+Xo48s97phDk48xzH+5hx8Gj+XtXlYaHJ93OHWOuo6ChmL+u/QcFDcUnPSY3tZYbkxbz2sXPMDloDD/nrOOBX58hrSLjxCcfg0QiYXLwGF6Z9yT3jLsZBAmfWGt4OTaE9Qoz+V8+Tc33/8TcePr2ImxGPTXfvgiiiPeiBxAGodvXZGjBbLPgexJOplKJlDvHXE+rsY330r86mSH3icFiOCWV7tPFrLDJuKm0/HPH/yhqLOnxXKC3M9fPjyUtu4a07JqzNMILA0EipbXdhMaxt3pKi6ENAQGtw/nfrzmogNTe3s6+fft4+OGHue6669i8eTMdHR0nPnGYQbEs4TLG+CfyyYHvOVST2+9xGicHnrtzElFBrrz0WRq7D1V1PycIAjPDJvHc7D8jl8p4evN/yKw5NbV2N7WWu8bdwLOzHkKtUPHS9rd4ffdHQ14JSCQSpoSM5Z/zHuOvU+4iwC2QXx3hH2FevNuSy/pPHqZm5dtYWupOabw2k57qr5/DVFeG1+UPIHcb3IyxyyHXUdFbQHQwhLkFsXjEAnaWprGzNP2krnE89bpG3FVnv+xbJVfy+PT7kAlSHln/Eu+lf9kjjbtgUihebmp+2FxwFkf5+0eQymnRGXFx6j1JaTG04uzg2MMY8nxlUL/B3/72t25lBhcXuypxX7pzw5wcEkHC3eNuwsfJk9d2f0ibsf+CEbVSzlO3jSciQMNLn6Z17yl1EaT15++zHsbL0Z2Xtr9NcVP5KY8v2iOcF+f8jSvjLmZ76V4eXv1cd9/DUJAIEpL94nl8+v38e94TXBQ5nSKtCx/6uvBI837e+Pphdv/4AvqK/CFf29xUTeXHj2Eoz8Xr8vtRhw++H6Pri2w5hfLty2MvItw1mA/3fX3KqTuDxUhFWw1B2nNjT8DfxYd/znuU2WGT2VC4nXtWPcFL294irSIDQRCZPSaIQ4UNtLQPvkJzmKEhSGW09LdCMrahcRia8eK5yqD6kIqLi3nttdcAcHZ25pFHHuHSSy89owO70FDJldw/4Vb+tu4FPj3wA3eNu6HfY9VKOU/cOp6HX9vG8x/t4ZUHpuGhPTq716o0PDrtHv627gX+s+s9Xrro0R4q4yeDTCpjSfxCUvwTeD31I57b8hpzwqdwXeIVQ/YtAnu++6bkq7k2cRHplZlsK9xOak0OO0wluGz+JyNtDozxTyRx5FwcPQP7LWe16lpoSVtFy+6fEWQKfJY8MqRgBODcKQ460ERgIAxGC01tRub4XcLbh9/glQ3fEOcwGZ3BjN5gwWCyYrJYsVhtiCIIgr1xV6mQ4aSSo3FywNNVRaC3M/6eTuTUFWATbcR4RJ7UeM4ETgpH/pByDVfEzWdNwRY2HtlJWmUGrkoNI7SjQC5wpLKFUVFnXoH8QkSQymltMRLZhzpJq6ENjfL8T9fBIAOSxWKhvb0dJyd7/btOpxvexDwDhLgGsiB6Fity1nNJ9KweVubHo3Fy4PFbxvGnV7fw8ufpPHfnpB5inm4qLXeOuYHnt77GuoKtLIgefDnzQIS7BfPinL/x1aEVrMzdwP6qw9w6eimj/eJP6npyqZzxgcmMD0ymw6xnb/FeduVuJq29ip116cg3pBFphpFKDxJcg/Fy8gRRxKprxlh9BEN5DtisOMZOwH32zchchi6146RwxEHmQO1xfT8ms5XGVoNdJ6/FQEPrUc28xlYDTa0GGluN6I1Hiw/kYX5kWNPZc8AZiahA7SBDqZAil0uRSSVIBLCJdmkjg9FCu96M2XK0MlIuk6CNzUWqkqM0eSJ2yhudK7iptVyTcBlXjbyE/VWH2FC4nR1VW1Emwk9FzQQGXH3OKEz8rpBIadWZ+rQsaTG2ndf6dccyqIB0+eWXc9VVVzFv3jwEQWDdunVcccUVZ3psFySXx1zEmvwtrMrbyB1jrx/w2EBvZ25flMB/vtrPyu1FXDq1p3LGKN84oj3CWV+4/bQFJLA3994w6krGBYzinb2f8+K2N0nxS+CGUVeekkePWq5iWuRUpkVOxWQxcfBIKnsLdpLRWk6W2MA3jQ14VFuI7DARZRKJVnvjOm4hzgkzUHgMXjXAbLHS2GqkscVAY5s92ChsTuzOK6A4bVd3EGrr6N0fppBLcXNxwM1FSaifhtExSrTO9p+1zg60ieG8mfEGd93qybyoqYOyRe8wWKht6qC0uo2csjo2dqzDUu/Jw//dSYivC/MmhDBrTCBKxdCMFc8kMom0Wz1+Y0YOr2/+kVxpJvetOszS+Et7GEcOc+oYkWOxir007MBe1KC9kFJ2t99+OxEREezatQuZTMZDDz3EtGnTzvTYLkicHBwZF5hEavl+bktZdkIr6JkpgWw7UMFnq7OZPMoft+M8dyYEJvPR/m9p6Gg67TPXaI9wXpz7CCvzNvJ91ioe+PVpZodPYVHcvEHp5A2EQqZgTOQUxkROQRRFqtpq2F91mIyqLPbVF7DLakIqdBBFLaNqD5EkFQnWBmA0Walt6qC2SU9dpxp4V4Bp6lzZtHX07oFSREmRKVtQd5jwdlMTG+qGm4sSdxcl7hqV3d5Cq8JRKRswyIiiF1/nu5LTkM984cTfEUEQcFTJCVVpCPXTYHQpREw388ilV1Fb5sDaPaW8/UMGX63N5erZUcybEIJcdm7d6EtLrVjLRvD8spv5NvtHPjnwPVl1Bdw/4dZTThUPY0cn2gOR5riiBpPFhN5i6G5cPt8Z9JRrypQppKSkdKfqmpubu91ghzm9JHjHsrU4laq22hP2FgiCwP8tiuePL23k89U53HP1qB7PdzXLVbfXnZFUikwq47LYuUwLGce3h1eyvnAbG4t2MCtsMpfGzBlQa2+wCIKAn4sPfi4+LIiehdlqZl9ZLjuLD5LdkEt23XK+zFwOZgcsTZ5Ym7yxtbqBKEUiEXB1dsBdo8TXw5G4MHfcXZS4uijtAafTQ+nDzDqKm8t45cZTm2gJgkCoWxDlLUMvZbfarKzI3UC4WzDJAdEIgQLzJ4ZyuKiBz1fn8M5PmazccYT/WxTfpx/O2aCtw8Sa3SWMifUm1MOHhyffwaq8jXx84Dv+s+t9Hpr0f8MrpdNAh80eiFyOK2po7dz3vKCKGj7++GP+9a9/YTbbZ5Zdee3s7OwzOrgLlS6BxLqOhkE1u/l5ODF/YigrtxexaHo4AV5HP5xyiX2Gaj7DOnJd9hqXxszhh6zVrCvcytrCrUwMHM2C6FmnJGsiiiJV9ToO5NdxuKiB3JImaho7ACdgNBqtiMa3BZtzDS1e5Vi8ylFIFIz0imNKSAqj/UagHKDwQhRFKttqTnlV14WLgxMFJ6Fvt61kDzXtdVw/6fYeq7ARYe48d+dE0rJreHf5IZ58ZxeTE/34w2UjcdecXKn66UAURV7/9gB6o4Vr58UA9oDclR7++MB3bCrayazwyWdtjL8X2q327/HxK6QWY5v98QupqOHTTz/lyy+//F37FZ1LdKU5hhJErp4VxbrUEr5ck8vD16d0P95uss+gnBTq0zvIfvB28uTOsddz1YgFrMzbyMaiHWwv3Uu0RzjzI2cwNmDUoJS0bTaRnJJGdmRUsudwNdUN9r43NxclsSFuXDwxhHB/LSF+Lj3y6marmUO1ueytyGBP+X72VR9ALpWT6BNHil88CT6xPZTNbaKNX/M2UdJczi3JS07Le2Cz2U6Yaj0ek9XMN4d+Idw1uNvV91gEQWBMnA+jojz5YVMBX6/PIz2nhmvmxnDJ5LDfPI1ns4m8+1MmOzOquPmSOEL9eppAXhw1kx2laazIXT8ckE4DXQFJ69RzYtUlzqy5kFJ2np6ew8HoN6Sj03ZgKOXUWmcHFk4J47uN+SyeFdl9gyjvlBX5rWXpPRzduDFpMVeNXMCmop2szt/Mf3a9h6tKw9zwqcwOn9znrK6uSc/a1BI2ppVS26RHLpOQGOnJ5dMiSIryxNfDccA9HLlUTpLvSJJ8R/KH5KVk1xeQWr6fvRUHSas4CICrUoOPsycyiZTKtloaOppI9IllVtikfq87FOo7GnFT9nbpHYhf8zZR39HInWMHdp6Vy6QsmRPN1KQA3vkpkw9WHGb1rmJuWBDHhJG+SE7BNn2wtLQb+e/XB9iTVc3l08JZNL235JIgCEwOHsNH+7+lSd+Cax+uxcMMHp3FfqvWOh+3QjJ0rpAupJTdpEmT+OKLL5g1axYODkdno8N7SGeGms5O+MF6FHVxxfQIft1ZzAc/H+aZ2ycgCAL5DUfwdvTAycHxxBc4A6jlKhZEz2J+5Az2Vx9mdf4mvj60gu+zfmVSUAoXR80k1DWQrCMN/LSlkNRDVYhAYqQn186LZfxIH9TKk9sYl0gkjPCKYoRXFDcnXU1ZSyWHanM50lRGna4Bg8VEtHsYKQkJTAoac1rKq202G0eayxgfkDzoc5r1LfyQ9SvJfvHEe8cM6hxfD0eeuHUc6Tm1fLDiEC98vJcwPw1XzIhgUqIfMunpXzGZLTbW7y3ls1+z6TBYuH1RPAsmhfb7vjl19nfpLQZcGQ5Ip0KbRYqTSo5c1nPl3dy9QrqAUnbvvPMOJpOJZ555pvux4T2kM8eR5jIcpAq8HQc2lDseJ7WCay6K5t2fDrEjo5KJCb7k1BUw2i/hxCefYSQSCaP94hntF09FazW/5m9iS3EqW4p342DyorU4ALXJj0XTI5g/MRRvt9ObYhQEgSCt/4C9XaeDgsZidKYORngNvqn184yfMNnM3Dhq8ZBeSxAEUmK9SYr2Yuv+cr5el8vLn6fz/s+HmDUmiKlJ/oT4upxyoG1o0bMpvZxVO49Q16QnNsSNP16V2KdR3LEUNBYjk8iGPLEapjdtZila594l3436ZhzlqnNC9/B0MKiAlJExNFHNYU6N7LoCIt1DT0qbasHEUDallfHW9xk4uXXQZtIx0jv6DIzy5PFz9ibeYTqZZe6UmrMQfEtxiNqHt3MlIbFuuGt7f/HOF7aX7kUmkZHkOziH3Oy6fLYU7+by2IvwPckeLqlEYMboQKYlBZCWU8PqXcX8sLmA7zbl4uZrxC/QjMrZhEJpw1Elx1Xlgr+zDzGeEb1eUxRFmtuMFFa0kFPSyMG8OnJKmgAYGe7OXVcmMjrG64RBrrGjmS3Fuxnrnzhc+n0aaDcLuDr3TuE36Jtx+x01Ig8qIJlMJrZs2YJOZ9foslqtlJaW8sADD5zRwV2ItBhaKWkuZ2n8yUkzSaUSHlw2mgf/s4U3Vm8EF86ZgGSziew+VMXX6/MoqmjBx13NXbMWMSXZj7TK/SzPWcebez7h20O/cFnsXGaETkR+Ht3MOkx6thTvZlzAKBwHUURisVl5L/0rPNRuXBE3/5RfXyIRSIn1QuHaiCoqgwNVh9GLJgpFEJukiBY5AiDITSCxq0MorC64mMNQd4TS0SalvlmP3mjX9JMIEB6g5bp5MUwZ5d+nU2lftJt0vLT9LWyiyJKT/BwP05N2o0BoHyukpo7m01Ydei4wqID0wAMPUFZWRl1dHXFxcRw8eJCxY8ee6bFdkHQpdCd4x570NQK9nXlw2Whe3rEXpdUZpXB29o+6sFhtbDtQwXcb8ymtbsPXw5H7lyYxPTkAaedex+TgsUwKGsP+qsP8kPUr76V/xY9Za1gUN4+ZoRORDcJG4myzKn8TerOBhdGzB3X8L7nrKWup5M+T70ApO7VVoc1mY3vpXn7MWk1FWzVOCkemho4lyXcE4a4htLcKFFe1UVbbZm8cbq+lyVaBzqGMeuUBcDiIRhPAqPAERniOIMxfS7i/Zsj7d7n1hbye+jENHU08NOn2k171DdOTNpPQZ8quQd90xtPQvyWD+pZnZ2ezdu1annrqKW6++WZEUeTpp58+02O7IDlYnY2zwpEw16BTus6EeF+0eSaaa5346xvb+fP1KT36k34LWnUm1qWW8MuOI9Q36wnyceZP145myij/Hrp7XQiCQLLfSJJ8R5BZk8O3h37hvfQv+TlnLVePXMjkoDHnrMR+Y0czP+esZYx/4qB0xarbavn28ErGBowipY8y76GQXZfP++lfU9pSQbA2gHvG3cz4wKQeq0t3Rwj27buwoKa9jg1FO9hYtIMDxpWUNe9kkiYFpS6JcEXwCd9zq83Kodpcfs3fzL7KTDwd3Xl8+r3Eep474rDnOwYLvVRYOkx6mg2tv3kF7ZlkUAHJy8sLmUxGSEgIeXl5zJ8/H71+2Lb4dCOKIpk1OYzwjj7lG68oinRYW5kUM5K0jXru+/cWls6J4rKp4b0cJ08nVpvIocJ6NuwtZcfBSkwWG/HhHtx1ZQKjY7wHVZYsCAIJPrHEe8dwoPowX2X8zOupH/FzzjqWJVxGku/Ic0pwVBRF3k3/Aqto44ZRV57weJto439pnyOTSE+p98lgMfLZwR9YW7AVD7Ub90+4lfGByUNWRvB28mRZwuVcNWIBeysOsqU4lZW5G/g5Zx1quYpwtyACXPzwdHTDUa5GEAT0ZgMN+mbKWirIqS9EbzbgrHBkyciFXBw186QU4IcZGHdNz/e0vNXuh/Z7sC7vYlABSa1Ws2LFCmJiYvjmm28ICwvr9kca5vRR015Ho76ZkV6nvucjImIVbQR4arjxoRm89X0Gn6zKZuWOI1wyOYzZY4L6TAGcDGaLlcNFDaQermZXZhUNLQbUShmzxgRx8aRQQnxPriRVEASSfEeS6BPH7rJ9fJn5My9se5MRXlFcn3jFOaNwvKZgC+mVmdwwajHeg5itri/czuHaPG5Pufak8//lrVX8a8c7VLbWsCBqFkvjLz3lSiu5VM7EoBQmBqXQbtRxoDqLrLp8ihpL2HhkJ0ZLT78jmUSGn7M3k4LGkOgTS7LvyPNqz+984/iAVNopTxX4O7Au72JQAemJJ57g22+/5eGHH+b777/n+uuvHy5oOAPkdcrNxHiEn+DIEyMRJDg7OFHf0Yi7RsVjt4wjo6COr9fl8fHKLD79NZuECA+SoryIC3MjzE8zqJWTKIrUNesprmqloKyZ7OJGsosbMZqsKGQSkqK9uHVhAGNH+uBwmlZiEkHCxKAUxvqPYn3Rdr49vJK/rnuByUFjWJpwGV6OQ7ecOF1k1uTw8f5vSfYdycVRM054fE17HZ8d/IEE71hmnmQj7v6qQ7yy8z0cpAoen34vIwfZuzQUnBwcmRw8hsnBYwD7311n7qDDbABRxEGmwFnhdM6mUH+PHC8TVdJcjoPMAc+z+Pk/3QwYkK6/vmfX+A033IAoikRHR/Prr79yzTXXnPEBXkiUNJcjl8jwd/E5LdeL84xkX2UmJosJhUxBQoQnCRGelFS3sjm9nNTDVXz4y2HAXlHl4arGQ6NE4+SAykGGRBCwiSJGk5W2DhNNbQZqm/QYTfYqLEGAYB8X5owJIinGi4RwD5QOZ674QCaVMS9yOlNDxrE8ey0r8zawu3w/8yKmsShuHs4Og6sCO13k1BXw0va38XP25t7xt5wwVWaz2Xg99WMEQeCOsdedVNpxU9FO3k77jBBNAH+Zchduau1Jjn5oCIKAk8Kxu9l1mN8e9+P2kHLqC4lyD/ldidcOePe47rrrAFi3bh3t7e1ceeWVSKVSli9fjovL76Mz+FyiRlePl6PHkHXQ+mN+5HRSy/fzZebP3DDqyu4bYLCPCzcuiOPGBXE0tOjJK22iqKKVqnodDa16ymvbMZos2GwigkRAqZDipFIQ4OVMcrQ3fp6OBPu4EOrnctIqCqeCWq7imoTLmBsxla8PrWBl/kY2FO1gYcxsLo6aiVp+5gVH95Qf4LXdH+Km1vLY9PtQK078mt9lrSK3vpB7xt18Us2iq/M388G+r0n0ieVPE/9vQMHYYX5fKOVCj8leh0lPaXMFi0dcfBZHdfoZMCBddNFFALz//vt89dVX3cvz6dOns2TJ6RGiHOYobcZ2tKrTF+jjvKK4KGIaK/M20GHWc/2oK3rNcN01KibEq5gQf/5tjLqrXblr7A0sjJ7NV5k/882hX1iVt4kFUTOZFzl9UL1AQ8VsNfP1oRX8nLOOCLcQ/jzlTrSDkG3JqM7m+8OrmBoyjikhQ2+ZWFewjQ/2fU2KfyIPTLh1eK/mAkPr2PNWndtQiIhIjGdvHcHzmUHlV5qamjAajahU9lmgTqejpaXljA7sQsRitQxqpj0Ubk6+GkeFmh+yfmVXWTpTg8cxyncEYa5BaFUufS73bTYbbaZ2WgxtNBlaaNK30GxopVnfQrOxjTZjO+0mHQazEaPVhMVmVyWXSqQopArUciUuDk64KrV4OLrh6+RFgMYXfxefM9K1H6jx4+HJd1DYWMK3h1fy9aEV/JSzlpmhE5kbMfW0pEBFUWR/1WE+OfAdlW01zA6fwk2jFqMYRCFBra6BV3e9j7+LD38YPfQ0987SdN5L/5Jkv3genPCH86Ina5jTi1bd829+sCoLuURGpHvoWRrRmWFQn+xLLrmEq6++mjlz5iCKIqtXr+bqq68+02O74FDIFBjMxhMfOAQkgoSl8ZcyITCZ5Tnr2FK8m7WFWwF7lZSzgyMOUvtN1Wy1oLcY0JsNiIi9rqWSKdEonXFxcEar1KByVuIgVXTbSVhFGyaLCZ1ZT6uxjbKWKpr0Ld3XkgoSgjT+RHmEEecVyQivaFxO475PuFswf51yF8VNZfySu4G1hVv5NX8T0R7hTAkeQ4p/4pCr2vRmA6nl+/k1fxNHmsrwdfLikan3MMo3blDn60wdvLD1DWyijYdPogE2qzaP11M/ItojbDgYXcBonY7+3UVRJK0yg3jvmFNuqD7XGNSn+7777mPEiBHs3r0bgL/+9a/DFuZnAHeVK5m1OWfk2sHaAO4dfzMmi4nCphJKmiuo72ik3ajDaDUB9rJfpcwBJ4UaFwdnNEpntEoXXJUatEqXk9qzMFnN1LTXUdZSRXFzGYWNxWwu3s2agi0ICIS7BZPin8BY/1GDMiMcDCGugdw9/iauG3UFm4/sYmtxKu+lf8V76V8RqPEjxiOcUNcg/Jy9cVNpUMtVSCVSzDYLrYY2anX1lLZUkl2Xz+HafCw2C37O3twx5jqmBo8bdFAwWky8tP0tqtpreXTq3UNWLShvreKf29/G28mDP0+5c1CrsWF+n2jVRzMLZS2V1OoauDz2orM4ojPDoKdbs2fPZvbswUmiDHNyhLgGsrUklYaOpjNiNw72VVisZ+Rv1kWvkMoJ1PgRqPFjYtBowK7hVtRYQkZNNvsqD/FV5s98lfkzgS6+TA4ey5SQsadFIVqrdOHy2Iu4LGYuFa3VpFVmcKgml+2le1lXuO2E5we4+DIvYhpjA5KI9ggbUlWcwWLkn9vfIqeukHsn3Dzk0uxWYzsvbn0TmUTG36bePVzddoHjfExA2tvp63UuqPifbobX/+cQo3zi+ARILd/PxVEzz/ZwzhgyiZQojzCiPMJYPGIBjfpm9pQfYEdpGl9mLuerzJ9J8IlldvhkRvslDMphdiAEQSBA40uAxpfLYy/CJtqo1zVS1V5Lk74FvdmAVbQil8hxdnDCQ+1KgIvvSe/ntRnbeXHbW+Q3HuGusTcwKWjMkM43W828vP1tGvXNPDnjgbPaZzXMuYGz6uitOq0yg0i3kN+l6eFwQDqHCND4Eu4azJqCLcyLmH7BNB26qbTMi5zOvMjp1LTXsaV4N5uKdvGvHe/gqtIwJ3wKs8OnDKqabTBIBAleTh54OQ3Nb2owlDZX8M/OYPLAhD8wPnDwRn1g3x/4X9rn5NQXcv+EW4nyCDvtYxzm/EOttqdrG/XNFDaWnLQbwLnOhXHHO4+4LHYuVW21g0op/R7xdvLk6pELeeOSv/PnyXcSpPHnm0O/cNeKR3kj9WOKm8rO9hD7RBRF1hdu55H1L2KwmnhyxgNDDkYA3x1eydbiVK4eeQkTg1LOwEiHOR9xUtlTdukVmQCMOUVB3nOV4RXSOca4gCTivWP47OAPxHlF/q6EE4eCRCIhxT+BFP8EKttq+DVvE5uLd7OleDcjvKKYHzmDFL+Ec2IVWdlWwwfpX5NRk028dwz3jLsJ7UmkU9YX2mWRpodM4Mq431fD4zCnhrPKvkJKqzyIt6MHAS6/H/26Yzn73+ZheiAIAn8ceyMquZJ/bH2DWl3D2R7SWcfP2ZtbRy/l7YXPc13iFdS01/Pyjv9xz6on+Cl7DS2G1rMyrmZ9Cx/u+4Y/rX6W/IYj3Jq8lEen3XNSwWh32T7eTf+CJN8R/N+Ya88pNfNhzj5OjgoMZgOZNbmk+Cf+bj8fwyukcxA3tZa/Tb2bZza9wuMb/smfJ99J+DmibH02cVSouTRmDguiZrK34iBrCrbwRcZPfH1oBSl+CUwLGcconxFnvFfnSFMZawq2sK04FatoY0boRJbELzzpPa69FQd5ddf7RLqF8sDE2065iGOY3x9KBzkZNTlYbBZG+8Wf7eGcMYYD0jlKqGsgT8/8Ey9se5PHNvyTJSMXcknUrOHGSOyKEOMDkxkfmEx5axUbC3ewtSSV1PL9OMpVjPZLYLR/PPFeMTg5nHq5tCiKlLZUkF6Zya7SdEpaKpBL5UwLGc+lMXPwOQVX1J2laby2+0PCXIN4ZOrdv7tGx2FOD4JExr7KdFRy5e9OLuhYhu9u5zBBWn9enPs33kn7gi8yfmLTkZ0sjlvAxKDRp02A9XwnwMWXG5IWsyxxERnVWewsS2df5SG2lqQiIBCk8SPCPZRQ1wACXHzxcvJAq9T0uwoxWIzUdzRS3VZHaUsFRY2l5NYX0mJsAyDKPYxbkpcwOXjMKfcG/Zq3iY/2f0uMZzh/mXLXbyIKO8x5iiBhf9VhEr3jftcr6OGAdI7j7ODEnyb9H/sqD/H5wR94LfVDvsj8iRmhE5gQOJoAF98zmk82Wc006ptp7GimxdhKi6ENnakDvcWAyWrGZrMBdmsIlUyJs4MjWqULHmo3/Jy9T8sKZTDIJFKS/eJJ9ovHarNS2FhCRk0OOXUF7C5LZ0PR9h7HO8pVKOVKZBIZomjDbLPQYdJ3q1Z04ePkSYJPLCO8oknyHXFaej8sNisf7/+WNQVbSPFP5P7xtwyrMAwzIBW6WpoMLST7jTzbQzmjnNGAtGLFCt566y3MZjM33XQT1157bZ/Hbd68mWeeeYaNGzeeyeGc1yT7jWSUbxz7KjNZU7CF7w//yneHV+Hp6M4Irygi3UIJ1vrj6+yFk8JxUEHKJtpoN+po1LfQqG+ivqOROl0jdboG6nQN1OoaulcGxyOXyFDIFN3irGarGaPF1EsDT6t0IcwtmGj3MEZ4RRHuFnzGV3fSYxpvwZ5ya9A3UdlaQ62uniZ9C21GHQaLEYvNgiAIyCUy1HIVLkpn3FRafJw8T6k5tj/qOxp5ddcH5NYXckn0bK5LWHROVAoOc26TVV8IwCjfEWd5JGeWMxaQampqeOWVV/jhhx9QKBQsXbqUcePGERHRM/9ZX1/Piy++eKaG8btCIkhI8U8kxT+RJn0LeysOcKA6m/SKDDYf2dV9nIPMAa3SBSe5GgeZAqlEioCAxWbBZDWjNxtoN+loM+mwibYeryGVSPFQu+Hl6MZo/wQ81G54qF1xU2nRKl1wcXDCSeHY516WTbShM3XQpG+hVtdAVVstJS3lFDaUsK/S3j+hkitJ8h3JuIBRJPmO/E32TARB6Pw9Tl2O6GQRRZFtJXv4cN/XWEUb90+4dbjPaJhBk99QTJDG/7Q1h5+rnLGAtHPnTsaPH49WqwXs3kqrV6/m7rvv7nHcY489xt13382//vWvPq/T2tpKa2vPst7q6uozMubzCVeVhrkR05gbMQ1RFKnV1VPWUkl1ez0NHU00G1rQmTowHbNykUmkOCrUeKjd7AKqSic0Di64qjS4qexWEVoHl5OesXfZpjs7OBGk9e/xXKuxnazaPPZXHSa9MoOdpWk4yBwY5z+K6aHjifOK+l05Xx5LdVstH+7/hv1Vh4lyD+Pu8Tfh4+R5toc1zDnGQPe6Iy1lzAmbcTaG9ZtyxgJSbW0tnp5Hv3ReXl5kZGT0OOaTTz4hLi6OxMT+u44//vhjXn/99TM1zN8FgiDg7eSJ9zl8k3NxcOqujLPZbGTV5bO9dC+7ytLZWpKKl6M7s8ImMyN0wkn18ZyLtBnb+TF7DavzNyOTSLkp6aoLShJqmKEx0L3OaDER4/H7ra7r4owFJFHs7adz7L5GXl4ea9eu5aOPPhpwxXPjjTeyaNGiHo9VV1f3ux81zLmPRCJhpHc0I72juSXpavZUHGB94Xa+zFzON4dWMDYgibkRU4nzjDwvGwBbDK2sytvE6vzNGCxGpoWM55qEy36XYpjDnD5OdK+L8Qg/G8P6TTljAcnb25u0tLTun2tra/HyOtqvsXr1aurq6rjyyisxm83U1taybNkyvvjiix7XcXFxwcXl9503vZBRyBRMDh7L5OCxVLZWs65wO5uLd7GrLB0/Z29mhk1iasi4cz53LooihY0lrCvcxvaSPVhsVsYFJrE47uJe6cthhumLge51KrnqjFnSnEucsYA0ceJEXnvtNRobG1GpVKxdu5Znn322+/l7772Xe++9F4Dy8nJuuOGGXsFomAsLPxcfbkxazDXxl7KrbB/ri7bz2cEf+DLjJ0b5jmBK8FiS/eLPqebRmvY6dpXtY1vJHspaKnGQOTA9dAILombidxqs04cZBuzfjfMxWzBUzugK6YEHHuCGG27AbDazePFiEhISuO2227j33nuJj//9yl8Mc2ooZAqmhY5nWuh4ylur2HxkN9tKUkmvzEQhlZPoE8dovwRG+cYN2ZL8VDFZTOQ2FJFZk8O+ykOUtlQA9obZ20YvY1JQymkvFR9mGP9TUAM5nxDEvjZ7znHKy8uZNWsWGzZsICAg4GwPZ5jfAJvNRk59ATvL0kmryKBR3wyAv4sPsR4RRLiHEuYaRICLz2mTVzJbzVS21VDSXEFRUymFDcUUNpVisVmQChKiPcJJ8U9kbMCoYRO9Yc4IXfe6J95/jmsnLz7bwznjDCs1DHNeIJFIiPOKIs4riluTl1LaUsHB6mwO1+ayqyyd9Z1KDFJBgreTJz5Onnio3dCqNLg4OOKoUOMgdUAhlSMRBETo0ZelM3XQamynydBCQ0cjte0N1HY0dBfnyKVywrSBXBw1g7hOC3iVXHkW35FhLiR+60zA2WI4IA1z3iEIAsHa/2/vTmOjKhs2jl8DbdlKKYUuPKC8AbENYYuC7CU8ULpRdkNZLAqCgCyWSNjEQMSASFKIJGyi4UNRSkGwBAHZZGmDghrW+kLCIn0ZSi1SaEs7Mz3vBx4m1hateRznHvr/JU2Yc8+cueZOOdec6cw9rdQ6uJWGRMWowqqQ/X6+rv76s278+n/Ku29X/oMC/e8vV/WgvLjG+61jq6Mm9RurWYOmatvsf9Sn9Ut6pkkLPdvk0QoYrB8Ib2lav3a8Q5NCgs+rY6ujfwVF6F9BEer9bOUxp8up++XFKnaUqMxZLofLoYr/nPX41amrgLoBauBfT438G6phQIOn9sO58G215SMDFBKean51/dS0QZNa8x8aT6fashI8TwcBwHC14S3fEoUEADAEhQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMIJHCykrK0sJCQmKiYlRenp6lfGDBw9q6NChGjJkiKZPn6579+55Mg4AwGAeK6Tbt28rLS1NW7du1e7du7VtEpZtFQAADjxJREFU2zZduXLFPf7gwQMtWbJEGzdu1JdffqnIyEh99NFHnooDADCcn6d2nJ2drR49eig4OFiSFBsbq3379mnGjBmSJIfDoSVLlig8PFySFBkZqaysrCr7KSoqUlFRUaVtdrvdU7EBwCs41nmwkPLz8xUaGuq+HBYWprNnz7ovN23aVAMHDpQkPXz4UBs3btQrr7xSZT9btmzR2rVrPRUTAIzAsc6DhWRZVpVtNputyrb79+9r+vTpioqK0vDhw6uMT5gwocp2u92ucePG/X1hAcDLONZ5sJDCw8N1+vRp9+X8/HyFhYVVuk5+fr4mTZqkHj16aOHChdXuJygoSEFBQZ6KCQBG4FjnwTc19OrVSzk5OSosLFRpaakOHDig6Oho97jL5dLUqVMVHx+vRYsWVXv2BACoPTx6hpSamqqUlBQ5HA6NGjVKnTp10uTJkzVr1izZ7XZdvHhRLpdL+/fvlyR16NBB77//vqciAQAM5rFCkqSkpCQlJSVV2rZp0yZJUseOHZWbm+vJuwcA+BBWagAAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGMGjhZSVlaWEhATFxMQoPT29yvilS5c0cuRIxcbGatGiRXI6nZ6MAwAwmMcK6fbt20pLS9PWrVu1e/dubdu2TVeuXKl0nblz52rx4sXav3+/LMtSRkaGp+IAAAznsULKzs5Wjx49FBwcrIYNGyo2Nlb79u1zj+fl5enhw4fq0qWLJGnEiBGVxh8rKirSzZs3K/3Y7XZPxQYAr+BYJ/l5asf5+fkKDQ11Xw4LC9PZs2efOB4aGqrbt29X2c+WLVu0du1aT8UEACNwrPNgIVmWVWWbzWar8fhjEyZM0PDhwytts9vtGjdu3N+QEgDMwLHOg4UUHh6u06dPuy/n5+crLCys0nhBQYH78p07dyqNPxYUFKSgoCBPxQQAI3Cs8+DfkHr16qWcnBwVFhaqtLRUBw4cUHR0tHu8ZcuWqlevns6cOSNJ2rVrV6VxAEDt4rFCCg8PV2pqqlJSUjRs2DANHjxYnTp10uTJk3Xu3DlJ0qpVq7R8+XLFx8ertLRUKSkpnooDADCcx16yk6SkpCQlJSVV2rZp0yb3v6OiopSZmenJCAAAH8FKDQAAI1BIAAAjUEgAACN49G9InuJyuSSp1n2KGcDTIyIiQn5+PnkI9hifnI07d+5IUq36wBiAp8uhQ4fUqlUrb8cwis2qbskEwz18+FDnz59XaGio6tat+7fu+/Eno9PT0xUREfG37tvTyO4dZPcOX84u1ewMyel0ym6315qzKZ98hPXr11fXrl09eh8RERE+++yF7N5Bdu/w5ex/xs/P76l9bNXhTQ0AACNQSAAAI1BIAAAjUEi/ExQUpBkzZvjkqrtk9w6ye4cvZ0f1fPJddgCApw9nSAAAI1BIAAAjUEj/cebMGY0cOVJDhw7VhAkTlJeXJ0kqKirSlClTFB8fr3HjxrlXiTBNVlaWEhISFBMTo/T0dG/H+VNr165VYmKiEhMTtXLlSklSdna2kpKSNGjQIKWlpXk54Z/74IMPNH/+fEnSpUuXNHLkSMXGxmrRokVyOp1eTle9w4cPa8SIEYqLi9OyZcsk+c6879692/0788EHH0jynXlHDVmwLMuy+vfvb126dMmyLMvavn27NXXqVMuyLGvp0qXWhg0bLMuyrC+++MKaPXu2tyI+kd1ut/r372/dvXvXKi4utpKSkqzLly97O9YTnTx50ho9erRVVlZmlZeXWykpKVZWVpbVr18/68aNG5bD4bAmTpxoHT161NtRnyg7O9vq3r27NW/ePMuyLCsxMdH64YcfLMuyrAULFljp6eleTFe9GzduWH369LFu3bpllZeXW2PGjLGOHj3qE/NeUlJidevWzfrll18sh8NhjRo1yjp58qRPzDtqjjMkSeXl5Zo9e7aioqIkSZGRkbp165Yk6ejRo+4vGRw8eLCOHTsmh8PhtazVyc7OVo8ePRQcHKyGDRsqNjZW+/bt83asJwoNDdX8+fMVEBAgf39/tW3bVteuXVPr1q31zDPPyM/PT0lJScY+hl9//VVpaWmaOnWqJCkvL08PHz5Uly5dJEkjRowwMvvXX3+thIQERUREyN/fX2lpaWrQoIFPzLvL5VJFRYVKS0vldDrldDrl5+fnE/OOmqOQJAUEBGjo0KGSpIqKCq1du1YDBw6UJOXn5ys0NFTSo2U8AgMDVVhY6LWs1fltRkkKCwvT7du3vZjoj7Vr1859ELl27Zr27t0rm83mM4/h3XffVWpqqvvtxr+f/9DQUCOzX79+XS6XS5MmTdKQIUO0detWn/ndCQwM1OzZsxUfH6/o6Gi1bNlS/v7+PjHvqLlaV0hfffWVoqOjK/28+uqrkh6dKb399ttyOp164403nriPOnXMmjarmnfu22w2LyT5ay5fvqyJEydq3rx5evbZZ6uMm/gYtm/frhYtWqhnz57ubb4y/y6XSzk5Ofrwww+VkZGhc+fO6ebNm1WuZ2L23Nxc7dixQ0eOHNGJEydUp04dnTx5ssr1TMyOmvPJxVX/G/Hx8YqPj6+yvbi4WNOmTVNwcLDWrVsnf39/SY+eMRYUFCgiIkJOp1MPHjxQcHDwP5z6j4WHh+v06dPuy/n5+QoLC/Nioj935swZzZo1SwsXLlRiYqK+/fZbFRQUuMdNfQx79+7VnTt3NHToUN27d08lJSWy2WyVst+5c8fI7M2bN1fPnj0VEhIiSRowYID27dtXacV8U+f9xIkT6tmzp5o1aybp0ctzmzdv9ol5R82Z9VTfi+bOnavWrVtrzZo1CggIcG/v16+fdu3aJenRwahr167usjJFr169lJOTo8LCQpWWlurAgQOKjo72dqwnunXrlt58802tWrVKiYmJkqTOnTvr6tWr7peV9uzZY+Rj+PTTT7Vnzx7t3r1bs2bN0r///W8tX75c9erV05kzZyRJu3btMjJ7//79deLECRUVFcnlcun48eOKi4vziXmPiopSdna2SkpKZFmWDh8+rJdeeskn5h01V+vOkKpz8eJFHTp0SM8995yGDRsm6dGZ0aZNmzR79mzNnz9fiYmJaty4sVatWuXdsNUIDw9XamqqUlJS5HA4NGrUKHXq1MnbsZ5o8+bNKisr04oVK9zbkpOTtWLFCs2cOVNlZWXq16+f4uLivJjyr1m1apXeeecdFRcXq3379kpJSfF2pCo6d+6s119/XWPHjpXD4VDv3r01ZswYtWnTxvh579Onjy5evKgRI0bI399fHTt21JQpUxQTE2P8vKPmWDoIAGAEXrIDABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAp5g/vz52rx581+6zaFDh9yraB89elRr1qzxRDTgqcTnkIC/0YABAzRgwABJ0rlz53Tv3j0vJwJ8B4UEn3Pq1CmtXLlS4eHh+vnnn1W/fn2tWLFCYWFhWrp0qXJzc2Wz2dS3b1/NmTNHfn5+at++vSZMmKBTp06ppKREc+bM0aBBg7Rz507t379fGzZskKQqlx/LzMzUtm3b5HA4dO/ePU2ePFljx47Vzp07lZmZqdLSUgUGBmr48OHav3+/pk+frs8//1wul0uNGzfW2bNnFRcXp9GjR0uS1q1bp7t372rhwoX/+PwBpqKQ4JMuXryoBQsWqGvXrvrss880d+5ctWvXTsHBwcrKypLD4dC0adP0ySefaMqUKXK5XGrSpIl27typ3NxcjR8/Xl27dq3RfRUXF2v79u3auHGjmjZtqh9//FGvvfaaxo4dK0m6cuWKDh8+rMDAQO3cuVPSo1URkpOTdffuXaWmpurgwYNav369Ro8erYqKCm3fvl0ff/yxx+YH8EX8DQk+KSoqyl0oI0eO1KVLl7Rnzx6NHz9eNptNAQEBSk5O1rFjx9y3GT9+vPu2zz//vL777rsa3VejRo20fv16ffPNN1q9erXWr1+vkpIS93hkZKQCAwP/cB/9+/dXQUGBcnNzdfz4cbVq1Upt2rT5qw8beKpRSPBJv12hWnr0FRC/XwWroqKi0lda//Y2FRUVqlu3rmw2W6XbVffli3a7XcOGDVNeXp5efPFFvfXWW5XGGzZsWKO8ycnJyszM1I4dO5ScnPyntwFqGwoJPik3N1e5ubmSpG3btumFF15QfHy80tPTZVmWysvLlZGRoV69erlv83jV9gsXLujq1avq1q2bQkJCdPnyZZWVlcnpdOrIkSNV7uv8+fMKCQnR9OnT1bdvX/d1XC7XH2asW7dupUJ8+eWXdfDgQV24cEExMTH/7RQATx3+hgSf1Lx5c61evVp5eXkKCQnRypUr1ahRIy1btkxJSUlyOBzq27ev+2vGJen7779XRkaGKioqlJaWpiZNmqh3797q1q2b4uPjFRoaqu7du+unn36qdF+9e/dWZmam4uLi1KBBA3Xq1EkhISG6fv36H2bs2bOnZs6cKX9/fy1evFjNmjVThw4d1LZtW+O+wgQwAat9w+ecOnVK7733nvbs2VPj20RGRionJ8f95XTeUFhYqFGjRik9PV0tWrTwWg7AVLxkB/wDMjIylJCQoJSUFMoIeALOkAAARuAMCQBgBAoJAGAECgkAYAQKCQBgBAoJAGAECgkAYIT/B5VRJ992K6JTAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Уопштено, три жанра се поклапају у погледу њихове популарности и плесности. Дијаграм распршивања истих оса показује сличан образац конвергенције. Пробајте дијаграм распршивања да проверите расподелу података по жанру.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:09:08+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/2-K-Means/notebook.ipynb b/translations/sr/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..d05924372 --- /dev/null +++ b/translations/sr/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,231 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:34+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Почните там где смо завршили у претходној лекцији, са увезеним и филтрираним подацима.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Фокусираћемо се само на 3 жанра. Можда можемо направити 3 кластера!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/sr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..8bd5900d0 --- /dev/null +++ b/translations/sr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,642 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:26:36+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Истражите K-Means кластерисање користећи R и принципе уређених података.\n", + "\n", + "### [**Квиз пре предавања**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "У овом часу ћете научити како да креирате кластере користећи пакет Tidymodels и друге пакете из R екосистема (назваћемо их пријатељи 🧑‍🤝‍🧑), као и нигеријски музички скуп података који сте раније увезли. Покрићемо основе K-Means кластерисања. Имајте на уму да, као што сте научили у претходном часу, постоји много начина за рад са кластерима, а метод који користите зависи од ваших података. Пробаћемо K-Means јер је то најчешћа техника кластерисања. Хајде да почнемо!\n", + "\n", + "Термини које ћете научити:\n", + "\n", + "- Силуетно оцењивање\n", + "\n", + "- Метода лакта\n", + "\n", + "- Инерција\n", + "\n", + "- Варијанса\n", + "\n", + "### **Увод**\n", + "\n", + "[K-Means кластерисање](https://wikipedia.org/wiki/K-means_clustering) је метода која потиче из области обраде сигнала. Користи се за поделу и груписање података у `k кластера` на основу сличности њихових карактеристика.\n", + "\n", + "Кластери се могу визуализовати као [Воронојеви дијаграми](https://wikipedia.org/wiki/Voronoi_diagram), који укључују тачку (или 'семе') и њен одговарајући регион.\n", + "\n", + "

\n", + " \n", + "

Инфографика од Џен Лупер
\n", + "\n", + "\n", + "K-Means кластерисање има следеће кораке:\n", + "\n", + "1. Научник за податке започиње одређивањем жељеног броја кластера који ће бити креирани.\n", + "\n", + "2. Затим алгоритам насумично бира K опсервација из скупа података које ће служити као почетни центри за кластере (тј. центроиде).\n", + "\n", + "3. Затим се свака од преосталих опсервација додељује најближем центроиду.\n", + "\n", + "4. Затим се рачуна нова средња вредност сваког кластера и центроид се помера на ту средњу вредност.\n", + "\n", + "5. Сада када су центри поново израчунати, свака опсервација се поново проверава да ли би могла бити ближа другом кластеру. Сви објекти се поново додељују користећи ажуриране средње вредности кластера. Кораци доделе кластера и ажурирања центроида се итеративно понављају док се доделе кластера не престану мењати (тј. када се постигне конвергенција). Типично, алгоритам се завршава када свака нова итерација резултира занемарљивим померањем центроида и кластери постану статични.\n", + "\n", + "
\n", + "\n", + "> Имајте на уму да због насумичности почетних K опсервација које се користе као почетни центроиди, можемо добити благо различите резултате сваки пут када применимо процедуру. Из тог разлога, већина алгоритама користи неколико *насумичних почетака* и бира итерацију са најнижом WCSS. Због тога се снажно препоручује да увек покрећете K-Means са неколико вредности *nstart* како бисте избегли *непожељан локални оптимум.*\n", + "\n", + "
\n", + "\n", + "Ова кратка анимација користећи [уметничке радове](https://github.com/allisonhorst/stats-illustrations) Алисон Хорст објашњава процес кластерисања:\n", + "\n", + "

\n", + " \n", + "

Уметнички рад од @allison_horst
\n", + "\n", + "\n", + "\n", + "Основно питање које се јавља у кластерисању је следеће: како знате на колико кластера да поделите своје податке? Један недостатак коришћења K-Means укључује чињеницу да ћете морати да одредите `k`, односно број `центроида`. Срећом, `метода лакта` помаже да се процени добра почетна вредност за `k`. Ускоро ћете је испробати.\n", + "\n", + "### \n", + "\n", + "**Предуслов**\n", + "\n", + "Наставићемо тачно тамо где смо стали у [претходном часу](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), где смо анализирали скуп података, направили много визуализација и филтрирали скуп података на опсервације од интереса. Обавезно погледајте!\n", + "\n", + "Биће нам потребни неки пакети да завршимо овај модул. Можете их инсталирати као: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Алтернативно, скрипта испод проверава да ли имате пакете потребне за завршетак овог модула и инсталира их за вас у случају да неки недостају.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Хајде да кренемо одмах!\n", + "\n", + "## 1. Плес са подацима: Сузимо избор на 3 најпопуларнија музичка жанра\n", + "\n", + "Ово је преглед онога што смо радили у претходној лекцији. Хајде да мало анализирамо податке!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 То је прошло добро!\n", + "\n", + "## 2. Више истраживања података.\n", + "\n", + "Колико су ови подаци чисти? Хајде да проверимо постојање екстремних вредности користећи box plot графиконе. Фокусираћемо се на нумеричке колоне са мање екстремних вредности (иако можете уклонити екстремне вредности). Box plot графикони могу показати опсег података и помоћи у избору колона које ћемо користити. Напомена: Box plot графикони не приказују варијансу, што је важан елемент за добро кластерисане податке. Молимо вас да погледате [ову дискусију](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) за више информација.\n", + "\n", + "[Box plot графикони](https://en.wikipedia.org/wiki/Box_plot) се користе за графичко приказивање расподеле `нумеричких` података, па хајде да почнемо са *избором* свих нумеричких колона заједно са популарним музичким жанровима.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Погледајте како селектор `where` олакшава овај процес 💁? Истражите и друге сличне функције [овде](https://tidyselect.r-lib.org/).\n", + "\n", + "Пошто ћемо правити boxplot за сваку нумеричку карактеристику и желимо да избегнемо коришћење петљи, хајде да преобликујемо наше податке у *дужи* формат који ће нам омогућити да искористимо `facets` - подграфиконе који приказују појединачне подскупове података.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Сада много дужи! Време је за мало `ggplots`! Па који `geom` ћемо користити?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Лако-gg!\n", + "\n", + "Сада можемо видети да су подаци мало \"бучни\": посматрајући сваку колону као boxplot, можете уочити екстремне вредности. Могли бисте проћи кроз скуп података и уклонити те екстремне вредности, али то би учинило податке прилично минималним.\n", + "\n", + "За сада, хајде да изаберемо које ћемо колоне користити за наш кластеринг задатак. Изабраћемо нумеричке колоне са сличним опсезима. Могли бисмо да енкодирамо `artist_top_genre` као нумеричку вредност, али ћемо је за сада изоставити.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Израчунавање k-means кластеризације у R-у\n", + "\n", + "Можемо израчунати k-means кластеризацију у R-у помоћу уграђене функције `kmeans`, погледајте `help(\"kmeans()\")`. Функција `kmeans()` прихвата податке у облику табеле са свим нумеричким колонама као свој примарни аргумент.\n", + "\n", + "Први корак при коришћењу k-means кластеризације је да се одреди број кластера (k) који ће бити генерисани у коначном решењу. Знамо да постоје 3 жанра песама које смо издвојили из скупа података, па хајде да пробамо са 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Објекат kmeans садржи неколико информација које су добро објашњене у `help(\"kmeans()\")`. За сада, хајде да се фокусирамо на неколико њих. Видимо да су подаци груписани у 3 кластера величина 65, 110, 111. Излаз такође садржи центаре кластера (средине) за 3 групе кроз 5 променљивих.\n", + "\n", + "Вектор кластерисања представља доделу кластера за сваку опсервацију. Хајде да користимо функцију `augment` како бисмо додали доделу кластера оригиналном скупу података.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Одлично, управо смо поделили наш скуп података у 3 групе. Па, колико је добро наше кластерисање 🤷? Хајде да погледамо `Silhouette score`.\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[Silhouette анализа](https://en.wikipedia.org/wiki/Silhouette_(clustering)) може се користити за проучавање удаљености раздвајања између добијених кластера. Овај скор варира од -1 до 1, и ако је скор близу 1, кластер је густ и добро одвојен од других кластера. Вредност близу 0 представља преклапајуће кластере са узорцима који су веома близу граници одлуке суседних кластера. [извор](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Метод просечног silhouette-а израчунава просечан silhouette за различите вредности *k*. Висок просечан silhouette скор указује на добро кластерисање.\n", + "\n", + "Функција `silhouette` у пакету за кластерисање користи се за израчунавање просечне ширине silhouette-а.\n", + "\n", + "> Silhouette се може израчунати помоћу било које [метрике удаљености](https://en.wikipedia.org/wiki/Distance \"Distance\"), као што су [Еуклидска удаљеност](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") или [Манхатен удаљеност](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") коју смо разматрали у [претходном часу](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Наш резултат је **0.549**, што је тачно на средини. Ово указује да наши подаци нису нарочито погодни за ову врсту кластерисања. Хајде да видимо да ли можемо визуелно да потврдимо ову претпоставку. [factoextra пакет](https://rpkgs.datanovia.com/factoextra/index.html) пружа функције (`fviz_cluster()`) за визуелизацију кластерисања.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Преклапање у кластерима указује на то да наши подаци нису нарочито погодни за ову врсту кластерисања, али хајде да наставимо.\n", + "\n", + "## 4. Одређивање оптималног броја кластера\n", + "\n", + "Основно питање које се често јавља код K-Means кластерисања је следеће - без познатих ознака класа, како знати на колико кластера треба поделити податке?\n", + "\n", + "Један од начина да то сазнамо је да користимо узорак података за `креирање серије модела кластерисања` са повећавајућим бројем кластера (нпр. од 1 до 10), и да проценимо метрике кластерисања као што је **Silhouette score.**\n", + "\n", + "Хајде да одредимо оптималан број кластера тако што ћемо израчунати алгоритам кластерисања за различите вредности *k* и проценити **Within Cluster Sum of Squares** (WCSS). Укупна сума квадрата унутар кластера (WCSS) мери компактност кластерисања, и желимо да она буде што мања, јер ниже вредности значе да су тачке података ближе једна другој.\n", + "\n", + "Хајде да истражимо ефекат различитих избора `k`, од 1 до 10, на ово кластерисање.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Сада када имамо укупну суму квадрата унутар кластера (tot.withinss) за сваки алгоритам кластерисања са центром *k*, користимо [метод лакта](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) да бисмо пронашли оптималан број кластера. Овај метод се састоји од графичког приказивања WCSS као функције броја кластера и одабирања [лакта криве](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") као броја кластера који ће се користити.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "График приказује значајно смањење WCSS (што значи већу *збијеност*) како се број кластера повећава са један на два, и још једно приметно смањење са два на три кластера. Након тога, смањење је мање изражено, што резултира `елбоу` 💪 на графику око три кластера. Ово је добар показатељ да постоје два до три разумно добро одвојена кластера тачака података.\n", + "\n", + "Сада можемо наставити и издвојити модел кластерисања где је `k = 3`:\n", + "\n", + "> `pull()`: користи се за издвајање једне колоне\n", + ">\n", + "> `pluck()`: користи се за индексирање структура података као што су листе\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Одлично! Хајде да визуализујемо добијене кластере. Желите ли мало интерактивности уз помоћ `plotly`?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Можда бисмо очекивали да сваки кластер (представљен различитим бојама) има јасно одвојене жанрове (представљене различитим облицима).\n", + "\n", + "Хајде да погледамо тачност модела.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Tačnost ovog modela nije loša, ali nije ni sjajna. Moguće je da podaci nisu pogodni za K-Means klasterizaciju. Ovi podaci su previše neuravnoteženi, slabo povezani i postoji prevelika varijansa između vrednosti kolona da bi se dobro grupisali. Zapravo, klasteri koji se formiraju verovatno su značajno pod uticajem ili iskrivljeni zbog tri kategorije žanrova koje smo gore definisali.\n", + "\n", + "Ipak, ovo je bio prilično poučan proces!\n", + "\n", + "U dokumentaciji Scikit-learn-a možete videti da model poput ovog, sa klasterima koji nisu dobro definisani, ima problem sa 'varijansom':\n", + "\n", + "

\n", + " \n", + "

Infografika iz Scikit-learn-a
\n", + "\n", + "\n", + "\n", + "## **Varijansa**\n", + "\n", + "Varijansa se definiše kao \"prosek kvadrata razlika od srednje vrednosti\" [izvor](https://www.mathsisfun.com/data/standard-deviation.html). U kontekstu ovog problema klasterizacije, odnosi se na podatke kod kojih vrednosti našeg skupa podataka imaju tendenciju da se previše udalje od srednje vrednosti.\n", + "\n", + "✅ Ovo je odličan trenutak da razmislite o svim načinima na koje biste mogli da rešite ovaj problem. Da li da malo prilagodite podatke? Koristite različite kolone? Primenite drugačiji algoritam? Savet: Probajte [skaliranje podataka](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) kako biste ih normalizovali i testirali druge kolone.\n", + "\n", + "> Probajte ovaj '[kalkulator varijanse](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' da biste bolje razumeli koncept.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Izazov**\n", + "\n", + "Provedite neko vreme sa ovim beležnicom, podešavajući parametre. Možete li poboljšati tačnost modela čišćenjem podataka (na primer, uklanjanjem ekstremnih vrednosti)? Možete koristiti težine kako biste određenim uzorcima podataka dali veću važnost. Šta još možete učiniti da kreirate bolje klastere?\n", + "\n", + "Savet: Probajte da skalirate podatke. U beležnici postoji komentarisani kod koji dodaje standardno skaliranje kako bi kolone podataka bile sličnije u smislu opsega. Videćete da, iako se silueta skora smanjuje, 'prelom' u grafiku lakta postaje glađi. To je zato što ostavljanje podataka neskaliranih omogućava podacima sa manjom varijansom da imaju veću težinu. Pročitajte više o ovom problemu [ovde](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Kviz nakon predavanja**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Pregled i samostalno učenje**\n", + "\n", + "- Pogledajte simulator za K-Means [kao što je ovaj](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Možete koristiti ovaj alat za vizualizaciju uzoraka podataka i određivanje njihovih centara. Možete uređivati nasumičnost podataka, broj klastera i broj centara. Da li vam ovo pomaže da steknete ideju o tome kako se podaci mogu grupisati?\n", + "\n", + "- Takođe, pogledajte [ovaj materijal o K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) sa Stanforda.\n", + "\n", + "Želite da testirate svoje novo stečene veštine klasterizacije na skupovima podataka koji su pogodni za K-Means klasterizaciju? Pogledajte:\n", + "\n", + "- [Treniranje i evaluacija modela klasterizacije](https://rpubs.com/eR_ic/clustering) koristeći Tidymodels i slične alate\n", + "\n", + "- [K-Means analiza klastera](https://uc-r.github.io/kmeans_clustering), UC Business Analytics R Programming Guide\n", + "\n", + "- [K-Means klasterizacija sa principima urednih podataka](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Zadatak**\n", + "\n", + "[Probajte različite metode klasterizacije](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## HVALA:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) za kreiranje originalne Python verzije ovog modula ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) za kreiranje neverovatnih ilustracija koje čine R pristupačnijim i zanimljivijim. Pronađite više ilustracija u njenoj [galeriji](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Srećno učenje,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

Ilustracija @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/sr/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..b90573479 --- /dev/null +++ b/translations/sr/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,548 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:20:53+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Почните там где смо завршили у последњем часу, са увезеним и филтрираним подацима.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Фокусираћемо се само на 3 жанра. Можда можемо направити 3 кластера!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Колико су чисти ови подаци? Проверите за одступања користећи боксплотове. Фокусираћемо се на колоне са мање одступања (иако можете уклонити одступања). Боксплотови могу показати опсег података и помоћи ће у одабиру колона за употребу. Имајте на уму, боксплотови не показују варијансу, важан елемент добрих података за кластеровање (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [ + "Изаберите неколико колона са сличним опсезима. Обавезно укључите колону artist_top_genre како бисмо задржали жанрове исправним.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Ти бројеви нам не значе много, па хајде да добијемо „силуетни скор“ како бисмо видели тачност. Наш скор је у средини.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Користите тај модел да одлучите, користећи Елбо метод, најбољи број кластера за израду\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Тачност овог модела није лоша, али није ни сјајна. Могуће је да подаци нису погодни за К-Меанс кластерисање. Можете покушати са другом методом.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/sr/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..8d79b7eef --- /dev/null +++ b/translations/sr/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,343 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:27+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Почните там где смо завршили у последњем часу, са увезеним и филтрираним подацима.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Фокусираћемо се само на 3 жанра. Можда можемо направити 3 кластера!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/sr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..b2d02248b --- /dev/null +++ b/translations/sr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:10+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/sr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/sr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..c4caec531 --- /dev/null +++ b/translations/sr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:49+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да превод буде тачан, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/sr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..d7189cfa6 --- /dev/null +++ b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:32+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..901808791 --- /dev/null +++ b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:22:52+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да превод буде тачан, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..28518ef28 --- /dev/null +++ b/translations/sr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:12+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације, препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/sr/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..9848c7909 --- /dev/null +++ b/translations/sr/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Три године података о часовном електричном оптерећењу и температури између 2012. и 2014. године.\n", + "\n", + "Тао Хонг, Пјер Пинсон, Шу Фан, Хамидреза Зареипур, Алберто Троколи и Роб Џ. Хајндман, \"Прогностичко предвиђање енергије: Глобално такмичење у предвиђању енергије 2014 и даље\", International Journal of Forecasting, vol.32, no.3, стр. 896-913, јул-септембар, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Учитај податке из csv у Pandas dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Прикажи све доступне податке о оптерећењу (јануар 2012. до децембар 2014.)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:21+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/sr/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..ff63ca998 --- /dev/null +++ b/translations/sr/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Подешавање података\n", + "\n", + "У овом нотебуку, показујемо како да:\n", + "\n", + "припремите временске серије података за овај модул \n", + "визуелизујете податке \n", + "Подаци у овом примеру преузети су из такмичења за прогнозирање GEFCom2014. Они обухватају 3 године података о сатној потрошњи електричне енергије и вредностима температуре у периоду од 2012. до 2014. године.\n", + "\n", + "1Таао Хонг, Пјер Пинсон, Шу Фан, Хамидреза Зареипур, Алберто Троколи и Роб Џ. Хајндман, \"Прогнозирање енергије са вероватноћом: Глобално такмичење за прогнозирање енергије 2014 и даље\", International Journal of Forecasting, вол.32, бр.3, стр. 896-913, јул-септембар, 2016. \n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:20+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/sr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..54c4c4f46 --- /dev/null +++ b/translations/sr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Тао Хонг, Пјер Пинсон, Шу Фан, Хамидреза Зареипур, Алберто Троколи и Роб Џ. Хајндман, \"Прогностичко предвиђање енергије: Глобално такмичење у предвиђању енергије 2014 и даље\", International Journal of Forecasting, вол.32, бр.3, стр. 896-913, јул-септембар, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Инсталирајте зависности\n", + "Започните инсталирањем неких потребних зависности. Познато је да ове библиотеке са одговарајућим верзијама функционишу за решење:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Прикажи све доступне податке о оптерећењу (јануар 2012. до децембар 2014.)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4kAAAHVCAYAAABc/b7wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOy9d5xfVZ3//zopEBGwIOiu7Bp0bYuIBXdtYMOKosh3VwHLuqv+lNXVdReNuCgdpBuahNBNQkASIKQnpPdJnfRkJtOSTO8zmfb5nN8fn8+duZ/7Obff+7n3fu7r6SMyc8u5Z255n/M+7yaklCCEEEIIIYQQQgBgXNQdIIQQQgghhBASH6gkEkIIIYQQQggZhUoiIYQQQgghhJBRqCQSQgghhBBCCBmFSiIhhBBCCCGEkFGoJBJCCCGEEEIIGWVC1B2Igje96U1y8uTJUXeDEEIIIYQQQiJh69atrVLKM1X7UqkkTp48GRUVFVF3gxBCCCGEEEIiQQhRa7aP7qaEEEIIIYQQQkahkkgIIYQQQgghZBQqiYQQQgghhBBCRkllTCIhhBBCCCGEAMDw8DAaGhowMDAQdVdCYdKkSTj77LMxceJEx+dQSSSEEEIIIYSkloaGBpx22mmYPHkyhBBRdydQpJRoa2tDQ0MDzjnnHMfn0d2UEEIIIYQQkloGBgZwxhlnlJ2CCABCCJxxxhmuraRUEgkhhBBCCCGpphwVRA0vfxuVREIIIYQQQgiJkFNPPTWQdq6//nrcddddvtuhkkgIIYQQQgghZBQqiYQQQgghhBASA6SUuOaaa/C+970P5513HmbPng0A6O3txec+9zl86EMfwnnnnYeXXnpp9JxbbrkF73rXu/DJT34SBw4cCKQfzG5KCCGEEEIIIQBumLcHe491B9rmP/7t6fjD1851dOycOXOwY8cO7Ny5E62trfjIRz6Ciy66CGeeeSbmzp2L008/Ha2trfjoRz+KSy+9FNu2bcOzzz6LHTt2YGRkBB/60Ifw4Q9/2HefaUkkhBBCCCGEkBiwdu1aXHHFFRg/fjze/OY341Of+hS2bNkCKSWuvfZavP/978fFF1+Mo0ePoqmpCWvWrMFll12GU045BaeffjouvfTSQPpBSyIhhBBCCCGEAI4tfqVmxowZaGlpwdatWzFx4kRMnjzZdVkLN9CSSAghhBBCCCEx4MILL8Ts2bORyWTQ0tKC1atX45/+6Z/Q1dWFs846CxMnTsSKFStQW1sLALjooovw4osv4sSJE+jp6cG8efMC6QctiYQQQkgCGMlkcbxrAH/3xlOi7gohhJCQuOyyy7Bhwwacf/75EELgjjvuwFve8hZcddVV+NrXvobzzjsPF1xwAd7znvcAAD70oQ/hW9/6Fs4//3ycddZZ+MhHPhJIP4SUMpCGksQFF1wgKyoqou4GIYQQ4pjrX96DJ9fXoOL/LsabTj056u4QQkjZsG/fPrz3ve+NuhuhovobhRBbpZQXqI6nuykhhBCSAJbvbwIA9A2ORNwTQggh5Q6VREIIISQBDI1kAQAnTeDQTQghJFw40hBCCCEJYDiTCw8ZP05E3BNCCCHlDpVEQgghJAGM5hBIXyoBQggJnXLO0+Llb6OSSAghhMScq6ZvREf/MADqiIQQEjSTJk1CW1tbWSqKUkq0tbVh0qRJrs5jCQxCCCEk5qw73Db6cxnOYQghJFLOPvtsNDQ0oKWlJequhMKkSZNw9tlnuzqHSiIhhBASU04MZXCouadgm6QtkRBCAmXixIk455xzou5GrKCSSAghhMSU/569A4v2NBZsoyWREEJI2DAmkRBCCIkpW+s6irZRRySEEBI2VBIJIYSQmJLJFquE5ZhYgRBCSLygkkgIIYTEFLWSGEFHCCGEpAoqiYQQQkhMUSmJhBBCSNhQSSSEEEJiykg2W7SNlkRCCCFhQyWREEIIiSkKHZElMAghhIQOlURCCCEkpmQUZkNaEgkhhIQNlURCCCEkQVBHJIQQEjZUEgkhhJAEwRIYhBBCwoZKIiGEEBJThGIbVURCCCFhQyWREEIISRA0JBJCCAkbKomEEEJITBEqUyJtiYQQE1YdbMGK/c1Rd4OUAROi7gAhhBBC1AgIGJVCWhIJIWZ8//HNAICa2y+JuCck6ZTMkiiEWCmEGBBC9Ob/HdDtu1IIUSuE6BNCvCiEeKNu3xuFEHPz+2qFEFca2jU9lxBCCCk3qCMSQggJm1K7m/5MSnlq/t+7AUAIcS6ARwB8F8CbAfQDeEh3zoMAhvL7rgLwcP4cJ+cSQgghyUXhbkpLIiGEkLCJg7vpVQDmSSlXA4AQ4joA+4QQpwHIArgcwPuklL0A1gohXkZOKZxida6UsieCv4UQQggJDHV2U2qJhBBCwqXUlsTbhBCtQoh1QohP57edC2CndoCUsgo5y+G78v9GpJQHdW3szJ9jd24BQogfCyEqhBAVLS0tAf5JhBBCSDioEtfQkkgIISRsSqkk/gbA2wG8FcA0APOEEO8AcCqALsOxXQBOy+/rNtkHm3MLkFJOk1JeIKW84Mwzz/TzdxBCCCGRQSWREEJI2JTM3VRKuUn361NCiCsAfAVAL4DTDYefDqAHOXdTs32wOZcQQghJNELhcEp3U0IIIWETZZ1EiVy4xR4A52sbhRBvB3AygIP5fxOEEO/UnXd+/hzYnEsIIYQkGrqbEkIIiYKSKIlCiNcLIb4ohJgkhJgghLgKwEUAFgGYAeBrQogLhRCvBXAjgDlSyh4pZR+AOQBuFEK8VgjxCQBfB/BMvmnTc0vxdxFCCCGEEEJIuVEqS+JEADcDaAHQCuDnAL4hpTwopdwD4CfIKXzNyMUTXq0792oAr8nvmwXgp/lz4OBcQgghJLEos5vSkkgICRFJIUNQophEKWULgI9Y7J8JYKbJvnYA3/ByLiGEEFJuZDmBI4SExJfuW4269n7svfFLUXeFREwc6iQSQgghxCFUEQkhYbG/kRFbJEeUiWsIIYQQYoFQZa4hhBBCQoZKIiEBcaS1D3O2Nbg+79vTNuBL960OoUeEkKRDFZEQQkgUUEkkJCC+dN9q/Oq5na7P21jdTvcOQlJKNitx0yt70dDRH3VXCCGEkFGoJBISEIMj2ai7QAhJGJVHu/DY2iP4z5nb1QfQlEgIISQCqCQSQgghEbG1tgMAsLO+U32AIksN09Onk8auAXT1D0fdDUJISqCSSEjAHKDrKCHEIeec+VoAwIff9oaIe0LizkdvW46P3rY86m4QQlIClURCAuaLTEJDCHGI5k16yknjrQ8gBMCJ4UzUXSCEpAQqiYQQQkhI1Lb1YUHlcdP9LHFBCCEkjkyIugOEEEJIufL5e1djaCSLqlu/gvHjPCiEqphE/90ihBBCLKElkRBCCAmJoXzW4y/cuyrinhBCCCHOoZJIUk1T9wAON/dG3Q1CSJlT1dKn3G5rW6Q3KiGEkAigkkhSzRfuXY2L7+EKPyEkWkyrWtC3lBBCSARQSSSppusEa04RQqLDS94alkkkhKiYvaUu6i6QMoJKIiGEEBJX6G5KCHHI0xtqi7b1DY7gnN/Ox5I9jRH0iCQZKomEEEJIxEgzv1JaDcuSgeEMJk+Zjxe3H426K6SMyCrkxZHWPkgJ3LfsUOk7RBINlURCANwwb0/UXSCEpBCRNxXShTRdtPQMAgDuXHwg4p6QciKTzUbdBVJGUEkkqUXqZmVPrKuJriOEEOIKapSEkGIyKlNiHkoN4hYqiSS1PFdRH3UXCCGEEEICwUpJZHgzccuEqDtASFRsPtIRSDs76jtx6sn8lAgh7tGym9LdNF14yWpLiB0ZChISIJzZktRimijCJd94cF0g7RBCCEkXkpN6UiL4phG30N2UpBaOzYSQ2EOLU1kiaEokISAUAmOc0JJjjU167l5yAJOnzC9Zv0gyoZJIUkspVnAzWYnrX96Dho5+5f7VB1tC7wMhJL7YqgoKMRXnBa7m7gFMnjIf2+qCcecvd2L8KEkCUa09jMvP9LM6wXH/q4dL1COSZKgkktRSisF5W10Hnlxfg1/N3qnc/6OnK0rQC0JI3AnK/T1q1le1AQCeZMZoS7S5vBeFf0tNe6B9IeWNZklU5bShuzOxgkoiSS2llI0MJieEKLEzJSbMK1GzZFhlWST+Etf8dk4lAOCVXcfQ3D0QUI9IOaB6rcYWJIq/SX6mxAoqiSS1BC0bxymkc8Lmd4SQuJGwSVxr7xAAYH7l8Yh7kgy8WJCllOgbHMHPZm7HVdM3hdArklRUsa5Wb1iWC9jEAiqJJLUELRz1wrl3cCS/Lfe7mUsHxTMhBHDn2RC23Njf2I3pa6o9nXv6JCZNd4IqwYgbtPHrWOeJILpDUgYt/sQJVBJJeglRNv7LnzfkfxKWl1IpjyOZLKavqcbQSDaczhFCYoOwkRFRcMnUtbh5/j5P55552skAgAvf+aYgu1S2eF2r1BYlnZ7edWIYB5t6vF2MlB1anOLKA80R94TEGSqJJLUEbknU/bzveHfBvu11nSZ9KN42a0s9bp6/D4+sqgqwd4QQ4gzNuuAnqQVLPFjj5/ZIuE98861HNuAL9672flGSCFSvleod0Y77yV+2hdkdknCoJBISEKpBX79tYDhTtF+lqPbnXVV78v8lhBA9e452leQ6XjzR4mQRTQKe7pfUhTI4bGF/I62IZAyu4RAnUEkkqSVoIamKMdFvUSmEqhW+0XTVjBVIJZmsRENHPzr6hqLuCikBo3LIxed+/by9ofTFyEjWu8s756DW+L0/o27KHCaIHn54JEAYYU5Si9/EAYoGizfpNFGng/nYCjFJI//17HbM35XLDFlz+yUR94aETRzndONEzoroSUek4HKFFyVPQp8Uzf744Qzj29OC0t1U8VHm5j/8WIk1tCQSEhBW9YkA9zGQXCFOJ5qCSEhUaN4MXuq7ahNSurNZ0zdUHH7gFCmlq/HhT8sOeb4WSS7Xzq2Mugsk4VBJJOklaEOiTUyiU+9RzfrI+kWEpAcv9fLCwq50j6M2AupLufKZu1YC8K9MO3lvqlt7/V2EJAa999LMTXUR9oSUA1QSSWoJehKjjkl0f5VxnF0RkhrioxoGA9e23OHV3VTDyeJj4KEVJLY4zW5qBnMhED1UEklqCTpFu50l0SnjaEkkJHXE8XP30iXt72AJDGd4uU36d8VRMXQ+CuKQB1ccLun1nttSj81H2kt6TeIcKomEBIR+HHaTVKConfy5VBIJIaXmeNcJDGe0Oone26FeUsz2ug7UtPYF0pbezXRhJeOYiT1VLX22LuSVJSqvo/HrF3bhXx/ZUNJrEudQSSSpJWi3Ti+ZTJXtBNAGISQZxO07f2WnTuFgncRAueyh9fh0PhbRD8Y4xB31nZbHU2FPDyrLtF7GLNzdmD9QfT69TYkeKokktQQfkxhQO3kpT1lNSHqIy/c+4nOWqFkq6G0aHm4XFuj6SzRaewct9/tJVkXKDyqJhMSMIDILEkKSxdbaDgyOeC+LEBQZXXHEOGVcJYTYY5ekaLyNC5X+i+cchFBJJKml5KurDuWtlriG8pmQ8keviBW4ekZEUG7zJDykLHw2do9JP9L98tntYXSJxBi9jBmf/77NZj/6XAj8/gmVRJJa4uqAo/VLE9b7G7uZljpBVLf0YtXBlqi7QYhv/EmduErY9KFfD31xx7HoOkJCx27te5ydJdHF4kMYSCnR0TcUwZWJCiqJhMQMvZA/2NSDL923BvctOxhdh4grPnv3Knz/8c0YGI7edZDEn3k66+G2uo4IexIMXM4iJBqklNjf2GN5zAQbJbHQklj6r/mhlVX44E1LcbzrRMmvTYqhkkjSSwAL3UMjWfuDXPKbFyoB5Fb0tBW1ebuid0Mj7vhsAFkMSfkza3Pd6M8zNtUV7JNSomdwpKT9GVfgbup+kjhWJzGoHhG/8FGkA7N6g/rP2C4mseA8vx3ywJK9TQCA410DEVydGCm5kiiEeKcQYkAI8Zf8758WQmSFEL26f9/XHf9GIcRcIUSfEKJWCHGlob0r89v7hBAvCiHeWOq/iSQTuwBvJ/x81jbHx7pNAiEBnDQh94keae1Dcw+FZpI4xkGOeGDvse7Rn5ftay759fXKnZ9JIhUTZ7T0DNoq47saOjGSKVyQdJNghNlN00HfkP2CkqYkmr0SUddnZgkw5wxngjdSGInCkvgggC2GbceklKfq/j1lOH4IwJsBXAXgYSHEuQCQ/+8jAL6b398P4KGw/wBCNBbvaQq1ff2qX2sP/fQJKXe+MnXN6M/9DiZ9QeNfneDsTsX6w62m+zTriYp9x7tx6QPr8IX7Vpse8/zWBl99I+lhvM2CQTZ8vcMRYXhplROHmnrwzt8txILKcL3MSqokCiG+DaATwHKHx78WwOUArpNS9kop1wJ4GTmlEMgpjfOklKullL0ArgPwTSHEaU7a39/YHfoNJvGl5MlNbeZOBwyxBKsOthRYO6Ne4SOElD8FlkQPIofupmqunL5p9Offza0s2NfWa74A2NKTq2tX3dJnekxn/zC6B4ZN9/NRpINymCLsqO8EADxXUY+B4QyT9plQebQLALBkT2Oo1ymZkiiEOB3AjQB+pdh9lhCiSQhxRAhxb145BIB3ARiRUuqzduwEcG7+53PzvwMApJRVyFkd3+WkT1+6bw2unuHcXZCUF3EbOP/fn9cX/K5NDjSoJBJCwka/MKV3kd9Q1Ya6tv4oulR2GGNPrWS7mbJtdDG1nEybtLHnWBc6++mhUi4EMUXwu0gUFFUtvXjPdYtw28J90XUixpRqEa6UlsSbADwmpTT6RewH8AEAfwPgswA+DOCe/L5TAXQbju8CcJpuf5fF/lGEED8WQlQIISpaWpienpQeO3mrcq/QT9K4oEZIuohiklYw+chff2gkiyse3YiL7lxhe77W5SBivklhIqGguWTqWnzzofX2B5JEkAlAYMTFA0CbDz27uT7insSbsIeIkiiJQogPALgYwL3GfVLKRinlXillVkp5BMCvkXMxBYBeAKcbTjkdQI/D/frrTJNSXiClvODMM8/EvuNG3ZOkjVIIQzfJagZHsrjg5mWF5+tOpyWREBIF33x4neNj6W7qHqNkP9Z5ApOnzMeO+k7HqrbV8GClsFe3mruxkmRhlsBItdnsnYjL4s6E8bl+cN6jplTPqVSWxE8DmAygTgjRCOB/AVwuhFD5ekpdvw4CmCCEeKdu//kA9uR/3pP/HQAghHg7gJPz51ly+8L97v4CUnaUWhiqBLhxItXaW+hiqj+DvvmEkLDRZ8LUJM7uo+4XVakkusAwNqw5lPN2mrGxNorekITiJNml3SyiMLtxdHOOscUmCpIoKZWSOA3AO5BzK/0AgD8DmA/gi0KIzwgh3iZy/B2A2wG8BABSyj4AcwDcKIR4rRDiEwC+DuCZfLszAHxNCHFhPo7xRgBzpJTW1UTBAYwkD6qI5U8UxYsJ0TPOb+IaSirXGNf/tAVMCfNJsvEu+7nrPRZJb0hyCMLqVkqlLOykK2kg7ClDSZREKWV/3q20UUrZiJyb6ICUsgXABwGsB9CX/28lgP/SnX41gNcAaAYwC8BPpZR78u3uAfAT5JTFZuRiEa920ifqiMQLe451eU7N7OVb1isN1B/Kn1mMvyA6olC4CkMS3V9/1ALAUdY7ultXijn7HYsOhH8REjpmSqLqOw7ivWrqHkCjj3rAP35mq+k+znesKZUuP6E0lylESnm97ud7MJaoRnVsO4BvWOyfCWCm2z44XS3ZXteB889+PcaN44CXdura+nHJ1LX4/sfehhu+/r6i/SqZZjdREibnaehXmGllKn/WV5nXUiPpI8ykJWYEZkngkOkYq1gy1W082nki0OsPDGcCbY9EQ9Dxe3bN/fOtuWp2NbdfEuh1AX0CLGJFWSSuSSrrDrfisofW4zuPbbI/mJQ9rX25eMGdDcaEuuboV/C8ye+xkxbutnfN+OFTFfjq/WtsjyPxhPEXRE8U74PvOonBdaUsGM5k8Z3p1nOIYnfTHBLS8TvARUSSNXFy4qtBvJJaJdGJ2G3oyNWEWl/V5sukTsoDLXGMmVE5jOmcXrg/ub7G9vhl+5o8JZkg8YAqItET9fvgx0U+6r7HhcV7GrH2sLWHgPE+6xVDMx1xQ1WbZRuEqEiKwjj5jFNyP1CQWBL24lB6lUQHq3N6V0G6Y5CRvJI4YZz6sxmxyT6qjguwfg8TIs9JQNCQmC6qW3ot90fx/RdkN/UxAaFVPEfGR1bqxbsbTRcljxlcTt3c7U3VbfYHkbKhpWewaFtcv85vXfB3AID3vfV1AOLbz6gJQr5mshK1bdYlcFKsJDo5aOxHTtaJNtiPNxm1T4SwkJCUVT8SDFHEoJHo+Ozdqyz3R+FCyDcwWJxM5ozPWTujbyiD4yF4MV09Yxv2HHMeNkGSzQ+froi6C47RPheW/HKGn7t037KD+NSdKy2PSa2SSIhbtHHcxJDooAEv16SgTBNeJujDmSzWHmLCm3Li2c11uOb5nZFcuz4fZgF4jEkczW5KvKLXKweGvWXTNmsPyA1Fl0xdW/C7FRU17Zg8ZT52H6VimXSSUqKGOqI1o5+0j/u00YFHQWqVRA5gxCtB6m1276GbS/11a4OfrpCIGRzJYM72o67Pu2vxAXznsU3YWtseQq9IFEyZU4nntzbYypq23sHA4+UfWVUdSDs0iud44NVDvs63ynzqvA1fXcDSfU0AgDVcjCobnFm4S9ARs2vnZz90W1cTxG1xUqYovUqigxtcUC+KFh3iE29JIJwfe/vC/R6uQKyYuakOP3zK2lVn99GuQFxjqpqtYwNMz2vJndfWO+S7DyRe2K36f/jmZfjobcvDu76n7KYcK/UcbLKOO1XhZH7ip9yB1/kMny0pFZxylwAHcia9SqKDu2NcwWjqHqCySEqKm0GZpTyD59q5lViWX0VXsam6DV+9fy0eX3ekhL1SQ8lUfpiltI8zaXE3zWZlYHFT7X2FCzz6+UkQlhRjE72DIwW/23mhmM2XZm6qw476Tl99I+lkJGMt3EbrJJa7IIk5qVUS3XKktQ//fOtyTFsdjCsOiTcnhjJFgf1ehJV+TcG4vjCcydpmRHWDWUIdEh71HbkMg3uP+y87YvZ+XTFtI/71zxsKth1p7cNP/7IVgyPMukzCw4vlaFRJLPPZ3duvXYArp28MpK2HVlYV/F5YqzL45Z/hTHGbRw3ZUp1w7dxKfOPBdUF0icSQMC3Hdyw+YH1tltJxxPzK456rLzi5t6lVEt2OX/+Rdzmzq3dEkoPVO/CbF3bhkqlri1Z4g+S3cypdnzNzUx2au9UxSMyMGSEhmvE2VLdhc01hvOHv5lZi4e5GVNR0hHdhEjhuB/OorcN+9JM0SKON1dHFAQetOw6P2Jut6UhVPkT9fe40sUBr7xjfNWv01v0GXbIxV23Q3dQctzGJGnxxywerZ7mtLjf57h0YMT/IAQUrwoYp32tPGm/fgKGP186txPce32x7LVIagrjlmaxEfXu/Z0s1n3s4DAxnMGzjEuWWG+btdXV81OENnuKoA+9F+iioVemzraGRrO+kZpQxJGjs3mvGv8aD1CqJTlAJRr646UL5DgT0Ckwcb//5qS7VrCiMC9CSGCbXv7wntLbvW3YQF96xAnVtxauBbmKeuIAVLO+5bhH+xeDm6xe3K76JfqQURyXl57O2K7cfaOxx3dblD6/HPUus3QGHHFgeSfwoZVZc68bVm0frJCZa+MWfjr4hR54QVBIBLNtrnpjCCCdi6UD1nP3OeYxtOtHpVP0wsy5QRwyPJ9fXWO63EgurD7Zg9cEW0/3rq3K1ilTK/83z91led2A4g6Uu5BdxR+RJOSIeb3xZMjlWeqZAlJvcR+OCtSZHitryMC5sre3A1FcPWx7z2btXum+YEBs4xy4N2+udhaqkVknU+/P+8GnrFPd6+AITN1i9L04SO6jSnJs1SUtiPPne45tNXYQB68WHF7ZZu4m5kV0ketyOH5kEupsS/zi576WytAxnsnjYkFgHABo63Ce6IeGSpCkAvfLc8ak7V+CL964e/d3v/XNS4QFIsZLo5AbT3ZQAuYLVQaB/c25dsC/wTLlJGiDKkfr2fnSdGHZ9ntVzc2fJoWyKO27HDz+18ILA1+Upj2KB03HB7FGf0CVb2ljdhhaTcAeSMJx4MrlsUkqJW+bvdTQOmsmW2RX1lvvTSm1bPw40uXcdN8WhXEitkugEp5o2KT+0yfnqQy348M3LsPJAc6DtO1UQVXLSKDx7B0fQNzhCS2IEaLdcSokL71iBz9+zytX5O+o7sSWfoVT1rAeG1XE/bh61lDLyBCgkR2FJHPtnkui4nCT3PQL074P+847TwvSaQ634yC3LXJ93y/y92FobXSZYMkaYb9OcbUfx6JojOP+GJZ76cWKoOPtzfN7+ePGzmeoYZKc4nUKkVkn0qgByrpUuttflYpL0sUleB20vE3Wlu6lh2/v+sBjn37AELJMYDJuq2xxnA/zVczsLfjdLKmRGQY0xxbMeCiC75qUPrMM5v13gux3iH+0Rrz7Y4uiZuC3W/u7/W4hvPhRk3ToOeCrCKI3kdkGg26HXgp/F7t5Bf9m9NR5dcwSXPxxsEigSP5p61OW5VDidD3X2u/fOIcGRWiXRK1QS04GTx1zZ0BV6P1Qd0W9aWHkcADCSlbR8B8S3pm3E/z6/0/7AhFB5tATvKXGEtsC0wqFnglt308GRLLbVeU+2Y5y40d1UzYV/fDXwNt0uIj6kiBNU4cfB5Fezd3g/mcQaJ6+F23fy3W8+zfGxSi8p3Vb9z6rM38QfTnJiAFQSC1h3uBVbdEWrGZOYXjTZOE4U/q5n3q5j4ffDRku85q+7Rn+mt6l7DjX1YPKU+aN1MZMMF7Dij/aMJjg0+0f9TP1cflOEhebDpk/hFueXqD9flUJQ3dpXzrp+Kgkz9ODUkycAAD4y+Q0O+uFsGwC09TEONmjobuqBq6ZvCrwuFkk2mnVO5n4pwK2w9SKblYJU97N+rsmYRPesypemmL8rZ5Ft9ZikKIhhN+pJIgkf7Rk7XcUtdeKaIC6n/WVHO5n90g1mz9rvM4liWPjUnSssMz7OJVwAACAASURBVDqT4NhW14GB4eAXLexQXXNcfkLiNZba7LSoE3iVI07lQnqVRIsb1NIziGV7m9RBtHxXU0GRBU8ZG+ikHZ/9sKmTOGH82Cc8Lr1fsy09A8N4aOXhohgvbbKuDUK/eNZfMLgfKFtSQP4ZO8446fGd2He8Gy/tOOrtZJ/X52scLH7vp5swBCklegb8xYDVt/ejtq3fsjYsCYZjnSfwzYfW49o5laG0b/Xu+b2mOimfVB4QQGg+8ciEqDsQR66avhEHm3qV+zgApgNpMZnT9pUi86Dab3+M8TpTIi2J5tz0yl48V9GAd551Gj7/j28e3W50Jw46SP7pDTWBtkeSjbb45HTi7jW84ct/WgMA+PoH3urqPOPV0hhesXxfE045aQI+9o4zou5KILgpgfHEuhrc+MresW0eVgk+c9dK1+cQb/TlEwvt8hB37tSbwYw5230uQrmoAZ1JdJrnYOk6MYzXvWai73ZYJ9EHNa3mQbJMJZ8uavMB089srMWqA4Uro05cIPy+L3bn6z9zv0K/nNGy9A2OFHoHaHdMe5b6W/jY2iOO2zd7TE+uq3Hchhu8JCkayWRxuDnAOkvENVaLTyqinht5EV9Jl0L/8VQFrnh0Yyonpov2NPpuY8Tivmlu/SQYRt07Y/Kuuvn2nZT30qC76Rjn37AEK/b7L8lGd1M/WNw8TsTTgSaSNEvd4EgWjzisbWjapgc5p5L9Zu2wBIY5o7GlhnunDbKjk3fdx3+TbkXdK1EPbXO3j5XyuHPxAVx8z2rUtPZF2KN0o70PTr/VoCdHaw61KMMoNIJYBI36nQ+KP7y8u6TXC2seHPWwoH+n/nPmNkeF1okzNO+hTEgvj1Wzp08K3hFR/67oL53GBRsrNh0xTwq2/nAr9h3vtm2DiWtCYmttB5q61bVgjrT2YX+j/cMh8UeTVef+7ekAgG9+qNhty8mEKowpl94FTL9mEfVkINbkb87PZ23Hz2ZuM25WWhLd8PJO+0y3VpPzIFC9a3ctPjj6c0VtLoNri8fkPGmlti04pVqTGY7dTQOcGx1q6sF3H9uM9/5+kfn1Qrx+0nhpR/jZq8Oie2AYn75zBXYf7Yo067VqjOSEPzjGa0qiz3s6ogv6U8WSZrOy6FmecpK5kuhkbqSuAa0+NiwlOGqe3VyHyVPmY2gkuKDLK6dvwpf/tAZTlx/CuRay3ilUEj1Q3aKeNHzmrpX40n1rStwb4hU3g6dKRrmVy17ie9ykiSZqVhxoLnBzekX3s+YZMJp1MuBr6wfLq2dstTzWr9WILjnh8P0AszS6tSQGGd7gxYLjRWaVzWJVgj+njVVtqGnrx33LDtofnMfPq9beN6Tcft+yQ94bJbZMGJ/72rTYRK9ocahVLepcHG+/dgHO+e2Cgm16GTbsIbNMVnGK/hXUy75MJsEfowV3LD4AwH0maCdy+Z6lB61L9dDd1Dt2985vMP9IJsvVtBjgd/7l5D3wfw1n24g5P3hii+k+zV1ndEAKcdl9Q3Wb5X67K1ebDOAamazEZ+9eiVdKUL8zTQwGuMqrvWaHmq2fpfH4IHAy5Biv5ze7aZJj+PWLLjvrO/HLZ7eXLPZLL4b+uGi/h/P1bvTOZZrxSKd/7e9fUrvmzthUy0XNENFurd/XcuneJgBAt3EhyaJdfejV/zy30/U1bbOb6ijXBVDtDrpO9hTA7WDiGh/YTQpae9WrZk459w+L8ck/vuqrDRI2Uvf/homP9l+3lkQPH/acbQ3FG8tTXkaCNs5pq5pWFp7fzqn0tbgzMJxFXdtYUiy3qeY/d8+q0Z9Vuuwvnt2B6pY+XPP8LuX5ZWPhKTHjAwz21d4eM8uLkSB1kigUtnKZ2/3w6Qq8uOOYrat298Cwo3ggO/T3zUvG5VJ/60G6yxHnaN/0qBu7xwc/brQUlLfz51ceL7j+trpOW+umqqtmly9bJVF3EyZPmY+VB/wnpDFy1fSNvs5PrZLoR4j+1yx/tdQGR7I43qWOayTxwIlMciZQ/Qm3ZfuKhUZBTCKn/r4w3j2ruzlrcx3ece0CiyOKMT79F3W16z5+m7uFIilzMQy3Ldzn6jxVO8Q5DR0n0D/kz51rlPzNd1quJsjJkSNLYsArUEl+1fR9156W3eO46tFcPJCUEhuq2iK3pPq9enVLH4Z9uPpJqYpzLdzSOziCP6+qik2GziTiZuFadcz4cQaPGgfoRZhKTtnNcdWWxLGf9xwbW2yxypibbArHgdlb6h2d5eZurDvchsGRTJFLK7Obhsx3pm/C9rqOqLtBIsTtBCAoMedn3rG1tsN3/EI5cyTkzJ/6Z9fj4TlMmVOJR1b5y7JL3BPUe+E6jtniY3frVlyQOdChEPHtLp/gFQkvXa/M16ubX3kcVzy6EbM2O5v0GfHr9V6Q0MxnWwsq7ctWmF2jrW/INl7t9oX7cPvC/VgcQPmN1OLzM9OURDfySb/QpfpWvHjd6Bep1leNhWcwPKuQaS4z7f9i1g584vZXC+4js5uGzNrDrfj1X9VuXST5GAe9AiEoFdtKiNfLdvUP4/KH1+PnPi3hUdDeN4SHVh4ObdIpIbGroRMdHly7koKW3ZS4R5VkwQ+fe+9Zjo6zett/NtPdd6yfZ5lNuurbC2sEW1kWpZTosvleegaSuyClyiLd3OPMA6i+Pbdq7yYzbpCiTeuvO3kZTjzY8xXWirL2jgQZ+5sWjI/G64KA9p4Yn/VDKw+bnqO+1thGO8VO6W5qckq5lp4L6s+yKokBjNU/9aJsU0n0Adc2yp+xFZvip+02cU1QCo7XdobyK7o76zsD6UcpmfLCLtyx6AA22whDI8e7nGcNqzNMkIOgKBEIpUYiCcrtU3v+kyaOD6Q9N+j/BrO5wsX3rHbc3rTV1Tj/xiU4ZpGZL8kLE6pHfukD64q2zdpcV7RtxqbaXBserz047F1hqm/vx6GmXGKkOBhgBgx/S1N3YVxngo3NscHNLZy+NjenUc0jjJusakPbuczbyUw7d9PC7eX5khQlivL4Z/5urrOarl7mH1QSCfFIVANwQaxMeS6wFdGbd810Gx/zMYdxfyL/v7AJaqzzmzyLWGOclASlJAZtkbRDX/OsUEn0726quQcalUT9V5RkN7HCmERz2TBzU7GS2NCRuyeVDV2Or6d3ab5+3h7H5xm58I4VuG2h+4yoZqjeAWMtPav7Y5yYfmWqukxYWsayMHCjRO0+WpxYqSafUM1VTKLjI53DRVTvuFkQdwOVRB/4Xd1YtNve159ERTAiUJUVNSo0AZzEwTisPk+ZUwkgvMEprHaDyKBInBOUruO2meWKxFVu+J6uxqN+uAozW6C+5QATw5Ye3R/iVf7Ylb3Ro1eevLrp7j1WKBdcOZu6sOJc81d1yYPO/uLFK6evWpkai0pCELcum5Xu5JzNN+FFxqTtHSgKa/LxJD9+u/2CuJf7m1olUV9QOyr+a9aOqLtAHBIn4WU6mNueqP2Q5Jlb9Axnsnhs7RFHad/bXFj8SvWKlavrTpgE7SrutL29AS4G6CcgbiaDxvfcTeHsJMcSFWaRTgY/eHKz/UEmBPGorn/ZuwWUeCcI8TRrS13kpSbSNjIF6b0U1qNLpZLYF1Q6c5+4XTV4vqI+NJMyscbrB1gYkxhMX/R4ETEJnreF6o7i9L78ZWMtbnplLx5be8T22P6hTMHvVr0P6rHY/R36iftLO45ixf7gazMlHeO3GrTX5HUvlX4yrf+bnLqBSgC/eq5wMfOdv1uIGotsr8Lk56Th2AIWo6mtcdIZxOKGmxaM8Yduzyfu8D4vKT7xWOeJAJREbwtRGoPDGfuDiClVLb2W+6evcZ8ZPZVKokpqLd3b5LqZqpY+/HlVVQAdsqdnYBjX/HUXrpq+qSTXI+Y4nRSEbbFZsqcR3Tq3JLsJWZIH61LECzq9guYK1j2Q/Eyov3h2B37w5JaouxF7AktcE+FHmC1YsHLeEZXXzeFm88lIkuWMnsLYb2vpIKXEPUsP+r7m5CnzXcUxGlF1043kVJ2velXCyESa5MXLUvDo6mpc//IedcIZl1+d6pmOE8JVK3aPy63MrG/vVy4ylDNGd3y/48O1+fAZM+5aMiajnHp5pFNJVHDTK3s9nXe7IkD8Bh9B52ZoCQ9aegatDySB4MRX3O4TW7i70SDQg5s+tfcN4cfPbB1N6OKkdU1ocyz2h/ZIvQj0qcsPmU7Q6zvoJRBXgs5uGgVOsps6Zfx4MZrd0Kqtkycmd4rhRpFu6R3E1OWHArnu0r3e6wX6yZZoduyrCk+DThelgujd7p/Dzb24ZcE+PLm+RpmF23X9VcU2gVxcop82Cva7fPDzdh3DSROSKy+8kAR3/HQ9EQuCHLyfWFeDHgdWBgrP5ODlWRlrjq077DyJgR1erJSa/E+AXDIlrG9GSuf3ZWeDvxIiZq5+TtxXSTQE9d5FKfP1MsOpu6lV3O2Ykmje1htfe5LD3pWe3/x1F+ZsazDdr5/A2ckG1WKxVxoCXCwKYl6z9nBrAD0xh9Mge07owhYKPAJGiza7a085fxDCkbJ5uLnHtOxNYXIsd30aGMog7W+D378+jLkdlcSQOO/6JWjvY5r6cqHzhH/Xwj8EGNTvxRKgrRKWwnUzaOKk2Gor614nYOkeBpOB8RkFZ0mMB04Xmf71kQ2m+8blZw9G64O+7TgvhM6uqMevnlNn6QSc3yMpgTnbjgbVLczZ7r0to2Vi3eE2fPbuVY7O/da0jZ6va0WcYjaTiv6xKmWRy/HRzJLo5J2/+J7VoWTSHE5wuZy4EIa8TaeSqPig6tuDd/UK1DU0RpPkNKKKWXWiuIQh9oSwF+ZWhXLjpHC5JaxhxNM9CSCZURj0D2WYwTRgAiuBEelzGXvJM377IYHx+YAa320lACv5UC5/vp9FbavySnb3Z97OY46OIzmUr6JrS6KiXeFfzulPb+oecN2ntL0DR00sskbCMDg5nfOkUkmMyxzZ0/eQP2nR7uNo6Cj2TSfO8aosaUkborTIeXl30jCZ80ru1rh7nl6tS6VYWX/Bo2XjPdctxKOr3WdAK3fcxOroOdTUU/B7pCpigTXCf3vHu3KTwC01Hf4bizlJ8b5wOukMmiDuT5IXL0vJxPFj03bP2U0hC5LehcHPZ2237oOh82mzOKuyips9z//38PqQe2NOKpXEUiOlxOqDLdh91DprWW1bn+0xGj/5yzZcMnVtEN1LLVYC1mq86ugfRr/DMiphxtC5Yf3hVnz+npzbUZLH4p31/uIB40ApdPXjHieLA8NZ3LJgX8C9SR5BWfwW7TYkIYlwHqT/7odGsvjSfaux6mCL7Xn/cNapRdskJKpbcmUwnt1cZ9hXfugVmIESpOm/8J1vCv0aSWPR7uOYPGU+ugII/Uga+vcvCGVaNc+MfA056uuXGLtyFXqqLUoO6QnjFlJJLAE/enorvvf4Znz1fmul7lN3rrQ9Rk8ahWVcGBrJYjhrna45Tiuj//fSbozkzQdJlsVBpJkHcq4wxpqjbp9X5IMqiT3FWZKjQx+vdrzzBPY39timTAeASQnOUBoGpShD9f6zXxf6NUpFUAsuD6/KeThUu5hclyNB3M4DjcX3MIin5KZvxvjZrEyXLVGd2dTfHTB6rgTBhMBbTCGNXQN4y+smFW3XXvll+9Q1GN0IT+19StNHFGcGhrOOkhWUUuzpRY5Vts4kKjdBp4r+3mObccCnQPV6G5N4/9OO1+9YNRGKA4fzE22v7onTdC7Jp5w0PpA+xRn9U9xaW+heG48nmnxsZXxMvp2o0d8G4x1x6varkmdm81Enmfq9UORuysfrmw4XpWmcUvIlQiHEO4UQA0KIv+i2XSmEqBVC9AkhXhRCvFG3741CiLn5fbVCiCsN7ZmeWyo+etvyUl+ShIzdeNXn0N00LJRCXvdzc88gntlYW7oOJYzmHndB9SriHJN43GXSAFJIOc5X9CJt77FuX21trG4f/fmnn36Hr7bihL7UgFPKKUmU19hCt4l9NJfdsBSQcqNQMfT/vqmeidnCsmlsobIN875lshLTVleZhupkLRLXlNEnZon2dx7vOoFbF+zzHAsfJFH4kTwIYIv2ixDiXACPAPgugDcD6AfwkOH4ofy+qwA8nD/HybmREuSL3d7LchpxwmmdMaMgnfLCLnSXYGD88TMVuO7F3WPJjQIeZErJdS/uxmoHsVN+eH5rA4Yz1u7DQSFlcQ3NoJm5qc7XAPOHl3bjh09VBNijdFLkbhplTKKuLx/4u9cDcOZKatfnSRMNlsRkiZcCrpoeThkIt5TTpFj1pzxXUQ8A2N9Y6M1x37KD+PKf1qgbilP8RoQU1En0vFCp2iaVSwRaoj6/vLLrGG5dsB93L8mFjBgtx0mbl/jF6m3+n+d2Ytrqamytiz4pWEmVRCHEtwF0AtCb3q4CME9KuVpK2QvgOgDfFEKcJoR4LYDLAVwnpeyVUq4F8DJySqHluVb9iPuruKGquOj6p+9aWfqOEFNUmamc8OyWerz/+iW+r283NrTlFxW0sMkjbc4Cn+NIqSyifYPurMNeJ3JZKfGzmdu8newCu+5ZxTQ/taHW1E0+jXh91kbLTJQTIf2cbEI+Q+I7zixOShMkSVN2ttWNJcbSdz1od3c7EnbbsNCYoEmH6h0YyeRLZhi237fsEPYd7w4s9rxckA4VQ8fyRdHGgyuqQn3vevPjq2ZJVLmbpklRtPpLtQVrKYEn1x0J5fpOJVrJlEQhxOkAbgTwK8OucwGMVrSVUlYhZzl8V/7fiJRSLzF25s+xOzexXPFoPFYz04yd281tC/c7aiMqkdfWl6vROTiSc+tJ2mQtCnoHw89aCACv7m/GcKb4gZRyHvp8RT3Ov8H/YkW5EtT3EitLok6maa7STvpDAw5xwv5GtQuzXWiEhv41m7r8UDCdShiZrLT1mlHdu6FMFq29zutym332QyP+vGmsxIlmAR1nIlDiEq8dJ5q6B3D9vL2R9qGUlsSbADwmpWwwbD8VgDEfbxeA0/L7jJJH22d3bgFCiB8LISqEEBVdnc7KTPgljHfeuPri1vpB7OnsH0JjADFdYa6KaTXKzBgYzgn7Odu91ctLKm29g45ccFRH3PRKaYTxL57dodxuNniGwcqQ3XfLjR6PNcWMTzQu8yCtX04yZNv12bi/XKwBUo5ZP6K4dtLodxHPubG6DdvrOvDSjmOj26zEX317P3pTEL84bXU1vvf4Ziw3eHHovyn9+KZ/TY609vmuVxmmoqb122ycS+I77werJ6U9R8dhTSFSEiVRCPEBABcDuFexuxfA6YZtpwPosdlnd24BUsppUsoLpJQXvO71waeXVrlHhDHnM7q9/CmlK25hsnRvcG52aw61BtaWhgBwucPiquUueDdWt6Ern9Fr77FufPjmZaPxLlZ0hpAFzC9BDwjllFAjan45W63Y26EaA0ZKFPtqpGCimf/v0c4TofYn6Qrj0xtqbI8JZTE4ofdN9b6r7s/SvU247KH1jkMJLrxjBapakhsy4RQth8AxQ9bhQndT9bluxg+zNlQeLm6w+ha0/o0zmRdnpSz7+QqQy458m8NaxHHw4iiVJfHTACYDqBNCNAL4XwCXCyG2AdgD4HztQCHE2wGcDOBg/t8EIcQ7dW2dnz8HNueWlCfWuvcbzkrYJsuQUqLJwqpVisK+aSOoybqAwJ9XVQXSllcyNrUck8zAcAbfnrYR59+4BFJKHGrOrQ2tPVwc00vGMC+N4u69v/mVvZg8ZX5ZKqNBTdKLYhKlRCYO90vXhSGbMchuohKHiYwfVhxoxs9nbceDKw4X7YvDoyolu4+VxsvKD+X8SDQrm9UUxGyfGyug2RxH1UZQ3/eokmiiJUoA23UxwYX7yuepX/7wejyiKyEUFU6fa6mUxGkA3gHgA/l/fwYwH8AXAcwA8DUhxIX5RDU3ApgjpeyRUvYBmAPgRiHEa4UQnwDwdQDP5Ns1PbdEf5clTpSNp9bXWO5/dks9/vlWltgoJSMhm/g/+vbSVWl56+tfU7JrlRr99/V8xZgXezkqLV74rUmh9KBuz3QPC2NpwzgQW6V594Ofd96v9cBI0j6/J9bVYN7OY7hz8QHTY6zmU37rrSqJ6B56dau24qkNNY6OW+bQgyfhaxKWaPpTUVKXgp/1rqdj292sB9eaJLKzkyOP+5D5WtPjTd1NJa6dqx6zygXH7usxeslLoiRKKfullI3aP+TcRAeklC1Syj0AfoKcwteMXDzh1brTrwbwmvy+WQB+mj8HDs4tGapP66v3r7U9z04ob6ymVaTUhO0H7ncSZXa6fhXuda+ZCAA4+w2n+LtYjNGPNU3dAyXPQOiHUnT1+a3G8G9r7N7LbFZia2279UEkEpzIFLNaa3GIewmbxq4BzN5Sp9znNYY5TMrpiTh17X9RF58IjLleGmtXJknOu0WYWBL176heGXx+61hohRtL4lMbvGUMv9Embt/K4qf1z9SSWE4vvQl6bwUnf24c3vUo6iRCSnm9lPI7ut9nSin/Xkr5Winl16WU7bp97VLKb+T3/b2UcqahLdNz44BdjZk/LT/ka5BOw4dVasK2JJbikWkJKVTXKpd3pjBbY+mvn1RXb6/jzrQ11bj84Q1Yf7gwzrZc3qdSIBFO3I2fJkdsTBB++huXd+Mzd63Eb16oRHufu3rDUXklmFl60sQn/7gCQK52op7op83hoclm41un/10fc//IqjG3xSDc2FVNOG12tCazCVr/zMafNGQ31cd/P7zSPBRp85H2ouOjIhIlMU3cMG9Pwe/GVTHAfpAmpSX0OD6fstDNIJkGwQtEE7Pw7Bb7BDlJob1vyPYOHsy71R01JFUoR7x+Np+7eyUes3DJCmsxw5E1zCT5hV9L4pM2IRNRs+ZQC07kF3TcjrXarSm1YrJ4T/JqlLb2DPrOrqmix8RFr769H+sOB58YLkq0+6eqIahh9r1lAxAuflr45B9XWHrG2bub+rh4QtBbBp2ULOlzkTHYQ28cHUUlscT8z/PusuSV86pZXAnfklg6aVjOglc/1uj/zld2HY/FClyS+METm22PMZsAlvEr5pqqlj7LUiotPYOobrX2LvGCW5GVdaEk2lmet9d1FiwcxO19ONBoHTNYzjKylPz4ma2htGtUKlp6cpPrT9+1EldN3xTKNaNC88Q0Lu46WQQKwm1cdRnTRGeKbVZKota/uSZluVLg9R4znN3wdCqJESYOqDxanD1sYMj7hLacsj6VGjPhlwkokYN5FslAmic6JAoXVJp6nBcWThuq96+2vd+xDEvD69sS4vuztbYj8DbdjgP640ds5J2T18LMOvDI6mizOzvB6t5RVkfPeEMM2w+frgBQnrG02t9q/NOc/KlBeA35baN/yFxJ1No2q/FsNf6Uy3doZ/SpajEsIMbgD0+nkhgCZmZhu1VMADj/xiVBd4c4wPj97TvejfVVraEnFYn+sy8PCupZelh5jZIYxKO7ZrTP8b61gXDhHSuKtsX9nbJD33v9pHPqq/5r7U4cr36hF1Q2+m47UFzGXHERNnqSKCs948OSmJXe67kGRd+guXuknaIbty9ta20HXjSxenrG5l2uaesv8MoIdyGE7qaxoNnjivT/Pr/TtoYiCZYv/2kNrnx0U+gZpdISJxg2P5+1ffTnokB/3mJThADm7zpesE1K+0Ha7KtIuvLklKAG7CBih7zwo7wFBih8ZnO2+Z8I6f+icnodyulvSSrjUqQlan+r8b1zIjICcTdVbXPR7Kknj/d87bjNiy5/eH0kSndlw1iW+oCrExXQPeAs6zCVxJjy160NSrckozk/Zt9Voohq7In6mZXjKxO3ASbOvLq/2df5ErJA0fngTUv9dilVjB8f/LDr9vXXP78L3vaGkl+/lNgt+llbErVGAusOcYnR3bScGXXW8GRJVB/jZhEvzAU/u6cYZxnihRNDGVQ2dKFNl6DGSWKnn/xl2+jPVvHtfvnBE1scHUclMcaoYkXK0A0/MsyEUtjK4476TvuDSsSm6jZMnjIfdW3W6atJ+WCWettukB5Nzy6BGZvG6myFUYC7nInDnFf/qP/jk+dYHutWHoaeHdoHD68qfvc3WNUiLreZawKhJdHZwm4QlkSVl4Ob2++0CypltFwWem9buA9L9zbhq/evwdceWIuP3f7q6L4kvspUEkuMm+/gO4/ZZ+4qj88qXoSRxjuuaAXXNx6xmCglACnNs52SYNDuqRDA7qPdrs6ta+tHo0nCgqQQ1CsVhrtpVkq8vPOY+X7DNfW/zrGJu3HyLelj9657aY/FkdHyxLoa1+dIKVHdwrqFURGHRZVSIUZjEu2P3WRY3AhizHPj3qhS9JxaIlV/X7kM2Y+sqsaPnq5AVV5mDI3Ed9HMCVQSY47KVUb/Ie5qiI9VipCoMA46f90a7xqGpVqIcFrTUAjnSToEBGZXuLu/F925Ah+9bbmrc8oBlfwOwxvksTVH8F+6GF0jxkLb+jGkIAGUV2I2w5NSYmN1G6SUvr40CWCeIX6XlJYkWl+8oskLoyxW6V7fmrbRUZulXDBt6xtydNzVM4rLpcQpu2lYCWOS+CpTSUwg+kFr99FudPY7+zCJM9I0KGnCt7492e6mKwxxdlNfPRxRT0i5EtRERe+qGxRNPdZWWuOkJ2jXrpjpiJixqQ7fnrYRC3f7y656tOME2h0UvSbhUNnQlSp3U+0v9aKjBPEN+o1JvN9i3NU/xsV7ihem4uSlfveSA4G219DRj7WHws+cHwZUEmPOlpr2om3GCf3AcIy+rjIggd+xb6yEexI40NSDhg5nVjOihi66peFgU6/9QS6xs0w/urq64Pegn3Xc3p2a1pyr11GfMmHO9qO4fl54ySOINV97YG2qxuNx+sBvl5gpeFF+mtp3CNjLKCeeLK29g7h2bqVrF84jrX1Yd7jV8fHrqorDbzJZie8+tgnrq5y3o/HZP5TmDwAAIABJREFUu1Y5Ch+LI1QSY4JZnIqTiS9rOQVLUKs9S4Jw4wqZJK5sAWrL5+0L90fQE2/E7Zs1m5OoEho5TZ1N4sPdSw8W/J6WBGhx+86IB5I6SHlAi780esI4eY/nV/p3i3ajmzopFfbpu1Y6bs+JTLpx3l7M3FSHRXvceQh85q6VuGq6PyWtvW8Iaw61Wrr1mzGUL2f34Ap10rg4QyUxJtyxOFjzNvFOUPFim48UW4GDIMw6jltrO3Dn4mCVratnbA3cxe7z964KtL1S4zbxS9h0nRhG32BxltKL7swVlZ+38xg2570abp6/r6R9iwtuFY7GrgG098XTVTFJmQSPdp7AZ+9e6SrxkZ2IZIhGSIQwNKVHRfSnD6880OL7+m5kXNDlMpw0p7nNR5HM6LoXdwMAWnuHEp+Mxg1UEmPCggBWgUgwlNvC5coDhfF6VsL48ofX48EVVYEOAAsqG/G7ubsDaw+gi3UYzNhUZ7rv57O2o5ZlUlzx0duWl2zl+JmN7hZhjN/3h29a6qsMTpgWuxkba1Hd0uc4GdVv51Ti0TVHLI+58I8rgugaKQHlNh5bYbYA7Gc4dlcn0ft1SoG2uBVFnKreeuk2cVuceGRVFSZPme/4eCqJJSaIj7C40Kr/Nkn50txTaM1o7R3EwHAGAHC4WR0fFcY7FWahXkLCppxeX+Pf0tY3hF+/sDOazjjEqQfFrM2Fix2q03oUVnMST9JUkirqMTLMqweh140pif7bcoPxuQwn2JLodkGRSmKJGQzg5SqnyUqUpGWFUhXv+t18EPWO+uBLqPQMDKPrRHHc2jRD8gwSL8zkSkOHvYUp6skN8U9Yad+9sOdYF04MZaLuBokB9y47aH9QmdDZH228t0qMt/YE45p9jyEuen9jj+s2djV0AQg35AYodnH+z5nblMc9vaEGk6fMT5T7qdtbRyWxxLQynXZsMJvXllvKbdXcb0tNh+U5fqaL512/BOffsKRo+1oX2cVI6TFzGfykA9e8WxekM04xqaie9IgPJTHINYKegWFcMnUtfvFsLkGEWdPvuW7haJxQKfpF4kO5Lkq99Q2vUW7389f6vVMnhkuzWGM17fr1C7swecp8HM/HJZd6jragsjBRjnZ5TfFVxfPHFbeWeSqJMUFleSHRUGY6Ymoz/O07Hq/kMHHHz7zLLgaMRMdIpniVW/Ws42JJ3FSdS5C0rc56IWtgOFvkOqWyPpaZOCcAHl1THl4pzT0DoSsYAy6UvLDmCgccWA2txh+jbIoicY2RE0OZUctvPCRnOFBJjAlulETjC/lcgoNoo2TjkeJaOOWH9FaYN8YrtTe/4qxu2bVzK0PuSXnhpo6UGSeGMkqlhESHKnO2ajLox2UqSGnxw6cr8j+5nwl++9GNAfaEuGX+rtIk4Asrc3ip+adbluMrU9eEeo3zri/26jEjrGH/i/etDrQ9r5ZEVd1xr+gztu8MIWwnLOhumgKM6cvvW3Yoop4kl/VVrabZGmOwSBUocVb4vDB9rTOrVbk9x7CpqLW23Djhvb9fhH97YkvBNv2EzsmKclxJ6me0SpEaX/W3BBEv75dhh1ZPPVUtY8m3jJM1pVstFzEST0yM3oHgJGt0uY3hKnpc1N/16u31L3/e4O1EBXrr5g+e3GJxZLKhkphA0pTty4z+oRHcs/Sg59Xvlh7z2NCwg6JLiZTqxDUaH3v7GY7a2dXQiUsfWJuoZBLl9ByThDH29F8fGRuYr1BYevqHRrChKg1W/Wg40ORMMY9D7cR/t5hsmX3On7t7FWpa+xxfwywJBUkOr+5vtj8o4UShGLoqlxHwtTe5sA67sSS6UT6BnHwII6FfXGjvdZeIiEpiAklrjJmeh1dWYeryQ3h2i3ltNz1Pra/BQl0tylaLD6W9r7wKLde1nyjadtqkCQDMJ4bGrde/vAe7Grqw51iX536Uesyjihg/VFac/31+J654dCOOdxW/p3GinOSu6i8pVS02K9YcGltgcLPG4yYh3OI9TW66REgk6Nd2SyV5YrBO5Ag3SqJdcisjpXKZjgq35X+oJCaQID7kHz61Bc9sqPHfUEQMZ3I3oWfA2Qv/h5f34KczxlaQ3/XmU02P/dPy8nHflQAeX1fsnvm9j70tt9/hu6QdZpTNaw+1FqzUOZ0s/s9zOx0VdE2S5ZLYozJqa6nQ+wb5rEtGDGaDgyMZ/PfsHTjaab044EQ5j/6vISRYIrEklvyK3pg43rmSaDQI6I0A7X1D2FTt3otFoHgu1Nw94LqdJEAlMYEEocQs29eM617aE0BvouGkvJB4vqIe020yne09VpzlstzKXLhF+/v1lkS9W6r5+DR235q7B/CdxzbhF8/uGN1228L9jq7/wrYGy/0zNtViR30nPnPXSkftqUj5I44l1o8kKVOU5KO0JJb4/q860IK524/iojvUJVaM74rXMAu6nZMkov8aX9p+tHBfSApkGM2G0dcJLpRE4+d/9YytAHL9+tBNS/GtacEku/pCwMl54kIqlcQjLuIXSEzJf/k1bf24eb51jbaws4clmYxOgM/YbO66q5LzWv0kfXzIk+tqzNuAREffUEHK71f3q12/fjd3N77x4Do0+lidY+xudOxqcB7ToT2lGBi3LIl7/9wQ9N/itL31h1vR3JP7prVTMlmJQw7jJs2YZSK7yumZkXShX8B9ccexCHuixknCHQB4wmJO4JWJ472rLsc6c/JnzrYxxTuImHitHEa5kUolkeNG8mlzEYOiIi3qg90kSe/+19Q1ppCZWRXMFuXXHCrOoGhk3eE2fPCmpQXWwX9/siJRhWiJMy59YB2qdVknrdCs2pTLpUP1fZdCobpy+iZc9uD6ou29VjJA16/9jd3oUkzG9BM+QlQMjmQwb+exxGQKtepmWH9CRW3wpUX2NwZfr9iPJ5h2alPP2HxHlVDNC9ttarsmkVQqieVI/9BIwc8Pr6yKTXHkMOh2GItYYVIXhy5IOZzW97F7kxo6iuOKzFLNNxsyy577h8XhJAviI46UDtXKquKZaJ9iHLJrWhHv3plz5mknF217dI2zMjJhoMUgzts5Zh1xWr/sS/etwbemOU9j/8dF++l2TgAA9yw5iJ/P2o5VB+0XNONAFOJQnzgqKEZKPA8dGM6gqqXXdjEgjPt72UPFC2BJh0pimfBN3ct51+KD+OOi/QWDcFpZvKcx6i4EjhvlX0Li6x/4W+U+Y8IIy5ikvETVVvCue3E3PnXnytHdoxN83YTsR6NFse2ZvaUeT2+oweQp8wsWPPzAuWG0DAw7S0Qzakm0eP3aegfRkaCsw3G3VqhKB5W6y6/osgjeuqA4lrm5ZxAnhjJFSbP2u6y1uSIF5RKIPdp413UiGW6BVuNxvKVLIWEYK6xk1XuuW4TP3b0Kty7Yh4MKN/batn40dDhzlTVDCJGacBYqiWWCfuDsHcwJwcER+0nakj2NsU89DwCzt9Rh0e6xSYXTz/M1E8eH06EywDiJ1wte/c8X37MKOxtypS+0+/7MxtqCc7NZicGRTMHkc4WiiLcZEhLTVucSELX2BKMM0IJQWuoMMSpXTd9UdIwqG7FQJFEy8uGbl+GDNy312cPS8det1omZSkmSP4OZuljDXofeI0bcyCFS/iTFi6hcHMFKbUnUeHTNEXzhXnUymWV7i3Mh+A1hKlcmWO0UQjwDB4sWUsrvBdYj4ovhTBbPVeQmKE5Whn/8TC7TU83tl4TZLd/85oVKAMDqaz6Dvz/jFN/tlVPNMyvM3gHVMGl2Rw43O4ste3xtjaPjzNDG7rQ8m3Ljz6urPJ2XnMQ1zjtYr3C/LmfCenZSytH7/sCKw+FchCSa7XUdmDRxPN77N6dbHhdz8VKElbyJu6eCnqT09av3r8WG334u6m7EDjtL4mEAVfl/XQC+AWA8gIb8uV8H4DyNHQmd+nZ/ZvS4820X8ShA8gaGMHC6bupElpvWNBNiNNupV+rbTzjuhxPS4g4SF/w+NyeeD3FGmpniI8YYAxwVXiaLQog43UoSQy57aD2+/Kc1WHGgGb2DI/jV7B3o7Fd4oxRHRMQa7bXPKixxWp3oJBBKWQ1InBjK4M7F+/Hx25bjE7e/6up8lTX5eJc6k7pKbs2vPJ4aTyVLJVFKeYP2D8C7AFwipbxKSnmtlPI7AC4B8O5SdJS458Udyc/41tg1gF88u33092MmH7IZxu9Yy6T5WISJG0qNmYz2IryvnrHNdJ8fmRnGQJIWIR4XvK4Y7z2ey34XRqr0IHHz1019NXlWL7ui9l7Ze6wbk6fMxyGHHgmEeKGquRd/2ViLOduPmroZAsBShathFEx5YVfRtoJ1pnzkxqAiflir9ZcEwkpI9tDKw3hwRRWOdQ2EJrsAYMam4vI6m48EnwU2rriJSfwoAGOe2E0APhZcd0iQbKxO/ot88/y9eElRI8ix2DFoCuf+YTG6TgxjeUqSGVjdJ+OkXu/mGWVm3OSskRI9ficDh5qSrUSk2eJl5SI+b1dOfj/gUXEul9gsEi76Mau5ZxAfvXV5wX7tHX05Jgn9nt1Sb7lf6+84xSw9SXG2YX2/ThOjqeSymwXktSFkfE0SbpTE7QBuFUK8BgDy/70FwI4wOkYIUFwP5/RJlmG0Rahkwdxt8UkqESWft1htff8NS1y1JaAWvF5qJAUVw+CnlhJxj0nFE8eMGxfv55XJSGwrwzpYduw55vwb3n20y3Sf18n54+vS4/VBvGMsw9TY7c7rKA7oh6zuEzmvp6QvPoURk6hqUuWWa91Gwm9siXCjJP4bgE8A6BJCNCEXo/hJAExaEyPK7bUfb5g4Thife2X/7g2vcXS+Sk+IKttWFEgp0dztMCZJd1usLIl7XUwan/CQzCY9T6e88JtwaHzMc23fvfQAvvnQeuw+2oVN1W2YPGU+qluSbf0Miu11udQEX71/bdE+P3OxeC8bkDhhzLitJ5uVWFCZrHJYF925AusOJ9+KVarp1m/nVJbmQnlunr+vpNeLCsfDspSyRkr5cQD/AOBSAP8gpfy4lLImrM4Rd7y04yh+/ddiP/ckY7QGaS5t737LaY7OVyUvGfJr8kgYG6rbijf6sLJ9Zeoax8d6URz6B5OdwCS1OHzU+lI2esbH3PJbmS8D0943hBfy3gh6l/40L278cjYdikj80MrybHXpAXDrgn34/uObw+iSElXdUiBXRmjfcffeOHEijJhEVYuzK6zdd4k3XK/dSinrAGwG0CCEGCeEiPn6b3r4xbM7sLW2vNyhJhgsiW5dCpSWxARlBislxrtyuNld0eqgsol+7YFia4QXYq5zlB1Ovyoz98Uvvu8twXXGBc09A/j9S7sxbLN4pGUUnDBOjJYZqmnrC71/SYclbUhUVNTmFnHsvm0j01ZXY9XB0sX93bYwZ5VSJUSpbUt2xvpSOm6psvurvD0EnHs4DCQ867ZfHCt4Qoi/FULMFUK0ARgBMKz7RxLAzvpOtPcFU6i8VIwfXzjTdytvVHrCSMosiSrWK9xYjD76VhninOJUEDM+IPk4fYZmuvtZp03y1a4batv68IeXdiOTlfj9i3vw9IZavGqTzEpzwR43TuAT/3AGgMIYab7DznH8rgjgg3//+pB7Q8oVTUHJxnzIP9iUW5BduLvYJTbp4TGllIsX3rGiaJtZRnynvVqZoCRBYeDGCvgIgCEAnwPQC+BDAF4G8JMQ+kVC4OsPrsM3H1oXdTdcMdFgSQxC3gwnXOgGQYXC4my8t25uE612pKXXYeyr7mWpc7BK3pMvWxMk/z17B57aUIvKo12jli67yYzmNjVOiNG43LuWHMSjq6sD71+5M9sms6OerhNchybe0OolhlWGISjGq1KY5nHrPRU3QnE39dumyYQlyqzuccWNkvhxAP8updwBQEopdwL4DwD/E0rPSKDM3a65RyXLdcEoPHsHR9DVr540fPX+NTjv+sUF21SyYNjE/78ccSNL/YjH383dHXibJFmsO6yIfbXhojuLV35LgVZMeTiTHXUrs5sfaPHRmawsOPaWBTlXMb7rznGaMXU4k0V1C116iTe05CJNMc90apXYORNzBdeOuFtx9UxfwwU/I27qCWSQczMFgE4hxJkAugG8NfBekcD579k7bY+RUkLKeKWinzC+uC9rTTJ+7T5aPPEQCi0x6e4bcUWlkNcyZosYiIN00UTc/a8exsBwbhajrXibrVJrclFKWZR12W3MU6pQ3E6nE1/KauKGJ0zKpVwT84R+Kw+0oM/EYyLp30BYVly/zarOr+9IlhGlFLixJG4C8JX8z4sBzAYwB0BF0J0i0fDL2Tvw9msXBN7uusOto5ZMtzQrVgCdTDCyWWkae5imCZ2bpBG+PTgU27bUOEuklPDFUuKB70zfZLrv1gX78H6DV0CQaEreal1yCu0dPOe3ahmoLXhkpCzKunzN8zv5Dpugui1+MzY6LaRN0kWT03JPMaROkXQFSH6scxjdT/YdSRZulMTvAliV//mXAFYA2A3gyqA7RaLhpR3qYsdba9t9Caqrpm/Cf8/eiZ4BZ7ElN87bi6V7mwAAzT3FQt9JX741bQP+4XcLldYt+p3Hj7nbj0bdBVIi5uWLqpt5BAC57ILdA/li0gGt6TR3D2Awn6nOaAkE7Cce/UO5czNZWVTT8UUT2UnUGJVst9y1+EDRtn97onQlC0h5kM1KTJ4yH4+sqjLdXyruXnJQuT3p85U4xoMeanKXuT3NuKmT2CmlbM//fEJKeZOU8jdSSnXRK1IWLN/XhMsf3mBZqNYpxzrt4wKGRrJ4fN0R/OjpnIH69EkTi45xInQ0C5aqLEMMZVYsiDJVfXVr8G6ptDZEz/GuE0Xb3D7r37+sjnd1yz/duhw/n7kdgDpRhNOFMCnVNR27HS6CpQ3V4qDTmphm9ePmKBaV0p6FkLhjz7Gu0djYOxWLDgDwSAmTUi3b16TcTiUxeJ7eUKuc78Swq5HjpgTGRCHEDUKII0KIASFEdf73k8LsIPGPn8myVnfmcHNxrRkjU5cfsgz8dTIveHV/oaBUpT/PZtWxhk6vmaa6XU5iUTXKTUA6dXUl4fHAq4d9t2Hm4aAim5X44VNbsKm6MImOpgAuyXsotCo9FBxeQ0pl3PZft3pzqS93Zm0uzmRqkcyxgPuWHVJuH+QCEPHJJVPXjtbkNfv0Dzq0OO091o3Khq6AelZIHJUsN4Sh4yb8liQKN+6mdwC4GMD/B+B85EpffBbAH52cLIT4ixDiuBCiWwhxUAjxw/z2yUIIKYTo1f27TnfeyUKIx/PnNQohfmVo93NCiP1CiH4hxAohxNtc/E1lTyYr8Z7rFnk+36kyBgD3LD04mk1MhZN8OA0dxZYHI3YxiUn34Y8K1X3bfdT5wMcyGMSIl1Vwo4Lnho7+ISzb14xvTdtYsN34ah9TWDidLh5lJfDevzm9aHuasib7RSWiZ26qc3x+3xCVRBI+ThW0r0xdM6pwBt+HUJotGTvqO0Np1+9tUcVFtygWD9OOGyXxXwBcKqVcIqU8IKVcAuAyAP/q8PzbAEyWUp4O4FIANwshPqzb/3op5an5fzfptl8P4J0A3gbgMwB+LYT4EgAIId6EXPKc6wC8EbkkOrNd/E1lz188uIn+fNZ2TJ4y33JlrH9oZDS+xzn2WoSVkqlhpwTqd6sUU+qQalS3ZYaLiRvvKzFipiRq9cv0SClR1dJbpOC5QX81vZww9kIVE+f0/X1y/RF8ZPIbirYnPVV9KVHdqWvnVpa8H4QA5nIqDp900t1Nw+B41wnfz2bxnmL33g0+FijLFTdKotkM35H9QEq5R0qpqeky/+8dDk79PoCbpJQdUsp9AB4F8G/5fd8EsEdK+byUcgA5hfJ8IcR7nPQpDbT1FU/G7NASS6w+VJz5T+Mff78YX7h3tat27SxNTi2AdslJfzZr29g1VTGJjq6SPhbtboy6C6TMMJvgfOm+NcrtxsLpZhmKzdCLkL26lWKjbFEmrnEoGMzqQXIu5xyzWENCSsHaQ+qkWVLKAot2HFw9S5k8Jyn8bOb2UMKG/CbUKkfcKInPA5gnhPiiEOK9eWvei/ntjhBCPCSE6AewH8BxAPpc47VCiAYhxBN5CyGEEG8A8DcA9IFVOwGcm//5XP0+KWUfgCrdfuKDTFZaKna1be5qyuibymYl7l16EK29Y+Z9NzFBRqpbxmImF1RS2fHCyRPdiANC7DGzrjWaFLc2ihujZ8Gqg86Tk1wydcz9yzjPUnoYOG5ZDSdzzgnLBY0QJzyyWp3NdPGepgKLttUXXdPah2km7aiob+/Hot3u8zzSQ6F0xKhEeGxwMyv8NYBlAB4EsBXA/ciVwbjGaQNSyqsBnAbgQuTcRAcBtAL4CHLupB/O75+RP+XU/H/1fo9d+WO0/UafSP3+UYQQPxZCVAghUl/XUTVAm1nxhjP5AtMBrNoMjmSx8kAzAKCitgN/Wn4I1zzvPLGKhqqvn717leJIk8Q1lLlK/lERZ8WFNeIHv9/aRoP7z5EW6wRaZnJKv31gOKPMrimlxPoq87IcdsTB6kAIsWcko15oPjFcWNDeyrvpykc34tYF+9HVb5/VeGttBy68YwV+8pdttsca4eKTmjDEbYeDZ5k2JljtFEJ81rBpZf6fwNgiyycBvOr0glLKDIC1QojvAPiplHIqcrGEANAkhPgZgONCiNMAaDOC0wEM6H7WUk715n/Xo9+vv+40ANMA4OS/eWeqvzpVxq5pilTPAsBNr+zN/+xfW/jyn3IuZiv/99OjE6q+wbG4RtVDUW3z66OfpuymbhhWDJxu4F0lRtwqTsajjae7/fS7+ofxulMmFrSz6Ui7MjuplLkshV7hXI6Q5PL0hlq8/+zXFWzLWnhF9wzmFMotNe22bd++0D7XghmUKyRKLJVEAI+ZbNdeW01ZfLvHa6tiErW2x0kpO4QQx5HLpro0v/18AHvyP+9BLmYx1xkhXptvU9tPFKj8rlWFrfWHHe20zzrqlN7BEUzIT9LMXCnOOu1k0/PdCE032VnTzrDL+C8j9yxVFwMm6cXt/OZHTxU6ehgXdOza21BVaHn87uOb8MJPP47GrjH31l//dScmTRxfdK6EVMYqOoWWRELih8oaqFooFqJ4UWrEYrLRM5BTEo+Y1H0dGM7g5AnjIIQYPRYAatv68LYzXuuk6wDobkqixdLdVEp5jsm/t+f/nSOltFUQhRBnCSG+LYQ4VQgxXgjxRQBXAFguhPhnIcS7hRDjhBBnAJgKYKWUUnMjfRrA/wkh3pBPSPMjAE/m980F8D4hxOVCiEkAfg9gl5Ryv6e7kRK8ZPx8dX/zaFFkv+4P+jpj2+s6cKAxZ9lcqPPXt9LtslI6tmsqj6PMVaJSEjk+ET/M3+UuBseYaMsoaroUWVH1rD5YuNi1/3gPrp1TiU/ftXJ021mnTcLrTyku7yulwyxsJjALISHxQ/VZqsY1KYu9rLQC91JK3L3kAKoU7u63LCi2EjZ2DeA91y3Cn1flPLROmjA21X6+wl09VbqbqmGps9JQqkwVEsBPATQA6ABwF4BfSilfRs4KuQg5F9HdyMUpXqE79w/IJaOpBbAKwJ1SykUAIKVsAXA5gFvy7f4zgG+X4O9JNMr07w40p968e4U+2YwXsrrJWFYCX7wvlyX1ZzO3OzyfbpFhsLG62G1m8xGmhCbRYfzWp7562NX5GSlHJ3oaEhIXvK24hIWEOuupU2hJJCR+fHvahqJtytAWKfGIIuwGAB5bewT3v3oY33tss6NrNnTkkvr9cdF+1LT24TU6z4UHVhx25dbOxSc1vCulwc7dNBDyytynTPbNAjDL4txBAP+e/6favwwAS164wGsylxX7W/CFc9/s20deSn9RgUFcnzhD7yZDSJioJ27u2nhhW+EqfSZbLGvMvKql9OeezhV/QuLHlpqOom2bjxQviJp9vXuPdY9mWR6xClLUoY95rmrpxYTxhXLl8ofXO2oHoLupGbwtpYE571OI00LSxgnTtXMrcfE9qxwLSjP8zqXcrNirjqVscY7fOEVCnLJEUdw4iAWdTkPGOmnirp6V/mIS0z6Z67RxBSYkzph9vvqMp04T+J00fmxqnZXFcy431sGUixUSMVQSU4iykLRCderoKx70O/uHAxBaEgNDGcsjmrrNXVq5Yl86VKnCCQmD3sHi9ONhfOpmEzQJKEtjOCXtYukDNy61P4iQmGLu3zQmE/5/9u47TK6y7B/4957tve8mm81ms8luyibZ9E3vCamUhIRAKCHEEIo0aUpCkRZpgl2UpqIvivhTRFGxoLzWCK8FRTESQARF6R2S5/fHzJk9M3POzDlnTpuZ7+e6cmV35pwzz0555qn3/dwrb1kaOC1J6CRaC5xjhstNjX3rd/8MuggFgZ3EAtNYVWoYuMZoP9rnH3rC8eM88Kd/4Yd/Tp0ZAKKNqY/96PGM1zDLP5Qu4ljqYxlU0KxzLSv02RHyj9XBqzlX/zDh9888uA/ff/Q5y49j+p5WKqu8oBy8IspdZtVCcp3w1b1Pm17j10+8gEM++lO8895gR9JoNYSdqoLfwcZefpM5Df3ATmKBqa8ocTUthNkG7O1f2IuTksLZa5Qy7pQme9Fk+dKNDzyOf79qLXgO69fsvJFhxpfILUZLuYxWtv9Tl84CAPZ89zHs+OJvs378g4qBa4gK1Q///G/D25NrhG8+Yj6Ddem3HsVf/vVqQhTUAwdTl5va2jLDwScKEDuJhUaAR595OfNxaejrtx//xbhi1fzt36+m3KaUwsRhdQZHJ7o6TQLay7/9p4znAybhry2dSUR+MsrVmqkx9T+/fsr24/z9eeO8ZirLPYlsyxHlrudeecvw9uRB9V/vNx/g1uqPd3RLUg8ohRV9bQnH2RlP4uATBYmdxDz39rupM0F2w8ink2m9/JP/fSPlNqsj9maNOTuMl5uy0iXKBc++bNxw01x4zx88qc2IAAAgAElEQVRceywF46BeVnHEnyj/2KkR/hAbgD//7t/Hbztw8GDCHkW77GyvIXIbO4l5zizvTzb0+4Tefi+1E/pv3YicYdJaWNv749WGbVa5RPnnlbey26OiFFBZWpT5QBPcO0SUf4zaKl+xsYIh2wDhHHyiILGTWGDcmJ3T++SP96XcNvOqwcAShtWbxTrv7/95Pes9haxgifLPC6+/g56LvpNw29MvpK5asEMBCUmv07nqO4+l3Maqhij/GO2V/uyDqe0eMwcCThlGlA12EslTZpG9kqtdsyWgH7k/tTFm6/Et30hEueI3+1/Au0npWbIdUHr0ny9nVTVwQIoo/xjNJNpZNZDtTCJTPVCQ2Ekk2+w0xowOVVBoqSlzrTzpZJujiIjCx6gOeifL1tg9Dz+T1fkMMEFUGOxMDnIZOuUydhLJU0YpMpQChjdUJtz2lV+b5x7Kxo0PpOZjZJ1NlH/qK0qyvkY2dYM+NxoR5QejmUS7KSy++htv2jdEXmMnkWxLrh7TRQu96YcGnTQAn3/oiYTbrrjPWkoLIiKjNQrFkWC/zn74WPp0QESUe4z2JNqJOPreQYW9T77oZpGIfMNOImVt5Ae/k/kgHaNROD+XanEmkSi37TMIwJVF9oo4LkUnIr1/v5qahsdOGi3uVaZcxk4i2fbSG+9kdf57Bww6iVypRUQWXfu9v6Tc9vSL2UU3JSJK9tIbqal17PT7XsyyvUQUJHYSKa2e1uqU2954JzU3oh1/+/drKbf5OpPI2QKivPPcy6kj/nb95okXXCgJEeULo/bCC68bd/xmdTem3Papn1hPl0EUNuwkUlpdzVUptxkluX/TRsfxXYMohH5GAHvrXU5bEuUbN1Z1PW4wgEVEhctO02RIbbl3BSEKADuJlNYb77yXcpvRrN8HvvZ/lq/55V89lXKbn/sEH/zr8/49GBH54s13s1vhAABFERc2NhJ5aMnY1qCLUFD+89rblo/lGiXKN+wkkm1GHbqHn3zJ+vmsSonIZVd823qE5P+aLBdjJ5HCbmBk6pJG8s5V33ks6CIQBYadRErLKPyz0UziS29a35y9bFxbVmUiIkr2to08hf96xXj/IjuJFHZuRPElbxgNoG+bO9L/ghC5hJ1Ess1o74+dfX6cRySiMCpiC5yIHDKKsMxxJ8pl7CRSWg/97T8ptxkFrrGDeQqJKIw4k0hhZ7S6h8Lhkaesb7shygXsJJJtdhLJEhGFzVMvGOdUFM4kUh6rKCkKughElEPYSSTbsg81z04mEQXnHy++aXj7K2+mJs4mCpNsxjG+fcY89wpCRHmPnUSy7Rd/T12CSkSU6+77w7NBF4HIM6NaqoMuQsFxIzUPUVDYSSTbvvTL1DyHRERERDToToO80ES5gp1E8t1jz70adBGIQq2qlHuH0uHzQ4WK+2aJyC/sJJLvGAGMKD02BNOrKisOughEgWDNQER+YSeRiChk2BBM79+vvh10EYgCwfEjIvILO4lERGHDhiARGWDVQER+YSeRiIiIKAdwKToR+YWdRCKikGEzkIiMJPcRT5g9IpiCEFHeYyeRiIiIKAckDyCdtawXE4fVBVIWIspv7CQSEYVMJMK5RCIywOWmROQTdhKJiEKGzUAiMpJcN7DPSEReYSeRiIiIKAewU0hEfmEnkYgoZAZGNgVdBCLy2PDGCtvnRJJ6icJ1B0TkEXYSiYhC5sbNk4MuAhF5bOuckbbPYZeQiPzCTiIRUciUlxQFXQQKme+eOR+XrhuPzsbKoItCAUpZbspeIxF5pDjoAhAREVF644bWYtzQWvzz5bdw80//HnRxyAVO+ndcXkpEfuFMIhERUY5gF6GwdTVXJfxuNZDNl04a8KA0RJTP2EkkIiIi8sENm/rjPzuJVDqls97R484c2ejoPCIqXOwkEhEREflAqcGf7fYRW2rKUm6zeg2zDun1G/uN7yCigsdOIhEREZHPxOZU4uiW6pROod1rpJYhq9OJKI8xcA0RERGRD3QTibY6aLedOANTOxtSbrc8kxj7f+7oJvzv3/5r/YGJqGBxJpGIiIjIB0q33tTOJN7iMa2oqyhJmTm0OxN4x4kz7Z0Q8/4lox2dR0S5y7dOooh8SUSeFZFXROSvIrJdd99SEXlMRN4QkR+LyAjdfWUicmvsvOdE5Jyk65qeS0QUhMpS5jkkj3B5YE5TmQ+xxW5KDKedzLmjm209DhHlPj9nEq8G0KWUqgVwKIArRGSaiDQDuAfAbgCNAPYCuEt33qUAegCMALAYwPkishIALJxLROS7hsrSoItARCFUpO+VOdgQmLon0eJ5sQNTzrfYyZza2YCmKtZrRIXEt06iUupRpdTb2q+xf6MArAfwqFLqa0qptxDtFPaLyNjYsScAuFwp9aJS6s8APgdga+y+TOcSERERhUJnU2X853yfFL7s0L6gi0BEWfB1T6KIfEpE3gDwGIBnAXwHQB+A32nHKKVeB7APQJ+INAAYqr8/9rNW85ie61aZL1473q1LERERpRg3tDboIpBPZnQ1Yl1/u+Pzk2cOLc8kmhxfUmStGcgoqESFx9dOolLqVAA1AOYjukz0bQDVAF5OOvTl2HHVut+T70OGcxOIyA4R2Ssie7P5G4iIiNy0ZGxL0EUgH0zprAcAVJdFA8u70fGyuydRb/LwelSWcf80ERnzPbqpUuqAUuohAB0ATgHwGoDkYdRaAK/G7kPS/dp9yHBu8uPerJSarpSant1fQEREROQ/EcH0EQ263+2fr9E6rZbOs/cwOHtZr80ziChsgkyBUYzonsRHAfRrN4pIlXa7UupFRJel9uvO64+dg3TnelpyIqI0uDSL3HTXjlnxn7OZOaKwyC7G6cLewZlny3kSDQ708r3UVluWkO6DiHKPL51EEWkVkc0iUi0iRSJyCICjAfwQwDcATBCRDSJSDuBiAL9XSj0WO/0LAHaJSEMsIM37ANweuy/TuUREOenrp8y2lJvsisMn+FAaCtJAd1PQRSAXJPeZBIK+9uD2o3Iwi4jS8WsmUSG6tPQfAF4EcB2As5RS31JKPQ9gA4ArY/cNANisO/cSRIPRPAngQQDXKqXuBwAL5xIR5aRpIxrxgRVjUFqcvppmQ48oNxRFoh9WfWfxsMnOg9gAqXkP7Rxnp+qw+jhElD+K/XiQWGduYZr7HwBgmLYiljZjW+yfrXPdwMUSRBSkoXXlePK/bwRdDAoJttVz19C6cgCDnUR3Atc4F4l492Yq8vDaROSPIPck5gRWc0Rkl58Nee5RI8oNV6+fmPC7G5/cbOoaATBvdLPp/WVJqxjsDJozrQtR7mMnkQpeR0NF0EUgIqI8V1Ne4vo1RQTK4ZqnUxePRklRxLSjePKC7sHHsXHd2vJiTBhW56hMRBQe7CRmwOWm+a++0v0vbips2czuPbx7ua3jnTYQKTw4G1xY3P7MOg0iWlfhzXdfQ1WpJ9clIn+xk5gBv7qJyK5sloA1soFFlNeOn92F4ohg0ZjWrK5z+uLM0Y+tMOu06m8VAVNaEBUYXwLX5DJWiUQUZmy3EeWWCcPq8LerVjs+P/kj71UdwLqFqLBxJpGIyGV2JxLXThrq+LHYjissXN2SX9xYapxtHWDUGTxyWofrj0O5I1PqJSoMfBdkwC9kIvKaFhrfidndjS6WhIjCYl2/tRyKXiwDXT6+LWEZqoj4MrNYUVLk/YMQEQDg+o39ae9nJzED5qQiO+7eOTvoIlAI+JV4ev+eNRjdWoMvnTTgy+MRUf4x6vwplXq71c5oNp3JjdNTZzCJyBsbDFYM6BV8J7GlpizoIlAemd7FWR2yvwIh2xH6eT3muc6IKPfcvXO2aT3i9hCU1WiryfXUJhsdutLiCC5YOdZOsYgojZV9Qzx/jILvJH7ztLlp7+dEIhEFiXUQUeFJF+XY7cA1RueLGDxO0u/XHGm8VM1oP9ufLjsEpywalbEsrO+IrJnf6/3gcMF3Etvr0ydST142tnVOl4elIaK84GJLh8EiiArP8MZKy8d6kSs1m+WmNx83LeU2v5bgE+WiGzal3xsYlILvJGair9duOWE6Lj20L7jCEBFRQWNbO78YvZ5rJg5FSVHEctcv65lE09uVpeOSdbdUOy4LO5NE4cFOog2su8gLs7ubgi4CuWyEjVmATKxWO1WljApIVIi8WG0gknrhbDqjbD755wdnLwi6COQDN9LnZMJOIlHAvnjSzKCLQC67cfMU1661a814S8fd+/55rj0mEQXH7vJRJykwDp+sS69hFt009rO2PzKbZa1WB9kPHEx9jIW9LY4ftxD1tNUEXQSyqbgonN2xcJaKqICEtXIg5+oqSly71rLxbZaOy2aJF+UOP0aPKTOrn/Fl41o9K4PW8Tpy2nBb5/31ilW4YdPkjMdtnjEcdRUl8QGobKIpWl1G2j+8PuW2bPLIEoVde105Rof0+5utU6IQuHvnbOxcmDnyGxERBa+h0upAkHedem0CcefCblvnlRZHEIkMlstohrC2vBjdLdX43SUrMCwW4M8smmkms7qtp4ZaO2moo8eg9Ni+CK+ff3ApKhxsF/EiYFUydhKJQmB6VyOOmdkZdDEoh3xt5+ygi0AuiXByMKfVlhf7+nha51B732Qb7KW7OXEW44rDJ2D2qNS98kapLawYY2P5I2M/EFnjx6oSf2s2IiIyNTAy84j70TM70dFQgRld1kfnKdwY0TH36F8zv9PUHNR6iS69by47rA937X06/vuxs0a4cl0KDz9mnci5sH4DsJNoA/eCEJEXtK/vZeMy7z+8ev1EbwtDvouwk5i3vHhptfrCrUuXl3gbGdlO98SonZVtig9i+zUfcbkpEcWVOVzqQ0ThtrZ/KB66YDGKuO604Ew2CNSSUazXFMTgQnssiMz8nuaMxxp1Zu8/a777hSJH1vW3Zz6IfBHWcUK2OpOcsbQn4ff6ytKASkKFRj8q1N9Rl3DfRzZMRHsseAAR5Y+mqlKMaqlGR0MlKi0ELwhrY6LQTBhWl/mgmKKI4LDJxg3y6V2NpjlOzVJbHLS52nSYi98dPz5vER7ZvRy3bp2R8djBVbGDBR07pDbtOXx/++fjR7uXqonCaccCe0GtkrGTmCS5fpoyvB41Pm9KJzpyWkfC74dPGRZQSfLfKYuCjfp24tyu+M9eN5CWW0ynQdTTGs6Q7GFxzYZJlo/dd9Vq3JQmd2pNuXGkVLPFZNqAotXq4mSb0U/TKSsuQkNVKUospG6yuhjOyqwkZc9p4CHynpPlwFaWYW9MakvaxXdMkvVTBxvjwxsrMLyxEtNGNARYIipISb0F7ifwzsimqkAff9UE/0K+V5dxwCvUQrT3aqKNmbJ8YyUap9WQ9V7U3Hbj1gT17aHNhGYqZ5euDjY79NcfWmorSiqlum6jsxQmXrKeSoaSZVpuXlYcQU+Wnxl2EpOM0FVW/R3RvQLcNJ3fDh4MugSpkrcmcQmOd4w2f+tn97zm52trtnyNvPW546e7di1WBd5bOcFm0nivPlYm143v9UtTebTVlg3+4mElk7zqxYgbg5ytteUod5BLjsKNkZ2d8+OpYyeRCt5BFxrOt26djod3LwcAlJc4+1jpi1ESiSTMagvYwHfD/j1rLB33odXjPC7JIMYqyX9uLvMttrDMj7LjZuMrm2uZRS+0MpN47ooxg2VwXoSMtgyY5/d1kqmDnQZ33HfGvKCLQB7L9Elxo8XIb5s0vKyszlrWk/kg8kVrbXnW11gytg2NVdEgRz8+d1HW1wMSlzwxRL6/9Ptt3OjEvX/JaNP7Ohv9W+7KYYbw0X+0qywsBz5hTpd3hdHheyV4ZuOC6/qjS9RXpBl8KNOltQj664PfXv7ra09dLs6B5vzCmcSApTz/Lr4gDYyaGho3HTXZ1esNrXMnktwRU4ahu7kKPzt/MSKcbsppZq/e1M56tNSUcUl7ATp3RW/Kbf+zY1bG8+oqgtnD01xdlvkgcpVZvdDXXof9e9ZgdKv5fqNh9eXxtElB7WmPB9ixM5NodBu//rLG5zD/+PG5ZicxDS/b5cfOGuHdxcmWhqpwdNiT2wOtteX40bmLMLyxMpDyUHasRE31e2kVO6PhYfRadDUHG0SJnPFuS2I2VxYcPjm6ZSGoDoJRCgzD45L+zn4nuSMpJxm9M46eOdz3cuQki5/r5JRqdrCTmIZZxXbByrFZXhdMmkyU5y5YOZbLyslTayb6Fxm3voCiENodoU93dDaj/dkM6ojYT5XhNqvFP6g7UAS43UIORnLHvacHu3fR6D0yqYODBEbWJ6VCy/i5jj25x6TZN5wJO4lpaC9A8ps46LxqVAA4hpBzPnpUanhxqyPpfuFEYn4Z354+Mbmb7tg207fHykUzRzYa3p5d4BrnosHOsi9DNhpj22paMixVTu4Mm63u4di6+yZmMcvkhuSXdOnYVsfB//LdDUdNdjVSthUF/UpkSiwaloYd5ba5o5tM77twVXaz0mTNyQu68f2zFzg6N11DrbGqFFM76/GB5b04YkpqKHgtcm6QgYdmdDHPaxgNvq9y43tmWL07e63z1e0nuj/7ld1MogymygjoPXbktA589Kh+bJs3MuW+Kl06iym65aXp2l1Xr5/obgHJE3ZWOCS/3txek15Hg7/1cEF3En9+4RLD26/ZMAkAN/qSO8YPNR/tXzbOvdD4ZO7khaPQa5JUNpuG2NTOetxz6ly8f6nxstKWmugIemut8Uj64GqF9IXoy2LG6IsnDcR/ZnS78NvqU/TSdO6yEECHdAQoNUlNkk074pgB53uzhtVXxNMoDXQbz3J6LRIRHDGlI2V7zQPnLMCD5y+O/75xeuZciwAwdoh/M+f5yK/q384KB7az09u1JjEd17ihtaiMDbBY3eubzSBRQXcSq03CjZut4w/be3kPR9VygpMZaaMzDrJ978iQ2vJ4ehK3XBTPo5j+tT12YAQ+ccwUHDU9u43438pi30h5CRNQ55JLD+3Dnz58SKBlYDRl+/T1fLrUFHYsGduG+T3Nts9b0NuClpoyzBnVjP171mBEU7gCIo1urUmIlstVW94TZBsIyRt85dMz+mxoqzr8qKYLupNoRhtt0ZaIaaPvQVdkycsWWa/mPv1rmGmWJ4wVfC7w4nNiNbR7JCJYO6k960a31UBXx87qxOqJQ9BeZ5z70+130LD6Cpy8oNvlqxaGdB/3ytJiTLYQ4dGrmeFM7zY/A+bkIv2g1JqJ7Vld644TZ2LfVattnWM2q+mFCcOc72m7+bhpOHtZaioYAPjVh5bijDT5ZSk/MAd0lJ32ndGRRjER3FDQnUSz9+bc0dGRu00zoksg4pu/s3is0a3V8Z+dfq/ftjUxcEBQ+wzIPdpyRCu4UjAYbj3vpy32PuDVFYdPxKe2TMPPP7jU88f6+NFT8LWds/HB1eMyH5yjVk0YEthje/Vx15YgppOp3fZRl3PLho2TATmzp2zNpOw61JGI2I6G7me7u6QogpV9zj4nK/qG4EyTCNBtteUYyn2wee8cg3yxhcisnWH0UR6cuBq87cBB69e0o6A7iWaGN1Zi/541mDYicR1/kAMeNeXFqYF22Ef0nZNGo9nLtH7qMNSWG4eVN5q1ZifRGS8+JlM7o8FgTrSxf+y8Q8ZikkkkOd+ioLr4HlrX3472PG/E9ZjsY3WD1dlot6OK9qRJwK4RkbSzlJmCvgHh2Fvpl5StKTY/xgddrNx7WqtxznI2vCk4dt7/m7LcilGIjGqLBo9SFBV0J9HqTJwbmz+9EK7SFIbrN3kzpQ8kfvCNIqL6Ge4+nzjteH1kw0R88piphvc1V5dh/541mDPa3n6hTG1BfqbDJ+jgUrXlqXvnp3ZGl6J6NXCULthWIbD7XZ/6MmR3/shm53sIf3DOQozL4vVzknibWyFyg9X64mNHT/HlcWiQ2VNm1HzR9iTq80lGRPCjDyzE2CHuDmwWdCdRkykAzMHYNK7dtuaXtw8kjKZuySKhpZGg90gWmqrSIlSWGgc7SsvmyzSyuQpD61JnaG48ajK+tnO2/ccnR46a0Wm6VMzpR8+sMWW0fMQLbMzZIwCWjWv15NoZG1Jp9sJ/afsAfqaLDummJWNbCz7YkZPPYVb5EHXvhY3TOkz36fnhazvnBPbYyTJ9Rm47cQYuXjvel6X8heTQ/uz20R7MIsqeUqogO5l29pd//Ogp+MQxUzCqpRoLe1vit3e3VOP+swZTfcWvmEXdVNCdRK1SP3xK5j0agP3nec7oZhwT6xgKEN+7w74dAfZHq6vKijGjK5hQ5vksyO8jbcR/MJ+Zt5aO9WZW7KbNmfeoHT2Ty4rcUllaHHg+sWNcHvTMZdkuN7143fj4z9du7Eexj4FnkllZSuyFm4+bltI5eS82Ql9i8nwsHtOKbfNGFmSnwi4/2518OdKbOTK1HWfnOauvLMXaSdY78vqYKHYVdifR4nF2Rt9/8cHE3IuDe42yawAancu+pr+cztxa7QxWxEbvRzQxmayRGoNldwBwzZGT0p6X7Zej/vrDG6MzvE6Xnic3ZnatHZdwu1erA+7YNhN71k/EhmnW8pGlM21Eg6Pzupudf1GRsSAbY1cdkb8pmATAeoPB43vTpKLJ5rOb7cxNPljRNyRlmeM770U7ifqO64PnLUo5t8oknRkl8qszfYD5utK640R395lnMrWzId6+tKugO4lW2WnAJS8T1Dak6xuVTr5KjB6bM5K5QQT47HHTsC5DQ6C9vgKfP3561vsB8tWK8cZBg7L9GGT64tRvrHf7S7asOFpxWw1i4tTC3hZsnunOzM+XThrAry/yPnpqGIh41xHza/aYnJnfm7rfeGLSfr0dTP8CwLvOx8Zpw7GgtyUhzY5Rzsft80d6UwDK6PPHT0+5bblLeUKPsLjKL9dUlKZ22Mw+Q2bBDdOpMRg0cbrPuaA7iVZH/uJf5g6+zRNmEtkayGlmL983T5ub8dxD+obg40mdP6MZ6mXj2xxVCpqTF+Zno+WE2SMsrdm//PAJKbdl+tw5Cndvcs0LVo7FNRvMZzbN/gQ30uzY1ddei08eMxUzbS5hrigtQmuNcR7GdFYGmE4ilDLmRc3OrG4uTXfK6nf1qYsS98LN7o4GHDOahcxnc0YNBlq70cX0KHWVJfjCtplorU1f35QVF6HYj8ziOaw44n5z//6z5mNZUofw8MntmDCsDkMyvGZWeJUHNkweOGdh7CfjvzXTdjijuurhi5dj4rA6S9tAMuEcPSx8IaR5n1aVFuH1dw6kOTW7N/molirse/71rK5B3uq3kPTaL0V5OhKxbHwbvvHwM4b36Qd7jP76yhL3qrlM31mnLHIWQGFwg7l/r9+9p89DJCJYM2koui68L+X+lpoyPP/q2wm3mUV7zaSvvbbgl4Rl+q4w4/QdEbZo3LnE7tJR7fiv7Jjl+DG/edpcvPb2e47Pz9bdO2fj7w7bGifM6cKl9/4JgPUYD+SNK49IHSgFgJ0LR+E7f3jW+oUsNF3HDkmNomsn9zMN7hc0aluUFkUy5kg1Oq+kKIJ732++NF5z3cbM0foLeybR4nHa1HBJkdGST2svoIg4+tI+OmmJmH4Dap72B8LLaURL0ySp7r+A+TzuZhoiOsN5t2xNXQ6TcF0fnzSzhwpiJjGS4cvnp+elRs90mhicdRXw0AVLMh9kwO5z96ktU7FrzThLA5S71owzvc/Kx4Kzle7pH16PuTZT6rhpelcjNs1wFlyKkdbDr6K0KOEzfdysEfGf6w1y7IUhEraImMYioChtS1umzqTRR7S+IvOqtYLuJGq0xrrZc3zdxn6cs7w3nkA78Vyrj6H7OfZqWQnbXBbbsD0v9uVxgi6lhrafibylreXO9CG0Il/X2PvBLOG0vvLraEhNHdLRkD4QkJOvQvfbRP6kwLDDaN+EU7m6asjNgZyGqtKE3y1mwLB9zOqJQ7F9frfhfTO6nAUdMpM8iJlP3HjPHsVE4b4x+37IJ06Dhmn0HS59ELPvnDE/5VinT6cbL8OYWK6/uaOb8zZGw/qpwwyjnNo1ObaSbWhd9st7jfjSSRSRMhG5RUSeFJFXReT/RGRV7L4uEVEi8pru3+6kc28VkVdE5DkROSfp2ktF5DEReUNEfiwiI5If37xcKeU0PK6lpgxnLO1JuL9O64Fn2u8U+8BEIqmPZ2XvWXlJEX587qKUJO4MP+6OxqSGW7K22rJ4bsKICy34j7q4X6OQKAWcsbTH8D7tc3TSvJHO8lja+FbLdo+E2fmDM4nudUru2jELnznW2vLQ3rbEyKOruH8wzuu2p1cDA0bFttKpC9E4Reile2/s37MGH8kQeZncUwgBNVf2ZVcvrzAJKGP01Dl9OjPVZ5m+WxSAvvY6PLJ7OY50IRp3WN2waTK+evJg3utRLc6if5+1rBc/OHsBetpq3CpaAr9mEosBPA1gIYA6ALsAfFVEunTH1CulqmP/LtfdfimAHgAjACwGcL6IrAQAEWkGcA+A3QAaAewFcJfdwmnT6nYmih7ZvRwLe1vw6S3TLF3bKLqp1Q/hyOaqgk9u7BWjcNp6I5urUFdRgpqyYly8dnzaY3PB4Cbp3GNWiS4d14rrNvbjvEPGOLqus5lEd5vSWs4jN0YWNQPdTVg5wdry0C0D0bG19VOG4Y+XHZISZInc58bKhOQlYfr9QAMuvpfsujXDEu9cUAATU67aPm+kK+9pp4y2A+Ubo68dLdeutfMF/UkRes2cvni05esmP4ZVPzs/dUuDJnnlRb6LRATLxrXaPq8oIpY6iEYvi5UqzpdOolLqdaXUpUqp/Uqpg0qpbwN4AkD6HlbUCQAuV0q9qJT6M4DPAdgau289gEeVUl9TSr2FaIeyX0TGWimX9mbWvgzsLN+MRAR3bJuJeT3p9xDoo5ua3UfBSVehbRnoxGeOnYaSogj+cNkh8Q356fbxhF02SVXDqKasGCKCI6d1+DKQ4hN5q48AACAASURBVNVHdl5PM/bvWRP461NdXozqsuJAk3kXirWx/Z0Le1sM7zcaYMzk/jMHl42dtaw34/HpUivlf5PbXJiWfeeKXWvH429Xrgrs8f/faXNxxhJnHZtc1p2U2sCt1ShnL+/F/j1rXLmWnr7da7REMqX0bCe7wml/I5CWgIi0AegF8Kju5idF5B8icltshhAi0gBgKIDf6Y77HYC+2M99+vuUUq8D2Ke7X/+YO0Rkr4jsTb6vvKQIZy/rxddPmePo79FPGQPApI46fGh1tJ86mCdRX5bo/5uTNomfv3KMq+GjKTtXHjER9ZW5NZpVaAMPRyV9hqwsB71odWIn38lz5vRrONvXZ55HgS2yXUZbCPuB3DR2SA1Gt9bgoQsW44rDjZPSX7ByLNpqyzCq1Xp+q6bqwZlE57M6scFTh2eHVXeLszxhVoShU/nDDyx03IZxi18BbPasn4gvbEtMSN7XXoczLQyMkH2HT06f41kvc7KAwZpFe7/cduIM3f2Fy8uv0ZzpJIpICYA7AdyhlHoMwH8AzEB0Oek0ADWx+wFAG1Z/WXeJl2PHaPfr70u+P04pdbNSarpSynAdzJnLeuKbZe3Slog1xCJEfev0edixIBqUpq+9DivGt+EjR05K+SJpqCqNnwMApy4anRI+Ol2lG4LvpZznx3NoFiXMre/Tn5y7yPCx9qxPbHyeNC//Ew5bqQe3JT0PdjpI2VbiNxyVOeS0mb9esQp3JDWM3KLt53G67/bgwcGfL1hpaSFHTih2eQnb8bMTt8x3NFSitNj4a3h+Twt+9aFlaffZZvt+9LL+C0PHSa/NRm5Pq7MxYWrQjmqpzjqwSa7YPLMTC0xm4POZG50Iu5f4yxUrcf2mwcmL5DoshWiPk/mRtHGsuaOadbclBwuxVMyc8f2zF5jeF6b6RONrJ1FEIgC+COAdAKcDgFLqNaXUXqXUe0qpf8VuXyEiNQBei52qX3RdC+DV2M+vJd2XfL9v7toxC/eflfrilxZHcPPx0xPyyei/gML4pigkXjVk9MmFk22Y6u5m7C79chPdGyr5b9udB3sqg9JWW5YwoOP0fdPXbm0/iJHS4sw5k6xYMzF1n6I2E+i0k7hwzGCDrbEqc0CuXLFtrrsDK8kz336z8vKmO+Z7Zy3A9wy+51KuEcKWnZ3E0iLm383fOn1ufGBOy0s71uEAM7krfO867xmlr3BTWXFRwvfOhw8zzsNolUpoo0js/3QnZPVwoTOsPjUCuybdgHXyykO7DIMTWRh18K2TKNF3wy0A2gBsUEq9a3KoVuqIUupFAM8C0A+/92Nwmeqj+vtEpArAKCQuY/XFQHcT2mrTj1QavR4HY0P4i8dkNypWCJu2c8kn0iQdH/Awt1ie1aeeSP6kWHnOfnHhUvx21/JQ5I7K1ieOmYJ9V61OuG3x2OiG+cOnJC4rshoMqLm6DLecMB29bdU5t0TbTETcTQMSRnbHBMYMqXG84iZorUnfz/2x0PFmmqqN38eTOurjA3N1lSX44kkz8dnjcj9QTz4ZbyOYS667yIUYCVY6C2ct67G07FQbIDK7ZKZHciOKfK46NPn5jT0V+65ajavXG29LsMrplhI/ZxI/DWAcgHVKqTe1G0VkQETGiEhERJoAfAzAT5RS2jLSLwDYJSINsYA07wNwe+y+bwCYICIbRKQcwMUAfh9bxhpeus/AjZsno7+jDp8/YUamQ9PiLJEz2Y54/+aiZY7P9WL9ub4iCONovteSN/G7JRIRRCKCoXXRUcDiSO4GdhGRlBnJUS3V2L9nDSZ1JDac+zvSN6T1lo5rw/fPXohig9lOblmMUlku6025nitXSTTSo89Q2Bi9TzWlRREssriccX5Py2BKLApUJCK4a8cs3Ll9wPQYp1E7w6DNINCL3bRPg6tGBm+zUj+ftawXN262HvXa6JKXH9aX8bHyvY+Yru4/YorxKrOiiGS933dGl7PJCb/yJI4AcDKAyQCe0+VD3AKgG8D9iC4R/SOAtwEcrTv9EkSD0TwJ4EEA1yql7gcApdTzADYAuBLAiwAGAGz2429yy5Kxbfjm6fNMl5Elvy+0TkCRCOZmiKxK3lBplnTGb0/42b9az6/AAWHVWluORRlm5bN5ij5/wnTctHlyQqoBsua9AwczHxRCbs4eV5VFG3R2wtZ7yeijcO6K1NnjNZOMU6ks6rUfsj0XbJnVCRFBT55Fgy4EA91NaKgqNVwFccXhE3Cuw1RJYbDO5HNoR6aBqh/rYhw4oV3WqDM4vr0OUzpTBx71JQkwi4ovgmqiOZ1I8isFxpNKKVFKletyIVYrpe5USn1FKTVSKVWllBqqlDpeKfWc7ty3lVLblFK1Sqk2pdQNSdd+QCk1VilVoZRapJTa78fflI16G6OOyW8obXlqRKJJxPkllh0nH9jyEnsfm+RGppb8PduORmdjZcqI6Wn6UdI8qmztNNPtzlrZOb65ugyHTR6W+cACdsAkq3U2y1DzZaZmZHMVvvy+AVx5RHb7ejTZblMweqWMAuncsMk44FJdZUne7cerKSuOp8PiBHjuOs1gxvDYWRmCroSAWfChjdM6ICJp97RZoVXPRm2fYfUVjlYS7Fk/EX3t0YGvdBE3RIBTFo5Ke618D1xji4sVkFG9Hpo8iRRVXlKEyw+fgLt3Wg9TnTwLNSm2j2KuC6HwV00YkvU1CtHmmZ3xn53UX4f0teHaIyfh7OU9WZVjVEtVyvsgXxrTfhrWkN2Xbj474GCd6OKxrTh2VmfK7aXFEWx3GGG3KoC9gW7Nyn/lfbMSfp8zqtm1nJ5TOhts5TIrxCXo6WgRoLuaKgdvTFiGN/j+T7c8lcgtZkHvpnS6E7l2WH10yWpN2WBbQUtcv3Nht6VrXLpuPKbqZgQ3z+yMtye1anOhwSoDQXRJcMrturo2pd7N4ZEaoxWCubbn0t5iZsracTZHsuqSIldN7WzAHy87BNVl2b90c0Y14bt/fC7zgZSgpCiChsoSvPjGu6YNyXT1gIhg4/TsoxzqE553NlbiqRfeyPqahSD5Nettq0F7XTn++fJb2DqnC4fZyAmV7+aOasLWOV3YsaAbjVXWZgJLiiK44vCJ+NIvn/K4dLmhtdb7pcm71ozD628fsHRsQp4yi9fP187ljFj6qoRGqsmx2S7DI8pGpqjDh9z4UwCZVzpdv3Eyfr7vP+jUDYxUlxXbGmzaOncktmaI/Hz1+on4+sP/SLjNysDbIX1tlssRdqNbqvGXfyUmW8ixPiI7iWF00ryRGDOkBuUlRYab593oIJK3/AjUoV+y9s3T5uLfr76dVIYcHoLzWWdTJf758ltYMb7NtRHbfFBcFMGlh/a5dr21/e34/ENPuHY9ito+39oMwJC6cjzzUjxuHIY3VqY5Ojthr34Eg2U0a7fp/wQvnysK1pi2mpTGfC4ZM6QGj12+Erf9734c2p9+kLOusgSrDNIgZWvwsxT9NJnlfzWi//zlw/fv2ct68dEH/mrYIcyxPiKXm4bR7rXjsWn6cBza355x5CXk38Oh53RUJ9Pz7sfr0qpLDt1QVZqzoendZve5F9NfyC3aazJ5eD0uXDXW9vlBBmQa3pBd5yBMy4tmdQ8uZYtINCKtGSsBunJfai8xIe0Jv2ALwsbp7uYuzobTj1p5SRFOWTQqYYWRn7SgXDXl5pMYblQjWwZStzKEyTdOnYPl483rVVvfBx7Xu1YG8thJpILmdBnVsQPRZcOVJnulDoZgGD0ERXBNkY2K1ckMavIoKGVvl0n+rp0GgQs2TE3fSAsy48gCi6kQzITlHaUFOdM+H9rrYCVQRZg6um4ymknU7yFtZhRj8tjpi0dbXsofZsfNHoFda8bhxAzLUPPdlM6GhCX9v7tkBf5w6Yr477lWlbKTmONy7P2WNz6wohf7rlptGoAinzpoYaAtXZlqED7bDd0tseTYDPzjGv0SyEz1lFmwBs2WAf+jEuoHG4Y3Og9uFLYOVrxjFCvWHSfOzHiOk78g7MvdRQYnCvUz1d0tgxHDP71lqs+lIj+EqZ4/95Ax+MTRmfMPhqsWSVVSFMH2+d1pl5mapgxL88fpO1ybpnfkxOT+YB0rqKsoQU354Pst19KUsZOY47Tolk4qvdoQVZRBMfu8plsuED0vNSG5np31+F7JhcrUqumxsOBfPMk8SbJTbbXluGRdH24/cQbGt4cjf12+yfa9ePICa3vu3OTWl3mQs6DfOn0uzlneC2AwSEu8YxRrdnY2VeLB8xalnKuPbJtj7Zq4IbWpycf1Mu1JbKrmTGI++tbpc4Mugm3aZzBXP4sA0BFbum+2cmSHQT2vb9tedmh2qYO2zunK6vxMlo1LjOjq5KWa0eXnnszM38yMgJLjPrh6LLYMdOL5197OfHCSKcMbcMe2mTioFE687TcelC533HfGPLz61nvYfPMvAUSXgGSjrqIEG6Z2pET3csNFq8ehp815fswtA52481e5E3ly/NDaeNjsKgtBm46c1oGfPf4fS9fWR3RbNMbfxODdDvJRFaogRl/1j5jNpFimjoqXJnXUY1JHPVZPHILOxuj7LXkmEQBGNKW+Fy9eOx73/f7Z2LH2n/9cGKTSZilyueFN9iWncg160ltfHu3H8UNr8adnX0k5NuiyOvXE1avj9cj1m/px3cZJ8ftExDS66rQRjbj9xBmYO7oZJVnst9y/Zw2eeelN3P7z/Sn3nbm0Bzf98HFL1ykpErx7IPVF0P99RnWsVV/aPoCX3ngXA1f90P7JNlmp14Of7qCslBUXoaetJmXZ4/GzrS3PWtjbgsU+N47DRPuI9LXXYVZ3E/buWoYLVo7FpI66rK+tjQg1ubzf4H0LutN2aOb3RGeXzb5M1kxyP7JZmFhJdr9kbCtuPm6aD6VJ9fMLl+DWrdPxzRwczfbLeYeMCboIrtg6pyuwQBJ6o1tr4qsb4h2jDOe0Zdm5zYXGLPciF6Z0KS9Ptpgr0E3KoyGV31y0zJPrWvXI7uXxn5M7JHYGnhaNac2qg6hpM9hjPKWzHmfHVltY8YEVid9Nd2ybib9ftTrh72mriz7Oal0U2ZuPm4YlYzO3tcuKiwaD/3hch1rJ1xv8txe5YmpS2ODKUk4SO9FcXYZTFo1yZeZi0/ThuPbISdjm80buzx0/HT87f7Gvj5lrbt06Ayv6hgTy2O31FVgyti1hn0K+2r12vKPzTtPN5A+tC2YmLmx7CV3n5d8n3jV8nTh/pfGgg/YUFBfl+WtNCUY0VSXUTfr3aksAS4z1M4na8kqzrQ92guJpA9SdAaVvaQhZQJ7iokjKIMAd24z3ZH/zNGuDuAt7W+IrnTStNeX4/aUrcOqiwSBtK/qG4NatMyxd049Bq/NXjsH82Ha1dNiTyCPze5rjy+zOXNqDzzy4L+AShccRU4bhG488k3K7l8vYIhHBxunDPbu+mfKSIgxvrEz44ls/NfPsGpHbBmL74KyqKS/GtUf2J9z24HkBDXjkab8h3T68Y9wKLx+e/iEA4PDY6oIvbJuJ42/9dfz23tYa7FjQjeNmjcB7B1Uo9pKTe6Z01uORp14yvO+keSOxasIQ7Hv+NfzZYFmnn/Qdv46GSnz9lNnoa6/D3b8d3K6idRzszNBHIoJbTpiOiS6sjMqGlRk0v3QlLa2v1Q3W6peS9g9PDJJ3z6lz8JsnXrBctdW6MQjs4XfQqYusbalijZinKkxSM1h12uLUMPVhdPYya8sESgp8pDiXl+/l+4ROPtMaP8Xp1nfpTB5ej5UTEmd4c7nhPqolfPtOByN6Jt6+f88aXHXERPceJ2QdRSA1nUkkIvjQ6nEY3liJkc1VGFbvPIothc/JC9K3Y9rrKzC/pyXhvaqtIFg9MbEeuuzQPtfLF5f0WZk2otF0KaDd9FpLx7Ul5FT222OXr8Tnjp/u2vWybQ5snmE8cP/9sxfgVx8yX547tbMBJy8cFcp6zUu5++1LnogI8MljpqIoyJB8HvjQauOcbfnc/yiEysysAzmr294MFnljaF200Z08k33JuvFZpZXwg34WtNnhErRjZ/mfuiMTLTWF10uagq5+Lna41DnZA+csxL2nz3PlWhROm2cMzqBrA8rJbSAvByutdPy0aOpBf67sKi8pShsJ3ksVBh1tETHck9rbVmMpX+XI5sGlu17tXw3TwHh+9QTIsYtinajRrdV5GdikvrIUzdWpFUCYPoxuC9uXybr+dleuU2ZhZunL22dlPIa811JThsevXJUSevzEuSNx98458d+tjEnduX0Ad+2Y5Uu00K+fMhuf1QU2+tzx03HNhklpzjAWxpxYI2NRdXuziJCsZ9a+DXKQateacdg2z5294KNbqwNfrkfZqS4rxg2b+k3vr6sswS8/uBTXbezHnNg+rZVJe9anDPcuNYHZZ+XnFy6J/3zo5PbYsYMHX36Yh7ObOeTYWYOd/PVTBgckT5o3Etdt7MeX35eYOkt7Br9zxnzbj7VywmD7+CSX6hi/tdvY589OIgEA5oyOJrPOtxlEve+cmVoh5PVsW8B/3EyL+9EqbS6N/rWFiG3JG8kpOCVFEcPOUltteXy2rrw483tg7uhmDHQ3OSyD4I+XHWL5+GkjGhOCf7XUlGGTyTIlfaMkFxwxZRi+edpcrJro7WDgxunGudDCIowdeHJb9DtwzqgmrDfJzacZUleOI6d1YFRLNfZdtTplsHxiR517e3aTNFQl7ovTaEFsSosj8eie+q/142Z3eVKeXGO20uOsZT04cloH5oxKDNCiPYdOB8qcriyxSptZ9qoZ890zF1g+Nn97BJSRUUSv5NGzsLMTQS95Xf6iMalRqfJJkKH35/c044wlPQm3TRqWOhp/2aF9+OCqsbauXV4y+HexoZfbbj5+Ov5nxyyUxwYKvHo9BYJqCzk27br5uGm44ajJCbctDVGQBiMikhKUwQvJEbeDkG4/ax5X/RTz9nsHAdiP0ZC8PLLf45nkaSMacd4hY9DVVGk4uKovznVpZkQLgVGLb5NJgMBMbSCj75tbTpgeb5MEtcxci3Zb7NGkTV2l9aA6BdlJtBpEoZC01pbjkd3L8f4l0YhH6/JwyWmyGV35vW9tQ4aRU80mD0b8ZyY9t3+5YqVrSzPKiotwyqLcCKxE6dVVlGBWd1N8CZVnVbNL101uwBmlUbnFYpjzvGbx+d46pwtfOmkg84ExOxZY2wO0bFwbAOCzx03DtrkjDdOo5H2KE8IhfUOwcVoHdq1xtj81+b2Z7eKcdN9bpy0ejZ+ctzghNZL2cPr3aiHntQYSl9sCwK8+tBTt9RU4Y+ngoPQ5y3vxPzsybzkx+r5ZOq4NJy+Mvk7my8y9XaVVVVqEHQu6cdfJwW+bKchOYk9bTV5uRE9umNvVUFUan1nraatBVZYRUt20MBaV7oKVibNO+d7Ry0ZpcSS+fyvdl9s1SSkH3Hjdd+q+DOeMakJZcZH5rG2GxprR3doyHMoPVbGlnVqgG7ele4fZCXK0M4BE20FqNUg+bcXYITWWjtuxoBvzejLn6gKiS8fOWtaT8bhh9RXoiu27HDe0FhevG284Y8BOYv4rLynCtRv70eLwfVxdnrj6YJLLM4q3bk0f9VOb0DjKZKl7IUpuy7TF2jj6XJBnLO3BrDRbE7SPvtOVK1p6i1KPVmuJRKMu97W7+377wdkL8JX32et4FmQnsTgiebkR/dTFo3HXjlm4/yz7m3HD7qI14zB9RAPW9SfOcM4d3Zwx+Izd5YyFbrHN5XKfOXZaym0lJpXncUnRHkVg2oPVZg38SCxLwZo5shEfPao/ISLl/WfNTxtsIp2+pETUWh2RbZQ9fcMjUwNPvyw6V33nzPm47wx7A6rzRjdn1dkfP9Q4ifhZy3oT9olqTpjtLIIs+4hkl1n6BKeWjG1Le395SRH+cOkK7HY4E5qPzAa8tY+zlTm+e0+fhzOXZh5wMnPHtpm4/LA+1FdmjoYaJj1tNZg9yt6+/tz/FqO4oohgoLsJY4dEv2QzzQjZ2bPmViQ8p3rbanD3KXMMGwnJe43uOXVOQr6rIbqlRv938XLvCpknrt/Uj4cusJ7APDmvXTqXHz4h4XcRMa3UtdnjdI05tvPyg4jgiCkdCXuHxg6pzRhswsz0EYn74bSBhhuT9g/aVVlajEVjWlJuT+5IXbex31ZwgLBqri7LOJp92BTrUYu17QyZ3LCpH+NMOot6zdWluOywCRmPM2IlSjKRXhB74GvKS/I6doJdh002rm8OxLcsZH6uJgyrw9nLreXYNjK8sbJgggaxlsxj92UI72snlPztJ87E1evdS7TsqqRextTOBlxvMgOhD2SwJOQBJrzSkGH0q6y4CB0NlWmPSUeLAKcFFUpXZxeJ4ODB1G7iSfNGoipNoJG8jkpLWUt+e2jvQa/eNlpaCc2R0zpSbstXpyx0f3/w+qkdWD4uc/18zylzHT/GVWH9PqOcc9URfC/5RUtRkkzrHJblwQqOMOGzmce6khop+un1VROG2Fp61V5fgaNnZhf++eNHT7F87Ke3TDW8XV/iyw6N5giy0/DTL120MlKd67RQyvrX2s2/+55T56TcdkVsZF9bcqbNCBpZ1z/U8PU7d8WY+M9p36UcYCUDyYMI8aVIHF1wnYjgy9sHLOVBtbPH0WDsKEVnU+pgltWAgMnRronMZHorDmswXl59RdLKmfkmHRzK3qH97Thp3siUuBXZ+ul5i/HrDy119Zq5hJ3EAuLHRv2fX7gEf7tyVfz3c1dEp/Tn9zRbTqZ+0+bJlpZ1xGcHdA0/bc+b6br1AutUaJ3E5L/7W6fbG4FfPXFIQnJxjRbmXp/MVlsaM7q1Gr/44BK8b75xsI9h9RWoKS9BV1PqjEtFaVHaBv0xA504pK8NJy9glNNCkyntzdY5XViaNAvl5jIxoysV+r7ZOaObM0ZJvnTdeGwZGIG/XbkKbbXmncUVfdF9WgczdOgXmAw+zexKv+fmmg2T8NWTZ6c9hgjQ56tL//meM6rJsG2R/A6eM7oZj+vaR06t629P2XddSJqro6uh9AGySosj2L12vOtB7TqbKtFqY9VdvnE/cRSF1oLeZnz0gb9aPl6r4H5lYxSlXbcXMCLA6Ut6sGVgBCrLrEfMPGzyMNz/x2fjv+vzjmWqiD98WHR2savZ+XLJfDJmSA3+87f/oiwpWfmkjnqMaKrEk/99w9J1zlnei9GtNVgxvg3f/9O/ACRGE7v8sAn40i+fSjkvXQAL7bU0C5Sj7SVdOi51c39dRQk+e1z6wCEAMKXT+3xw5K9MM0xLxramdCAGZxJTj180phVD6yowqqUK133fev1IiTJN0m6dG02BE4Ggt60G/3rl7XhU20kddfj9P14GgHh+VX2exU8ek7qypMik3X7V+vR7FDcxUiRZpEWvzLQ1JyKC/o56/N/TLyXeYfChMAvqZoedVVn56P+dNhePPPUS1hZAqragsZNYIJ64erXt0XRtFK2m3P7b5MOH9cUjATZUWY8AVRL75tc3BHea5BYyavhpf+PQugqsnTQU3/79s6knFpBPHzsNjz7ziuHo2r3vn4eXXn/X1vVuPn46ui68L+V2JzM1+lOG1VfgmZfeTLh/aF0FfrtrWcY9lGb271nj6DwKt+QZ5qOmD8dde59Of1J8T2LiuXt3LUNTVWn8/eu0k1hoKxSy9cktU6P1Uiyp81dPno2xu+8HMLgSYdn4wcEho/3jbbqG+7ffPw9rP/4QAKQMiBE5NWFYHa7ZMAkrJ5oHZ9sy0ImiiHEANv0gqdFKHHKmo6Eyq7gJZB2XmxYIJ414rS3mZJnq8bO70NtmLVeWnrZsy2xUOuH2WLnYQDNXW15iGvK4trzEcE+PVdk+7+cdMrh3wGxpWVN1GSO7UYIbj0ocRR/RXJkxIfsRU4YBSK1XmqvLEupGK1GcuavRWLr64Le7liX8nlwvlZcYd+y0QGP6az9x9Wpcs2ESLl43mBZgwrD8S2lF4bBpxvB4XjwjjWkGwStLi/DY5SuxZ/1ErNANeowdUoPhjd7khCVyEzuJZGrxmOjobba5xdJJaViYjPhr9JGrtFPv3pkaPCV6bXYu7Hp492CKkE8c482SlpMXduPCVWNxqG6P6gErUSqIgJTk6ycvGIV5Pc3xVQiaS3SdiEvWRZehz80QOOKeU+fify9ckvYY7b1apIuQUlYcQWVpEW7anF2KjVymDfAZfV80VTtLZm70NSAi2DRjuGE6JCKvrUla4rhlIJan02Cgs6GqFOUlRdg8szOhPXL/WQvws/PT1zNEYcBOYoGy0n+6cfNk/Oz8xaZr6I+c1oHbT5yRVTn+b/eKhA3Y6fYOAdE8Zdo6dO1vGG+ygdso8InWgJk8nHvVjDRUOt/0fe/p8/Dt92dOvP3BVeOwMyls/ieOmYrFBvnniDLRPtMzuhoBDNYLJ8b2wOmPaastx6QO81mn6rLihByrRuKdRF0lKiL404dX4rDJw+z/AXliVncjjps1AtccOcm1a9bH6iM39nERueEmXa7V4ogk5GHWa6wqLYgI6pTfWPOSqfKSIgxvNF+OuGWgE4vG2Ms1OD9pFqCusgRf3j4r/ruVfGbaHslMEQWrY3n29HsqS4oi+POHV+KeU4xnHwvVhGHRLzP9aGemQBQXrR6X8PvEjjrHy75mjmzEbSfOdHQuERBNibN4TEu8s2hG6+Q5HZQYnEnkSgW94qIILj98QsJewWzdvXMOrtkwic81hUaxxQGLgZHp6yGiXMD1GgUqqHRhH1w1Dj97/GcJt9XpZq+0jt/UNFEptVHl4gwNh4vWjMPo1ur4sllNRSkDGyS7c/ss/DMpcIy2H8jMij7zzfxEfutpq7E00KCtbP6ALhenHWsnDcWvnniBEZR90NlUmdW+aSIvlem+I5ObVEcxii7lAXYS89zHjp6CKoNOkdOciXduH8CWz//KcXky5b7Slm50NFRi2ogGT3xSTgAAE95JREFU/PbJF1OO+cCKMSgtiuDwKYNLuxb0tuDlN95JOK6mvATbTXL0UaK6ipKUCKjLDVJPEOU6ZTH3mZljZ43AphnDGUWTqMB947TUfMNLxrbi1q3ZbcMhCgt2EvPcoSYJ7IOK6ZJpBvPL7xuMUqg15pKLWldRgl1rxyfc9oVtXKrotkhEMKqlCvuefz3oohAZOmtZT9r7P3PstJQVB/EE2Q43W4gIO4gWdTRU4B8vvpn5QAB3bJuJJhvpkpLduX0gvsWAyA9GEdzPWJq+TiLKJdyTWKCanUabM7BhaoflY/VRSy87tC/lfn1eIe79CZ72had/XYjC4qxlvWnvXzlhSEK+PQCoiEXFLHbaSyTLvnHqXHxt52xLxy7sbckqlcXc0c3oZ0AyCsjuteMxurUaYxyk/iIKKw67FZjm6jIURwQXrhqb+WCLrt/Uj68//A9Lx45srgIQHeFfOSH9nrayWO4s7iEMzmGTh6VEbLx03XgMYaeRctRnjp2Kex5+BqNaqoIuSt5rqSlDS417A5JEYTWjqxEPnLMw6GIQuYqdxAKzNympcTYaKu0vDaopL8H+PWssHfvxo6fgnoef4chcyGzVpRYgCkJfey1eeuNdR+cOravAaYtHu1wiIiKi/MJOIjlSHBF0NQ+OxP/momUZo43a1VZbjlMWjcp8IBEVlPvOmB90EYiIiPIaO4nkyMykHEBcUkRERERElB/YSSRHgsqzSERERBSUR3YvDyxCPJGf2EkkW7INH29m15pxENa6REREFGINWaRqIcol7CSSLbGsFI4TUZth0nsiIiIionBgoiiypSjWOawoYVoKIiIiIqJ8xJlEsmXOqCacsWQ0TpjTFXRRiIiIiIjIA+wkki2RiOCcFWOCLgYREREREXmEy02JiIiIiIgojp1EIiIiIiIiivOlkygiZSJyi4g8KSKvisj/icgq3f1LReQxEXlDRH4sIiOSzr1VRF4RkedE5Jyka5ueS0RERERERPb4NZNYDOBpAAsB1AHYBeCrItIlIs0A7gGwG0AjgL0A7tKdeymAHgAjACwGcL6IrAQAC+cSERERERGRDb4ErlFKvY5oZ0/zbRF5AsA0AE0AHlVKfQ0ARORSAP8RkbFKqccAnABgq1LqRQAvisjnAGwFcD+A9RnOJSIiIiIiIhsC2ZMoIm0AegE8CqAPwO+0+2Idyn0A+kSkAcBQ/f2xn/tiP5uea/CYO0Rkr4jsff755939g4iIiIiIiPKE751EESkBcCeAO2KzfdUAXk467GUANbH7kHS/dh8ynJtAKXWzUmq6Ump6S0tLdn8EERERERFRnvK1kygiEQBfBPAOgNNjN78GoDbp0FoAr8buQ9L92n2ZziWiHDSqpSroIhAREREVNF/2JAKAiAiAWwC0AVitlHo3dtejiO471I6rAjAK0b2GL4rIswD6Afwgdkh/7Jy053r4pxCRR352/mLUVZYEXQwiIiKigubnTOKnAYwDsE4p9abu9m8AmCAiG0SkHMDFAH6vCzzzBQC7RKRBRMYCeB+A2y2eS0Q5ZHhjJWrL2UkkIiIiCpJfeRJHADgZwGQAz4nIa7F/W5RSzwPYAOBKAC8CGACwWXf6JYgGo3kSwIMArlVK3Q8AFs4lIiIiIiIiG0QpFXQZfDd9+nS1d+/eoItBREREREQUCBH5rVJqutF9gaTAICIiIiIionBiJ5GIiIiIiIji2EkkIiIiIiKiOHYSiYiIiIiIKI6dRCIiIiIiIopjJ5GIiIiIiIji2EkkIiIiIiKiOHYSiYiIiIiIKI6dRCIiIiIiIopjJ5GIiIiIiIji2EkkIiIiIiKiOHYSiYiIiIiIKI6dRCIiIiIiIooTpVTQZfCdiLwK4C8ePkQdgJdz6Lp+XJ/Xzr/r89rGmgH8x8Pre1H+XH0f5mq5vb6219fP1Wt7ff1crlty8TOaq9f2+vosu//X9vr6Xl57jFKqxvAepVTB/QOw1+Pr35xL1/Xj+rx2/l2f1za9fs7VL7n6PszVcrPsfF4cXt+zuiUXP6O5em2WPf+unctlT1evcLmpN+7Nsev6cX1eO/+uz2sHw4vy5+r7MFfL7fW1vb5+rl7b6+vnct2Si5/RXL2219dn2f2/ttfXD6RuKdTlpnuVUtODLgcR5R/WL0TkBdYtROS2dPVKoc4k3hx0AYgob7F+ISIvsG4hIreZ1isFOZNIRERERERExgp1JpHIERG5XUSuCLocRJRfWLcQkRdYt5BT7CQSARCRn4jI9qDLQUT5hXULEXmBdQt5jZ1EIiIiIiIiimMnkUhHRLaKyENJtykRGR1UmYgo97FuISIvsG4hr+RcJ1FEcq7MRJQbWL8QkdtYrxBRLsqpiktEipRSB4MuBxHlH9YvROQ21itElKtyopMoIkUAoJQ6ICLNIvIxETlbRPqCLhsR5TbWL0TkNtYrRJTrcqKTqJQ6AAAiMhfAgwDaABwK4FoRmRy7Lyf+Fgq91wFUar+IyJAAy0I+YP1CPmHdUkBYr5CPWLeQJ0JZQYmIJP1eJiJfBnAJgI8rpY4CcDqAfQDOBwAu5yCX/A5An4hMFpFyAJcGXB5yGesXCgjrljzGeoUCxLqFPBGqTqJEFSmllP52pdTbAH4KYCKAmthtjwL4LoDhInJk7PxQ/T2Uc5RS6q8APgzgAQCPA3go/SmUK1i/UIBYt+Qp1isUMNYt5BlJqteCKYRIRD+iJiLVAC4C8CqA3yqlvhcbpfsmgMcA3KSUekZEWgCcCmABgDVKqbcCKD7lARF5GMCHlVL/L+iykLtYv1CQWLfkJ9YrFDTWLeS1wEewRGQlgCtFpDP2+3YAfwcwDkA/gI+LyHGxUbpbAMyK/YNS6nkAPwYgAOYFUHzKA7FAAuMAPBJ0WchdrF8oSKxb8hPrFQoa6xbyQ+CdRADFAJYBmCkilQCmA3i/Uurw2Br+HwG4EgCUUt8E8FcAK0VkfOz8XwPYoJR6wP+iU64TkY8A+D6AC5RSTwZdHnId6xcKBOuWvMZ6hQLDuoX8Epblpp8EUAvgCgCvKqX+KSI9AD4PoAPR9fxfUUqdKSJTAXwF0c3gd2n7ALRN48n7AoiosLF+ISK3sV4honwX6EyiLhrYTQC6ACwB8IKIdAP4KoBfKKVGAbgZwOkiMlIp9TCA7Uqp/9FXrCrG37+AiMKK9QsRuY31ChEVikA7iUopJSISi8z0XQBrEF1jPQrAC0qpC2OHliG68XtD7LyfAakhp4mINKxfiMhtrFeIqFCEYrkpEI8M9g1E1/K/BWA9ohXsAgB7AZyqlHo5uBISUa5i/UJEbmO9QkT5LAyBa7RQ0q8B+CKAuQCeQ3SdfwmA65RSW5RSL8fyEaUts4iU6q/rZbmJKPxcrl+q9df1stxEFF5u1iux63WLSG3sZ842ElHgQjOTqBGRuwA8D+ASpdR/dbcXKaUOpDmvE8AeAO8A+IdSapfnhSWinJJl/XITgPcQzYO2Qyn1ntflJaLwc1qv6I47DcC1AI5XSt3tXUmJiKwLzUi4buTsYwBmILq+HyJSBAAZGnA7EV3a8Syiyz6OEpFbY/eF5m8komBkWb9cBOBhAE8D+DCA5QA+mXRdIiow2dQrSfoBvIhoSo0et8tJROREaDpQsc3gEaXU/yKaZPaQ2O1pK1kRqQfQA+B0pdQHlFJfALARwHoRqVVKHfS67EQUblnULwLgIICVSqkzlFJ/APAQgNpY8IpwLcUgIt84rVc0WmcSwOMA7gIwAGCeiJR5UV4iIjtC00kEAKXUwVhi2jcB/MXsOK0CjTXg3kY0/9D9sdsiAOoB/BnRSpuIyEn9UhzrBN6glNorItNE5C8ADgXwdwCH6/dAE1HhsVqvAAl1S/JM42wAtwH4NoDDAIz0rMBERBaFqpMYcziARwDck3yHiDTElpF+BojnGHpTKbVXKfVKbGT/IKKhp18F8JqfBSei0LNTv7wX+//t2CHtAD6hlKoCcAOiibEvEpEaPwpORKFlWq8AhnXLgdjtWhvsaQDDAdwCoBzA0SJyhYhM8rrgRERmwhi4xnAJl4hMBPBxAE0AXgFwvVLqHqON4SLyaQDvKqXO8KXQRJQTXKpfJLbM7EgA1wMYr5R63Y/yE1H4pFt6nqZuiWjbYUTkpwBOVErtE5F7AawCcB+ALbEIqkREvgvdTGKaPT6liIaa3grghwDeJyKlSqkD2uZxEYnElnFMQ3R9P0Rku4ic4n3JiSjssqlfdIpj/7+KaLCJWi/KSkS5IcPeZLO65aBuufqvAFwmIn9AtD55CMB+AFWeFZqIKIPQdRI1IjJWRBaKSGvspj8AuFsp9VsA3wOgAJyuHR77XyFawf4bQIeI/AjAlYgu5SAiAuC4ftFC2r8rIuMQzYn2XaXUs36WnYjCy07dopR6J7bkdCiAPgA3KqUWAvgIgEb/S09ENCiMy02LEF23vwnAbxGtPM9XSt2rO6YawEkANgA4Tin1pLZ0Q0SWI1oRvwDgY0qpD/v+RxBRKGVRvwiAakQDTJwGYAGAa5VSV/n8JxBRCDmtW2K3jwTwL6XUG74XnIjIRBhnEvsAjEY039AKALcDuElEFmgHxNbo/xDAPwGcHbvtYKySfgnApQC62EEkoiRO6xeFaCCsxxHdK9TJDiIR6TiqW2KeVkq9oQWyYf5VIgqDUHQSRaROF+VrFoARSqn/ADiolPoIouv1TxCRbt1pf0U09cUEEblKRH4BYKFS6jdKqQ9zszcRAa7WL8uUUk8opW5WSr3q6x9BRKHjUt3yvwCWAtHB7tj/4VriRUQFKdBOooj0iMj3ANwJ4OsiMgLAnwA8JSKTtQoTwNUA+gHEw0Erpd4BcADRivkEAJ9VSv3I1z+AiELLg/rlB77+AUQUSi7XLZ9TSn3P1z+AiMiCwDqJInISgB8hmlvofEQ3ae9GNHLgvxBdrgEAUEr9HtHN38fFzi2K7T28G8CnlFLDlFK3+/oHEFFosX4hIi+wbiGiQhFY4BoRuQLAk0qpz8V+7wDwGIBeRCvUqdDNDorIOgB7AMyIrd0fBuB1pdRLgfwBRBRarF+IyAusW4ioUBRnPsQznwHwNgCISBmANwDsA1AB4GuIbv4+S0T2xSKAzQDwfS36l1LqmUBKTUS5gPULEXmBdQsRFYTAOolKqX8A0SheSqm3RWQ8ostfn47lDvoYonnI7hORlwCMAbAlqPISUe5g/UJEXmDdQkSFIsiZRAAJUbwWAfhLbFM3lFJ/FJENAKYA6FNK3RFQEYkoR7F+ISIvsG4honwXeCdRRIqUUgcAzARwf+y2UxAdfbtSKbUXwN4Ai0hEOYr1CxF5gXULEeW7wDuJSqkDIlKMaISwVhH5KYAuANuUUs8HWjgiymmsX4jIC6xbiCjfBRbdNKEQIhMB/A7R8NHXK6WuC7hIRJQnWL8QkRdYtxBRPgtLJ7EUwOmI5g16K+jyEFH+YP1CRF5g3UJE+SwUnUQiIiIiIiIKh0jQBSAiIiIiIqLwYCeRiIiIiIiI4thJJCIiIiIiojh2EomIiIiIiCiOnUQiIiIiIiKKYyeRiIgIgIh0ishrIlIUdFmIiIiCxE4iEREVLBHZLyLLAEAp9ZRSqlopdcDHx18kIv/w6/GIiIisYCeRiIiIiIiI4thJJCKigiQiXwTQCeDe2DLT80VEiUhx7P6fiMgVIvLz2P33ikiTiNwpIq+IyG9EpEt3vbEi8gMReUFE/iIim3T3rRaRP4nIqyLyjIicKyJVAL4LoD12/ddEpF1EZorIL0TkJRF5VkQ+ISKlumspETlVRB6PXe9yERkVK+crIvJV7XhtplJEPiQi/4nNnG7x5xkmIqJcxU4iEREVJKXUcQCeArBOKVUN4KsGh20GcByAYQBGAfgFgNsANAL4M4BLACDW4fsBgC8DaI2d9ykRGR+7zi0ATlZK1QCYAOBHSqnXAawC8M/YMtdqpdQ/ARwAcDaAZgCzASwFcGpSuQ4BMA3ALADnA7gZwLEAhseuf7Tu2CGxaw0DcAKAm0VkjK0ni4iICgo7iUREROZuU0rtU0q9jOis3z6l1ANKqfcAfA3AlNhxawHsV0rdppR6Tyn1CICvA9gYu/9dAONFpFYp9aJS6mGzB1RK/VYp9cvYdfYD+CyAhUmHXaOUekUp9SiAPwL4vlLq77pyTkk6frdS6m2l1IMA7gOwCURERCbYSSQiIjL3L93Pbxr8Xh37eQSAgdgS0ZdE5CUAWxCdxQOA/9/OHbJmGUZhHP9fwVnUKbYhBsExP4DBIJgMFoMmZX3rJllZUfwEBqsiYjHsCyz7BZbEIYzXNNhsgsfw3Lt9w1bePaDu/f/gbg/nnHo4F88j4AGwm2Q7yZ2TGiZZTrKVZJLkAHjBcAmcZS6A/Xa1PLILLJ3UX5Ikl0RJ0jyrkep8A7ar6vLUu1BV6wBV9bmqHjJEUT/xJ9p6XP/XwA5ws6ouAc+BnGK2Ky0Oe+Q6sHeKepKkM84lUZI0z74DN0aoswUsJ1lNcq6920luJVlI8jTJYlX9BA6AX1P9ryZZnKp1sX3zI8kKsD7CfJttjrsM0diPI9SUJJ1RLomSpHn2Etho8dDHsxapqkPgPsMPa/aACfAKON8+WQW+tvjoGkMUlaraAd4DX1pMdQl4BjwBDoE3wIdZ52omwH6b6x2w1vpKknSsVI2VtJEkSf+SJPeAt1V17W/PIkn6f3hJlCRJkiR1LomSJEmSpM64qSRJkiSp85IoSZIkSepcEiVJkiRJnUuiJEmSJKlzSZQkSZIkdS6JkiRJkqTOJVGSJEmS1P0G8XdW8bXHiqkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Креирајте скупове података за обуку и тестирање\n", + "\n", + "### Подела података на скупове за обуку и тестирање\n", + "\n", + "Када радите са машинским учењем, важно је да поделите своје податке на два дела: скуп за обуку и скуп за тестирање. Ова подела омогућава да се модел обучи на једном делу података, а затим процени његова тачност на другом, независном скупу.\n", + "\n", + "### Зашто је ова подела важна?\n", + "\n", + "Подела података помаже у процени како ће модел функционисати на новим, невидљивим подацима. Ако модел тестирате на истим подацима на којима је обучен, резултати могу бити пристрасни и неће одражавати стварну перформансу модела.\n", + "\n", + "### Како поделити податке?\n", + "\n", + "Ево неколико корака које можете пратити:\n", + "\n", + "1. **Случајно мешање података** \n", + " Пре него што поделите податке, уверите се да су добро измешани. Ово осигурава да подаци буду равномерно распоређени између скупа за обуку и скупа за тестирање.\n", + "\n", + "2. **Одређивање пропорције** \n", + " Уобичајена пракса је да се 70-80% података користи за обуку, а преосталих 20-30% за тестирање. На пример, ако имате 1000 узорака, можете користити 800 за обуку и 200 за тестирање.\n", + "\n", + "3. **Коришћење библиотека** \n", + " Многе библиотеке за машинско учење, као што је scikit-learn, нуде уграђене функције за поделу података. На пример, функција `train_test_split` може аутоматски поделити ваше податке.\n", + "\n", + "### Пример кода\n", + "\n", + "Ево примера како можете поделити податке користећи Python и scikit-learn:\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Претпоставимо да имате податке X и ознаке y\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(\"Број узорака у скупу за обуку:\", len(X_train))\n", + "print(\"Број узорака у скупу за тестирање:\", len(X_test))\n", + "```\n", + "\n", + "### Савети\n", + "\n", + "- **Чување репродуктивности** \n", + " Користите параметар `random_state` у функцији `train_test_split` како бисте осигурали да је подела података увек иста сваки пут када покренете код.\n", + "\n", + "- **Проверите расподелу класа** \n", + " Ако радите са класификационим проблемом, уверите се да је расподела класа слична у оба скупа. Ово можете постићи коришћењем параметра `stratify` у функцији `train_test_split`.\n", + "\n", + "- **Избегавајте цурење података** \n", + " Уверите се да подаци из скупа за тестирање не утичу на процес обуке. На пример, ако радите са временским серијама, подаци треба да буду подељени хронолошки, а не случајно.\n", + "\n", + "### Закључак\n", + "\n", + "Подела података на скуп за обуку и скуп за тестирање је кључни корак у машинском учењу. Правилна подела осигурава да ваш модел буде добро обучен и да се његова перформанса реално процени.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Оригинални наспрам скалираних података:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Хајде да такође скалирамо тест податке\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Процените модел\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Креирајте тестну тачку података за сваки корак HORIZON.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Направите предвиђања на тест подацима\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Упоредите предвиђања са стварним оптерећењем\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Израчунајте **средњу апсолутну процентуалну грешку (MAPE)** за све предвиђања\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Прикажи предвиђања у односу на стварне вредности за прву недељу тестног скупа\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:57:54+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/sr/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..b1e27df85 --- /dev/null +++ b/translations/sr/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:22+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "Тао Хонг, Пјер Пинсон, Шу Фан, Хамидреза Зареипур, Алберто Троколи и Роб Џ. Хиндман, \"Прогностичко предвиђање енергије: Глобално такмичење у предвиђању енергије 2014 и даље\", International Journal of Forecasting, вол.32, бр.3, стр. 896-913, јул-септембар, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако настојимо да обезбедимо тачност, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/sr/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..10b1990bd --- /dev/null +++ b/translations/sr/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1023 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "У овом бележнику демонстрирамо како да:\n", + "\n", + "- припремите 2Д временске серије за тренирање модела СВМ регресора \n", + "- имплементирате СВР користећи РБФ језгро \n", + "- евалуирате модел користећи графиконе и МАПЕ \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Увоз модула\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Припрема података\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Учитај податке\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сада треба да припремите податке за обуку извршавањем филтрирања и скалирања ваших података.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Скалирајте податке да буду у опсегу (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Креирање података са временским корацима\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "За наш СВР, трансформишемо улазне податке у облик `[batch, timesteps]`. Дакле, преобликујемо постојеће `train_data` и `test_data` тако да постоји нова димензија која се односи на временске кораке. У нашем примеру, узимамо `timesteps = 5`. Дакле, улази у модел су подаци за прва 4 временска корака, а излаз ће бити подаци за 5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Направи предвиђање модела\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Предвиђање целокупног скупа података\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:03:51+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sr/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/sr/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..934543ee6 --- /dev/null +++ b/translations/sr/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,699 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "У овом бележнику демонстрирамо како да:\n", + "\n", + "- припремите 2Д временске серије за тренирање модела СВМ регресора \n", + "- имплементирате СВР користећи РБФ језгро \n", + "- евалуирате модел користећи графике и МАПЕ \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Увоз модула\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Припрема података\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Учитај податке\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сада треба да припремите податке за обуку извршавањем филтрирања и скалирања ваших података.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Скалирајте податке да буду у опсегу (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Креирање података са временским корацима\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "За наш SVR, трансформишемо улазне податке у облик `[batch, timesteps]`. Дакле, преобликујемо постојеће `train_data` и `test_data` тако да постоји нова димензија која се односи на временске кораке. У нашем примеру, узимамо `timesteps = 5`. Дакле, улази у модел су подаци за прва 4 временска корака, а излаз ће бити подаци за 5. временски корак.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Направи предвиђање модела\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Предвиђање целокупног скупа података\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:06:21+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sr/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/sr/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..aaf55ee51 --- /dev/null +++ b/translations/sr/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:04:04+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Петар и вук: Увод у учење путем појачања\n", + "\n", + "У овом туторијалу ћемо научити како да применимо учење путем појачања на проблем проналажења пута. Овај сценарио је инспирисан музичком бајком [Петар и вук](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) руског композитора [Сергеја Прокофјева](https://en.wikipedia.org/wiki/Sergei_Prokofiev). То је прича о младом пиониру Петру, који храбро излази из своје куће на шумску чистину да би јурио вука. Тренираћемо алгоритме машинског учења који ће помоћи Петру да истражи околину и направи оптималну навигациону мапу.\n", + "\n", + "Прво, хајде да увеземо неколико корисних библиотека:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Преглед учења путем појачања\n", + "\n", + "**Учење путем појачања** (RL) је техника учења која нам омогућава да научимо оптимално понашање **агента** у неком **окружењу** кроз извођење многих експеримената. Агент у овом окружењу треба да има неки **циљ**, који је дефинисан **функцијом награде**.\n", + "\n", + "## Окружење\n", + "\n", + "Ради једноставности, замислимо да је Петеров свет квадратна табла димензија `width` x `height`. Свака ћелија на овој табли може бити:\n", + "* **тло**, по коме Петер и друга створења могу ходати\n", + "* **вода**, по којој, наравно, не можете ходати\n", + "* **дрво** или **трава** - место где можете одморити\n", + "* **јабука**, која представља нешто што би Петер радо пронашао како би се прехранио\n", + "* **вук**, који је опасан и треба га избегавати\n", + "\n", + "Да бисмо радили са окружењем, дефинисаћемо класу под називом `Board`. Да не бисмо превише оптеретили овај бележник, сав код за рад са таблом преместили смо у посебан модул `rlboard`, који ћемо сада увозити. Можете погледати унутар овог модула за више детаља о унутрашњости имплементације.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Хајде сада да направимо насумичну таблу и видимо како изгледа:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Акције и Политика\n", + "\n", + "У нашем примеру, циљ Петра би био да пронађе јабуку, избегавајући вука и друге препреке. Дефинишите те акције као речник и повежите их са паровима одговарајућих промена координата.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Стратегија нашег агента (Петар) дефинисана је такозваном **политиком**. Хајде да размотримо најједноставнију политику која се зове **случајна шетња**.\n", + "\n", + "## Случајна шетња\n", + "\n", + "Прво ћемо решити наш проблем применом стратегије случајне шетње.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Функција награде\n", + "\n", + "Да бисмо нашу политику учинили интелигентнијом, потребно је да разумемо који потези су \"бољи\" од других.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Лернинг\n", + "\n", + "Направите Q-Табелу, или вишедимензионални низ. Пошто наша табла има димензије `width` x `height`, можемо представити Q-Табелу помоћу numpy низа са обликом `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Проследите Q-табелу функцији `plot` како бисте визуализовали табелу на табли:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Суштина Q-Learning-а: Белманова једначина и алгоритам учења\n", + "\n", + "Напишите псеудо-код за наш алгоритам учења:\n", + "\n", + "* Иницијализујте Q-табелу Q са истим вредностима за сва стања и акције\n", + "* Поставите стопу учења $\\alpha\\leftarrow 1$\n", + "* Поновите симулацију много пута\n", + " 1. Почните са случајне позиције\n", + " 1. Понављајте\n", + " 1. Изаберите акцију $a$ у стању $s$\n", + " 2. Извршите акцију преласком у ново стање $s'$\n", + " 3. Ако наиђемо на услов краја игре или је укупна награда сувише мала - изађите из симулације \n", + " 4. Израчунајте награду $r$ у новом стању\n", + " 5. Ажурирајте Q-функцију према Белмановој једначини: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Ажурирајте укупну награду и смањите $\\alpha$.\n", + "\n", + "## Експлоатација vs. Експлорација\n", + "\n", + "Најбољи приступ је пронаћи равнотежу између истраживања и искоришћавања. Како више учимо о нашем окружењу, вероватније је да ћемо следити оптималну путању, али је важно повремено изабрати неистражену путању.\n", + "\n", + "## Python имплементација\n", + "\n", + "Сада смо спремни да имплементирамо алгоритам учења. Пре тога, потребна нам је и функција која ће произвољне бројеве из Q-табеле претворити у вектор вероватноћа за одговарајуће акције:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Мало количину `eps` додајемо оригиналном вектору како бисмо избегли дељење са 0 у почетном случају, када су све компоненте вектора идентичне.\n", + "\n", + "Стварни алгоритам учења ћемо покренути за 5000 експеримената, који се такође називају **епохе**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Након извршавања овог алгоритма, Q-табела би требало да буде ажурирана вредностима које дефинишу привлачност различитих акција у сваком кораку. Визуализујте табелу овде:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Провера политике\n", + "\n", + "Пошто Q-табела наводи „атрактивност“ сваке акције у сваком стању, прилично је једноставно користити је за дефинисање ефикасне навигације у нашем свету. У најједноставнијем случају, можемо једноставно изабрати акцију која одговара највишој вредности у Q-табели:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Ако пробате код изнад неколико пута, можда ћете приметити да понекад само \"заглави\", и потребно је да притиснете дугме STOP у нотебоок-у како бисте га прекинули.\n", + "\n", + "> **Задатак 1:** Измените функцију `walk` тако да ограничи максималну дужину пута на одређени број корака (рецимо, 100), и посматрајте како код изнад повремено враћа ову вредност.\n", + "\n", + "> **Задатак 2:** Измените функцију `walk` тако да не иде назад на места на којима је већ био раније. Ово ће спречити `walk` да уђе у петљу, али агент и даље може завршити \"заробљен\" на локацији са које не може да побегне.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Вежба\n", + "## Реалистичнији свет Петра и вука\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/sr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..849658320 --- /dev/null +++ b/translations/sr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,469 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:14:18+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Петар и вук: Реалистично окружење\n", + "\n", + "У нашој ситуацији, Петар је могао да се креће готово без умора или глади. У реалистичнијем свету, морао би с времена на време да седне и одмори се, као и да се нахрани. Хајде да учинимо наш свет реалистичнијим, применом следећих правила:\n", + "\n", + "1. Крећући се са једног места на друго, Петар губи **енергију** и добија одређени ниво **умора**.\n", + "2. Петар може да повећа енергију једући јабуке.\n", + "3. Петар може да се ослободи умора одмарајући се испод дрвета или на трави (тј. уласком на локацију табле са дрветом или травом - зелено поље).\n", + "4. Петар мора да пронађе и убије вука.\n", + "5. Да би убио вука, Петар мора да има одређене нивое енергије и умора, иначе губи битку.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Дефинисање стања\n", + "\n", + "У нашим новим правилима игре, потребно је да пратимо енергију и умор у сваком стању табле. Због тога ћемо креирати објекат `state` који ће носити све потребне информације о тренутном стању проблема, укључујући стање табле, тренутне нивое енергије и умора, и да ли можемо победити вука у терминалном стању:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [ + "Хајде да покушамо да решимо проблем користећи случајну шетњу и видимо да ли ћемо успети:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Функција награде\n", + "\n", + "Функција награде је кључни део система за учење заснованог на награђивању. Она дефинише како агент добија повратне информације за своје акције у окружењу.\n", + "\n", + "### Основни принципи\n", + "\n", + "- Функција награде треба да буде једноставна и интуитивна.\n", + "- Треба да подстиче жељено понашање агента.\n", + "- Избегавајте сложене математичке изразе који могу довести до неочекиваних резултата.\n", + "\n", + "[!NOTE] Увек тестирајте функцију награде у симулацији пре него што је примените у стварном окружењу.\n", + "\n", + "### Пример\n", + "\n", + "Ево једноставног примера функције награде:\n", + "\n", + "```python\n", + "def reward_function(params):\n", + " if params['is_off_track']:\n", + " return -10 # Казна за излазак са стазе\n", + " elif params['is_near_center']:\n", + " return 5 # Награда за кретање близу центра\n", + " else:\n", + " return 1 # Мала награда за кретање по стази\n", + "```\n", + "\n", + "### Савети за креирање функције награде\n", + "\n", + "[!TIP] Почните са основним правилима и постепено додајте сложеност.\n", + "\n", + "- Користите параметре као што су брзина, позиција и оријентација за израчунавање награде.\n", + "- Увек узмите у обзир крајњи циљ агента.\n", + "\n", + "[!WARNING] Превише сложена функција награде може довести до проблема са учењем.\n", + "\n", + "### Честа питања\n", + "\n", + "#### Шта ако агент не учи како треба?\n", + "\n", + "Проверите да ли функција награде правилно одражава жељено понашање. Можда је потребно да прилагодите вредности награда и казни.\n", + "\n", + "#### Могу ли користити више функција награде?\n", + "\n", + "Да, можете комбиновати више функција награде како бисте постигли сложеније циљеве. Међутим, будите опрезни да не створите конфликте између различитих функција.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Q-Learning алгоритам\n", + "\n", + "Сам алгоритам учења остаје углавном непромењен, само користимо `state` уместо само позиције на табли.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Резултати\n", + "\n", + "Хајде да видимо да ли смо успешно обучили Петра да се бори против вука!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Сада видимо много мање случајева дављења, али Петар још увек није увек у стању да убије вука. Покушајте да експериментишете и видите да ли можете побољшати овај резултат играјући се са хиперпараметрима.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/sr/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..f27461799 --- /dev/null +++ b/translations/sr/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:09:26+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Петар и вук: Увод у учење путем појачања\n", + "\n", + "У овом туторијалу ћемо научити како применити учење путем појачања на проблем проналажења пута. Овај сценарио је инспирисан музичком бајком [Петар и вук](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) руског композитора [Сергеја Прокофјева](https://en.wikipedia.org/wiki/Sergei_Prokofiev). То је прича о младом пиониру Петру, који храбро излази из своје куће на шумску чистину да би јурио вука. Тренираћемо алгоритме машинског учења који ће помоћи Петру да истражи околину и направи оптималну навигациону мапу.\n", + "\n", + "Прво, хајде да увеземо неколико корисних библиотека:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Преглед учења путем појачања\n", + "\n", + "**Учење путем појачања** (RL) је техника учења која нам омогућава да научимо оптимално понашање **агента** у неком **окружењу** кроз извођење многих експеримената. Агент у овом окружењу треба да има неки **циљ**, који је дефинисан помоћу **функције награде**.\n", + "\n", + "## Окружење\n", + "\n", + "За једноставност, хајде да замислимо Петеров свет као квадратну таблу величине `width` x `height`. Свака ћелија на овој табли може бити:\n", + "* **земља**, по којој Петер и друга створења могу ходати\n", + "* **вода**, по којој, очигледно, не можете ходати\n", + "* **дрво** или **трава** - место где можете мало одморити\n", + "* **јабука**, која представља нешто што би Петер радо пронашао да би се нахранио\n", + "* **вук**, који је опасан и треба га избегавати\n", + "\n", + "Да бисмо радили са окружењем, дефинисаћемо класу под називом `Board`. Да не бисмо превише оптеретили овај нотебук, сав код за рад са таблом преместили смо у посебан модул `rlboard`, који ћемо сада увозити. Можете погледати унутар овог модула за више детаља о интерним аспектима имплементације.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Хајде сада да направимо насумичну таблу и видимо како изгледа:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Акције и Политика\n", + "\n", + "У нашем примеру, циљ Петра би био да пронађе јабуку, избегавајући вука и друге препреке. Да би то урадио, он може суштински да се креће док не пронађе јабуку. Због тога, на било којој позицији може да изабере једну од следећих акција: горе, доле, лево и десно. Те акције ћемо дефинисати као речник и повезати их са паровима одговарајућих промена координата. На пример, кретање десно (`R`) би одговарало пару `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Стратегија нашег агента (Петар) дефинисана је такозваном **политиком**. Хајде да размотримо најједноставнију политику која се зове **случајно кретање**.\n", + "\n", + "## Случајно кретање\n", + "\n", + "Хајде прво да решимо наш проблем применом стратегије случајног кретања.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Хајде да спроведемо експеримент случајног хода неколико пута и видимо просечан број корака:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Функција награђивања\n", + "\n", + "Да бисмо нашу политику учинили интелигентнијом, потребно је да разумемо који потези су \"бољи\" од других.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Лернинг\n", + "\n", + "Направите Q-Табелу, или вишедимензионални низ. Пошто наша табла има димензије `width` x `height`, можемо представити Q-Табелу помоћу numpy низа са обликом `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Проследите Q-табелу функцији за цртање како бисте визуализовали табелу на табли:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Суштина Q-Learning-а: Белманова једначина и алгоритам учења\n", + "\n", + "Напишите псеудо-код за наш алгоритам учења:\n", + "\n", + "* Иницијализујте Q-табелу Q са једнаким вредностима за сва стања и акције\n", + "* Поставите стопу учења $\\alpha\\leftarrow 1$\n", + "* Поновите симулацију више пута\n", + " 1. Почните са случајне позиције\n", + " 1. Понављајте\n", + " 1. Изаберите акцију $a$ у стању $s$\n", + " 2. Извршите акцију преласком у ново стање $s'$\n", + " 3. Ако наиђемо на услов за крај игре, или је укупна награда сувише мала - изађите из симулације \n", + " 4. Израчунајте награду $r$ у новом стању\n", + " 5. Ажурирајте Q-функцију према Белмановој једначини: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Ажурирајте укупну награду и смањите $\\alpha$.\n", + "\n", + "## Експлоатација vs. Експлорација\n", + "\n", + "Најбољи приступ је пронаћи баланс између експлорације и експлоатације. Како више учимо о нашем окружењу, вероватније је да ћемо следити оптималну путању, али је важно да повремено изаберемо непознат пут.\n", + "\n", + "## Python имплементација\n", + "\n", + "Сада смо спремни да имплементирамо алгоритам учења. Пре тога, потребна нам је и функција која ће произвољне бројеве у Q-табели претворити у вектор вероватноћа за одговарајуће акције:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Малој количини `eps` додајемо оригиналном вектору како бисмо избегли дељење са 0 у почетном случају, када су све компоненте вектора идентичне.\n", + "\n", + "Стварни алгоритам учења покренућемо за 5000 експеримената, који се такође називају **епохе**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Након извршавања овог алгоритма, Q-табела би требало да буде ажурирана вредностима које дефинишу привлачност различитих акција у сваком кораку. Визуелизујте табелу овде:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Провера политике\n", + "\n", + "Пошто Q-табела приказује „атрактивност“ сваке акције у сваком стању, веома је једноставно користити је за дефинисање ефикасне навигације у нашем свету. У најједноставнијем случају, можемо једноставно изабрати акцију која одговара највишој вредности у Q-табели:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Ако покренете горњи код неколико пута, можда ћете приметити да понекад само \"заглави\", и потребно је да притиснете дугме STOP у бележници како бисте га прекинули.\n", + "\n", + "> **Задатак 1:** Измените функцију `walk` тако да ограничи максималну дужину пута на одређени број корака (на пример, 100), и посматрајте како горњи код повремено враћа ову вредност.\n", + "\n", + "> **Задатак 2:** Измените функцију `walk` тако да се не враћа на места на којима је већ била раније. Ово ће спречити да `walk` упадне у петљу, али агент и даље може завршити \"заробљен\" на локацији са које не може да побегне.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Оно што овде видимо је да је на почетку просечна дужина пута порасла. Ово је вероватно због чињенице да када ништа не знамо о окружењу – вероватно ћемо се заглавити у лошим стањима, као што су вода или вук. Како учимо више и почнемо да користимо то знање, можемо истраживати окружење дуже, али и даље не знамо добро где се налазе јабуке.\n", + "\n", + "Када научимо довољно, агенту постаје лакше да постигне циљ, и дужина пута почиње да се смањује. Међутим, и даље смо отворени за истраживање, па често одступамо од најбољег пута и истражујемо нове опције, што чини пут дужим од оптималног.\n", + "\n", + "Оно што такође примећујемо на овом графику је да је у једном тренутку дужина нагло порасла. Ово указује на стохастичку природу процеса и на то да у неком тренутку можемо „покварити“ коефицијенте Q-табеле, тако што ћемо их преписати новим вредностима. Ово би идеално требало минимизовати смањењем стопе учења (тј. пред крај обуке подешавамо вредности Q-табеле само за малу вредност).\n", + "\n", + "Уопштено, важно је запамтити да успех и квалитет процеса учења значајно зависе од параметара, као што су стопа учења, смањење стопе учења и фактор дисконтовања. Ови параметри се често називају **хиперпараметри**, како би се разликовали од **параметара** које оптимизујемо током обуке (нпр. коефицијенти Q-табеле). Процес проналажења најбољих вредности хиперпараметара назива се **оптимизација хиперпараметара**, и заслужује посебну тему.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Вежба\n", + "#### Реалнији свет Петра и вука\n", + "\n", + "У нашој ситуацији, Петар је могао да се креће готово без умарања или глади. У реалнијем свету, он мора с времена на време да седне и одмори се, као и да се храни. Хајде да наш свет учинимо реалнијим применом следећих правила:\n", + "\n", + "1. Крећући се са једног места на друго, Петар губи **енергију** и добија одређени ниво **умора**.\n", + "2. Петар може да повећа енергију једући јабуке.\n", + "3. Петар може да се ослободи умора одмарајући се испод дрвета или на трави (тј. уласком на локацију табле са дрветом или травом - зелено поље).\n", + "4. Петар мора да пронађе и убије вука.\n", + "5. Да би убио вука, Петар мора да има одређене нивое енергије и умора, иначе губи битку.\n", + "\n", + "Измените функцију награде изнад у складу са правилима игре, покрените алгоритам за учење појачања како бисте научили најбољу стратегију за победу у игри, и упоредите резултате насумичног кретања са вашим алгоритмом у смислу броја добијених и изгубљених игара.\n", + "\n", + "> **Напомена**: Можда ћете морати да прилагодите хиперпараметре да би све функционисало, посебно број епоха. Пошто је успех у игри (борба са вуком) редак догађај, можете очекивати много дуже време тренинга.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква неспоразумевања или погрешна тумачења која могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/8-Reinforcement/2-Gym/notebook.ipynb b/translations/sr/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..96ad81053 --- /dev/null +++ b/translations/sr/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,394 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:16:57+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Скејтовање на CartPole-у\n", + "\n", + "> **Проблем**: Ако Петар жели да побегне од вука, мора да се креће брже од њега. Видећемо како Петар може да научи да скејтује, конкретно, да одржава равнотежу, користећи Q-Learning.\n", + "\n", + "Прво, хајде да инсталирамо gym и увеземо потребне библиотеке:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Направите окружење за колица и шипку\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Да бисмо видели како окружење функционише, хајде да покренемо кратку симулацију од 100 корака.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Током симулације, потребно је добити опсервације како бисмо одлучили како да поступимо. У ствари, функција `step` нам враћа тренутне опсервације, функцију награде и заставицу `done` која указује да ли има смисла наставити симулацију или не:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Можемо добити минималну и максималну вредност тих бројева:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Хајде да истражимо и друге методе дискретизације користећи бинове:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Хајде сада да покренемо кратку симулацију и посматрамо те дискретне вредности окружења.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [ + "## Q-Table Структура\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Са овог графикона није могуће ништа закључити, јер због природе стохастичког процеса тренинга дужина сесија тренинга веома варира. Да би овај графикон имао више смисла, можемо израчунати **покретни просек** преко серије експеримената, рецимо 100. Ово се може лако урадити користећи `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Променљиви хиперпараметри и посматрање резултата у пракси\n", + "\n", + "Сада би било занимљиво видети како се обучени модел заправо понаша. Покренимо симулацију, и пратићемо исту стратегију избора акција као током обуке: узорковање према расподели вероватноће у Q-табели:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Чување резултата у анимираном GIF-у\n", + "\n", + "Ако желите да импресионирате своје пријатеље, можда ћете желети да им пошаљете анимирану GIF слику балансирајуће шипке. Да бисте то урадили, можемо позвати `env.render` да произведемо слику кадра, а затим их сачувати као анимиран GIF користећи PIL библиотеку:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да превод буде тачан, молимо вас да имате у виду да аутоматизовани преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/sr/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..82d6e622b --- /dev/null +++ b/translations/sr/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,526 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:19:48+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "sr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Карпул клизање\n", + "\n", + "> **Проблем**: Ако Петар жели да побегне од вука, мора бити бржи од њега. Видећемо како Петар може научити да клиза, посебно како да одржи равнотежу, користећи Q-Learning.\n", + "\n", + "Прво, хајде да инсталирамо gym и увеземо потребне библиотеке:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Направите окружење за колица и шипку\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Да бисмо видели како окружење функционише, хајде да покренемо кратку симулацију од 100 корака.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Током симулације, потребно је добити опсервације како бисмо одлучили како да поступимо. У ствари, функција `step` нам враћа тренутне опсервације, функцију награде и заставицу `done` која указује да ли има смисла наставити симулацију или не:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Можемо добити минималну и максималну вредност тих бројева:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Хајде да истражимо и друге методе дискретизације користећи бинове:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Хајде сада да покренемо кратку симулацију и посматрамо те дискретне вредности окружења.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [ + "## Структура Q-Табеле\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deXxU9b3/8dcnCSTsa8CQgAEJIKIIBGSXTUWiYqu0Lq2o3MvV6nWhVlGrtbdasddq9dqfy9W2tr22WpdKXYu4W0VBRVBAQFACCEF2kCXk+/tjvkkm+yTMZCZn3s/HI4+c853vzPmenMl7vud7zpxjzjlERCS4UuLdABERiS0FvYhIwCnoRUQCTkEvIhJwCnoRkYBLi3cDADp37uxyc3Pj3QwRkSZl0aJFW5xzmXXVS4igz83NZeHChfFuhohIk2JmX0ZST0M3IiIBp6AXEQk4Bb2ISMAp6EVEAk5BLyIScBEFvZmtNbMlZvaxmS30ZR3NbJ6ZrfS/O/hyM7N7zWyVmX1iZoNjuQIiIlK7+vToxzvnjnfO5fv52cB851weMN/PA5wK5PmfmcD90WqsiIjU3+GcRz8VGOenHwVeB67z5X90oesfv2dm7c0syzm38XAa2pjWbd3Lj/+2mG7tMvjpaf3p3DqdbXsO8K/V31BwXBbOOf7+8XpO6NmJj77azsSjuzD5N2/yo3G9eXbxev484wTumb+S/lltOaJdBobxwBureePzIpbccjJmBsCLSzby+ooiphyXxYl9Kn7nYUnhDv7+8Xq6tk1n5tijyso/XredtBRjQHY7nHM8uaiQCf26cOvzy+jYqjnLNu7kX6u/4d5zB5HdvgWri3bz3UHZpKWm8PKnX3NMt7bc/sJybig4mpPueoNHpg+leZrxwdptzHlxOU9dOoIN2/dx+sBuPLbgK254Zglt0tO4cFQu//PqKm46rT+/eO4zlv9iMjf9fSl/W1RIn66tGdazI51apXPP/JX87PT+9OzcivfXbGXJ+h28tXJLg7fFJScexQNvrKZ3l9as2ry7rNwMUs0oLonfZbbbZKSxa19xxPVHHtWJpet3sHNfMcdmt2PJ+h306tyKL7bsAeDu7w/k6scXx6q53HJ6f7buPcgTH6zj6537YraccENzO/DB2m38YHgP1m/7luZpKbz86aZ6vcb5J/TgolG5TLrrzXov/4qJeaSacfcrn9da79YzB/DlN3v437fW8B9je/Hgm19UqfPdwdk8/eH6Gl9jcI/2PHXpSNZs2cOEX78RUftyOrTglVknktEsNaL6DWWRXI/ezNYA2wAHPOice8jMtjvn2ofV2eac62BmzwFznHNv+/L5wHXOuYWVXnMmoR4/PXr0GPLllxGd998ocmc/Xzad06EFb183ge8/+C4L1mzlvesnsm7bXqY98G5ZnYn9ujB/+eay+cqhFO73Fw1lfN8u7Nx3kONu+WdZ+do5BTW2Ifyx0vK1cwp4aelGLvnzh3Wuz7WT+3LxqJ70u+mlOuuWKv0HrUnz1BQOHCqJ+PVEksFvzxvMZY/V/T8Z7ofDj+QXZw5o0PLMbFHYKEuNIu3Rj3LObTCzLsA8M1te27KrKavyaeKcewh4CCA/Pz9h735SuO1bANZvD/0+eKiE3fsr9uLWbdtbYb6mkAfKeoDFhw5/lXd+G1lvcuvuA5TU8wYztYU8oJAXqcaufQfr/ZyiXftj0JKKIhqjd85t8L83A88Aw4BNZpYF4H+XdmkLge5hT88BNkSrwUGVO/t5LvnTong3Q0QCqM6gN7NWZtamdBo4GVgKzAWm+2rTgWf99FzgAn/2zXBgR1Man4+nlz79Ot5NEJEAimTopivwjD+AmAY85px7ycw+AJ4wsxnAV8A0X/8FYAqwCtgLXBT1VouIBETR7tgP3dQZ9M65L4CB1ZR/A0ysptwBl0WldQG0/+Ah5i7ewKijOjXaMuN4YoqI1OHLb/bWXekwJcRlipPJr15eQdGu/dxzzvGNtkyr7vC4iETdI2+viXcTqqVLIHhffrOHRV/WfqZJNJQeYd/5bf2PzotIYltZyxl38aQevXfif78OVD2fXUQklrY0whi9evRRYNV+dSBxOJf4bRSR2FHQi4gEnIZuoqAhBzsb80SY372zhjYZ2tQiyUo9+giMv/P1CvNNcRDknvkr490EEYkTBX0E1virC4qINEUK+nqq57XBouqv738Vv4WLSJOloI9QInzpaPbTS+LdBBFpghT0IiIBp6AH9h6o+7ru0R6yiecQkIgkFwU9sOCLrXXWKb0BSayHcL49cCi2CxCRpKOgr2TfwbqDNpad8UPq6otIlCnoK9l/sHFukVfbnkFjXPtCRJKHgr6eHv9gXUzv8bh19wHyb30lZq8vIslHQV9P9722it+/szZmr79174EqZbmzn+eQ7h4iIg2koAdcPUfdDxQf/gHTmobiP9uws9ryg4caZ0hJRIJHQd8Aq4sO/5IIroakX7l512G/tohIOAV9gvmwhrtcrdyUmHeuEZHEp6CvpL7DONG2uHBHteWn3/d2I7dERIJCQR8nlggXzxGRpKCgFxEJOAU98bnuTE0HY0VEok1BHwUahhGRRKagr8Sa5I0CRURqpqCvpCFn3SzbWP2XnEREEoGCnviM0Wu4R0Qai4JeRCTgFPTE9vryNS5TZ92ISCNR0MfJLf/4LN5NEJEkEXHQm1mqmX1kZs/5+Z5mtsDMVprZ42bW3Jen+/lV/vHc2DQ9etS7FpEgq0+P/kpgWdj8HcDdzrk8YBsww5fPALY553oDd/t6Ce3xD9bFuwn11uenL/L655vj3QwRaQIiCnozywEKgIf9vAETgCd9lUeBM/30VD+Pf3yiJfgpJis3l18Zsql07g8Ul/DCkq/j3QwRaQIi7dH/BrgWKL37RSdgu3Ou2M8XAtl+OhtYB+Af3+HrV2BmM81soZktLCoqamDzRUSkLnUGvZmdBmx2zi0KL66mqovgsfIC5x5yzuU75/IzMzMjamysxPvSxCIisZQWQZ1RwBlmNgXIANoS6uG3N7M032vPATb4+oVAd6DQzNKAdsDWqLc8SkpKHCVhd+lL7EEmEZH6q7NH75y73jmX45zLBc4BXnXOnQ+8Bpztq00HnvXTc/08/vFXXQKf1nLqPW+xfvu38W6GiEjMHM559NcBs8xsFaEx+Ed8+SNAJ18+C5h9eE2MrRWbdI9WEQm2SIZuyjjnXgde99NfAMOqqbMPmBaFtsVF4u57iIg0jL4ZKyIScAp6EZGAU9CLiARcvcbog+JAcQl9fvoiV0zoHe+miIjEXFL26PcVHwLg9++sjW9DREQaQVIGvYhIMlHQV6KzK0UkaBT0legKCCISNAp6EZGAU9CLiAScgr4SjdGLSNAo6CuZ95nu2iQiwZLUQb9rf3GVsi27D8ShJSIisZPUQS8ikgwU9JX8+p8r4t0EEZGoUtBXUqKjsSISMEkX9J9v2sW0+9+NdzNERBpN0gX9L19YptsHikhSSbqgFxFJNgp6EZGAU9CLiAScgl5EJOAU9CIiAaegFxEJuKQLet1YRESSTdIFvYhIslHQi4gEnIJeRCTgFPQiIgGnoBcRCbikC3oznXcjIsmlzqA3swwze9/MFpvZp2b2c1/e08wWmNlKM3vczJr78nQ/v8o/nhvbVRARkdpE0qPfD0xwzg0Ejgcmm9lw4A7gbudcHrANmOHrzwC2Oed6A3f7eglD/XkRSTZ1Br0L2e1nm/kfB0wAnvTljwJn+umpfh7/+ETTeImISNxENEZvZqlm9jGwGZgHrAa2O+eKfZVCINtPZwPrAPzjO4BO0Wy0iIhELqKgd84dcs4dD+QAw4Cjq6vmf1fXe69yJ1Yzm2lmC81sYVFRUaTtFRGReqrXWTfOue3A68BwoL2ZpfmHcoANfroQ6A7gH28HbK3mtR5yzuU75/IzMzMb1voG0CCSiCSbSM66yTSz9n66BTAJWAa8Bpztq00HnvXTc/08/vFXnXNVevQiItI40uquQhbwqJmlEvpgeMI595yZfQb81cxuBT4CHvH1HwH+ZGarCPXkz4lBu0VEJEJ1Br1z7hNgUDXlXxAar69cvg+YFpXWiYjIYUuKb8YeKnHcMvdTNmz/Nt5NERFpdJEM3TR576/Zyh/+tZbPN+2iZfPUeDdHRKRRJUWP3vmzO0t0TFhEklBSBH1FOr9SRJJLEga9iEhyUdCLiARcUgW9huhFJBklRdBb2Li8LoEgIskm0KdXOudYXbQn3s0QEYmrQPfoH3l7DZPueoPFhdvLyg6VaPxGRJJLoIP+o3WhgF+3dW9Z2avLN8erOSIicRHooBcRkSQLeg3aiEgySoqg15k2IpLMgh306sKLiAQ86D3T9W1EJIkF9jz63NnPl00/vnBdHFsiIhJfSdGjP1BcEu8miIjETVIEfRmN2YtIEgpU0O/eX8zU377D55t2xbspIiIJI1BB/69VW1i8bju/emlFvJsiIpIwAhX0dXl/7dZ4N0FEpNElVdCLiCSjQAW9jrWKiFQVqKAvpUseiIiUC2TQi4hIuUAFve4JKyJSVaCCvpRGbkREygUy6EVEpFzAgl5jNyIilQUs6EN01o2ISLlABr2IiJSrM+jNrLuZvWZmy8zsUzO70pd3NLN5ZrbS/+7gy83M7jWzVWb2iZkNjvVKlNJZNyIiVUXSoy8GfuycOxoYDlxmZv2B2cB851weMN/PA5wK5PmfmcD9UW91HXRHKRGRcnUGvXNuo3PuQz+9C1gGZANTgUd9tUeBM/30VOCPLuQ9oL2ZZUW95dXYrxuMiIhUUa8xejPLBQYBC4CuzrmNEPowALr4atlA+L37Cn1Z5deaaWYLzWxhUVFR/Vtejase/zgqryMiEiQRB72ZtQaeAq5yzu2srWo1ZVVGz51zDznn8p1z+ZmZmZE2IyI660ZEpFxEQW9mzQiF/P855572xZtKh2T8782+vBDoHvb0HGBDdJorIiL1FclZNwY8Aixzzt0V9tBcYLqfng48G1Z+gT/7Zjiwo3SIR0REGl9aBHVGAT8ElphZ6SD4DcAc4AkzmwF8BUzzj70ATAFWAXuBi6La4gho6EZEpFydQe+ce5uarxM2sZr6DrjsMNslIiJRom/GiogEXCCDXl+YEhEpF8igFxGRcgp6EZGAC2TQry7aHe8miIgkjEAG/fKvd8W7CSIiCSOQQS8iIuUU9CIiAaegFxEJOAW9iEjAKehFRAJOQS8iEnAKehGRgFPQi4gEnIJeRCTgFPQiIgGnoBcRCTgFvYhIwCnoRUQCTkEvIhJwCnoRkYBT0IuIBFxggr5w2954N0FEJCEFIujfXf0No+94Ld7NEBFJSIEI+uVf74x3E0REElYggt65eLdARCRxBSPo490AEZEEFoigFxGRmgUi6J3GbkREahSIoBcRkZo1+aDfd/AQtz6/LN7NEBFJWHUGvZn9zsw2m9nSsLKOZjbPzFb63x18uZnZvWa2ysw+MbPBsWw8wINvfBHrRYiINGmR9Oj/AEyuVDYbmO+cywPm+3mAU4E8/zMTuD86zazZ3oPFsV6EiEiTVmfQO+feBLZWKp4KPOqnHwXODCv/owt5D2hvZlnRamz1DYzpq4uINHkNHaPv6pzbCOB/d/Hl2cC6sHqFvqwKM5tpZgvNbGFRUVEDmwH/WLyhwc8VEUkG0T4Ya9WUVdvnds495JzLd87lZ2ZmNniBG3bsa/BzRUSSQUODflPpkIz/vdmXFwLdw+rlAOpyi4jEUUODfi4w3U9PB54NK7/An30zHNhROsQjIiLxkVZXBTP7CzAO6GxmhcDPgDnAE2Y2A/gKmOarvwBMAVYBe4GLYtBmERGphzqD3jl3bg0PTaymrgMuO9xGiYhI9DT5b8aKiEjtFPQiIgHXpIP+/TWVv8clIiKVNemg/8HDC+LdBBGRhNekg/7AoZJ4N0FEJOE16aAXEZG6KehFRAJOQS8iEnAKehGRgFPQi4gEnIJeRCTgFPQiInHUPC32MaygFxGJo7OH5MR8GQp6EZE4Om9Yj5gvQ0EvIhJHA7LbxXwZCnoRkTjp3aV1oyxHQS8iEidj8zIbZTkKehGROLnmlD6NshwFvYhInLRsXufdXKNCQS8iEnAKeklIfbo2zkEqaTzpjfDFIKme/vKSkJyLdwsk2sbkdY53E5KWgl6kEfzyO8dGXPfYRjivuql69OJhUX29Fs1So/p63Tu2iOrrRYuCXqSBxveNzalxd04bGPUAasq+Oyi7bPrEPuV/87F9av/7FxyXVedr5+d2qHd7Tu7ftcJ8r8xWjIvReyFaFPRS5vwTYv9V7Ibo27VNteV/nnFClbJOrZrHtC3nDO1eNn36wG4AXDDiSNbOKaj1ec1SrV7LuXh0br3bVpe1cwq45uTGOZ0vmobkdqBV86offJUDd3CP9hXmczrU3btOT0vh0nFHlQ0rRXKBsXF9u1SYP2twDtntQ8tqlpqYkZqYrZKo6pXZKqJ6t9VjeKHyP1ldLh/fO+K6o3tXHMt9+eqxVercd94gRsdhzLe095bZJp0zBnbjigm9+ckpfWt9zohenfhOWK80El3bZgBUG3CljvB1EtWpA4447Nd49rJRnDesBwtunMTim08GKgb43y4Zwc2n9WfBDRN57N+H07LS36tTq+ZMOrrm9+pPTunHdZNDPwBnDc7m/RsncvGonmV1hlbq9We2Secfl48um+/SJp3Zp/bjigm9Oe3YmvcirpjQm/wj678HEQ0K+iZkUKUeS6T+3/mDK7xxo2FafndunHI0AMNyO9ZZ/5pKYTiqd6da6885K/Sh819Tj6n28dOO6xZJMyOWF/ZV9PvOG8S8aj5cSv3homF8cOMk0lJTmHVyX9pkNAPgwpG5NK+mR/eXmcNJS03hhin9uHBkbll5Zpt0js5qW+0y+vi9mP+aOqDC3kL3ji147ZpxADzxHyPKyksD5KcFR5eVfS+/6lURv5ffvcK6Ho7OrdOrLX/1xyfy4pVjKqxruJqWX10IDuzeHjOjdXoa7VqG/s5XTepT9jpDczty8eiedG2bQUazVF798ThOCuuELLrpJB6enl82f+e0gfz7mPL/hTYZofPYB2S345Hp+fzs9GPo0iajQs/+4QuGcsvp/cvmT+rflWNz2vHFL6fwwA+GcPaQHNpkNGPWyX0Z2L3m/9ExfTI55ZjyD7+nLh1RY91oU9AniLp2Gbu0SeepS0Y26LWz2rXg5tP7c1xO+UG+W88cUOtzurWrvbc4rm8mk32P7ZJxvSJqx2XjjyqbPmdo7cNEQ47syNo5BVwwIheAD26cVGub75w2kGd+NLLsHzcSFjaaMm/WiRSE9cbyahguqs0tZxzD57edWuPjM8cexS1nlH9wDc3twItXjikbAgo3vFcn3r5uPGf5S9iWhvbLV42lZ+dWrJ1TQI9OLcvq//b8wVxzch9mjC4Psdu+cyzvXT+xwut2aZvBvFkncvNp/XllVs0fZvURvmdxZKeW9MpsXeMHWG2evDSy9/dZg7N569rxnNCramfhiHYZDO5R9QOjTXoak47uwtlDcrixoD9PXjKCn5zSl27ty/cOJh7dlQx/bCR8CLBdy2ZcOKon78yewJrbp5SVp6QYkwccgYW9kSaG7T3MOqkPr10zjldmjeXqSX3IP7JD2V7oHy4aypAj6+4gRYuCPgZKd+XrMx561/cG1vr4+zdOIiXFuOec4+vVlrVzCmjXItQTmnv5aOZ8N9RT7ndE7UFWenbDhSNzeSSsRwSw5vYpNEtNoXvHlqydU8CEflV3jR/4wZAqZT85pR9r5xSwdk5BhXBb/ovJQGg4qH9WW2af2q/KczPbpJeFeAffswt39pAcBvXoUHYlwNJeY/hxh/Drfg/s3p41txfwyqyxvHjlmBr+CiGDe7RniO9tRnMM9r/PDm3z/zl3EGvnFPC3S0YwMKcduZ1DAZ7ToTzIf3X2QNbOKajxm5Rd22Zw+YQ8zKys09AsNYUj2mUw7+qxvHRVxXW8eHRPenWuX8++8tlAE/t1oW/XNtz2ndAH8IhenXiyls5Ih5ah8ByTl1n2Plg7p4B7zjm+wvBKm/TaP6zNjO4dW9b4+FmDs8nr0pofDj+yrGzJz0/h4elDy+bzcztyWS3DiWP6hAK5Z+fyYc/s9i0qhHpNXrpqDPOuHssVE/Po2bkVvbu04cpJoW1zdFZb1s4pqDLOH2uN8/3bAMjr0pqnfjSS4275Z511Z4zuya59xfzbmF7c+c/PAUhLMYpLKp4cfnL/rvTLasuQIzsw6qjqhzLeu34i7cOC7YyB3fh80y4y0lJplpbCWYNzuP2FZTz90XpG9e7ElRP70KVNOu+s3sJx2VV3I78/tDsjj+pMj04tueOsY8nr2oY9+4tZun4nEOoZp1ioR/v2dePJateC1JSKb+7q3uxzLx/FkvU7uPGZpQBMHnAE/bPa8tnGndUeNAX4xZkDGNS9PRnNUnn3+gl0apVe655NwbFZLP96F5ecWL5n8OAPh1QYLik9KHbd5H4s/HIbV07MY8/+Yv7+8QbunDaQJxcVAnD/+YMB6N2l5g+8966fSIdWzUhPS2XvgWLunb+K8yI4YD0mrzOL123nzWvHY1T9Ww3IbsvufcW0qhRoQ3M78mzY2G8k/jRjGDu/La5Q9tx/juatlVvK5mvaOyndjOP6ZnJS/65l2y6rXQYbd+yjR8eWfLV1LwA/P+MYpo/MJXf287RJT2PX/mLyurbmjrOPA+Cta8eT3b4FKWHvlaP8h+2IXp1494tv6N+tLW9PGl/l2MLU47OZenzoGMaHN51Es1TjpLveJDuCg6nVKd1rORyl262+B9EB+h1R/72ZWDOXAN9Myc/PdwsXLqz380bcPp+NO/bV+3nXTu5L6/Q0bn72U1o0S+XpH40kNcW455WVPL9kIytvO5UHXl/Nr+eFQvrm0/pzsd8lds5x07NLuXBkLt978D227jnA9/JzSE1J4cqJeXz5zZ4Ku5TTf/c+Zw7qxstLN/HSp1+z+pdTqgRnuE8Kt3PGfe9w7rDuXDAiN6Jd4H0HD/H6is1MHlD36WSH465/rmDX/mJ+dnr14+YAS9fvoEXzVI7KbM3WPQf4dMMOxjTSFfoADhSXMH/Zpiq71KWeWlTIc59s4PcXVT0fe8XXu7jssQ956tKRZXtBySR39vMAvH/DRFYV7eaYrHZ8+NU29hwoZsqArLIQd87xwpKvmTzgiFrfy6VKShwvLo28fiLYX3yIs+7/Fz8t6M/waoaIEoWZLXLO5ddZLxZBb2aTgXuAVOBh59yc2uo3NOiLD5XwzEfr2VdcQtuMNIbmdqR5WgqpZlz+lw+57cxjSU0x9heXRHTd5/3Fh9iy+wDZ7VtQUuJYt20vR3aK7IyVSF77m90HKowJ1uTLb/bQo2PLiHYTRaJlz/5i9uwvpkuCn80j5eIW9GaWCnwOnAQUAh8A5zrnPqvpOQ0NehGRZBZp0MfiYOwwYJVz7gvn3AHgr8DUGCxHREQiEIugzwbWhc0X+rIKzGymmS00s4VFRUUxaIaIiEBsgr66geUq40POuYecc/nOufzMzMS+ToSISFMWi6AvBLqHzecAG2KwHBERiUAsgv4DIM/MeppZc+AcYG4MliMiIhGI+hemnHPFZnY58DKh0yt/55z7NNrLERGRyMTkm7HOuReAF2Lx2iIiUj+61o2ISMAlxCUQzKwI+LKBT+8MbKmzVrBonZOD1jk5HM46H+mcq/O0xYQI+sNhZgsj+WZYkGidk4PWOTk0xjpr6EZEJOAU9CIiAReEoH8o3g2IA61zctA6J4eYr3OTH6MXEZHaBaFHLyIitVDQi4gEXJMOejObbGYrzGyVmc2Od3vqw8y6m9lrZrbMzD41syt9eUczm2dmK/3vDr7czOxev66fmNngsNea7uuvNLPpYeVDzGyJf869liC3rDKzVDP7yMye8/M9zWyBb//j/hpJmFm6n1/lH88Ne43rffkKMzslrDzh3hNm1t7MnjSz5X57jwj6djazq/37eqmZ/cXMMoK2nc3sd2a22cyWhpXFfLvWtIxaOeea5A+h6+isBnoBzYHFQP94t6se7c8CBvvpNoTuytUf+BUw25fPBu7w01OAFwldBno4sMCXdwS+8L87+OkO/rH3gRH+OS8Cp8Z7vX27ZgGPAc/5+SeAc/z0A8ClfvpHwAN++hzgcT/d32/vdKCnfx+kJup7AngU+Dc/3RxoH+TtTOj+E2uAFmHb98KgbWdgLDAYWBpWFvPtWtMyam1rvP8JDuOPPAJ4OWz+euD6eLfrMNbnWUK3X1wBZPmyLGCFn36Q0C0ZS+uv8I+fCzwYVv6gL8sCloeVV6gXx/XMAeYDE4Dn/Jt4C5BWebsSujDeCD+d5utZ5W1dWi8R3xNAWx96Vqk8sNuZ8psPdfTb7TnglCBuZyCXikEf8+1a0zJq+2nKQzcR3cmqKfC7qoOABUBX59xGAP+7i69W0/rWVl5YTXm8/Qa4Fijx852A7c65Yj8f3s6ydfOP7/D16/u3iKdeQBHwez9c9bCZtSLA29k5tx64E/gK2Ehouy0i2Nu5VGNs15qWUaOmHPQR3ckq0ZlZa+Ap4Crn3M7aqlZT5hpQHjdmdhqw2Tm3KLy4mqqujseazDoT6qEOBu53zg0C9hDa3a5Jk19nP2Y8ldBwSzegFXBqNVWDtJ3rEtd1bMpB3+TvZGVmzQiF/P855572xZvMLMs/ngVs9uU1rW9t5TnVlMfTKOAMM1tL6KbxEwj18NubWekls8PbWbZu/vF2wFbq/7eIp0Kg0Dm3wM8/SSj4g7ydJwFrnHNFzrmDwNPASIK9nUs1xnataRk1aspB36TvZOWPoD8CLHPO3RX20Fyg9Mj7dEJj96XlF/ij98OBHX637WXgZDPr4HtSJxMav9wI7DKz4X5ZF4S9Vlw45653zuU453IJba9XnXPnA68BZ/tqlde59G9xtq/vfPk5/myNnkAeoQNXCfeecM59Dawzs76+aCLwGQHezoSGbIabWUvfptJ1Dux2DtMY27WmZdQsngdtonAgZAqhs1VWAzfGuz31bPtoQrtinwAf+58phMYm5wMr/e+Ovr4Bv9wf3t8AAACjSURBVPXrugTID3uti4FV/ueisPJ8YKl/zn1UOiAY5/UfR/lZN70I/QOvAv4GpPvyDD+/yj/eK+z5N/r1WkHYWSaJ+J4AjgcW+m39d0JnVwR6OwM/B5b7dv2J0JkzgdrOwF8IHYM4SKgHPqMxtmtNy6jtR5dAEBEJuKY8dCMiIhFQ0IuIBJyCXkQk4BT0IiIBp6AXEQk4Bb2ISMAp6EVEAu7/A6SijxMjKxrLAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Са овог графикона није могуће рећи ништа, јер због природе стохастичког процеса тренинга дужина сесија тренинга веома варира. Да би овај графикон имао више смисла, можемо израчунати **покретни просек** преко серије експеримената, рецимо 100. Ово се може лако урадити користећи `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Променљиви хиперпараметри и посматрање резултата у пракси\n", + "\n", + "Сада би било занимљиво видети како се обучени модел заправо понаша. Покренимо симулацију, и пратићемо исту стратегију избора акција као током обуке: узорковање према расподели вероватноће у Q-табели:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Чување резултата у анимираном GIF-у\n", + "\n", + "Ако желите да импресионирате своје пријатеље, можда ћете желети да им пошаљете анимирану GIF слику балансирајуће шипке. Да бисте то урадили, можемо позвати `env.render` да произведемо слику кадра, а затим их сачувати као анимирани GIF користећи PIL библиотеку:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако се трудимо да обезбедимо тачност, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на његовом изворном језику треба сматрати меродавним извором. За критичне информације, препоручује се професионални превод од стране људи. Не преузимамо одговорност за било каква погрешна тумачења или неспоразуме који могу настати услед коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sr/PyTorch_Fundamentals.ipynb b/translations/sr/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..15b8c22c9 --- /dev/null +++ b/translations/sr/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2830 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:01+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "sr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Тензорски типови података\n" + ], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Одрицање од одговорности**: \nОвај документ је преведен коришћењем услуге за превођење помоћу вештачке интелигенције [Co-op Translator](https://github.com/Azure/co-op-translator). Иако тежимо тачности, молимо вас да имате у виду да аутоматски преводи могу садржати грешке или нетачности. Оригинални документ на изворном језику треба сматрати ауторитативним извором. За критичне информације препоручује се професионални превод од стране људи. Не сносимо одговорност за било каква погрешна тумачења или неспоразуме који могу произаћи из коришћења овог превода.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/2-Regression/1-Tools/notebook.ipynb b/translations/sv/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sv/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/sv/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..fc88b9af9 --- /dev/null +++ b/translations/sv/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:42:52+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Introduktion till regression - Lektion 1\n", + "\n", + "#### Sätta det i perspektiv\n", + "\n", + "✅ Det finns många typer av regressionsmetoder, och vilken du väljer beror på vilken typ av svar du söker. Om du vill förutsäga den sannolika längden för en person i en viss ålder, skulle du använda `linjär regression`, eftersom du söker ett **numeriskt värde**. Om du är intresserad av att avgöra om en viss typ av mat ska betraktas som vegansk eller inte, söker du en **kategoriindelning** och skulle använda `logistisk regression`. Du kommer att lära dig mer om logistisk regression senare. Fundera lite på några frågor du kan ställa till data, och vilken av dessa metoder som skulle vara mest lämplig.\n", + "\n", + "I det här avsnittet kommer du att arbeta med en [liten dataset om diabetes](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Föreställ dig att du ville testa en behandling för diabetiker. Maskininlärningsmodeller kan hjälpa dig att avgöra vilka patienter som skulle svara bättre på behandlingen, baserat på kombinationer av variabler. Även en mycket enkel regressionsmodell, när den visualiseras, kan visa information om variabler som kan hjälpa dig att organisera dina teoretiska kliniska studier.\n", + "\n", + "Med det sagt, låt oss sätta igång med denna uppgift!\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Ladda upp vårt verktygspaket\n", + "\n", + "För den här uppgiften behöver vi följande paket:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) som är utformade för att göra dataanalys snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett ramverk som består av en [samling paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "Du kan installera dem med följande kommando:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Skriptet nedan kontrollerar om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om några saknas.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu, låt oss ladda dessa fantastiska paket och göra dem tillgängliga i vår nuvarande R-session. (Detta är bara för illustration, `pacman::p_load()` har redan gjort det åt dig)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Diabetes-datasetet\n", + "\n", + "I den här övningen ska vi använda våra regressionskunskaper för att göra förutsägelser på ett diabetes-dataset. [Diabetes-datasetet](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) innehåller `442 prover` med data relaterad till diabetes, med 10 prediktorvariabler: `ålder`, `kön`, `kroppsmasseindex`, `genomsnittligt blodtryck` och `sex blodserummätningar` samt en utfallsvariabel `y`: ett kvantitativt mått på sjukdomsprogression ett år efter baslinjen.\n", + "\n", + "|Antal observationer|442|\n", + "|-------------------|:---|\n", + "|Antal prediktorer|De första 10 kolumnerna är numeriska prediktorer|\n", + "|Utfall/Mål|Kolumn 11 är ett kvantitativt mått på sjukdomsprogression ett år efter baslinjen|\n", + "|Information om prediktorer|- ålder i år\n", + "||- kön\n", + "||- bmi kroppsmasseindex\n", + "||- bp genomsnittligt blodtryck\n", + "||- s1 tc, totalt serumkolesterol\n", + "||- s2 ldl, lågdensitetslipoproteiner\n", + "||- s3 hdl, högdensitetslipoproteiner\n", + "||- s4 tch, totalt kolesterol / HDL\n", + "||- s5 ltg, möjligen logaritmen av serumtriglyceridnivå\n", + "||- s6 glu, blodsockernivå|\n", + "\n", + "> 🎓 Kom ihåg, detta är övervakad inlärning, och vi behöver ett namngivet mål 'y'.\n", + "\n", + "Innan du kan manipulera data med R, måste du importera data till R:s minne eller skapa en anslutning till data som R kan använda för att komma åt den på distans.\n", + "\n", + "> Paketet [readr](https://readr.tidyverse.org/), som är en del av Tidyverse, erbjuder ett snabbt och användarvänligt sätt att läsa in rektangulära data i R.\n", + "\n", + "Nu ska vi ladda diabetes-datasetet från denna käll-URL: \n", + "\n", + "Vi ska också göra en snabb kontroll av vår data med hjälp av `glimpse()` och visa de första 5 raderna med `slice()`.\n", + "\n", + "Innan vi går vidare, låt oss introducera något du ofta kommer att stöta på i R-kod 🥁🥁: pipe-operatorn `%>%`\n", + "\n", + "Pipe-operatorn (`%>%`) utför operationer i logisk sekvens genom att skicka ett objekt vidare till en funktion eller ett uttryck. Du kan tänka på pipe-operatorn som att säga \"och sedan\" i din kod.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` visar oss att denna data har 442 rader och 11 kolumner, där alla kolumner är av datatypen `double`.\n", + "\n", + "
\n", + "\n", + "> glimpse() och slice() är funktioner i [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, som är en del av Tidyverse, är en grammatik för datamanipulation som erbjuder en konsekvent uppsättning verb för att lösa de vanligaste utmaningarna inom datamanipulation.\n", + "\n", + "
\n", + "\n", + "Nu när vi har datan, låt oss fokusera på en specifik variabel (`bmi`) som mål för denna övning. Detta kräver att vi väljer ut de önskade kolumnerna. Så, hur gör vi detta?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) låter oss *välja* (och eventuellt byta namn på) kolumner i en data frame.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Tränings- och testdata\n", + "\n", + "Det är vanligt inom övervakad inlärning att *dela upp* data i två delmängder; en (vanligtvis större) uppsättning för att träna modellen, och en mindre \"håll-ut\" uppsättning för att se hur modellen presterade.\n", + "\n", + "Nu när vi har data redo kan vi se om en maskin kan hjälpa till att avgöra en logisk uppdelning mellan siffrorna i detta dataset. Vi kan använda paketet [rsample](https://tidymodels.github.io/rsample/), som är en del av Tidymodels-ramverket, för att skapa ett objekt som innehåller information om *hur* man delar upp data, och sedan två ytterligare rsample-funktioner för att extrahera de skapade tränings- och testuppsättningarna:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Träna en linjär regressionsmodell med Tidymodels\n", + "\n", + "Nu är vi redo att träna vår modell!\n", + "\n", + "I Tidymodels specificerar du modeller med `parsnip()` genom att ange tre koncept:\n", + "\n", + "- Modellens **typ** skiljer mellan olika modeller som linjär regression, logistisk regression, beslutsträd och så vidare.\n", + "\n", + "- Modellens **läge** inkluderar vanliga alternativ som regression och klassificering; vissa modelltyper stödjer båda dessa medan andra bara har ett läge.\n", + "\n", + "- Modellens **motor** är det beräkningsverktyg som kommer att användas för att anpassa modellen. Ofta är dessa R-paket, såsom **`\"lm\"`** eller **`\"ranger\"`**\n", + "\n", + "Denna modellinformation fångas i en modelspecifikation, så låt oss skapa en!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Efter att en modell har *specificerats* kan modellen `estimeras` eller `tränas` med hjälp av funktionen [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), vanligtvis med en formel och lite data.\n", + "\n", + "`y ~ .` betyder att vi kommer att anpassa `y` som den förutsagda kvantiteten/målet, förklarad av alla prediktorer/funktioner, dvs. `.` (i det här fallet har vi bara en prediktor: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Från modellens output kan vi se de koefficienter som lärdes in under träningen. Dessa representerar koefficienterna för den bästa anpassade linjen som ger oss det lägsta totala felet mellan den faktiska och den förutsagda variabeln.\n", + "
\n", + "\n", + "## 5. Gör förutsägelser på testuppsättningen\n", + "\n", + "Nu när vi har tränat en modell kan vi använda den för att förutsäga sjukdomsprogressionen y för testdatamängden med hjälp av [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Detta kommer att användas för att dra linjen mellan datagrupper.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 💃🕺 Vi har precis tränat en modell och använt den för att göra förutsägelser!\n", + "\n", + "När man gör förutsägelser är tidymodels-konventionen att alltid skapa en tibble/data frame med resultat och standardiserade kolumnnamn. Detta gör det enkelt att kombinera den ursprungliga datan med förutsägelserna i ett användbart format för efterföljande operationer, såsom att skapa diagram.\n", + "\n", + "`dplyr::bind_cols()` binder effektivt flera data frames kolumnvis.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Visa modellresultat\n", + "\n", + "Nu är det dags att se detta visuellt 📈. Vi ska skapa ett spridningsdiagram med alla `y`- och `bmi`-värden från testuppsättningen och sedan använda förutsägelserna för att rita en linje på den mest lämpliga platsen, mellan modellens datagrupperingar.\n", + "\n", + "R har flera system för att skapa grafer, men `ggplot2` är ett av de mest eleganta och mångsidiga. Det gör det möjligt att komponera grafer genom att **kombinera oberoende komponenter**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Fundera lite på vad som händer här. En rak linje går genom många små datapunkter, men vad gör den egentligen? Kan du se hur du borde kunna använda denna linje för att förutsäga var en ny, osedd datapunkt borde passa i förhållande till diagrammets y-axel? Försök att formulera den praktiska användningen av denna modell.\n", + "\n", + "Grattis, du har byggt din första linjära regressionsmodell, gjort en förutsägelse med den och visat den i ett diagram!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/2-Regression/1-Tools/solution/notebook.ipynb b/translations/sv/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..53be42a79 --- /dev/null +++ b/translations/sv/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linjär regression för Diabetes-datasetet - Lektion 1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importera nödvändiga bibliotek\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ladda diabetesdatasetet, uppdelat i `X`-data och `y`-funktioner\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Välj bara en funktion att rikta in dig på för denna övning\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dela upp tränings- och testdata för både `X` och `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Välj modellen och anpassa den med träningsdatan\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Använd testdata för att förutsäga en linje\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visa resultaten i ett diagram\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:39:08+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/2-Data/notebook.ipynb b/translations/sv/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..b923d18b6 --- /dev/null +++ b/translations/sv/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:45:57+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/sv/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..2c5ba848d --- /dev/null +++ b/translations/sv/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,671 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:51:44+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Bygg en regressionsmodell: förbered och visualisera data\n", + "\n", + "## **Linjär regression för pumpor - Lektion 2**\n", + "#### Introduktion\n", + "\n", + "Nu när du har verktygen du behöver för att börja bygga maskininlärningsmodeller med Tidymodels och Tidyverse, är du redo att börja ställa frågor till dina data. När du arbetar med data och tillämpar ML-lösningar är det mycket viktigt att förstå hur man ställer rätt fråga för att verkligen utnyttja potentialen i din dataset.\n", + "\n", + "I den här lektionen kommer du att lära dig:\n", + "\n", + "- Hur du förbereder dina data för modellbyggande.\n", + "\n", + "- Hur du använder `ggplot2` för datavisualisering.\n", + "\n", + "Frågan du behöver svar på avgör vilken typ av ML-algoritmer du kommer att använda. Och kvaliteten på svaret du får tillbaka kommer att vara starkt beroende av datans natur.\n", + "\n", + "Låt oss se detta genom att arbeta igenom en praktisk övning.\n", + "\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Importera pumpadata och kalla på Tidyverse\n", + "\n", + "Vi kommer att behöva följande paket för att hantera denna lektion:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) som är utformade för att göra datavetenskap snabbare, enklare och roligare!\n", + "\n", + "Du kan installera dem med:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Skriptet nedan kontrollerar om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om några saknas.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu sätter vi igång några paket och laddar [data](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) som tillhandahålls för denna lektion!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "En snabb `glimpse()` visar direkt att det finns tomma värden och en blandning av strängar (`chr`) och numeriska data (`dbl`). `Date` är av typen tecken och det finns också en märklig kolumn kallad `Package` där datan är en blandning av `sacks`, `bins` och andra värden. Datan är faktiskt lite av en röra 😤.\n", + "\n", + "Det är faktiskt inte särskilt vanligt att få en dataset som är helt redo att användas för att skapa en ML-modell direkt. Men oroa dig inte, i denna lektion kommer du att lära dig hur man förbereder en rå dataset med hjälp av standardbibliotek i R 🧑‍🔧. Du kommer också att lära dig olika tekniker för att visualisera datan.📈📊\n", + "
\n", + "\n", + "> En påminnelse: Pipe-operatorn (`%>%`) utför operationer i logisk sekvens genom att skicka ett objekt framåt in i en funktion eller ett uttryck. Du kan tänka på pipe-operatorn som att säga \"och sedan\" i din kod.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Kontrollera för saknade data\n", + "\n", + "Ett av de vanligaste problemen som dataanalytiker måste hantera är ofullständig eller saknad data. R representerar saknade eller okända värden med ett speciellt sentinelvärde: `NA` (Not Available).\n", + "\n", + "Så hur kan vi veta att dataframen innehåller saknade värden?\n", + "
\n", + "- Ett direkt sätt skulle vara att använda R:s inbyggda funktion `anyNA`, som returnerar de logiska objekten `TRUE` eller `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra, det verkar som att det saknas viss data! Det är en bra utgångspunkt.\n", + "\n", + "- Ett annat sätt skulle vara att använda funktionen `is.na()` som visar vilka enskilda kolumnelement som saknas med ett logiskt värde `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Okej, jobbet är klart men med en så stor dataram som denna skulle det vara ineffektivt och praktiskt taget omöjligt att granska alla rader och kolumner individuellt😴.\n", + "\n", + "- Ett mer intuitivt sätt skulle vara att beräkna summan av de saknade värdena för varje kolumn:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mycket bättre! Det saknas data, men kanske spelar det ingen roll för uppgiften. Låt oss se vad vidare analys ger.\n", + "\n", + "> Tillsammans med de fantastiska uppsättningarna av paket och funktioner har R en mycket bra dokumentation. Till exempel, använd `help(colSums)` eller `?colSums` för att ta reda på mer om funktionen.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: En grammatik för datamanipulation\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), ett paket i Tidyverse, är en grammatik för datamanipulation som erbjuder en konsekvent uppsättning verb som hjälper dig att lösa de vanligaste utmaningarna inom datamanipulation. I det här avsnittet ska vi utforska några av dplyrs verb! \n", + "
\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` är en funktion i paketet `dplyr` som hjälper dig att välja vilka kolumner du vill behålla eller exkludera.\n", + "\n", + "För att göra din data frame enklare att arbeta med, ta bort flera av dess kolumner med hjälp av `select()` och behåll endast de kolumner du behöver.\n", + "\n", + "Till exempel, i denna övning kommer vår analys att involvera kolumnerna `Package`, `Low Price`, `High Price` och `Date`. Låt oss välja dessa kolumner.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` är en funktion i paketet `dplyr` som hjälper dig att skapa eller ändra kolumner, samtidigt som de befintliga kolumnerna behålls.\n", + "\n", + "Den generella strukturen för mutate är:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Låt oss testa `mutate` med hjälp av kolumnen `Date` genom att utföra följande operationer:\n", + "\n", + "1. Konvertera datumen (som för närvarande är av typen tecken) till ett månadsformat (det här är amerikanska datum, så formatet är `MM/DD/YYYY`).\n", + "\n", + "2. Extrahera månaden från datumen till en ny kolumn.\n", + "\n", + "I R gör paketet [lubridate](https://lubridate.tidyverse.org/) det enklare att arbeta med datum- och tidsdata. Så låt oss använda `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` och se hur vi kan uppnå ovanstående mål. Vi kan ta bort kolumnen Date eftersom vi inte kommer att behöva den igen i efterföljande operationer.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 🤩\n", + "\n", + "Nu ska vi skapa en ny kolumn `Price`, som representerar det genomsnittliga priset på en pumpa. Låt oss nu ta genomsnittet av kolumnerna `Low Price` och `High Price` för att fylla i den nya kolumnen Price.\n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Jaaa!💪\n", + "\n", + "\"Men vänta lite!\", säger du efter att ha skummat igenom hela datasettet med `View(pumpkins)`, \"Det är något konstigt här!\"🤔\n", + "\n", + "Om du tittar på kolumnen `Package`, säljs pumpor i många olika konfigurationer. Vissa säljs i mått som `1 1/9 bushel`, andra i `1/2 bushel`, några per pumpa, några per pund, och vissa i stora lådor med varierande bredder.\n", + "\n", + "Låt oss kontrollera detta:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Fantastiskt!👏\n", + "\n", + "Pumpor verkar vara väldigt svåra att väga konsekvent, så låt oss filtrera dem genom att välja endast pumpor med strängen *bushel* i kolumnen `Package` och lägga detta i en ny data frame `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() och stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): skapar en delmängd av data som endast innehåller **rader** som uppfyller dina villkor, i detta fall pumpor med strängen *bushel* i kolumnen `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): identifierar förekomsten eller frånvaron av ett mönster i en sträng.\n", + "\n", + "Paketet [`stringr`](https://github.com/tidyverse/stringr) erbjuder enkla funktioner för vanliga strängoperationer.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Du kan se att vi har begränsat oss till ungefär 415 rader med data som innehåller pumpor i skäppor.🤩 \n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Men vänta! Det finns en sak till att göra**\n", + "\n", + "Märkte du att mängden per skäppa varierar per rad? Du behöver normalisera prissättningen så att du visar priset per skäppa, inte per 1 1/9 eller 1/2 skäppa. Dags att göra lite matematik för att standardisera det.\n", + "\n", + "Vi kommer att använda funktionen [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) för att *mutera* kolumnen Price beroende på vissa villkor. `case_when` gör det möjligt att vektorisera flera `if_else()`-satser.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu kan vi analysera priset per enhet baserat på deras mått i bushels. All denna undersökning av pumpors bushels visar dock hur `viktigt` det är att `förstå din datas natur`!\n", + "\n", + "> ✅ Enligt [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308) beror en bushels vikt på typen av gröda, eftersom det är en volymmätning. \"En bushel tomater, till exempel, ska väga 56 pund... Blad och gröna tar upp mer plats med mindre vikt, så en bushel spenat väger bara 20 pund.\" Det är ganska komplicerat! Låt oss inte bry oss om att göra en omvandling från bushel till pund, utan istället prissätta per bushel. All denna undersökning av pumpors bushels visar dock hur viktigt det är att förstå din datas natur!\n", + ">\n", + "> ✅ Lade du märke till att pumpor som säljs per halv-bushel är väldigt dyra? Kan du lista ut varför? Tips: små pumpor är mycket dyrare än stora, antagligen eftersom det finns så många fler av dem per bushel, med tanke på det outnyttjade utrymmet som en stor ihålig pajpumpa tar upp.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu slutligen, för äventyrets skull 💁‍♀️, låt oss också flytta kolumnen Månad till första positionen, dvs `före` kolumnen `Paket`.\n", + "\n", + "`dplyr::relocate()` används för att ändra kolumnpositioner.\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat! 👌 Du har nu ett rent och prydligt dataset som du kan använda för att bygga din nya regressionsmodell! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Datavisualisering med ggplot2\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "Det finns ett *klokt* ordspråk som lyder så här:\n", + "\n", + "> \"Den enkla grafen har fört mer information till dataanalytikerns sinne än någon annan metod.\" --- John Tukey\n", + "\n", + "En del av dataforskarens roll är att visa kvaliteten och karaktären hos de data de arbetar med. För att göra detta skapar de ofta intressanta visualiseringar, eller diagram, grafer och tabeller, som visar olika aspekter av data. På så sätt kan de visuellt visa relationer och luckor som annars är svåra att upptäcka.\n", + "\n", + "Visualiseringar kan också hjälpa till att avgöra vilken maskininlärningsteknik som är mest lämplig för datan. Ett spridningsdiagram som verkar följa en linje, till exempel, indikerar att datan är en bra kandidat för en linjär regressionsanalys.\n", + "\n", + "R erbjuder flera system för att skapa grafer, men [`ggplot2`](https://ggplot2.tidyverse.org/index.html) är ett av de mest eleganta och mångsidiga. `ggplot2` gör det möjligt att skapa grafer genom att **kombinera oberoende komponenter**.\n", + "\n", + "Låt oss börja med ett enkelt spridningsdiagram för kolumnerna Price och Month.\n", + "\n", + "I det här fallet börjar vi med [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), tillhandahåller en dataset och estetisk mappning (med [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)) och lägger sedan till lager (som [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) för spridningsdiagram.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Är detta en användbar graf 🤷? Är det något med den som förvånar dig?\n", + "\n", + "Den är inte särskilt användbar eftersom allt den gör är att visa dina data som en spridning av punkter under en viss månad.\n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Hur gör vi det användbart?**\n", + "\n", + "För att få diagram att visa användbar data behöver du oftast gruppera datan på något sätt. Till exempel, i vårt fall skulle det ge mer insikt i de underliggande mönstren i vår data om vi hittar det genomsnittliga priset på pumpor för varje månad. Detta leder oss till ännu en snabbgenomgång av **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Grupperad aggregering i R kan enkelt beräknas med\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` ändrar analysenheten från hela datasetet till individuella grupper, som per månad.\n", + "\n", + "- `dplyr::summarize()` skapar en ny dataram med en kolumn för varje grupperingsvariabel och en kolumn för varje sammanfattningsstatistik som du har specificerat.\n", + "\n", + "Till exempel kan vi använda `dplyr::group_by() %>% summarize()` för att gruppera pumporna i grupper baserade på **Month**-kolumnen och sedan hitta **medelpriset** för varje månad.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kategoriska funktioner, såsom månader, representeras bäst med ett stapeldiagram 📊. De lager som används för stapeldiagram är `geom_bar()` och `geom_col()`. Konsultera `?geom_bar` för att lära dig mer.\n", + "\n", + "Låt oss skapa ett!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩Det här är en mer användbar datavisualisering! Det verkar indikera att det högsta priset för pumpor inträffar i september och oktober. Stämmer det med dina förväntningar? Varför eller varför inte?\n", + "\n", + "Grattis till att ha avslutat den andra lektionen 👏! Du förberedde dina data för modellbyggande och upptäckte sedan fler insikter med hjälp av visualiseringar!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/2-Regression/2-Data/solution/notebook.ipynb b/translations/sv/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..41c6128d6 --- /dev/null +++ b/translations/sv/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:22+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/3-Linear/notebook.ipynb b/translations/sv/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..dc64f79ad --- /dev/null +++ b/translations/sv/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pumpaprisering\n", + "\n", + "Ladda in nödvändiga bibliotek och dataset. Konvertera data till en dataframe som innehåller ett urval av data:\n", + "\n", + "- Ta endast med pumpor som är prissatta per skäppa\n", + "- Konvertera datumet till en månad\n", + "- Beräkna priset som ett genomsnitt av högsta och lägsta priser\n", + "- Konvertera priset för att återspegla prissättningen per skäppmängd\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ett grundläggande spridningsdiagram påminner oss om att vi bara har månadsdata från augusti till december. Vi behöver förmodligen mer data för att kunna dra slutsatser på ett linjärt sätt.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:08:53+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/sv/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..14d4df1e7 --- /dev/null +++ b/translations/sv/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1089 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:19:40+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Linjär och Polynomisk Regression för Pumpapris - Lektion 3\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "#### Introduktion\n", + "\n", + "Hittills har du utforskat vad regression är med hjälp av exempeldata från pumpapris-datasetet som vi kommer att använda genom hela denna lektion. Du har också visualiserat det med hjälp av `ggplot2`.💪\n", + "\n", + "Nu är du redo att fördjupa dig i regression för maskininlärning. I denna lektion kommer du att lära dig mer om två typer av regression: *grundläggande linjär regression* och *polynomisk regression*, tillsammans med lite av matematiken bakom dessa tekniker.\n", + "\n", + "> Genom hela denna kurs antar vi minimal kunskap om matematik och strävar efter att göra det tillgängligt för studenter från andra områden. Håll utkik efter anteckningar, 🧮 matematiska inslag, diagram och andra lärverktyg som hjälper till att förstå.\n", + "\n", + "#### Förberedelse\n", + "\n", + "Som en påminnelse, du laddar denna data för att kunna ställa frågor om den.\n", + "\n", + "- När är den bästa tiden att köpa pumpor?\n", + "\n", + "- Vilket pris kan jag förvänta mig för en låda med miniatyrpumpor?\n", + "\n", + "- Bör jag köpa dem i halv-bushelkorgar eller i 1 1/9 bushel-lådor? Låt oss gräva djupare i denna data.\n", + "\n", + "I föregående lektion skapade du en `tibble` (en modern omarbetning av data frame) och fyllde den med en del av den ursprungliga datasetet, där du standardiserade prissättningen per bushel. Genom att göra det kunde du dock bara samla in cirka 400 datapunkter och endast för höstmånaderna. Kanske kan vi få lite mer detaljer om datans natur genom att städa upp den mer? Vi får se... 🕵️‍♀️\n", + "\n", + "För denna uppgift behöver vi följande paket:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) designade för att göra dataanalys snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett [ramverk av paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "- `janitor`: [janitor-paketet](https://github.com/sfirke/janitor) erbjuder enkla verktyg för att undersöka och städa smutsig data.\n", + "\n", + "- `corrplot`: [corrplot-paketet](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) erbjuder ett visuellt utforskande verktyg för korrelationsmatriser som stödjer automatisk omordning av variabler för att hjälpa till att upptäcka dolda mönster bland variabler.\n", + "\n", + "Du kan installera dem med:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Skriptet nedan kontrollerar om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om de saknas.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vi kommer senare att ladda dessa fantastiska paket och göra dem tillgängliga i vår nuvarande R-session. (Detta är bara för illustration, `pacman::p_load()` har redan gjort det åt dig)\n", + "\n", + "## 1. En linjär regressionslinje\n", + "\n", + "Som du lärde dig i Lektion 1 är målet med en linjär regressionsövning att kunna rita en *linje* *som* *passar bäst* för att:\n", + "\n", + "- **Visa samband mellan variabler**. Visa relationen mellan variabler.\n", + "\n", + "- **Göra förutsägelser**. Göra exakta förutsägelser om var en ny datapunkt skulle hamna i förhållande till den linjen.\n", + "\n", + "För att rita denna typ av linje använder vi en statistisk teknik som kallas **Minsta kvadratmetoden**. Termen `minsta kvadrat` betyder att alla datapunkter runt regressionslinjen kvadreras och sedan summeras. Idealiskt sett är den slutliga summan så liten som möjligt, eftersom vi vill ha ett lågt antal fel, eller `minsta kvadrat`. Därför är den linje som passar bäst den linje som ger oss det lägsta värdet för summan av de kvadrerade felen - därav namnet *minsta kvadratmetoden*.\n", + "\n", + "Vi gör detta eftersom vi vill modellera en linje som har den minsta kumulativa avvikelsen från alla våra datapunkter. Vi kvadrerar också termerna innan vi summerar dem eftersom vi är intresserade av deras storlek snarare än deras riktning.\n", + "\n", + "> **🧮 Visa mig matematiken**\n", + ">\n", + "> Denna linje, kallad *linjen som passar bäst*, kan uttryckas med [en ekvation](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` är den '`förklarande variabeln` eller `prediktorn`'. `Y` är den '`beroende variabeln` eller `utfallet`'. Lutningen på linjen är `b` och `a` är skärningspunkten med y-axeln, vilket hänvisar till värdet av `Y` när `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"lutning = $y/x$\")\n", + " Infografik av Jen Looper\n", + ">\n", + "> Först, beräkna lutningen `b`.\n", + ">\n", + "> Med andra ord, och med hänvisning till vår pumpadata och den ursprungliga frågan: \"förutsäg priset på en pumpa per skäppa beroende på månad\", skulle `X` hänvisa till priset och `Y` hänvisa till försäljningsmånaden.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Infografik av Jen Looper\n", + "> \n", + "> Beräkna värdet av Y. Om du betalar runt \\$4, måste det vara april!\n", + ">\n", + "> Matematiken som beräknar linjen måste visa lutningen på linjen, som också beror på skärningspunkten, eller var `Y` befinner sig när `X = 0`.\n", + ">\n", + "> Du kan se metoden för att beräkna dessa värden på webbplatsen [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Besök också [denna Minsta kvadrat-kalkylator](https://www.mathsisfun.com/data/least-squares-calculator.html) för att se hur värdena påverkar linjen.\n", + "\n", + "Inte så skrämmande, eller hur? 🤓\n", + "\n", + "#### Korrelation\n", + "\n", + "Ett annat begrepp att förstå är **Korrelationskoefficienten** mellan givna X- och Y-variabler. Med hjälp av ett spridningsdiagram kan du snabbt visualisera denna koefficient. Ett diagram med datapunkter som är snyggt uppradade har hög korrelation, men ett diagram med datapunkter spridda överallt mellan X och Y har låg korrelation.\n", + "\n", + "En bra linjär regressionsmodell är en som har en hög (närmare 1 än 0) korrelationskoefficient med hjälp av Minsta kvadratmetoden och en regressionslinje.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. En dans med data: skapa en data frame som ska användas för modellering**\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ladda upp nödvändiga bibliotek och dataset. Konvertera data till en data frame som innehåller ett urval av data:\n", + "\n", + "- Ta endast med pumpor som är prissatta per skäppa\n", + "\n", + "- Konvertera datumet till en månad\n", + "\n", + "- Beräkna priset som ett genomsnitt av högsta och lägsta pris\n", + "\n", + "- Konvertera priset för att återspegla prissättningen per skäppmängd\n", + "\n", + "> Vi gick igenom dessa steg i [föregående lektion](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "I äkta äventyrsanda, låt oss utforska [`janitor-paketet`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor) som erbjuder enkla funktioner för att undersöka och rengöra smutsiga data. Till exempel, låt oss titta på kolumnnamnen för våra data:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Vi kan göra bättre. Låt oss göra dessa kolumnnamn `friendR` genom att konvertera dem till [snake_case](https://en.wikipedia.org/wiki/Snake_case)-konventionen med hjälp av `janitor::clean_names`. För att ta reda på mer om denna funktion: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mycket tidyR 🧹! Nu, en dans med datan med hjälp av `dplyr`, precis som i den förra lektionen! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat! 👌 Du har nu en ren och prydlig datamängd som du kan använda för att bygga din nya regressionsmodell!\n", + "\n", + "Vad sägs om ett spridningsdiagram?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ett spridningsdiagram påminner oss om att vi bara har månadsdata från augusti till december. Vi behöver troligen mer data för att kunna dra slutsatser på ett linjärt sätt.\n", + "\n", + "Låt oss titta på våra modelleringsdata igen:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vad händer om vi ville förutsäga `price` för en pumpa baserat på kolumnerna `city` eller `package`, som är av typen text? Eller ännu enklare, hur skulle vi kunna hitta korrelationen (som kräver att båda dess indata är numeriska) mellan till exempel `package` och `price`? 🤷🤷\n", + "\n", + "Maskininlärningsmodeller fungerar bäst med numeriska egenskaper snarare än textvärden, så du behöver vanligtvis konvertera kategoriska egenskaper till numeriska representationer.\n", + "\n", + "Detta innebär att vi måste hitta ett sätt att omforma våra prediktorer för att göra dem enklare för en modell att använda effektivt, en process som kallas `feature engineering`.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Förbereda data för modellering med recipes 👩‍🍳👨‍🍳\n", + "\n", + "Aktiviteter som omformar prediktorvärden för att göra dem enklare för en modell att använda effektivt kallas `feature engineering`.\n", + "\n", + "Olika modeller har olika krav på förbehandling. Till exempel kräver minsta kvadratmetoden `kodning av kategoriska variabler` såsom månad, sort och stad_namn. Detta innebär helt enkelt att `översätta` en kolumn med `kategoriska värden` till en eller flera `numeriska kolumner` som ersätter den ursprungliga.\n", + "\n", + "Till exempel, anta att din data innehåller följande kategoriska variabel:\n", + "\n", + "| stad |\n", + "|:-------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokyo |\n", + "\n", + "Du kan använda *ordinal kodning* för att ersätta varje kategori med ett unikt heltalsvärde, som detta:\n", + "\n", + "| stad |\n", + "|:----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "Och det är precis vad vi ska göra med vår data!\n", + "\n", + "I det här avsnittet ska vi utforska ett annat fantastiskt Tidymodels-paket: [recipes](https://tidymodels.github.io/recipes/) - som är utformat för att hjälpa dig att förbehandla din data **innan** du tränar din modell. Kärnan i en recipe är ett objekt som definierar vilka steg som ska tillämpas på en dataset för att göra den redo för modellering.\n", + "\n", + "Nu ska vi skapa en recipe som förbereder vår data för modellering genom att ersätta varje observation i prediktorkolumnerna med ett unikt heltal:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Fantastiskt! 👏 Vi har precis skapat vårt första recept som specificerar ett utfall (pris) och dess motsvarande prediktorer, och att alla prediktorkolumner ska kodas om till en uppsättning heltal 🙌! Låt oss snabbt bryta ner det:\n", + "\n", + "- Anropet till `recipe()` med en formel berättar för receptet vilka *roller* variablerna har, med hjälp av `new_pumpkins`-data som referens. Till exempel har kolumnen `price` tilldelats rollen `outcome`, medan resten av kolumnerna har tilldelats rollen `predictor`.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` specificerar att alla prediktorer ska konverteras till en uppsättning heltal, där numreringen börjar på 0.\n", + "\n", + "Vi är säkra på att du kanske tänker något i stil med: \"Det här är så häftigt!! Men vad händer om jag behöver bekräfta att recepten gör exakt det jag förväntar mig? 🤔\"\n", + "\n", + "Det är en fantastisk tanke! Du förstår, när ditt recept är definierat kan du uppskatta de parametrar som krävs för att faktiskt förbehandla datan och sedan extrahera den bearbetade datan. Du behöver vanligtvis inte göra detta när du använder Tidymodels (vi kommer att se den normala konventionen om en liten stund -> `workflows`), men det kan vara användbart när du vill göra någon form av rimlighetskontroll för att bekräfta att recepten gör det du förväntar dig.\n", + "\n", + "För detta behöver du två ytterligare verb: `prep()` och `bake()`, och som alltid hjälper våra små R-vänner från [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) dig att förstå detta bättre!\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): beräknar de nödvändiga parametrarna från en träningsuppsättning som senare kan tillämpas på andra datamängder. Till exempel, för en given prediktorkolumn, vilken observation kommer att tilldelas heltalet 0, 1, 2 osv.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): tar ett förberett recept och tillämpar operationerna på vilken datamängd som helst.\n", + "\n", + "Med det sagt, låt oss förbereda och tillämpa våra recept för att verkligen bekräfta att bakom kulisserna kommer prediktorkolumnerna först att kodas innan en modell anpassas.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo!🥳 Den bearbetade datan `baked_pumpkins` har alla sina prediktorer kodade, vilket bekräftar att de förbehandlingssteg som definierats som vårt recept fungerar som förväntat. Detta gör det svårare för dig att läsa men mycket mer begripligt för Tidymodels! Ta lite tid att ta reda på vilken observation som har mappats till ett motsvarande heltal.\n", + "\n", + "Det är också värt att nämna att `baked_pumpkins` är en data frame som vi kan utföra beräkningar på.\n", + "\n", + "Till exempel, låt oss försöka hitta en bra korrelation mellan två punkter i din data för att potentiellt bygga en bra prediktiv modell. Vi kommer att använda funktionen `cor()` för detta. Skriv `?cor()` för att ta reda på mer om funktionen.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Som det visar sig finns det bara en svag korrelation mellan Stad och Pris. Däremot finns det en något bättre korrelation mellan Paket och dess Pris. Det är väl logiskt, eller hur? Vanligtvis, ju större lådan med varor, desto högre pris.\n", + "\n", + "När vi ändå håller på, låt oss också försöka visualisera en korrelationsmatris för alla kolumner med hjälp av paketet `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Mycket bättre.\n", + "\n", + "En bra fråga att ställa om dessa data är: '`Vilket pris kan jag förvänta mig för ett visst pumpapaket?`' Låt oss sätta igång!\n", + "\n", + "> Note: När du **`bake()`** det förberedda receptet **`pumpkins_prep`** med **`new_data = NULL`**, extraherar du den bearbetade (dvs. kodade) träningsdatan. Om du hade en annan dataset, till exempel en testuppsättning, och ville se hur ett recept skulle förbehandla den, skulle du helt enkelt baka **`pumpkins_prep`** med **`new_data = test_set`**\n", + "\n", + "## 4. Bygg en linjär regressionsmodell\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu när vi har skapat ett recept och faktiskt bekräftat att datan kommer att förbehandlas korrekt, låt oss nu bygga en regressionsmodell för att besvara frågan: `Vilket pris kan jag förvänta mig för ett givet pumpapaket?`\n", + "\n", + "#### Träna en linjär regressionsmodell med träningsuppsättningen\n", + "\n", + "Som du kanske redan har räknat ut är kolumnen *price* den `beroende` variabeln medan kolumnen *package* är den `oberoende` variabeln.\n", + "\n", + "För att göra detta kommer vi först att dela upp datan så att 80% går till träningsuppsättningen och 20% till testuppsättningen, sedan definiera ett recept som kommer att koda den oberoende kolumnen till en uppsättning heltal, och därefter bygga en modellspecifikation. Vi kommer inte att förbereda och baka vårt recept eftersom vi redan vet att det kommer att förbehandla datan som förväntat.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat! Nu när vi har ett recept och en modellspecifikation behöver vi hitta ett sätt att kombinera dem till ett objekt som först förbehandlar data (prep+bake bakom kulisserna), tränar modellen på den förbehandlade datan och även möjliggör eventuella efterbehandlingsaktiviteter. Hur låter det för din sinnesro!🤩\n", + "\n", + "I Tidymodels kallas detta praktiska objekt för en [`workflow`](https://workflows.tidymodels.org/) och innehåller smidigt dina modelleringskomponenter! Det är vad vi skulle kalla *pipelines* i *Python*.\n", + "\n", + "Så låt oss paketera allt i ett workflow!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Dessutom kan ett arbetsflöde anpassas/tränas på ungefär samma sätt som en modell kan.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Från modellens output kan vi se de koefficienter som lärdes under träningen. De representerar koefficienterna för den bästa linjen som ger oss det lägsta totala felet mellan den faktiska och den förutspådda variabeln.\n", + "\n", + "#### Utvärdera modellens prestanda med hjälp av testuppsättningen\n", + "\n", + "Det är dags att se hur modellen presterade 📏! Hur gör vi detta?\n", + "\n", + "Nu när vi har tränat modellen kan vi använda den för att göra förutsägelser för test_set med `parsnip::predict()`. Därefter kan vi jämföra dessa förutsägelser med de faktiska etikettvärdena för att utvärdera hur bra (eller inte!) modellen fungerar.\n", + "\n", + "Låt oss börja med att göra förutsägelser för testuppsättningen och sedan binda kolumnerna till testuppsättningen.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ja, du har precis tränat en modell och använt den för att göra förutsägelser! 🔮 Är den bra? Låt oss utvärdera modellens prestanda!\n", + "\n", + "I Tidymodels gör vi detta med `yardstick::metrics()`! För linjär regression fokuserar vi på följande mått:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Kvadratroten av [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). Detta ger ett absolut mått i samma enhet som etiketten (i detta fall priset på en pumpa). Ju mindre värde, desto bättre modell (förenklat sett representerar det det genomsnittliga priset med vilket förutsägelserna är fel!)\n", + "\n", + "- `Coefficient of Determination (vanligtvis känt som R-squared eller R2)`: Ett relativt mått där ett högre värde innebär en bättre passform för modellen. I grund och botten representerar detta mått hur mycket av variansen mellan förutsagda och faktiska etikettvärden modellen kan förklara.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Där går modellens prestanda. Låt oss se om vi kan få en bättre indikation genom att visualisera ett spridningsdiagram av paketet och priset, och sedan använda de gjorda förutsägelserna för att lägga till en linje som bäst passar.\n", + "\n", + "Detta innebär att vi måste förbereda och bearbeta testuppsättningen för att koda paketkolumnen och sedan binda detta till de förutsägelser som vår modell har gjort.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra! Som du kan se, generaliserar inte linjär regressionsmodellen särskilt väl relationen mellan ett paket och dess motsvarande pris.\n", + "\n", + "🎃 Grattis, du har precis skapat en modell som kan hjälpa till att förutsäga priset på några olika sorters pumpor. Din pumpaplantage inför högtiden kommer att bli fantastisk. Men du kan förmodligen skapa en ännu bättre modell!\n", + "\n", + "## 5. Bygg en polynomregressionsmodell\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ibland kanske våra data inte har en linjär relation, men vi vill ändå förutsäga ett resultat. Polynomregression kan hjälpa oss att göra förutsägelser för mer komplexa icke-linjära samband.\n", + "\n", + "Ta till exempel sambandet mellan förpackning och pris i vår dataset med pumpor. Även om det ibland finns en linjär relation mellan variabler - ju större pumpan är i volym, desto högre pris - kan dessa samband ibland inte plottas som ett plan eller en rak linje.\n", + "\n", + "> ✅ Här är [några fler exempel](https://online.stat.psu.edu/stat501/lesson/9/9.8) på data som kan använda polynomregression\n", + ">\n", + "> Titta en gång till på sambandet mellan Sort och Pris i det tidigare diagrammet. Verkar det som att detta spridningsdiagram nödvändigtvis bör analyseras med en rak linje? Kanske inte. I detta fall kan du prova polynomregression.\n", + ">\n", + "> ✅ Polynom är matematiska uttryck som kan bestå av en eller flera variabler och koefficienter\n", + "\n", + "#### Träna en polynomregressionsmodell med hjälp av träningsdata\n", + "\n", + "Polynomregression skapar en *kurvad linje* för att bättre passa icke-linjär data.\n", + "\n", + "Låt oss se om en polynommodell presterar bättre när det gäller att göra förutsägelser. Vi kommer att följa en liknande procedur som vi gjorde tidigare:\n", + "\n", + "- Skapa ett recept som specificerar de förbehandlingssteg som ska utföras på våra data för att göra dem redo för modellering, dvs: kodning av prediktorer och beräkning av polynom av grad *n*\n", + "\n", + "- Bygga en modellspecifikation\n", + "\n", + "- Paketera receptet och modellspecifikationen i ett arbetsflöde\n", + "\n", + "- Skapa en modell genom att passa arbetsflödet\n", + "\n", + "- Utvärdera hur väl modellen presterar på testdata\n", + "\n", + "Nu kör vi igång!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Utvärdera modellens prestanda\n", + "\n", + "👏👏Du har skapat en polynommodell, låt oss göra förutsägelser på testuppsättningen!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo, låt oss utvärdera hur modellen presterade på test_set med hjälp av `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Mycket bättre prestanda.\n", + "\n", + "`rmse` minskade från cirka 7 till cirka 3, vilket indikerar en minskad felmarginal mellan det faktiska priset och det förutspådda priset. Du kan *ungefärligt* tolka detta som att felaktiga förutsägelser i genomsnitt är fel med cirka 3 dollar. `rsq` ökade från cirka 0,4 till 0,8.\n", + "\n", + "Alla dessa mätvärden visar att den polynomiska modellen presterar mycket bättre än den linjära modellen. Bra jobbat!\n", + "\n", + "Låt oss se om vi kan visualisera detta!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Du kan se en kurva som passar dina data bättre! 🤩\n", + "\n", + "Du kan göra detta ännu mjukare genom att skicka en polynomformel till `geom_smooth` så här:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mycket som en jämn kurva!🤩\n", + "\n", + "Så här gör du en ny förutsägelse:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Prediktionen med `polynomial model` är rimlig, med tanke på spridningsdiagrammen för `price` och `package`! Och om detta är en bättre modell än den tidigare, baserat på samma data, behöver du planera för dessa dyrare pumpor!\n", + "\n", + "🏆 Bra jobbat! Du skapade två regressionsmodeller under en lektion. I den sista delen om regression kommer du att lära dig om logistisk regression för att bestämma kategorier.\n", + "\n", + "## **🚀Utmaning**\n", + "\n", + "Testa flera olika variabler i denna notebook för att se hur korrelationen påverkar modellens noggrannhet.\n", + "\n", + "## [**Quiz efter föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Granskning & Självstudier**\n", + "\n", + "I denna lektion lärde vi oss om linjär regression. Det finns andra viktiga typer av regression. Läs om Stepwise, Ridge, Lasso och Elasticnet-tekniker. En bra kurs att studera för att lära dig mer är [Stanford Statistical Learning course](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Om du vill lära dig mer om hur du använder det fantastiska Tidymodels-ramverket, kolla in följande resurser:\n", + "\n", + "- Tidymodels webbplats: [Kom igång med Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn och Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **TACK TILL:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) för att ha skapat de fantastiska illustrationerna som gör R mer välkomnande och engagerande. Hitta fler illustrationer i hennes [galleri](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/2-Regression/3-Linear/solution/notebook.ipynb b/translations/sv/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..073f488b4 --- /dev/null +++ b/translations/sv/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linjär och Polynomisk Regression för Pumpapris - Lektion 3\n", + "\n", + "Ladda in nödvändiga bibliotek och dataset. Konvertera data till en dataram som innehåller ett urval av data:\n", + "\n", + "- Ta endast med pumpor som är prissatta per skäppa\n", + "- Konvertera datumet till en månad\n", + "- Beräkna priset som ett genomsnitt av högsta och lägsta priser\n", + "- Konvertera priset för att återspegla prissättningen per skäppmängd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ett spridningsdiagram påminner oss om att vi bara har månadsdata från augusti till december. Vi behöver förmodligen mer data för att kunna dra slutsatser på ett linjärt sätt.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAshElEQVR4nO3dfZyU5Xno8d+1y7IgiwLLunJkV2xWSangRreKIVoVQ03qEdKon74YTKMlzan9pCatmLTHGtvaiE3M6UlOq4k59SVNJJiA9ZgIEqxRwThQXhQ0bAOyGFhwBd01sO7Ldf6YZ5aZ3ZndmWfuZ+aemev7+Sw7c+8z19zP7HDtM/erqCrGGGMqR1WxK2CMMaawLPEbY0yFscRvjDEVxhK/McZUGEv8xhhTYcYVuwLZmD59us6aNavY1TDGmJKyefPmN1W1YXh5SST+WbNmEYvFil0NY4wpKSLyerpya+oxxpgKY4nfGGMqjCV+Y4ypMJb4jTGmwljiN8aYCmOJ32TU1dPLto6jdPX0FrsqxhiHSmI4pym8NVvfYPlj26mpqqJvcJAVH5/H1a2nF7taxhgH7IrfjNDV08vyx7ZzvG+Q7t5+jvcNcutj2+3K35gyYYnfjLD/yDFqqlLfGjVVVew/cqxINTLGuGSJ34wwc+pE+gYHU8r6BgeZOXVikWpkjHHJEr8Zob6ulhUfn8eEmiom145jQk0VKz4+j/q62rziWmexMX6wzl2T1tWtp7OgZTr7jxxj5tSJeSd96yw2xh+RJn4R2Qt0AwNAv6q2icg04FFgFrAXuE5Vj0RZDxNOfV1t3gkfUjuLjxNvQrr1se0saJnuJL4xJjeFaOq5TFVbVbUtuH8bsF5VzwLWB/dNGbPOYmP8Uow2/sXAg8HtB4ElRaiDKSDrLDbGL1EnfgXWishmEVkWlDWq6gGA4PupEdfBFFlUncXGmHCi7txdoKq/FJFTgXUi8mq2Dwz+UCwDaG5ujqp+pkBcdxYbY8KL9IpfVX8ZfD8E/BC4AOgUkRkAwfdDGR57v6q2qWpbQ8OIncNMCaqvq+XcpimW9I0pssgSv4hMEpHJidvAIuBl4HHghuCwG4A1UdXBGGPMSFE29TQCPxSRxPP8m6r+WEReAlaKyI3APuDaCOtgjDFmmMgSv6r+Ajg3TXkXsDCq5zXGGDM6W7LBGGMqjCX+InK9do3v8YwxfrC1eorE9do1vsczxvjDrviLwPVGJ77HM8b4xRJ/Ebheu8b3eMYYv1jiLwLXa9f4Hs8Y4xdL/EXgeu0a3+MlWGexMX4QVS12HcbU1tamsVis2NVwrqun1+naNT7Hs85iYwpPRDYnLYk/xEb1FJGrjU58j2cbsRjjF2vqMZEr5c5i181T7Z3drIp10N7Z7SSeNZ+ZMOyK30SuVDuLXTdP3b56Bw9t2jd0f+lFzdy5eK439TOVw674Tc5yvWotxY1YXM9laO/sTkn6AA9t3Bf6yt/mWph82BW/yUnYq9ZS24gl0TyV6JOAE81TYeq+teNoxvKWxslFr5+pLHbFb7KW71VrKW3E4rp5qrVpSk7lYynV5jPjB0v8JmujXbWWG9fNUy2Nk1l6UeoWoksvag51tR9F/UxlsaYekzXXV62+c908defiuSydP4utHUdpbZoSOulHVT9TOSJP/CJSDcSAN1T1KhG5A/hj4HBwyBdV9cmo62Hyl7hqfWhjaht/vgnMZ67nRrQ0Tnb6ermun+tJgMZPhbji/yywCzg5qexeVf3HAjy3ccz1Vavxhw0PrRyRtvGLyEzgd4BvRfk8prBaGidzTVuTJf0yYsNDK0vUnbtfA24FBoeV3ywi20Xk2yIyNd0DRWSZiMREJHb48OF0hxhjHCnl2dUmd5ElfhG5CjikqpuH/eifgfcBrcAB4CvpHq+q96tqm6q2NTQ0RFVNYww2PLTSRHnFvwC4WkT2At8DLheRR1S1U1UHVHUQ+CZwQYR1MMZkwYaHVpbIOndV9QvAFwBE5FLgL1T1ehGZoaoHgsM+BrwcVR2MMdmz4aGVoxjj+FeISCugwF7g00WogzEmDdfDQ42fCpL4VfUZ4Jng9icK8ZzGGGPSsyUbjDGmwljiN8aYCmOJ3xhjKowlfmOMqTCW+I0xpsJY4jfGmApjid8UTFdPL9s6jka68Nf6nQdZvmob63cerIh4rl9T3+MZN0RVi12HMbW1tWksFit2NUweCrHk76J7n+Hnne8O3Z/dOImnbrm0bOO5fk19j2dyJyKbVbVteLld8ZvIFWLJ3/U7D6YkVYDXOt8NfWXtezzXr6nv8YxblvhN5Aqx5O/anZ05lZd6PNevqe/xjFuW+E3kCrHk76I5jTmVl3o816+p7/GMW5b4TeQKseTvwjmnMbtxUkrZ7MZJLJxzWlnGc/2a+h7PuGWdu6ZgCrGR9/qdB1m7s5NFcxpDJ9VSiuf6NfU9nslNps5dS/zGGFOmbFSPMcYYoACJX0SqReQ/ReSJ4P40EVknIruD72k3Wzf+cjkpZ/WWDm568CVWb+lwULPKmzAU29PFV9e+RmxPl5fx2ju7WRXroL2z20k840bkTT0i8jmgDThZVa8SkRXAW6r6ZRG5DZiqqstHi2FNPf5wOSln/l3rOPjOe0P3Z5w8no1f/LAXdSsF139rE8+1n0jQF7fU8/BN872Jd/vqHTy0ad/Q/aUXNXPn4rmh45ncFaWpR0RmAr8DfCupeDHwYHD7QWBJlHUw7riclLN6S0dK0gc48M57oa/8K23CUGxPV0qSBvhpe1foK3XX8do7u1OSPsBDG/fZlb8nom7q+RpwK5A8oLcxsdl68P3UdA8UkWUiEhOR2OHDhyOupsmGy0k5T+xIP2M1U3kh61YKnt39Zk7lhY63teNoTuWmsCJL/CJyFXBIVTeHebyq3q+qbara1tDQ4Lh2JgyXk3Kumpt+KGOm8kLWrRRcctb0nMoLHa+1aUpO5aaworziXwBcLSJ7ge8Bl4vII0CniMwACL4firAOxiGXk3KWnNfEjJPHp5TNOHk8S85rKnrdSkHbmfVc3FKfUnZxSz1tZ9ZneERh47U0TmbpRc0pZUsvaqalcXKoeMatgozjF5FLgb8IOnfvAbqSOnenqeqtoz3eOnf94nJSzuotHTyx4yBXzT0tdNKPqm6lILani2d3v8klZ00PnaSjjNfe2c3WjqO0Nk2xpF8ERZ3ANSzx1wMrgWZgH3Ctqr412uMt8RtjTO4yJf5xhXhyVX0GeCa43QUsLMTzGmOMGclm7hpjTIUp68Tv+yxO1/VzPUvS93iFYFsRmnJUkKaeYvB9Fqfr+rmeJel7vEKwrQhNuSrLK37fZ3G6rp/rWZK+xysE24rQlLOyTPy+z+J0XT/XsyR9j1cIthWhKWdlmfh9n8Xpun6uZ0n6Hq8QbCtCU87KMvH7PovTdf1cz5L0PV4h2FaEppyV9Q5cvs/idF0/17MkfY9XCLYVoSlltvWiqQiWWI05oagzd40pBBsuaUx2yrKN31QeGy5pTPYs8ReR61mc63ceZPmqbazfGW4zk2zjhX2edI9zteduVMMlfd/T1vXv3GYWVwZr4y8S180Si+59hp93vjt0f3bjJJ665VLn8cI+T7rHvX2sz9meu109vbT93dMkv5sFiP31FaHb+n3f09b179yayspPUfbcNem5bpZYv/NgSgIAeK3z3dBXgZni3fvUrlDPkymeyz13t+47wvBLGA3Kw/B9T1vXv3NrKqsslviLwHWzxNqdnTmVh423Znv6pDLW8+RSj7B77rp+DXzf09b1+drM4soS5Z67E0TkZyKyTUReEZEvBeV3iMgbIrI1+PpoVHXwletZnIvmNOZUHjbe4nnp98Md63lyqUfYPXddvwa+72nr+nxtZnFlifKKvxe4XFXPBVqBK0Uk0aB5r6q2Bl9PRlgHL7mexblwzmnMbpyUUja7cRIL54RLopni3fLbvx7qeTLFc7nnruvXwPc9bV2fb1Qzi21Zaz9FNo5f473GPcHdmuDL/57kArm69XQWtEx3NtnoqVsuZf3Og6zd2cmiOY2hE8BY8RomT+C1pLblUydPCB1v/l3r8qrjcK7fXJ3dx1PuHxp2P1cP3zTf6Z62rn/nrt+Ttqy1vyId1SMi1cBmoAX4hqouF5E7gE8C7wAx4POqOmoPXDmO6ilFsT1dXHPfphHlqz49P+cktnpLB3++cvuI8q9dNy/UVf/6nQe58aHNI8ofWHp+qIToOl6l6erpZcHdP+F434nmowk1VTy//PJQf1Bcx6sURRnVo6oDqtoKzAQuEJFzgH8G3ke8+ecA8JV0jxWRZSISE5HY4cOHo6ymyZLLDspMnbi+dO66jldpbFlrvxVkVI+qHiW+2fqVqtoZ/EEYBL4JXJDhMferapuqtjU0NBSimmPyfTKPa8PbU/PtoEyOl6kT15fOXdfxEh55YQ/X/ssLPPLCnrziRBXP1YQ6W9bab5G18YtIA9CnqkdFZCJwBXC3iMxQ1QPBYR8DXo6qDi4lT775p5+0O53M4yKea5naU0+ZUM3bxweGjjtlQnVWzTzp4qWLlU/n7oyTx3Ng2ISwfDq4XcYDOPeOHw+d70t7j3DP2tfYdseV3sSbf9e6obkVT+86xN0/fjX0hLr6ulrOmDYxpT/ojGnh+w3q62ppnjYxZe5CPvEqXZRX/DOADSKyHXgJWKeqTwArRGRHUH4ZcEuEdXDC98k8rmWazLN+58GURA3w9vGBMeudLt5frtrO8f7UK7jeAc1ra8Mjx/pTyo4c6/cm3iMv7En72oW9Uncdb/WWDqcT6mJ7ulKSPsQnmOXzfybdhDVf/s+UmsgSv6puV9UPqOo8VT1HVe8Myj+hqnOD8quTrv695ftkHtcytadmat8eq97p4lVXCdXibxuw63hrtqd/m2cqL3Q8130ulfZ/ptTYzN0s+D6Zx7VM7amZ2rfHqne6eAODyoD62wbsOt7ieTNyKi90PNd9LpX2f6bUWOLPgu+TeVzLNJln4ZzTQtU7Ea92nHBSTTW144R7rpnHPdecSzXxN2E1ONnasJr44myu4klwX/KMd/0Hz+SUCdUpZadMqOb6D57pRbwl5zU5nVBXaf9nSo1txJIl15NvXMdzLdNknt2HulOOax92P5P4bBGJZ1CNp9O7ntxJciv1Pzy5M68JOf9z9Y6heAPB/XziLV+1bWhSmAb3fZowNHF8auf4SeOrRzl6bK5n9FTa/5lSYlf8OWg7s57PLZrt7A3nOp5r9XW1nNs0ZSjph+0ATHTu9vYP8qv3BujtH+Tz39/mtDPRdWfnfRt2c6w/NRUe61fu27Dbi/q57ox1HS+h0v7PlIqsEr+InC0i60Xk5eD+PBH562irZnwTtgMwXUfpYIbLy7Cdia47O1dneFym8rH43hnrOp7xW7ZX/N8EvgD0QXzEDvB7UVXK+ClsB2C6jtIqSX9s2M5E152dSzI8LlP5WHzvjHUdz/gt28R/kqr+bFhZf9ojTdZKbeXCsB2A6TqLv3LtuU47E113dn76srOYOC71r9PEccKnLzvLi/q57ox1Hc/4LatF2kTkR8DNwPdV9TwRuQa4UVU/EnUFoTwXaSvllQtXb+ngiR0HuWruaTklhq6e3hGdxWFjZfLIC3tYs/0Ai+fNCJ1Uk923YTertx9gybwZoZN+VPVbs/UNPve9rSjxPvN7f68179+569+HKa5Mi7Rlm/h/Dbgf+CBwBNgDXK+qex3XM61yS/y2cqHJl/3OTTbyWp1TVX+hqlcADcD7VfVDhUr65cj3WaZh2AYZheXD79yUrqzG8YvIXcCKYJVNRGQq8XX0bWRPCL7PMs2VbZBReMX+nZvSlm3n7kcSSR8g2Dil4vbKdcX1NneF3jYvuTzTgm7ZXvn73sHta7zE73x8NdRWVzG+Or+ZxQntnd2sinXQ3pndxDxTmrKduVstIrWq2gsQLLNsDYl5cL3NXaG2zRte/qeXtlBTVcVxTlx9Vouw4dVDXPb+UwEy1sn3Dm7f48X2vsV7A0Dw2sdefyuveLev3sFDm/YN3V96UTN3Lp4bOp7xV7adu7cCVwP/l/jM7k8Bj6vqimirF1dunbu+y9Rx+MTNH+Kqrz+XUl47TgChd9gSy3W11RzvG0BEmDCuekSi872D2/d47Z3dXHHvsyPKn77lEloaJxc9nvFDvp27K4C/B34d+A3gbwuV9E3hZeo43NpxdET5+Opqbr6shQk1VUxKWiump3eA/kHoG9C0TUC+d3D7Hm9rx9Gcygsdz/gt60XaVPVHwI8irIvxRKaOw9amKWnL/+DCZv7gwmY2vHqIO/79FXp6U9ekSUgkuvq6Wu87uH2P19o0JafyQsczfhv1il9Engu+d4vIO0lf3SLyzhiPnSAiPxORbSLyioh8KSifJiLrRGR38H2qu9OpbK728D3RcVjF+GphfHW8s7ilcXKwvHLV0FeiQ/HIu+/R09tP30DmpsNj7/XzlbWvsXpLB0fefY+Pf2Am4wRqqoXx1eKkg7umCqqroKbKzbLM46vFaf1cxWtpnMzSi5pTypZe1By6WcZ1POO3Ua/4VfVDwfcwv/1e4HJV7RGRGuC5YAbw7wLrVfXLInIbcBuwPER8k8T1Hr4rYx28N5C4QlW+H+vg6tbTie19K6U9P/b6W8T2vpXSKZhJv8Z3TBqxa1LwxyLxHGF9fcNuEk3oA8A3NuzOK178NTjxhyzf+rmO53od5TsXz2Xp/Fls7ThKa9MUS/plbMw2fhGpSqzKmQuN6wnu1gRfCiwGHgzKHwSW5BrbpCrUnsCrt3SMSPAPbdyXVdLPRj51Xr/zYNo9WdfvDLe6pO/7LLd3dqf9XeQ7DLOlcTLXtDVZ0i9zYyZ+VR0EtolI81jHDici1SKyFThEfLP1F4HGxD67wfdTMzx2mYjERCR2+PDhXJ+6ohRqf9NCLNEbts6Z9gPOVB62Hr7sGWudsSYf2U7gmgG8EqzJ/3jia6wHqeqAqrYCM4ELROScbCumqverapuqtjU0NGT7sIpUqP1NC7FEb9g6Z9oPOFN52Hr4smesdcaafGSb+L8EXAXcCXwl6SsrwazfZ4ArgU4RmQEQfD+UfXVNOoXa33TJeU1pOwCHl4WVT50XzjmN2Y2TUspmN05i4Zxwf6x83zPWOmNNPkadwCUiE4A/AVqAHcADqprVOvwi0gD0qerRYKbvWuBu4LeArqTO3WmqeutosWwCV3Zc70eaKV57Z/eIDsDkMiDt7U3/9ebQksTvn3Eyz+5+k1PrxnOo5z1ndV6/8yBrd3ayaE5j6KSfrFCvaVjpfhfGJIRalllEHiW+69ZPgY8Ar6vqZ7N8wnnEO2+riX+yWKmqd4pIPbASaAb2Adeq6lujxbLEX/qSlys41tefcUavMcadTIl/rAlcc1R1bhDgAWD4LlwZBdszfiBNeRewMNs4pvQlL+R2Yk0fpW8g/uHx1se2s6Bluq0jb0yBjNXG35e4kW0TjzHDpVuuIJmtI29MYY2V+M9Nnq0LzMt25q4pX7ku3ZtuuYJkx/sH6OtPv8xD1HWzeKYSjTVzt3q0n5vKE2bp3sRyBbcOa+OH+CJufQPKNfdtynsZYNfLCldaPFM5sh3OaUzWs0WHb9SyreMoC1qm8/zyy3nkpgt58YtX8N2bLhyxrk8+M09dz2SttHimsmS9Oqcxo80WTQwlzHb0zoZX00/fSI7lum4Wz5g4u+I3WRtrtujwbRhHW4/f92WFKy2eqSyW+E3WxpotmsvoHd+XFa60eKayZLX1YrHZBC6/ZJotmm57wWTpthp0PfPU4hlzQqiZu76wxF86Ht/6xojROzZD15jiCDtz15icXN16OgtaprP/yLGhbQUTt21mrjF+sMRvnKuvq01J8pbwjfGLde4aY0yFsSt+UzBdPb3WBGSMByzxm4KwZZmN8Yc19ZjI5TKxyxgTvcgSv4g0icgGEdklIq+IyGeD8jtE5A0R2Rp8fTSqOpjCSV6fZzhbltkYv0TZ1NMPfF5Vt4jIZGCziKwLfnavqv5jhM9tCii5GSdd081YyzL3DQ4OtfsbY6IX2RW/qh5Q1S3B7W5gF2ANuWVmeDNOuqabxLLME2qqmFw7jnFVUFMtTK4dx4SaKlZ8fJ518BpTQAXp3BWRWcS3YXwRWADcLCJLgRjxTwVH0jxmGbAMoLm5efiPjScSzTgntlQ80XSTnMxtYpcx/oi8c1dE6oDHgD9X1XeAfwbeB7QCB4CvpHucqt6vqm2q2tbQ0BB1NU1I6ZpxMjXd1NfVcm7TlKEJXonbxpjCijTxi0gN8aT/HVX9AYCqdqrqgKoOAt8ELoiyDiZaw5txRmu6Sd4m0LYMNKZ4Imvqkfjeeg8Au1T1q0nlM1T1QHD3Y8DLUdXBFMbwZpx0SX/4NoHJbMtAYworyjb+BcAngB0isjUo+yLw+yLSCiiwF/h0hHUwBTJ8fZ5k6bYJTPbQxn0snT/LlhU2pkAiS/yq+hwgaX70ZFTPafyUaZvA4cdY4jemMGzmrolcNtsB2paBxhSOJX4TuXTbBCazLQONKSxbpM0UxJ2L57J0/qyhbQIB2zLQmCKxxG8KpqVxckqSt4RvTHFYU48xxlQYS/zGGFNhLPGbjEZbatmHeMaYcKyN36Q11lLLxY5njAnPrvjNCNkstVzMeMaY/FjiNyOk2zErn12yXMczxuTHEr8ZIZellosRzxiTH0v8ZoRcllouRjxjTH5EVYtdhzG1tbVpLBYrdjUqTldPr9NdslzHM8aMTkQ2q2rb8HIb1WMyGm2pZR/ipeP7HyuLZ/F8YInflA3fh6BaPIvni8ja+EWkSUQ2iMguEXlFRD4blE8TkXUisjv4PjWqOpjK4fsQVItn8XwSZeduP/B5Vf11YD7wpyIyB7gNWK+qZwHrg/sVyfeZsaU009b3IagWz+L5JModuA4AB4Lb3SKyCzgdWAxcGhz2IPAMsDyqevjK94+ZpfSxFfwfgmrxLJ5PCjKcU0RmAR8AXgQaE5utB99PLUQdfOL7x8xS+9gK/g9BtXgWzyeRD+cUkTrgP4C/V9UfiMhRVZ2S9PMjqjqinV9ElgHLAJqbm89//fXXI61nIW3rOMr133qR7t7+obLJteN45KYLOTfEFoS+xysk30dpWDyLV0hFGc4pIjXAY8B3VPUHQXGniMxQ1QMiMgM4lO6xqno/cD/Ex/FHWc9C8/1jZql9bE3m+xBUi2fxfBDlqB4BHgB2qepXk370OHBDcPsGYE1UdfBV4mNh7bgqThpfTe04Nx8z3ccTTqqppnacpMQL2+mb7nG+d0hbPFOOorziXwB8AtghIluDsi8CXwZWisiNwD7g2gjr4C1N/KsydM+/eAJCEDMubKdvuscpeN0hbfH87tA34dmSDUXQ1dPLgrt/wvG+E80pE2qqeH755aGu0gsV74mbP8RVX38u5+dJF692XBWg9PafeP+Vwmtg8UwpydTGX9aLtPn6Mdj3McSZ4m3tOBrqedLFq64SqqX0XgOLZ8pB2S7Z4PPH4JlTJ3K8fyCl7Hj/QF6dsT1JI3AAenr7ncdrbZoSqtM33fn2DQxSJanH+dQhbfFKp0Pf5K4sr/hLYVz78Ca2fJrcjrz73ohWfQ3KXcYDQo9VTne+t//33/B2HLXF83scuslPWV7xJz62HufEFUziY2uYN3IU8SbWjEsZJz+xZlzoeFs7jmYsb2mc7DTeNW1NLGiZntNY5Uzne85/O4Xnl1/ubNzz1a2n51w3ixddPOOvskz8vn8Mdh2vNcOkqkzl+cbLdazyaOfr+zhqi2fKUVk29Yw1Dj1sPF8/Vrc0TmbpRc0pZUsvag51tR9FPNfzDIwx+SnLK37IPA49LN8/Vp9/xjS+97N9CFUog7SdMc2reK7nGRSK71P6Ky2ecaMsx/FX2phk38d0l+rvw+eRYZUYz+SuosbxV9qYZN/HdJfi78P3kWGVFs+4VZaJP6oxyb5OCCt053Ou9R4tnuvXdPWWDm568CVWb+nIK47vf/wqLZ5xqyzb+BOdibcO+5iZT7OCzx+D6+tqaZ42kZ93vjtUdsa08G2q9XW1tJ0xlefau4bKfvOMqdTX1Yaqd31dLdedP5OHNu0bKruubSbPtb/p9DWdf9c6Dr4Tn7vw9K5D3P3jV9n4xQ+HijVz6kR+1Zc66exXfflNsvN5ZJjv8YxbZXnFD/HO0+eXX84jN13I88svzyuh+P4xOLanKyXpA7zW+S6xPV0ZHjG69s7ulKQP8NP2LmJ7ukLVu6unl5Wb96eUPfrSfm5dtc3Za7B6S8dQ0k848M57oa/8j7z7HgODqf1fA4MaelJcfV0t17XNTCm7rm2mNyPDfI9n3CrLK/4EV2OSfZ8Q9uzuNzOWt51Zn3O8TBO4nt39Zqh6pzvf6ioJRvicuKrO5zX44dZfZixfcl5TzvGeaz+csTzMsNaunl5WxlL/+K2M7eezC88O/R71faSZTQjzV9le8bvk+8fgS86anlP5WDJN4LrkrOkc60tdw+dY39hrAqU734FBHdGU0p3H+kIfyFDnTOVjmV43IafysUTV5l1fV8u5TVOcJVXf4xk3LPFnwfePwW1n1nNxS+qV/cUt9aGu9gGmThofvyJPUl0lTDlpPPH9dU4Yfj+ddM0cl53dkPbYPYd7cqxt3NzTT8mpfCwTa9L/18hUPpao2rxddWYnrN95kOWrtrF+50En8WJ7uvjq2tdCNztGHa+9s5tVsQ7aO7srIl5CWTf1uOT7x+CHb5pPbE8Xz+5+k0vOmh466UP86vSkmuqUtXVOqqlma8fRtO3eYzXPpGvmWLerM+2xYZuntu1/O2P5wjmnFT1efV0ttdXC8b4TZbXVktfv3WVnNsCie58Z6it6NLaf2Y2TeOqWS0PHu/5bm4b6iv7pJ+1c3FLPwzfN9ybe7at3pAw4WHpRM3cunlu28ZJFufXit0XkkIi8nFR2h4i8ISJbg6+PRvX8UfD9Y3DbmfV8btHsvJI+ZL46HVcFw/I+gwpvdh8fNV66Zo5xGd55vzb9pJzrO9rjfIm3eksHbx9Pbdp6+/hA6Ct1153Z63ceTDtAIOyVf2xPV8YBAj7Ea+/sTkmqAA9t3Bf6ytr3eMNF2dTzr8CVacrvVdXW4OvJCJ/fhJSpKeoXb/4q7fGZro4T0rbxk76JqH8wbfGYMj3Ol3hP7EifQDOVFzre2p3pP4FlKh/LaAMOfIg32gq05RhvuMgSv6o+C7wVVXwTratbT+eRT13AHy2YxSOfuoCrW08P3YmcvEhb4uvW356d9tioVhQtdryr5qZvHspUPpZLM7zmmcrHsmhOY07lY3E94KBQAxh8eb+4jjdcMTp3bxaR7UFT0NRMB4nIMhGJiUjs8OH0Q+tMdG5fvYNr7tvEP/2knWvu28Tta3bk1Ykc2/sWvf2DQ1/7j/zK6xVFXcdbcl4TM04en1I24+TxoYaaAsxtSv9fJ1P5WBbOOY3ZjZNSymY3TgrVnwHuBxy4juf7+8V1vOEiXaRNRGYBT6jqOcH9RuBN4ssz/i0wQ1U/NVacctts3Xftnd1cce+zI8qfvuUSWhons3pLB0/sOMhVc0/LKnGNFu/1rndZu7OTRXMaQyeZZOt3HvQ63n0bdrN6+wGWzJvBpy87K3ScqBa+e+SFPazZfoDF82Zw/QfPDB0nIdf3SqHj+f5+yXfARqZF2go6qkdVhxoMReSbwBOFfH6TndHaFx/auHeo0+npXYfY0nF0zJEGmeLdvuZlXvhFvDXw0dh+p6MgfI+360A3b7xzPHS80ZbVcFG/l/Ye4eeHe5ydb7bvlWLF8/H9krw8yv0//YXT1U0L2tQjIjOS7n4MeDnTsaZ4MrUjTj2pJtRIg0zxEkk/l1iZ+D6qIop46Ua5+FQ/ixc+XtSrm0Y5nPO7wEZgtojsF5EbgRUiskNEtgOXAbdE9fwmvEzti0d+1Zf2+LFGGqSL98H3pd/YxZdREBbP4hUzXtSrm0bW1KOqv5+m+IGons+4defiuSydP4utHUdpbZpCS+PkjFcv2Yw0OP+MaTz60n4S+29devapvPBfIwd9+TIKwuJZvGLGi3p1U1uywWTU0jiZa9qahkYSTJ00fsToewnKR5P42NrbP8jxYFTPV5/+Ode1pbZX5jNqIWzdLF5cS+PktKNm8hmVUmnxotinOqrVTW3JBpO1/UeOUVc7LmUph7racaFW56ypquIPL5zFsovfl/KpotB1s3hxXT29vPT6kZSyl14/QldPr8XLUrpPyfmIcnVTS/wma2E/fo72uPq6Widjk31fQdX3eL4vPe57vISWxsnOxtqDu6Xlh7OmHpO1sB8/C7Eph+8rqPoez/c/TL7HKzWRTuByxSZw+aWrpzfUx8+wjytE3SwePL71jRHbleYzbrzS4vko0wQuS/zGmCE+/2EqhXi+8WLmrjGlptwTw3Cu25QrLV6psMRvTAbJU+bLtSnAVCbr3DUmjainzBtTTJb4jUkj6inzxhSTJX5j0qj04X6mvFniNyaNqOYetHd2syrW4WzvVNfxunp62dZx1Jq0ypx17hqTgesp88nrtQNO1393Ec86syuHXfEbM4r6ulrObZri5Eq/ktd/N36xxG9MAVT6+u/GL1FuxPJtETkkIi8nlU0TkXUisjv4Hm5naGNKTKWv/278EuUV/78CVw4ruw1Yr6pnAeuD+8aUPdfrtZfa+u/GL5Gu1SMis4AnVPWc4P5rwKWqeiDYf/cZVZ09Vhxbq8eUi/bObmfrtUcRr9KWqCh3vqzV06iqBwCC5H9qpgNFZBmwDKC5uTnTYcaUFNfrtZfK+u/GL9527qrq/arapqptDQ0Nxa6OMcaUjUIn/s6giYfg+6ECP78xxlS8Qif+x4Ebgts3AGsK/PzGGFPxohzO+V1gIzBbRPaLyI3Al4EPi8hu4MPBfWOMMQUUWeeuqv5+hh8tjOo5jTHGjK0ktl4UkcPA6xE+xXTgzQjj+6Dcz7Hczw/sHMtFIc/xDFUdMTqmJBJ/1EQklm6sazkp93Ms9/MDO8dy4cM5ejuc0xhjTDQs8RtjTIWxxB93f7ErUADlfo7lfn5g51guin6O1sZvjDEVxq74jTGmwljiN8aYClP2iV9EmkRkg4jsEpFXROSzw37+FyKiIjI9qewLItIuIq+JyG8Xvta5Ge0cReTPgvN4RURWJJWXxTmKSKuIbBKRrSISE5ELkh5Tauc4QUR+JiLbgnP8UlCecQOjUjrHUc7vHhF5VUS2i8gPRWRK0mNK5vwg8zkm/dyPfKOqZf0FzADOC25PBn4OzAnuNwFPEZ8cNj0omwNsA2qBM4H/AqqLfR5hzhG4DHgaqA1+dmoZnuNa4CNB+UeJ7/FQqucoQF1wuwZ4EZgPrABuC8pvA+4uxXMc5fwWAeOC8rtL9fxGO8fgvjf5puyv+FX1gKpuCW53A7uA04Mf3wvcCiT3cC8Gvqeqvaq6B2gHLsBjo5zjZ4Avq2pv8LPEaqjldI4KnBwcdgrwy+B2KZ6jqmpPcLcm+FLi5/JgUP4gsCS4XVLnmOn8VHWtqvYH5ZuAmcHtkjo/GPV3CB7lm7JP/MmCHcE+ALwoIlcDb6jqtmGHnQ50JN3fz4k/FN5LPkfgbOBiEXlRRP5DRH4zOKyczvHPgXtEpAP4R+ALwWEleY4iUi0iW4kvWb5OVV9k2AZGQGIDo5I7xwznl+xTwI+C2yV3fpD+HH3LNxWT+EWkDniMeKLoB/4KuD3doWnKSmLMa/I5quo7xBfhm0r84/RfAitFRCivc/wMcIuqNgG3AA8kDk3zcO/PUVUHVLWV+FXvBSJyziiHl9w5jnZ+IvJXxP9vfidRlC5E5JXMU5pznIdn+aYiEr+I1BBPFt9R1R8A7yPenrZNRPYS/wVtEZHTiP/FbUp6+ExONB94K805QvxcfhB8/PwZMEh8gahyOscbgMTt73PiY3JJnmOCqh4FngGuJPMGRiV7jsPODxG5AbgK+EMNGr8p4fODlHNcjG/5ppgdIYX4Iv4X9SHga6Mcs5cTnS2/QWpnyy8ojQ6lEecI/AlwZ3D7bOIfKaXMznEXcGlweyGwuYR/jw3AlOD2ROCnxJPhPaR27q4oxXMc5fyuBHYCDcOOL6nzG+0chx1T9HxT6M3Wi2EB8AlgR9DuBvBFVX0y3cGq+oqIrCT+RuwH/lRVBwpS0/DSniPwbeDbIvIy8B5wg8bfbeV0jn8M/C8RGQccB5ZByf4eZwAPikg18U/jK1X1CRHZSLyZ7kZgH3AtlOQ5Zjq/duKJb128JZJNqvonJXh+kOEcMx1crHO0JRuMMabCVEQbvzHGmBMs8RtjTIWxxG+MMRXGEr8xxlQYS/zGGFNhLPGbsiUiA8Gqna8EqyV+TkRCv+dF5EPByouvBl/Lkn7WECyN8Z8SX0X0M0k/uzBYebIShk+bEmBvRFPOjml86jwicirwb8QXcvubXAMFsyz/DViiqluCZXWfEpE3VPX/EZ889qqq3iAijcBGEVkFdAFfB/6HnliILNfnFuJDrwfDPN6Y4WwcvylbItKjqnVJ938NeIn4shVnAA8Dk4If36yqL4jIw8AqVV0TPOY7wKPAbxJffPH2pHgLgTuAPwMeJz5T8w3gIuCPgse8BJxPfGLZl4FLiU9W+oaq3hesPbSG+JpKNcBfq+qaYCG6HwEbgnhLVPV1l6+PqVyW+E3ZGp74g7IjwPuBbmBQVY+LyFnAd1W1TUR+i/iib0tE5BRgK3AWsBJ4MPEHIYh1CrBHVaeJyCeBNlW9OfhZFbCR+EqabcDHie+H8HciUgs8T3wGbgdwkqq+E3yK2BQ83xnEp+9/UFU3RfICmYplTT2m0iRWQ6wBvi4ircAA8bWMUNX/EJFvBE1Dvws8pqr9QXNLuquktFdOqjooIvcR/2PQJSKLgHkick1wyCnEE/x+4C4RuYT4InqnA43BMa9b0jdRsMRvKkbQ1DNAfHXLvwE6gXOJD3I4nnTow8AfAr9HfH14gFeIX7k/nnTc+cTXWMlkMPiC+B+cP1PVp4bV6ZPEF/Y6X1X7gtUbJwQ/fjf7szMmezaqx1QEEWkA/gX4erBQ3SnAgaDD9BNAddLh/0p83wZU9ZWg7BvAJ4NPCIhIPfFtAleQnaeAzwRLSyMiZ4vIpKAeh4KkfxnxJh5jImVX/KacTQxW8qwhvvLhw8BXg5/9H+AxEbmWeAfq0NW1qnaKyC5gdVLZARG5HvimiEwmfgX/NVX99yzr8i1gFvF12AU4THwLxe8A/y4iMeL9Ca+GOVFjcmGdu8YMIyInATuIb+7+drHrY4xr1tRjTBIRuYL4Vff/tqRvypVd8RtjTIWxK35jjKkwlviNMabCWOI3xpgKY4nfGGMqjCV+Y4ypMP8fFF03YlhPduQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Det verkar som att korrelationen är ganska liten, men det finns någon annan viktigare relation - eftersom prisnivåerna i diagrammet ovan verkar ha flera distinkta kluster. Låt oss skapa ett diagram som visar olika pumpasorter:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linjär regression\n", + "\n", + "Vi kommer att använda Scikit Learn för att träna en linjär regressionsmodell:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Linjens lutning kan bestämmas från linjär regressionskoefficienter:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vi kan använda den tränade modellen för att förutsäga pris:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polynomregression\n", + "\n", + "Ibland är förhållandet mellan funktioner och resultatet i grunden icke-linjärt. Till exempel kan pumpapriser vara höga på vintern (månader=1,2), sedan sjunka under sommaren (månader=5-7) och sedan stiga igen. Linjär regression kan inte fånga detta förhållande korrekt.\n", + "\n", + "I sådana fall kan vi överväga att lägga till extra funktioner. Ett enkelt sätt är att använda polynom från indatafunktionerna, vilket resulterar i **polynomregression**. I Scikit Learn kan vi automatiskt förberäkna polynomfunktioner med hjälp av pipelines:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Kodning av sorter\n", + "\n", + "I en idealisk värld vill vi kunna förutsäga priser för olika pumpasorter med samma modell. För att ta hänsyn till sorten måste vi först konvertera den till numerisk form, eller **koda**. Det finns flera sätt att göra detta:\n", + "\n", + "* Enkel numerisk kodning som skapar en tabell över olika sorter och sedan ersätter sortnamnet med ett index i den tabellen. Detta är inte den bästa idén för linjär regression, eftersom linjär regression tar hänsyn till det numeriska värdet av indexet, och det numeriska värdet sannolikt inte korrelerar numeriskt med priset.\n", + "* One-hot-kodning, som ersätter `Variety`-kolumnen med 4 olika kolumner, en för varje sort, som innehåller 1 om den motsvarande raden är av en viss sort, och 0 annars.\n", + "\n", + "Koden nedan visar hur vi kan one-hot-koda en sort:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linjär regression på sort\n", + "\n", + "Vi kommer nu att använda samma kod som ovan, men istället för `DayOfYear` kommer vi att använda vår one-hot-kodade sort som indata:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vi kan också försöka använda andra funktioner på samma sätt och kombinera dem med numeriska funktioner, såsom `Month` eller `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polynomregression\n", + "\n", + "Polynomregression kan också användas med kategoriska funktioner som är one-hot-kodade. Koden för att träna polynomregression skulle i princip vara densamma som vi har sett ovan.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:11:01+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/4-Logistic/notebook.ipynb b/translations/sv/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..150d4345d --- /dev/null +++ b/translations/sv/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pumpasorter och färg\n", + "\n", + "Ladda in nödvändiga bibliotek och dataset. Konvertera data till en dataframe som innehåller ett urval av data:\n", + "\n", + "Låt oss titta på sambandet mellan färg och sort\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:35+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/sv/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..0e420ddd5 --- /dev/null +++ b/translations/sv/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bygg en logistisk regressionsmodell - Lektion 4\n", + "\n", + "![Infografik om logistisk vs. linjär regression](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Quiz före föreläsningen](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Introduktion\n", + "\n", + "I denna sista lektion om regression, en av de grundläggande *klassiska* ML-teknikerna, ska vi titta på logistisk regression. Du kan använda denna teknik för att upptäcka mönster och förutsäga binära kategorier. Är detta godis choklad eller inte? Är denna sjukdom smittsam eller inte? Kommer denna kund att välja denna produkt eller inte?\n", + "\n", + "I denna lektion kommer du att lära dig:\n", + "\n", + "- Tekniker för logistisk regression\n", + "\n", + "✅ Fördjupa din förståelse för att arbeta med denna typ av regression i detta [Learn-modul](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Förkunskaper\n", + "\n", + "Efter att ha arbetat med pumpadatan är vi nu tillräckligt bekanta med den för att inse att det finns en binär kategori som vi kan arbeta med: `Color`.\n", + "\n", + "Låt oss bygga en logistisk regressionsmodell för att förutsäga, baserat på vissa variabler, *vilken färg en given pumpa sannolikt har* (orange 🎃 eller vit 👻).\n", + "\n", + "> Varför pratar vi om binär klassificering i en lektion som handlar om regression? Endast av språklig bekvämlighet, eftersom logistisk regression [egentligen är en klassificeringsmetod](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), om än en linjärbaserad sådan. Lär dig om andra sätt att klassificera data i nästa lektionsgrupp.\n", + "\n", + "För denna lektion behöver vi följande paket:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) som är utformade för att göra datavetenskap snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett [ramverk av paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "- `janitor`: [janitor-paketet](https://github.com/sfirke/janitor) erbjuder enkla verktyg för att undersöka och rengöra smutsiga data.\n", + "\n", + "- `ggbeeswarm`: [ggbeeswarm-paketet](https://github.com/eclarke/ggbeeswarm) tillhandahåller metoder för att skapa beeswarm-stil diagram med ggplot2.\n", + "\n", + "Du kan installera dem med:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Alternativt kan skriptet nedan kontrollera om du har de paket som krävs för att slutföra denna modul och installera dem åt dig om de saknas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Definiera frågan**\n", + "\n", + "För våra ändamål kommer vi att uttrycka detta som en binär: 'Vit' eller 'Inte Vit'. Det finns också en kategori 'randig' i vår dataset, men det finns få exempel på den, så vi kommer inte att använda den. Den försvinner ändå när vi tar bort nullvärden från datasetet.\n", + "\n", + "> 🎃 Rolig fakta, vi kallar ibland vita pumpor för 'spök'-pumpor. De är inte så lätta att skära i, så de är inte lika populära som de orangea, men de ser häftiga ut! Så vi skulle också kunna omformulera vår fråga som: 'Spök' eller 'Inte Spök'. 👻\n", + "\n", + "## **Om logistisk regression**\n", + "\n", + "Logistisk regression skiljer sig från linjär regression, som du lärde dig om tidigare, på några viktiga sätt.\n", + "\n", + "#### **Binär klassificering**\n", + "\n", + "Logistisk regression erbjuder inte samma funktioner som linjär regression. Den förstnämnda ger en förutsägelse om en `binär kategori` (\"orange eller inte orange\") medan den senare kan förutsäga `kontinuerliga värden`, till exempel givet ursprunget av en pumpa och skördetiden, *hur mycket priset kommer att stiga*.\n", + "\n", + "![Infografik av Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Andra klassificeringar\n", + "\n", + "Det finns andra typer av logistisk regression, inklusive multinomial och ordinal:\n", + "\n", + "- **Multinomial**, som innebär att ha mer än en kategori - \"Orange, Vit och Randig\".\n", + "\n", + "- **Ordinal**, som innebär ordnade kategorier, användbart om vi vill ordna våra resultat logiskt, som våra pumpor som är ordnade efter ett begränsat antal storlekar (mini,sm,med,lg,xl,xxl).\n", + "\n", + "![Multinomial vs ordinal regression](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Variabler BEHÖVER INTE korrelera**\n", + "\n", + "Kommer du ihåg hur linjär regression fungerade bättre med mer korrelerade variabler? Logistisk regression är motsatsen - variablerna behöver inte stämma överens. Det fungerar för denna data som har ganska svaga korrelationer.\n", + "\n", + "#### **Du behöver mycket ren data**\n", + "\n", + "Logistisk regression ger mer exakta resultat om du använder mer data; vår lilla dataset är inte optimal för denna uppgift, så ha det i åtanke.\n", + "\n", + "✅ Fundera på vilka typer av data som skulle passa bra för logistisk regression\n", + "\n", + "## Övning - städa upp datan\n", + "\n", + "Först, städa upp datan lite, ta bort nullvärden och välj endast några av kolumnerna:\n", + "\n", + "1. Lägg till följande kod:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Du kan alltid ta en titt på din nya dataframe genom att använda funktionen [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) enligt nedan:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Låt oss bekräfta att vi faktiskt kommer att arbeta med ett binärt klassificeringsproblem:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualisering - kategoriskt diagram\n", + "Nu har du laddat upp pumpadatan igen och rensat den för att bevara en dataset som innehåller några variabler, inklusive Färg. Låt oss visualisera dataframen i notebooken med hjälp av ggplot-biblioteket.\n", + "\n", + "Biblioteket ggplot erbjuder några smarta sätt att visualisera din data. Till exempel kan du jämföra distributionerna av data för varje Sort och Färg i ett kategoriskt diagram.\n", + "\n", + "1. Skapa ett sådant diagram genom att använda funktionen geombar, med vår pumpadata, och specificera en färgkartläggning för varje pumpakategori (orange eller vit):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Genom att observera data kan du se hur färgdata relaterar till sort.\n", + "\n", + "✅ Givet detta kategoriska diagram, vilka intressanta undersökningar kan du föreställa dig?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Datapreparation: funktionskodning\n", + "\n", + "Vår pumpadataset innehåller strängvärden för alla sina kolumner. Att arbeta med kategoriska data är intuitivt för människor men inte för maskiner. Maskininlärningsalgoritmer fungerar bra med siffror. Därför är kodning ett mycket viktigt steg i datapreparationsfasen, eftersom det gör det möjligt för oss att omvandla kategoriska data till numeriska data utan att förlora någon information. Bra kodning leder till att bygga en bra modell.\n", + "\n", + "För funktionskodning finns det två huvudsakliga typer av kodare:\n", + "\n", + "1. Ordinal kodare: den passar bra för ordnade variabler, som är kategoriska variabler där deras data följer en logisk ordning, som kolumnen `item_size` i vårt dataset. Den skapar en mappning där varje kategori representeras av ett nummer, vilket är ordningen för kategorin i kolumnen.\n", + "\n", + "2. Kategorisk kodare: den passar bra för nominella variabler, som är kategoriska variabler där deras data inte följer en logisk ordning, som alla funktioner utom `item_size` i vårt dataset. Det är en one-hot-kodning, vilket innebär att varje kategori representeras av en binär kolumn: den kodade variabeln är lika med 1 om pumpan tillhör den sorten och 0 annars.\n", + "\n", + "Tidymodels erbjuder ytterligare ett smidigt paket: [recipes](https://recipes.tidymodels.org/) - ett paket för datapreparation. Vi kommer att definiera en `recipe` som specificerar att alla prediktorkolumner ska kodas till en uppsättning heltal, `prep` för att uppskatta de nödvändiga mängderna och statistiken som behövs för alla operationer och slutligen `bake` för att tillämpa beräkningarna på ny data.\n", + "\n", + "> Vanligtvis används recipes som en förprocessor för modellering där den definierar vilka steg som ska tillämpas på en dataset för att göra den redo för modellering. I det fallet är det **starkt rekommenderat** att du använder en `workflow()` istället för att manuellt uppskatta en recipe med prep och bake. Vi kommer att se allt detta om en liten stund.\n", + ">\n", + "> Men för tillfället använder vi recipes + prep + bake för att specificera vilka steg som ska tillämpas på en dataset för att göra den redo för dataanalys och sedan extrahera den förprocessade datan med de tillämpade stegen.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Vilka är fördelarna med att använda en ordinal encoder för kolumnen Item Size?\n", + "\n", + "### Analysera relationer mellan variabler\n", + "\n", + "Nu när vi har förbehandlat vår data kan vi analysera relationerna mellan funktionerna och etiketten för att få en uppfattning om hur väl modellen kommer att kunna förutsäga etiketten baserat på funktionerna. Det bästa sättet att utföra denna typ av analys är att visualisera datan. \n", + "Vi kommer återigen att använda ggplot-funktionen geom_boxplot_ för att visualisera relationerna mellan Item Size, Variety och Color i ett kategoriskt diagram. För att bättre kunna plotta datan kommer vi att använda den kodade kolumnen Item Size och den okodade kolumnen Variety.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Använd ett swarm-diagram\n", + "\n", + "Eftersom Color är en binär kategori (Vit eller Inte), krävs 'ett [specialiserat tillvägagångssätt](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf) för visualisering'.\n", + "\n", + "Prova ett `swarm-diagram` för att visa fördelningen av färg i förhållande till item_size.\n", + "\n", + "Vi kommer att använda [ggbeeswarm-paketet](https://github.com/eclarke/ggbeeswarm) som erbjuder metoder för att skapa beeswarm-stil diagram med ggplot2. Beeswarm-diagram är ett sätt att plotta punkter som normalt skulle överlappa varandra så att de istället placeras bredvid varandra.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nu när vi har en uppfattning om sambandet mellan de binära färgkategorierna och den större gruppen av storlekar, låt oss utforska logistisk regression för att avgöra en pumpas sannolika färg.\n", + "\n", + "## Bygg din modell\n", + "\n", + "Välj de variabler du vill använda i din klassificeringsmodell och dela upp data i tränings- och testuppsättningar. [rsample](https://rsample.tidymodels.org/), ett paket i Tidymodels, erbjuder infrastruktur för effektiv datadelning och återprovtagning:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Vi är nu redo att träna en modell genom att passa träningsfunktionerna till träningsetiketten (färg).\n", + "\n", + "Vi börjar med att skapa ett recept som anger de förbehandlingssteg som ska utföras på vår data för att göra den redo för modellering, dvs: koda kategoriska variabler till en uppsättning heltal. Precis som `baked_pumpkins` skapar vi ett `pumpkins_recipe` men vi `prep` och `bake` inte eftersom det kommer att paketeras i ett arbetsflöde, vilket du kommer att se om bara några steg.\n", + "\n", + "Det finns ganska många sätt att specificera en logistisk regressionsmodell i Tidymodels. Se `?logistic_reg()` För tillfället kommer vi att specificera en logistisk regressionsmodell via den förvalda `stats::glm()`-motorn.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nu när vi har ett recept och en modellspecifikation behöver vi hitta ett sätt att kombinera dem i ett objekt som först förbehandlar data (prep+bake i bakgrunden), anpassar modellen på den förbehandlade datan och även möjliggör eventuella efterbehandlingsaktiviteter.\n", + "\n", + "I Tidymodels kallas detta praktiska objekt för en [`workflow`](https://workflows.tidymodels.org/) och det håller smidigt dina modellkomponenter.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Efter att ett arbetsflöde har *specificerats* kan en modell `tränas` med hjälp av [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html)-funktionen. Arbetsflödet kommer att uppskatta ett recept och förbehandla data innan träningen, så vi behöver inte göra det manuellt med prep och bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modellen visar de koefficienter som lärts in under träningen.\n", + "\n", + "Nu när vi har tränat modellen med träningsdata kan vi göra förutsägelser på testdata med [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Låt oss börja med att använda modellen för att förutsäga etiketter för vårt testset och sannolikheterna för varje etikett. När sannolikheten är mer än 0.5 är den förutsagda klassen `WHITE`, annars `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Väldigt bra! Detta ger några fler insikter i hur logistisk regression fungerar.\n", + "\n", + "### Bättre förståelse via en förvirringsmatris\n", + "\n", + "Att jämföra varje förutsägelse med dess motsvarande \"ground truth\"-värde är inte ett särskilt effektivt sätt att avgöra hur väl modellen förutspår. Lyckligtvis har Tidymodels några fler knep i rockärmen: [`yardstick`](https://yardstick.tidymodels.org/) - ett paket som används för att mäta modellers effektivitet med hjälp av prestationsmått.\n", + "\n", + "Ett prestationsmått som är kopplat till klassificeringsproblem är [`förvirringsmatrisen`](https://wikipedia.org/wiki/Confusion_matrix). En förvirringsmatris beskriver hur väl en klassificeringsmodell presterar. En förvirringsmatris sammanställer hur många exempel i varje klass som korrekt klassificerades av en modell. I vårt fall kommer den att visa hur många orangea pumpor som klassificerades som orangea och hur många vita pumpor som klassificerades som vita; förvirringsmatrisen visar också hur många som klassificerades i de **felaktiga** kategorierna.\n", + "\n", + "Funktionen [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) från yardstick beräknar denna kors-tabell över observerade och förutspådda klasser.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Låt oss tolka förvirringsmatrisen. Vår modell ska klassificera pumpor mellan två binära kategorier, kategori `vit` och kategori `inte-vit`.\n", + "\n", + "- Om din modell förutspår att en pumpa är vit och den faktiskt tillhör kategorin 'vit' kallar vi det en `sann positiv`, vilket visas av siffran längst upp till vänster.\n", + "\n", + "- Om din modell förutspår att en pumpa inte är vit och den faktiskt tillhör kategorin 'vit' kallar vi det en `falsk negativ`, vilket visas av siffran längst ner till vänster.\n", + "\n", + "- Om din modell förutspår att en pumpa är vit och den faktiskt tillhör kategorin 'inte-vit' kallar vi det en `falsk positiv`, vilket visas av siffran längst upp till höger.\n", + "\n", + "- Om din modell förutspår att en pumpa inte är vit och den faktiskt tillhör kategorin 'inte-vit' kallar vi det en `sann negativ`, vilket visas av siffran längst ner till höger.\n", + "\n", + "| Sanning |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Förutspådd** | VIT | ORANGE |\n", + "| VIT | TP | FP |\n", + "| ORANGE | FN | TN |\n", + "\n", + "Som du kanske har gissat är det att föredra att ha ett större antal sanna positiva och sanna negativa samt ett lägre antal falska positiva och falska negativa, vilket innebär att modellen presterar bättre.\n", + "\n", + "Förvirringsmatrisen är användbar eftersom den ger upphov till andra mått som kan hjälpa oss att bättre utvärdera prestandan hos en klassificeringsmodell. Låt oss gå igenom några av dem:\n", + "\n", + "🎓 Precision: `TP/(TP + FP)` definieras som andelen förutspådda positiva som faktiskt är positiva. Kallas också [positivt prediktivt värde](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Recall: `TP/(TP + FN)` definieras som andelen positiva resultat av antalet prover som faktiskt var positiva. Kallas också `sensitivitet`.\n", + "\n", + "🎓 Specificitet: `TN/(TN + FP)` definieras som andelen negativa resultat av antalet prover som faktiskt var negativa.\n", + "\n", + "🎓 Noggrannhet: `TP + TN/(TP + TN + FP + FN)` Den procentandel av etiketter som förutspåtts korrekt för ett prov.\n", + "\n", + "🎓 F-mått: Ett viktat genomsnitt av precision och recall, där det bästa är 1 och det sämsta är 0.\n", + "\n", + "Låt oss beräkna dessa mått!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualisera ROC-kurvan för den här modellen\n", + "\n", + "Låt oss göra en ytterligare visualisering för att se den så kallade [`ROC-kurvan`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC-kurvor används ofta för att få en överblick över en klassificerares resultat i termer av dess sanna respektive falska positiva. ROC-kurvor visar vanligtvis `True Positive Rate`/Sensitivitet på Y-axeln och `False Positive Rate`/1-Specificitet på X-axeln. Därför spelar kurvans branthet och avståndet mellan mittlinjen och kurvan roll: du vill ha en kurva som snabbt går upp och över linjen. I vårt fall finns det falska positiva i början, och sedan går linjen upp och över på rätt sätt.\n", + "\n", + "Slutligen, låt oss använda `yardstick::roc_auc()` för att beräkna det faktiska Area Under the Curve. Ett sätt att tolka AUC är som sannolikheten att modellen rankar ett slumpmässigt positivt exempel högre än ett slumpmässigt negativt exempel.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Resultatet är cirka `0.975`. Eftersom AUC sträcker sig från 0 till 1 vill du ha ett högt värde, eftersom en modell som är 100 % korrekt i sina förutsägelser kommer att ha en AUC på 1; i det här fallet är modellen *ganska bra*.\n", + "\n", + "I framtida lektioner om klassificering kommer du att lära dig hur du kan förbättra modellens resultat (till exempel hantering av obalanserad data i detta fall).\n", + "\n", + "## 🚀Utmaning\n", + "\n", + "Det finns mycket mer att utforska kring logistisk regression! Men det bästa sättet att lära sig är att experimentera. Hitta en dataset som lämpar sig för denna typ av analys och bygg en modell med den. Vad lär du dig? tips: prova [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) för intressanta dataset.\n", + "\n", + "## Granskning & Självstudier\n", + "\n", + "Läs de första sidorna av [denna artikel från Stanford](https://web.stanford.edu/~jurafsky/slp3/5.pdf) om några praktiska användningsområden för logistisk regression. Fundera på uppgifter som passar bättre för den ena eller andra typen av regressionsuppgifter som vi har studerat hittills. Vad skulle fungera bäst?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:33:24+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sv/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/sv/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..26f3545f3 --- /dev/null +++ b/translations/sv/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logistisk regression - Lektion 4\n", + "\n", + "Ladda in nödvändiga bibliotek och dataset. Konvertera data till en dataframe som innehåller ett urval av datan:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Låt oss ta en titt på vår data!\n", + "\n", + "Genom att visualisera den med Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Databehandling\n", + "\n", + "Låt oss koda funktioner och etiketter för att bättre visualisera data och träna modellen\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Var uppmärksam**: Att ignorera varningar är INTE en bra praxis och bör undvikas när det är möjligt. Varningar innehåller ofta användbara meddelanden som hjälper oss att förbättra vår kod och lösa ett problem. \n", + "Anledningen till att vi ignorerar just denna varning är för att säkerställa läsbarheten av diagrammet. Att plotta alla datapunkter med en reducerad markörstorlek, samtidigt som vi behåller konsekvensen i palettens färger, skapar en otydlig visualisering.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bygg din modell\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:27:40+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/3-Web-App/1-Web-App/notebook.ipynb b/translations/sv/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sv/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/sv/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..92963b2a4 --- /dev/null +++ b/translations/sv/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:11+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/1-Introduction/notebook.ipynb b/translations/sv/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..79c3c6aff --- /dev/null +++ b/translations/sv/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:49+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller inexaktheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/sv/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..ebe6f38c1 --- /dev/null +++ b/translations/sv/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,727 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T14:57:53+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Bygg en klassificeringsmodell: Utsökta asiatiska och indiska rätter\n" + ], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Introduktion till klassificering: Rensa, förbered och visualisera din data\n", + "\n", + "I dessa fyra lektioner kommer du att utforska ett grundläggande fokus inom klassisk maskininlärning - *klassificering*. Vi kommer att gå igenom hur man använder olika klassificeringsalgoritmer med en dataset om alla de fantastiska köken i Asien och Indien. Hoppas du är hungrig!\n", + "\n", + "

\n", + " \n", + "

Fira pan-asiatiska kök i dessa lektioner! Bild av Jen Looper
\n", + "\n", + "\n", + "\n", + "\n", + "Klassificering är en form av [övervakad inlärning](https://wikipedia.org/wiki/Supervised_learning) som har mycket gemensamt med regressionstekniker. Vid klassificering tränar du en modell för att förutsäga vilken `kategori` ett objekt tillhör. Om maskininlärning handlar om att förutsäga värden eller namn på saker med hjälp av datasets, så faller klassificering generellt sett in i två grupper: *binär klassificering* och *multiklassklassificering*.\n", + "\n", + "Kom ihåg:\n", + "\n", + "- **Linjär regression** hjälpte dig att förutsäga relationer mellan variabler och göra exakta förutsägelser om var en ny datapunkt skulle hamna i förhållande till den linjen. Så du kunde förutsäga numeriska värden, som *vad priset på en pumpa skulle vara i september jämfört med december*, till exempel.\n", + "\n", + "- **Logistisk regression** hjälpte dig att upptäcka \"binära kategorier\": vid denna prisnivå, *är denna pumpa orange eller inte-orange*?\n", + "\n", + "Klassificering använder olika algoritmer för att avgöra andra sätt att bestämma en datapunkts etikett eller klass. Låt oss arbeta med denna matlagningsdata för att se om vi, genom att observera en grupp ingredienser, kan avgöra dess ursprungskök.\n", + "\n", + "### [**Quiz före föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Introduktion**\n", + "\n", + "Klassificering är en av de grundläggande aktiviteterna för forskare inom maskininlärning och dataanalytiker. Från grundläggande klassificering av ett binärt värde (\"är detta e-postmeddelande skräppost eller inte?\") till komplex bildklassificering och segmentering med hjälp av datorseende, är det alltid användbart att kunna sortera data i klasser och ställa frågor om den.\n", + "\n", + "För att uttrycka processen på ett mer vetenskapligt sätt skapar din klassificeringsmetod en prediktiv modell som gör det möjligt att kartlägga relationen mellan indata och utdata.\n", + "\n", + "

\n", + " \n", + "

Binära vs. multiklassproblem för klassificeringsalgoritmer att hantera. Infografik av Jen Looper
\n", + "\n", + "\n", + "\n", + "Innan vi börjar processen med att rensa vår data, visualisera den och förbereda den för våra ML-uppgifter, låt oss lära oss lite om de olika sätt som maskininlärning kan användas för att klassificera data.\n", + "\n", + "Härledd från [statistik](https://wikipedia.org/wiki/Statistical_classification), klassificering med klassisk maskininlärning använder egenskaper, såsom `rökare`, `vikt` och `ålder` för att avgöra *sannolikheten att utveckla X sjukdom*. Som en övervakad inlärningsteknik liknande de regressionsexempel du utförde tidigare, är din data märkt och ML-algoritmerna använder dessa etiketter för att klassificera och förutsäga klasser (eller 'egenskaper') i en dataset och tilldela dem till en grupp eller ett resultat.\n", + "\n", + "✅ Ta en stund och föreställ dig en dataset om matlagning. Vad skulle en multiklassmodell kunna svara på? Vad skulle en binär modell kunna svara på? Vad händer om du ville avgöra om ett visst kök sannolikt använder bockhornsklöver? Vad händer om du ville se om du, med en present av en matkasse full av stjärnanis, kronärtskockor, blomkål och pepparrot, kunde skapa en typisk indisk maträtt?\n", + "\n", + "### **Hej 'klassificerare'**\n", + "\n", + "Frågan vi vill ställa om denna matlagningsdataset är faktiskt en **multiklassfråga**, eftersom vi har flera potentiella nationella kök att arbeta med. Givet en grupp ingredienser, vilken av dessa många klasser passar datan in i?\n", + "\n", + "Tidymodels erbjuder flera olika algoritmer att använda för att klassificera data, beroende på vilken typ av problem du vill lösa. Under de kommande två lektionerna kommer du att lära dig om flera av dessa algoritmer.\n", + "\n", + "#### **Förkunskaper**\n", + "\n", + "För denna lektion behöver vi följande paket för att rensa, förbereda och visualisera vår data:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) designade för att göra dataanalys snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett [ramverk av paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "- `DataExplorer`: [DataExplorer-paketet](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) är avsett att förenkla och automatisera EDA-processen och rapportgenerering.\n", + "\n", + "- `themis`: [themis-paketet](https://themis.tidymodels.org/) erbjuder extra receptsteg för att hantera obalanserad data.\n", + "\n", + "Du kan installera dem med:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternativt kontrollerar skriptet nedan om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om de saknas.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vi kommer senare att ladda dessa fantastiska paket och göra dem tillgängliga i vår nuvarande R-session. (Detta är bara för illustration, `pacman::p_load()` har redan gjort det åt dig)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Övning - rensa och balansera din data\n", + "\n", + "Den första uppgiften, innan du börjar med detta projekt, är att rensa och **balansera** din data för att få bättre resultat.\n", + "\n", + "Låt oss bekanta oss med datan!🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Intressant! Det verkar som att den första kolumnen är en slags `id`-kolumn. Låt oss ta reda på lite mer information om data.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Från resultatet kan vi direkt se att vi har `2448` rader och `385` kolumner samt `0` saknade värden. Vi har också 1 diskret kolumn, *cuisine*.\n", + "\n", + "## Övning - lära känna kökstyper\n", + "\n", + "Nu börjar arbetet bli mer intressant. Låt oss utforska datafördelningen per kökstyp.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Det finns ett begränsat antal kök, men fördelningen av data är ojämn. Du kan fixa det! Utforska lite mer innan du gör det.\n", + "\n", + "Låt oss nu tilldela varje kök till sin egen tibble och ta reda på hur mycket data som finns tillgängligt (rader, kolumner) per kök.\n", + "\n", + "> En [tibble](https://tibble.tidyverse.org/) är en modern dataram.\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Övning - Upptäcka toppingredienser per kök med hjälp av dplyr**\n", + "\n", + "Nu kan du gräva djupare i datan och ta reda på vilka som är de typiska ingredienserna för varje kök. Du bör rensa bort återkommande data som skapar förvirring mellan köken, så låt oss lära oss mer om detta problem.\n", + "\n", + "Skapa en funktion `create_ingredient()` i R som returnerar en ingrediens-dataram. Denna funktion börjar med att ta bort en oanvändbar kolumn och sortera ingredienser baserat på deras antal.\n", + "\n", + "Den grundläggande strukturen för en funktion i R är:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "En enkel introduktion till R-funktioner finns [här](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Låt oss sätta igång! Vi kommer att använda [dplyr-verb](https://dplyr.tidyverse.org/) som vi har lärt oss i våra tidigare lektioner. Som en sammanfattning:\n", + "\n", + "- `dplyr::select()`: hjälper dig att välja vilka **kolumner** du vill behålla eller utesluta.\n", + "\n", + "- `dplyr::pivot_longer()`: hjälper dig att \"förlänga\" data, vilket ökar antalet rader och minskar antalet kolumner.\n", + "\n", + "- `dplyr::group_by()` och `dplyr::summarise()`: hjälper dig att hitta sammanfattande statistik för olika grupper och presentera dem i en snygg tabell.\n", + "\n", + "- `dplyr::filter()`: skapar en delmängd av datan som endast innehåller rader som uppfyller dina villkor.\n", + "\n", + "- `dplyr::mutate()`: hjälper dig att skapa eller ändra kolumner.\n", + "\n", + "Kolla in denna [*konst*-fyllda learnr-tutorial](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) av Allison Horst, som introducerar några användbara datahanteringsfunktioner i dplyr *(en del av Tidyverse)*.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu kan vi använda funktionen för att få en uppfattning om de tio mest populära ingredienserna per kök. Låt oss testa den med `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "I föregående avsnitt använde vi `geom_col()`, låt oss se hur du också kan använda `geom_bar` för att skapa stapeldiagram. Använd `?geom_bar` för vidare läsning.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss göra samma sak för de japanska uppgifterna\n" + ], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vad sägs om de kinesiska köken?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [ + "Slutligen, plotta de koreanska ingredienserna.\n" + ], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Från datavisualiseringarna kan vi nu ta bort de vanligaste ingredienserna som skapar förvirring mellan olika kök, med hjälp av `dplyr::select()`.\n", + "\n", + "Alla älskar ris, vitlök och ingefära!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Förbehandling av data med recept 👩‍🍳👨‍🍳 - Hantera obalanserad data ⚖️\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n", + "\n", + "Eftersom den här lektionen handlar om kök, måste vi sätta `recept` i rätt sammanhang.\n", + "\n", + "Tidymodels erbjuder ännu ett smidigt paket: `recipes` - ett paket för förbehandling av data.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss titta på fördelningen av våra kök igen.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Som du kan se, finns det en ganska ojämn fördelning i antalet kök. Koreanska kök är nästan tre gånger fler än thailändska kök. Obalanserad data har ofta negativa effekter på modellens prestanda. Tänk på en binär klassificering. Om majoriteten av din data tillhör en klass, kommer en ML-modell att förutsäga den klassen oftare, bara för att det finns mer data för den. Att balansera data tar bort snedvridningar och hjälper till att eliminera denna obalans. Många modeller presterar bäst när antalet observationer är lika och har därför svårt med obalanserad data.\n", + "\n", + "Det finns huvudsakligen två sätt att hantera obalanserade datasätt:\n", + "\n", + "- lägga till observationer till minoritetsklassen: `Över-sampling`, t.ex. med hjälp av en SMOTE-algoritm\n", + "\n", + "- ta bort observationer från majoritetsklassen: `Under-sampling`\n", + "\n", + "Låt oss nu demonstrera hur man hanterar obalanserade datasätt med hjälp av ett `recept`. Ett recept kan ses som en ritning som beskriver vilka steg som ska tillämpas på ett datasätt för att göra det redo för dataanalys.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss gå igenom våra förbehandlingssteg.\n", + "\n", + "- Anropet till `recipe()` med en formel talar om för receptet vilka *roller* variablerna har, med hjälp av `df_select`-data som referens. Till exempel har kolumnen `cuisine` tilldelats rollen `outcome`, medan resten av kolumnerna har tilldelats rollen `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) skapar en *specifikation* för ett receptsteg som syntetiskt genererar nya exempel för minoritetsklassen med hjälp av närmaste grannar till dessa fall.\n", + "\n", + "Nu, om vi vill se den förbehandlade datan, måste vi [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) och [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) vårt recept.\n", + "\n", + "`prep()`: uppskattar de nödvändiga parametrarna från en träningsuppsättning som senare kan tillämpas på andra dataset.\n", + "\n", + "`bake()`: tar ett förberett recept och tillämpar operationerna på valfritt dataset.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss nu kontrollera fördelningen av våra kök och jämföra dem med den obalanserade datan.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mums! Datan är ren, balanserad och väldigt läcker 😋!\n", + "\n", + "> Vanligtvis används ett recept som en förprocessor för modellering där det definierar vilka steg som ska tillämpas på en dataset för att göra den redo för modellering. I sådana fall används vanligtvis en `workflow()` (som vi redan har sett i våra tidigare lektioner) istället för att manuellt uppskatta ett recept.\n", + ">\n", + "> Därför behöver du vanligtvis inte **`prep()`** och **`bake()`** recept när du använder tidymodels, men de är användbara funktioner att ha i din verktygslåda för att bekräfta att recepten gör det du förväntar dig, som i vårt fall.\n", + ">\n", + "> När du **`bake()`** ett förberett recept med **`new_data = NULL`**, får du tillbaka den data som du angav när du definierade receptet, men som har genomgått förbearbetningsstegen.\n", + "\n", + "Låt oss nu spara en kopia av denna data för användning i framtida lektioner:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Denna nya CSV-fil finns nu i rotmappen för data.\n", + "\n", + "**🚀Utmaning**\n", + "\n", + "Detta kursmaterial innehåller flera intressanta dataset. Gå igenom `data`-mapparna och se om någon innehåller dataset som skulle passa för binär eller multi-klassklassificering? Vilka frågor skulle du ställa till detta dataset?\n", + "\n", + "## [**Quiz efter föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Granskning & Självstudier**\n", + "\n", + "- Kolla in [paketet themis](https://github.com/tidymodels/themis). Vilka andra tekniker kan vi använda för att hantera obalanserad data?\n", + "\n", + "- Tidy models [referenswebbplats](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham och G. Grolemund, [*R för Data Science: Visualisera, Modellera, Transformera, Städa och Importera Data*](https://r4ds.had.co.nz/).\n", + "\n", + "#### TACK TILL:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) för att ha skapat de fantastiska illustrationerna som gör R mer välkomnande och engagerande. Hitta fler illustrationer i hennes [galleri](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) och [Jen Looper](https://www.twitter.com/jenlooper) för att ha skapat den ursprungliga Python-versionen av denna modul ♥️\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/sv/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..5b1cf058b --- /dev/null +++ b/translations/sv/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,711 @@ +{ + "cells": [ + { + "source": [ + "# Utsökta asiatiska och indiska maträtter\n", + "\n", + "## Introduktion\n", + "\n", + "Asiatisk och indisk mat är känd för sina rika smaker, aromatiska kryddor och mångsidiga ingredienser. Den här guiden ger dig en inblick i några av de mest populära rätterna och deras unika egenskaper.\n", + "\n", + "## Populära asiatiska rätter\n", + "\n", + "### Sushi\n", + "Sushi är en japansk rätt som består av vinägersmaksatt ris kombinerat med olika ingredienser som rå fisk, grönsaker och ibland frukt. Det finns flera olika typer av sushi, inklusive maki, nigiri och sashimi.\n", + "\n", + "### Pad Thai\n", + "Pad Thai är en klassisk thailändsk nudelrätt som vanligtvis tillagas med risnudlar, ägg, tofu, räkor eller kyckling, och smaksätts med tamarind, fisksås och lime. Den toppas ofta med hackade jordnötter och färska örter.\n", + "\n", + "### Pekinganka\n", + "Pekinganka är en kinesisk specialitet där ankan tillagas tills skinnet blir krispigt. Den serveras vanligtvis med tunna pannkakor, hoisinsås och skivade grönsaker.\n", + "\n", + "## Populära indiska rätter\n", + "\n", + "### Butter Chicken\n", + "Butter Chicken, eller smörkyckling, är en krämig och smakrik curry som görs med kyckling tillagad i en tomatbaserad sås med smör och grädde. Den serveras ofta med naanbröd eller basmatiris.\n", + "\n", + "### Biryani\n", + "Biryani är en aromatisk risrätt som tillagas med kryddor, kött (som kyckling, lamm eller get) och ibland grönsaker. Den är populär i hela Indien och finns i många regionala variationer.\n", + "\n", + "### Samosa\n", + "Samosa är ett friterat eller bakat bakverk fyllt med kryddad potatis, ärtor och ibland kött. Det är en populär indisk snacksrätt som ofta serveras med chutney.\n", + "\n", + "## Tips för att laga mat hemma\n", + "\n", + "[!TIP] Börja med att samla alla ingredienser innan du börjar laga mat. Många asiatiska och indiska rätter kräver specifika kryddor och såser som kan vara svåra att hitta i vanliga mataffärer.\n", + "\n", + "[!NOTE] Om du är nybörjare, börja med enklare recept som inte kräver för många steg eller ovanliga ingredienser.\n", + "\n", + "[!WARNING] Var försiktig med mängden chili och kryddor om du inte är van vid stark mat.\n", + "\n", + "## Avslutning\n", + "\n", + "Att laga asiatisk och indisk mat hemma kan vara både roligt och givande. Med rätt ingredienser och lite övning kan du skapa autentiska smaker som tar dig på en kulinarisk resa utan att lämna ditt kök.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Installera Imblearn som möjliggör SMOTE. Detta är ett Scikit-learn-paket som hjälper till att hantera obalanserad data vid klassificering. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Denna datamängd inkluderar 385 kolumner som anger alla typer av ingredienser i olika kök från en given uppsättning kök.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Visa köken i ett stapeldiagram\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdIAAAD4CAYAAABYIGfSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5xXVb3/8dcbHCEFp1SyCbQho0hAQAYTNPV4KfOSdhTpRF7SXxz9HbFOWnHSftFJfz/NfuZdo1JILQXKS3KCTPBOyowIwyWsRI8gKd5GkSCEz/ljr5Ev48www56ZLzPf9/PxmMfs79prr/XZCx58WGvv796KCMzMzGz7dCt2AGZmZp2ZE6mZmVkOTqRmZmY5OJGamZnl4ERqZmaWw07FDsA61p577hmVlZXFDsPMrFOpqal5JSL6NLbPibTEVFZWUl1dXewwzMw6FUnPN7XPS7tmZmY5OJGamZnl4ERqZmaWg6+RmpnZe2zcuJGVK1eyfv36YofSoXr27Em/fv0oKytr8TFOpCWmdlUdlRNnFjsM68Keu+y4YodgbWDlypX07t2byspKJBU7nA4REbz66qusXLmS/v37t/g4L+2amdl7rF+/nj322KNkkiiAJPbYY49Wz8KdSDuYpJMk7deCelMkndJI+eGS7muf6MzMtiilJFpve87ZibTjnQRsM5GamVnn4GukDUi6DHghIq5PnycBawEBpwI9gLsi4ntp/3eBLwNrgBeAmoj4kaR9geuBPsA64KvA7sDngcMkXQycDBwBjAd2Bv4CnBYR61I4R0maCOwGfCMitpqJStoVuBYYDJQBkyLinjYfFDMreW19b0VbX0vv1asXa9eubdM2W8oz0ve6kyxh1juVLEkOAA4EhgEjJB0qaSRZMhwKfA6oKjhuMjAhIkYAFwI3RMTjwL3ANyNiWET8FfhNRIyMiKHAMuDsgjYqU5/HATdJ6tkg1ouAORFxIPBPwBUpuW5F0nhJ1ZKqN62r244hMTOzpjiRNhARC4APSvqwpKHA68AQ4DPAAuApYCBZYj0YuCci1kfEW8BvAST1AkYD0yU9DfwEqGiiy8GSHpFUC4wDBhXsmxYRmyPiz8Czqd9CnwEmpj4eBHoC+zRyTpMjoioiqrrvUt7KETEz63gTJ07k+uuvf/fzpEmTuOSSSzjyyCM54IADGDJkCPfc894FuAcffJDjjz/+3c/nnXceU6ZMAaCmpobDDjuMESNG8NnPfpbVq1e3SaxOpI2bDpwCjCWboQr4f2kWOSwiPhYRP2/m+G7AGwX1h0XEJ5uoOwU4LyKGAN8nS4b1okHdhp8FnFzQxz4Rsaxlp2hmtuMaO3Ys06ZNe/fztGnTOOOMM7jrrrt46qmnmDt3LhdccAERDf9ZbNzGjRuZMGECM2bMoKamhrPOOouLLrqoTWJ1Im3cncAXyZLpdGA2cFaaaSKpr6QPAo8BJ0jqmfYdDxARbwIrJI1J9ZVmtwBvAb0L+uoNrJZURjYjLTRGUrd0vfWjwPIG+2cDE5RuM5M0vA3O3cys6IYPH87LL7/Miy++yMKFC/nABz7Ahz70Ib7zne+w//77c9RRR7Fq1SpeeumlFrW3fPlyFi9ezNFHH82wYcO45JJLWLlyZZvE6puNGhERSyT1BlZFxGqyRPdJYF7KWWuBL0fEfEn3AouAl4BaoP4i5DjgxnRTURlwB7Aw/f6ppPPJEvV3gSfIrsM+wdZJ9r+BJ8luNjonItY3uDX7B8BVwCJJ3YAVpGRuZtbZjRkzhhkzZvC3v/2NsWPHcvvtt7NmzRpqamooKyujsrLyPd/53Gmnndi8efO7n+v3RwSDBg1i3rx5bR6nE2kT0lJr4eergasbqfqjiJgkaRfgYaAm1V8BHNNIu4+x9ddfbkw/Deud2URcD5JdDyUi/g786zZPxsysExo7dixf/epXeeWVV3jooYeYNm0aH/zgBykrK2Pu3Lk8//x732z2kY98hKVLl7Jhwwb+/ve/88ADD3DIIYfwiU98gjVr1jBv3jxGjRrFxo0beeaZZxg0aFAjPbeOE2l+k9MDFnoCUyPiqWIH1Jwhfcup9iPczKyVivHox0GDBvHWW2/Rt29fKioqGDduHCeccAJDhgyhqqqKgQMb3n8Je++9N6eeeiqDBw+mf//+DB+eXfHaeeedmTFjBueffz51dXW88847fP3rX2+TRKqWXqi1rqGqqir8Ym8z25Zly5bxyU82dY9k19bYuUuqiYiqxur7ZiMzM7McnEjNzMxycCI1M7NGleKlv+05ZydSMzN7j549e/Lqq6+WVDKtfx9pz54Nn8baPN+1a2Zm79GvXz9WrlzJmjVrih1Kh+rZsyf9+vVr1TFOpGZm9h5lZWX079+/2GF0Cl7aNTMzy8GJ1MzMLAcnUjMzsxx8jbTE1K6qa/M33Zu1VDEeM2fW3jwjNTMzy8GJ1MzMLIeiJFJJJ6U3prT2uLXtEU9XIWmKpFOKHYeZWSlpVSJVpi2S70ls/U5OMzOzTmmbSVFSpaTlkn4BLAZOk1QrabGkywvq3SipWtISSd8vKL9M0lJJiyT9SNJo4PPAFZKelrRv+pklqUbSI5IGpmP7S5qX+rtkG3FWSHo4tblY0qdT+WdSG09Jmi6pV2NxpbITJD0haYGkP0jaK5VPkjQ1xfa8pH+W9MMU1yxJZaneCEkPpfOYLamimXjPL+j/jlS2q6SbJT2ZYjgxlXeXdIWk+an+v6ZySbou/fn8AfhgE32NT3821ZvW1TX7521mZq3T0rt2BwBnAP8N/BEYAbwO/F7SSRFxN3BRRLwmqTvwgKT9gVXAF4CBERGS3h8Rb0i6F7gvImYASHoAOCci/izpU8ANwBHA1cCNEfELSf+2jRi/BMyOiEtTDLtI2hO4GDgqIt6W9G3gG5KubxhXauNR4KBU9r+AbwEXpH37Av9ENpOeB5wcEd+SdBdwnKSZwLXAiRGxRtJY4FLgrCbinQj0j4gNBf1fBMyJiLNS2ZMpQY4D6iJipKQewGOSfg8MBz6RYtoLWArc3LCjiJgMTAboUTGgdB6caWbWAVqaSJ+PiD+mGdKDEbEGQNLtwKHA3cCpksanNivI/nFfCqwHfi7pPuC+hg2nGeJoYLqk+uIe6ffBwMlp+1bgcpo2H7g5zQ7vjoinJR2W4ngstb0zWRKsayKufsCdaSa5M7CioP3fRcRGSbVAd2BWKq8FKskS2mDg/tRXd2B1M/EuAm6XdDfZ+AF8Bvi8pAvT557APql8f225/llO9p+bQ4FfRcQm4EVJc5rpz8zM2kFLE+nbze2U1B+4EBgZEa9LmgL0jIh3JB0IHAmcApxHNtMs1A14IyKGNdF8i2ZQEfGwpEOB44Apkq4kmzXfHxH/0kjMjcV1LXBlRNwr6XBgUsEhG1I/myVtjC2vRNhMNo4ClkTEqJbEm+I8FDgBuEjSkNTGyRGxvEGsAiZExOwG5ce2sC8zM2snrb1x6EngMEl7puXTfwEeAnYjS7Z16bri5+Dd2WZ5RPwX8O/A0NTOW0BvgIh4E1ghaUw6RpLq6z0GfDFtj2suMEkfAV6KiJ8CPwMOIFuGPljSx1KdXSV9vJm4ysmWoyFbym6N5UAfSaNSX2WSBjURazdg74iYC3w79dsLmA1MSIkTScPTIbOBcwuuxX5c0q7Aw8DYdA21gmzp2czMOlCrnmwUEaslTQTmks2eZkbEPQCSFgB/Al4gS4CQJct7JPVM9b+Ryu8AfirpfLIZ4TjgRkkXA2Vp/0Lga8Av07XNe7YR3uHANyVtBNYCp6drlWcCv0rXFiG7ZvpWE3FNIltifh2YA7T41QcR8Y+09HqNpHKysb0KWNJI9e7AbamegGvSteMfpGMWpWS7Ajie7D8GlcBTKcmuIbvz+S6ymfRSsuvX87YV55C+5VT76TJmZm1GpfTSVoOqqqqorq4udhhmZp2KpJqIqGpsn59sZGZmlkOne2h9uinn1gbFGyLiU8WIZ1vSV20OblB8dUTcUox4zMysbXW6RBoRtUBTd/jucCJiW99/NTOzTsxLu2ZmZjk4kZqZmeXgRGpmZpaDE6mZmVkOTqRmZmY5OJGamZnl0Om+/mL51K6qo3LizGKHYbaV5/zYSuvEPCM1MzPLwYnUzMwsByfSIpC0ttgxmJlZ23AiNTMzy8GJtJXSy8FnSlooabGksZKOlLRAUq2kmyX1kHSEpLsLjjta0l0Fn38saYmkByT1SWX7SpolqUbSI5IGpvITJD2R+vhDenk6kial/h6U9Gx6v6uZmXUgJ9LWOwZ4MSKGRsRgYBYwBRgbEUPI7oQ+l+zl5wPrkyTwFeDmtL0rUB0Rg4CHgO+l8snAhIgYAVwI3JDKHwUOiojhZC89/1ZBPAOBzwIHAt+TVNYwYEnjJVVLqt60ri73AJiZ2RZOpK1XCxwt6XJJnwYqgRUR8UzaPxU4NLI3pt8KfFnS+4FRwO9Snc3AnWn7NuAQSb2A0cB0SU8DPwEqUp1+wGxJtcA3gUEF8cyMiA0R8QrwMrBXw4AjYnJEVEVEVfddyttgCMzMrJ6/R9pKEfGMpAOAY4FLgDnNVL8F+C2wHpgeEe801SzZf2reiIjGXhF3LXBlRNwr6XBgUsG+DQXbm/CfqZlZh/KMtJUkfRhYFxG3AVeQzTQrJX0sVTmNbLmWiHgReBG4mCyp1usGnJK2vwQ8GhFvAiskjUn9SNLQVKccWJW2z2iXEzMzs+3i2UvrDQGukLQZ2Eh2PbScbEl2J2A+cFNB/duBPhGxrKDsbeBASReTLceOTeXjgBtTeRnZ9dCFZDPQ6ZJeJ5sB92+nczMzs1ZSdinP2ouk64AFEfHzYscCUFVVFdXV1cUOw8ysU5FUExFVje3zjLQdSaohm31eUOxYzMysfTiRtqP0NRYzM+vCfLORmZlZDk6kZmZmOTiRmpmZ5eBEamZmloMTqZmZWQ5OpGZmZjk4kZqZmeXgRGpmZpaDH8hQYmpX1VE5cWaxwzBrV89ddlyxQ7AS4hmpmZlZDk6kOzhJ/5VeDG5mZjsgL+3uwCQJOD4iNhc7FjMza5xnpDsYSZWSlkv6BbAY2CRpz7TvdEmLJC2UdGsq6yPp15Lmp5+Dixm/mVmp8Yx0xzQAOCMi/ijpOQBJg4CLgdER8Yqk3VPdq4EfR8SjkvYBZgOfLGxM0nhgPED33fp00CmYmZUGJ9Id0/MR8ccGZUcA0yPiFYCIeC2VHwXsl60CA7CbpF4Rsba+ICImA5MBelQM8JvczczakBPpjuntVtTtBhwUEevbKxgzM2uar5F2HnOAMZL2AChY2v09MKG+kqRhRYjNzKxkOZF2EhGxBLgUeEjSQuDKtOt8oCrdhLQUOKdYMZqZlSIv7e5gIuI5YHDB58qC7anA1Ab1XwHGdlB4ZmbWgBNpiRnSt5xqPz7NzKzNeGnXzMwsBydSMzOzHJxIzczMcnAiNTMzy8GJ1MzMLAcnUjMzsxycSM3MzHJwIjUzM8vBidTMzCwHJ1IzM7Mc/IjAElO7qo7KiTOLHYZZUT3nx2RaG/KM1MzMLAcnUjMzsxy6XCKVVClpcSPlD0qq2o72zpR0XdtEZ2ZmXU2XS6QGknzt28ysg3TVRLqTpNslLZM0Q9IuhTsl3SipWtISSd8vKB8p6XFJCyU9Kal3g+OOkzRP0p6NdSppiqSbUtvPSDo+lXeXdIWk+ZIWSfrXVH64pIclzZS0PB3bLe1bK+nHKcYHJPVJ5ftKmiWpRtIjkgY26PsJ4IcN4hqfYqretK4u9+CamdkWXTWRfgK4ISI+CbwJ/O8G+y+KiCpgf+AwSftL2hm4E/haRAwFjgL+Xn+ApC8AE4FjI+KVZvquBA4EjgNuktQTOBuoi4iRwEjgq5L6p/oHAhOA/YB9gX9O5bsC1RExCHgI+F4qnwxMiIgRwIXADQV99wNGR8Q3CgOKiMkRURURVd13KW8mdDMza62uugT4QkQ8lrZvA85vsP9USePJzr+CLIkFsDoi5gNExJsAkgCOAKqAz9SXN2NaRGwG/izpWWAg8Blgf0mnpDrlwADgH8CTEfFs6utXwCHADGAzWWKvP4ffSOoFjAamp7gAehT0PT0iNm0jPjMza0NdNZFGU5/TTPBCYGREvC5pCtBzG+39Ffgo8HGgejv6FtkscnbhDkmHNxdrI+XdgDciYlgTdd7eRmxmZtbGuurS7j6SRqXtLwGPFuzbjSzh1EnaC/hcKl8OVEgaCSCpd8FNO88DJwO/kDRoG32PkdRN0r5kyXc5MBs4V1JZavvjknZN9Q+U1D9dGx1bEGs3oH4G+yXg0TQbXiFpTGpHkoa2dFDMzKztddVEuhz4N0nLgA8AN9bviIiFwALgT8AvgcdS+T/IEtm1khYC91MwU42IPwHjyJZV922m7/8GngR+B5wTEeuBnwFLgafSV3N+wpbVgPnAdcAyYAVwVyp/myzJLiZbWv7PVD4OODvFuAQ4sVUjY2ZmbUoRTa0kWmulZeL7ImJGC+sfDlwYEcc3sm9tRPRq2wihqqoqqqu3tTptZmaFJNWkm1Tfo6vOSM3MzDpEV73ZqF1JuggY06B4ekSc2Zp2IuJB4MEm9rX5bNTMzNqeE+l2iIhLgUuLHYeZmRWfl3bNzMxycCI1MzPLwYnUzMwsBydSMzOzHJxIzczMcnAiNTMzy8GJ1MzMLAd/j7TE1K6qo3LizGKHYWbNeO6y44odgrWCZ6RmZmY5OJGamZnl4ETayUk6R9LpaXuKpFO2dYyZmbUdXyPt5CLipmLHYGZWykp6RippV0kzJS2UtFjSWEkjJD0kqUbSbEkVqe75kpZKWiTpjlR2oKR5khZIelzSJ1L5mZLulnS/pOcknSfpG6neHyXtnurtK2lW6usRSQObibVS0pzU/wOS9knlkyRduI3zHC+pWlL1pnV1bTV8ZmZGiSdS4BjgxYgYGhGDgVnAtcApETECuJktb3mZCAyPiP2Bc1LZn4BPR8Rw4P8A/7eg7cHAPwMjUxvrUr15wOmpzmRgQurrQuCGZmK9Fpia+r8duKalJxkRkyOiKiKquu9S3tLDzMysBUp9abcW+P+SLgfuA14nS4D3SwLoDqxOdRcBt0u6G7g7lZUDUyUNAAIoK2h7bkS8BbwlqQ74bUGf+0vqBYwGpqe+AHo0E+sossQMcCvww9afrpmZtbWSTqQR8YykA4BjgUuAOcCSiBjVSPXjgEOBE4CLJA0BfkCWML8gqZKtX9K9oWB7c8HnzWTj3g14IyKGtdkJmZlZhyvppV1JHyZbcr0NuAL4FNBH0qi0v0zSIEndgL0jYi7wbbKZaK/0e1Vq7szW9B0RbwIrJI1JfUnS0GYOeRz4YtoeBzzSmv7MzKx9lPSMFBgCXCFpM7AROBd4B7hGUjnZ+FwFPAPclsoEXBMRb0j6IdnS7sXA9jwuaBxwYzq+DLgDWNhE3QnALZK+CawBvrId/TGkbznVfmqKmVmbUUQUOwbrQFVVVVFdXV3sMMzMOhVJNRFR1di+kl7aNTMzy6vUl3Z3OJIuAsY0KJ4eEZc2Vt/MzIrLiXQHkxKmk6aZWSfhpV0zM7McnEjNzMxycCI1MzPLwYnUzMwsBydSMzOzHJxIzczMcvDXX0pM7ao6Kiduz9MMzayUPOdHibaYZ6RmZmY5OJGamZnl4ERqZmaWgxOpmZlZDk6kBSTtKmmmpIWSFksaK+lISQsk1Uq6WVIPSUdIurvguKMl3dVEm90lTUnt1Ur691T+VUnzU1+/lrRLKp8i6ZSC49cWbH87tbFQ0mWpbF9JsyTVSHpE0sD2Gh8zM3svJ9KtHQO8GBFDI2IwMAuYAoyNiCFkdzmfC8wFBkrqk477CnBzE20OA/pGxODUxi2p/DcRMTIihgLLgLObC0zS54ATgU+lY36Ydk0GJkTECOBC4IZGjh0vqVpS9aZ1ddseBTMzazEn0q3VAkdLulzSp4FKYEVEPJP2TwUOjext6LcCX5b0fmAU8Lsm2nwW+KikayUdA7yZygenGWQtMA4YtI3YjgJuiYh1ABHxmqRewGhguqSngZ8AFQ0PjIjJEVEVEVXddylvyTiYmVkL+XukBSLiGUkHAMcClwBzmql+C/BbYD3Z+0LfaaLN1yUNBT4LnAOcCpxFNtM9KSIWSjoTODwd8g7pPziSugE7NxNDN+CNiBjWkvMzM7O25xlpAUkfBtZFxG3AFWQzzUpJH0tVTgMeAoiIF4EXgYvZslzbWJt7At0i4tep7gFpV29gtaQyshlpveeAEWn780BZ2r4f+ErBtdTdI+JNYIWkMalMKWmbmVkH8Yx0a0OAKyRtBjaSXQ8tJ1s63QmYD9xUUP92oE9ELGumzb7ALWl2CfAf6fd3gSeANel371T+U+AeSQvJrtG+DRARsyQNA6ol/QP4L+A7ZEn4RkkXkyXdO4CF23n+ZmbWSsou99n2kHQdsCAifl7sWFqqqqoqqqurix2GmVmnIqkmIqoa2+cZ6XaSVEM2W7yg2LGYmVnxOJFup/R1k61IegLo0aD4tIio7ZiozMysozmRtqGI+FSxYzAzs47lu3bNzMxycCI1MzPLwYnUzMwsBydSMzOzHJxIzczMcnAiNTMzy8GJ1MzMLAd/j7TE1K6qo3LizGKHYWZdzHOXHVfsEIrGM1IzM7McnEjNzMxycCI1MzPLwYm0lSSdLmmRpIWSbpV0gqQnJC2Q9AdJe0nqJunPkvqkY7pJ+oukPunn15Lmp5+DU51Jkm6W9KCkZyWdn8orJS2T9FNJSyT9XtL70r59Jc2SVCPpEUkDizcyZmalyYm0FSQNAi4GjoiIocDXgEeBgyJiONlLtb8VEZuB28heug1wFLAwItYAVwM/joiRwMnAzwq6GAh8FjgQ+J6kslQ+ALg+IgYBb6TjACYDE9KbaC4Ebmgi7vGSqiVVb1pXl3sczMxsC9+12zpHANMj4hWAiHhN0hDgTkkVwM7AilT3ZuAe4CrgLOCWVH4UsJ+k+jZ3k9Qrbc+MiA3ABkkvA3ul8hUR8XTargEq0zGjgekFbTV8hRspzslkSZceFQP8JnczszbkRJrftcCVEXGvpMOBSQAR8YKklyQdQTbDrJ+ddiObwa4vbCQlww0FRZvY8ufTsPx9qZ03ImJYm56NmZm1ipd2W2cOMEbSHgCSdgfKgVVp/xkN6v+MbIl3ekRsSmW/BybUV5C0XYkwIt4EVkgak9qRpKHb05aZmW0/J9JWiIglwKXAQ5IWAleSzUCnS6oBXmlwyL1AL7Ys6wKcD1SlG5aWAufkCGkccHaKZQlwYo62zMxsOyjCl8zai6QqshuLPl3sWOr1qBgQFWdcVewwzKyL6epPNpJUExFVje3zNdJ2ImkicC5bro3uEIb0Lae6i/+FNzPrSF7abScRcVlEfCQiHi12LGZm1n6cSM3MzHJwIjUzM8vBidTMzCwHJ1IzM7McnEjNzMxycCI1MzPLwYnUzMwsBydSMzOzHJxIzczMcvAjAktM7ao6KifOLHYYZlYCuvrzd+t5RmpmZpaDE2mRSaqUtDhtHy7pvrT9+fTgezMz24F5aXcHFRH3kr3P1MzMdmCekeYkaVdJMyUtlLRY0lhJIyU9nsqelNQ7zTwfkfRU+hm9jXbPlHRd2q6UNCe9DPwBSfuk8imSrkl9PSvplI44ZzMz28Iz0vyOAV6MiOMAJJUDC4CxETFf0m7A34GXgaMjYr2kAcCvgEZfEtuIa4GpETFV0lnANcBJaV8FcAgwkGwGO6PhwZLGA+MBuu/WZ/vO0szMGuUZaX61wNGSLpf0aWAfYHVEzAeIiDcj4h2gDPippFpgOrBfK/oYBfwybd9Kljjr3R0RmyNiKbBXYwdHxOSIqIqIqu67lLfq5MzMrHmekeYUEc9IOgA4FrgEmNNE1X8HXgKGkv0HZn0bhbChYFtt1KaZmbWQZ6Q5SfowsC4ibgOuAD4FVEgamfb3lrQTUE42U90MnAZ0b0U3jwNfTNvjgEfaKn4zM8vHM9L8hgBXSNoMbATOJZsZXivpfWTXR48CbgB+Lel0YBbwdiv6mADcIumbwBrgK20Yv5mZ5aCIKHYM1oF6VAyIijOuKnYYZlYCutKTjSTVRESjN4h6RlpihvQtp7oL/eU2Mys2XyM1MzPLwYnUzMwsBydSMzOzHJxIzczMcnAiNTMzy8GJ1MzMLAcnUjMzsxycSM3MzHJwIjUzM8vBTzYqMbWr6qicOLPYYZiZdaj2fFyhZ6RmZmY5OJGamZnl4ETaSUg6U9J1afuc9Do2MzMrMl8j7QTSi8HfFRE3FSsWMzPbmhNpB5L0XeDLZC/nfgGoAeqA8cDOwF+A0yJinaQpwHpgOPAYsKignUnA2oj4kaSPATcBfYBNwJiI+GtHnZOZWanz0m4HkTQSOBkYCnwOqH9B7G8iYmREDAWWAWcXHNYPGB0R32im6duB69Pxo4HVjfQ9XlK1pOpN6+ra4GzMzKyeZ6Qd52DgnohYD6yX9NtUPljSJcD7gV7A7IJjpkfEpqYalNQb6BsRdwGktt8jIiYDkwF6VAyI3GdiZmbv8oy0+KYA50XEEOD7QM+CfW8XJSIzM2sxJ9KO8xhwgqSeknoBx6fy3sBqSWXAuNY0GBFvASslnQQgqYekXdoyaDMza54TaQeJiPnAvWQ3Df0OqCW70ei7wBNkifZP29H0acD5khYBjwMfapOAzcysRRThS2YdRVKviFibZo0PA+Mj4qmOjKGqqiqqq6s7skszs05PUk1EVDW2zzcbdazJkvYjuw46taOTqJmZtT0n0g4UEV8qdgxmZta2fI3UzMwsBydSMzOzHJxIzczMcvBduyVG0lvA8mLHsQPZE3il2EHsQDweW3gstlbq4/GRiOjT2A7fbFR6ljd1C3cpklTt8djC47GFx2JrHo+meWnXzMwsBydSMzOzHJxIS8/kYgewg/F4bM3jsYXHYmsejyb4ZiMzM7McPCM1MzPLwYnUzMwsByfSEiLpGEnLJf1F0sRix9MRJN0s6WVJiwvKdpd0v6Q/p98fSOWSdE0an0WSDihe5G1P0t6S5kpaKmmJpK+l8pIbj/Re4CclLUxj8f1U3l/SE+mc75S0cyrvkT7/Je2vLLtjiJIAAALmSURBVGb87UVSd0kLJN2XPpf0eLSUE2mJkNQduB74HLAf8C/pTTRd3RTgmAZlE4EHImIA8ED6DNnYDEg/44EbOyjGjvIOcEFE7AccBPxb+jtQiuOxATgiIoYCw4BjJB0EXA78OCI+BrwOnJ3qnw28nsp/nOp1RV8DlhV8LvXxaBEn0tJxIPCXiHg2Iv4B3AGcWOSY2l1EPAy81qD4RGBq2p4KnFRQ/ovI/BF4v6SKjom0/UXE6vpX90XEW2T/YPalBMcjndPa9LEs/QRwBDAjlTcci/oxmgEcKUkdFG6HkNQPOA74WfosSng8WsOJtHT0BV4o+LwylZWivSJiddr+G7BX2i6ZMUpLccOBJyjR8UjLmE8DLwP3A38F3oiId1KVwvN9dyzS/jpgj46NuN1dBXwL2Jw+70Fpj0eLOZFaSYvs+18l9R0wSb2AXwNfj4g3C/eV0nhExKaIGAb0I1uxGVjkkIpG0vHAyxFRU+xYOiMn0tKxCti74HO/VFaKXqpfoky/X07lXX6MJJWRJdHbI+I3qbhkxwMgIt4A5gKjyJav659BXni+745F2l8OvNrBobang4HPS3qO7LLPEcDVlO54tIoTaemYDwxId+HtDHwRuLfIMRXLvcAZafsM4J6C8tPT3aoHAXUFS56dXrqG9XNgWURcWbCr5MZDUh9J70/b7wOOJrtmPBc4JVVrOBb1Y3QKMCe60NNsIuI/IqJfRFSS/dswJyLGUaLj0Vp+slEJkXQs2XWQ7sDNEXFpkUNqd5J+BRxO9gqol4DvAXcD04B9gOeBUyPitZRoriO7y3cd8JWIqC5G3O1B0iHAI0AtW66DfYfsOmlJjYek/clululONqGYFhH/KemjZDOy3YEFwJcjYoOknsCtZNeVXwO+GBHPFif69iXpcODCiDje49EyTqRmZmY5eGnXzMwsBydSMzOzHJxIzczMcnAiNTMzy8GJ1MzMLAcnUjMzsxycSM3MzHL4Hx9S2FkSapv/AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Balansera data med SMOTE-översampling till den högsta klassen. Läs mer här: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [ + "Spara filen för framtida användning\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:52:08+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sv/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/sv/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..8119e7f54 --- /dev/null +++ b/translations/sv/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:40+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Bygg klassificeringsmodeller\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/sv/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..521b277d9 --- /dev/null +++ b/translations/sv/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1298 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:37:24+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Bygg en klassificeringsmodell: Utsökta asiatiska och indiska rätter\n" + ], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Klassificering av kök 1\n", + "\n", + "I den här lektionen ska vi utforska olika klassificeringsmetoder för att *förutsäga ett visst nationellt kök baserat på en grupp ingredienser.* Samtidigt kommer vi att lära oss mer om hur algoritmer kan användas för klassificeringsuppgifter.\n", + "\n", + "### [**Quiz före lektionen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Förberedelse**\n", + "\n", + "Den här lektionen bygger vidare på vår [föregående lektion](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) där vi:\n", + "\n", + "- Gjorde en mjuk introduktion till klassificeringar med hjälp av en dataset om alla fantastiska kök från Asien och Indien 😋.\n", + "\n", + "- Utforskade några [dplyr-verb](https://dplyr.tidyverse.org/) för att förbereda och städa vår data.\n", + "\n", + "- Skapade vackra visualiseringar med ggplot2.\n", + "\n", + "- Visade hur man hanterar obalanserad data genom att förbehandla den med [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Demonstrerade hur man `prep` och `bake` vår recipe för att säkerställa att den fungerar som den ska.\n", + "\n", + "#### **Förkunskaper**\n", + "\n", + "För den här lektionen behöver vi följande paket för att städa, förbereda och visualisera vår data:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) som är utformade för att göra datavetenskap snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett [ramverk av paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "- `themis`: [themis-paketet](https://themis.tidymodels.org/) erbjuder extra steg för att hantera obalanserad data.\n", + "\n", + "- `nnet`: [nnet-paketet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) tillhandahåller funktioner för att uppskatta feed-forward neurala nätverk med ett enda dolt lager, samt för multinomiala logistiska regressionsmodeller.\n", + "\n", + "Du kan installera dem som:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternativt kontrollerar skriptet nedan om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om de saknas.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Dela upp data i tränings- och testuppsättningar.\n", + "\n", + "Vi börjar med att välja några steg från vår tidigare lektion.\n", + "\n", + "### Ta bort de vanligaste ingredienserna som skapar förvirring mellan olika kök, med hjälp av `dplyr::select()`.\n", + "\n", + "Alla älskar ris, vitlök och ingefära!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Perfekt! Nu är det dags att dela upp datan så att 70 % går till träning och 30 % till testning. Vi kommer också att använda en `stratifieringsteknik` vid uppdelningen för att `behålla proportionen av varje kök` i tränings- och valideringsdatan.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), ett paket i Tidymodels, erbjuder infrastruktur för effektiv datauppdelning och resampling:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Hantera obalanserad data\n", + "\n", + "Som du kanske har märkt i den ursprungliga datamängden såväl som i vår träningsuppsättning, finns det en ganska ojämn fördelning i antalet kök. Koreanska kök är *nästan* tre gånger fler än thailändska kök. Obalanserad data har ofta negativa effekter på modellens prestanda. Många modeller fungerar bäst när antalet observationer är lika och har därför svårt att hantera obalanserad data.\n", + "\n", + "Det finns huvudsakligen två sätt att hantera obalanserade datamängder:\n", + "\n", + "- lägga till observationer till minoritetsklassen: `Över-sampling`, t.ex. med en SMOTE-algoritm som syntetiskt genererar nya exempel för minoritetsklassen med hjälp av närmaste grannar till dessa fall.\n", + "\n", + "- ta bort observationer från majoritetsklassen: `Under-sampling`\n", + "\n", + "I vår tidigare lektion visade vi hur man hanterar obalanserade datamängder med hjälp av ett `recept`. Ett recept kan ses som en ritning som beskriver vilka steg som ska tillämpas på en datamängd för att göra den redo för dataanalys. I vårt fall vill vi ha en jämn fördelning av antalet kök i vår `träningsuppsättning`. Låt oss sätta igång direkt.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Du kan självklart gå vidare och bekräfta (genom att förbereda och baka) att receptet fungerar som du förväntar dig - alla kökskategorier har `559` observationer.\n", + "\n", + "Eftersom vi kommer att använda detta recept som en förprocessor för modellering, kommer en `workflow()` att hantera all förberedelse och bakning åt oss, så vi behöver inte manuellt uppskatta receptet.\n", + "\n", + "Nu är vi redo att träna en modell 👩‍💻👨‍💻!\n", + "\n", + "## 3. Välja din klassificerare\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nu måste vi bestämma vilken algoritm vi ska använda för uppgiften 🤔.\n", + "\n", + "I Tidymodels erbjuder [`parsnip-paketet`](https://parsnip.tidymodels.org/index.html) ett konsekvent gränssnitt för att arbeta med modeller över olika motorer (paket). Se gärna dokumentationen för parsnip för att utforska [modelltyper och motorer](https://www.tidymodels.org/find/parsnip/#models) samt deras motsvarande [modellargument](https://www.tidymodels.org/find/parsnip/#model-args). Utbudet kan verka överväldigande vid första anblicken. Till exempel inkluderar följande metoder alla klassificeringstekniker:\n", + "\n", + "- C5.0 Regelbaserade klassificeringsmodeller\n", + "\n", + "- Flexibla diskriminantmodeller\n", + "\n", + "- Linjära diskriminantmodeller\n", + "\n", + "- Regulariserade diskriminantmodeller\n", + "\n", + "- Logistiska regressionsmodeller\n", + "\n", + "- Multinomiala regressionsmodeller\n", + "\n", + "- Naiva Bayes-modeller\n", + "\n", + "- Supportvektormaskiner\n", + "\n", + "- Närmaste grannar\n", + "\n", + "- Beslutsträd\n", + "\n", + "- Ensemblemetoder\n", + "\n", + "- Neurala nätverk\n", + "\n", + "Listan fortsätter!\n", + "\n", + "### **Vilken klassificerare ska man välja?**\n", + "\n", + "Så, vilken klassificerare ska du välja? Ofta är det en bra idé att testa flera och leta efter ett bra resultat.\n", + "\n", + "> AutoML löser detta problem smidigt genom att köra dessa jämförelser i molnet, vilket gör att du kan välja den bästa algoritmen för dina data. Prova det [här](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Valet av klassificerare beror också på vårt problem. Till exempel, när resultatet kan kategoriseras i `fler än två klasser`, som i vårt fall, måste du använda en `multiklassklassificeringsalgoritm` istället för `binär klassificering.`\n", + "\n", + "### **En bättre metod**\n", + "\n", + "En bättre metod än att gissa vilt är att följa idéerna i detta nedladdningsbara [ML Cheat Sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott). Här upptäcker vi att, för vårt multiklassproblem, har vi några alternativ:\n", + "\n", + "

\n", + " \n", + "

En del av Microsofts algoritm-översikt, som beskriver alternativ för multiklassklassificering
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Resonemang**\n", + "\n", + "Låt oss se om vi kan resonera oss fram till olika tillvägagångssätt med de begränsningar vi har:\n", + "\n", + "- **Djupa neurala nätverk är för tunga**. Med tanke på vårt rena, men minimala dataset, och det faktum att vi kör träningen lokalt via notebooks, är djupa neurala nätverk för resurskrävande för denna uppgift.\n", + "\n", + "- **Ingen tvåklassklassificerare**. Vi använder inte en tvåklassklassificerare, vilket utesluter one-vs-all.\n", + "\n", + "- **Beslutsträd eller logistisk regression kan fungera**. Ett beslutsträd kan fungera, eller multinomial regression/multiklass logistisk regression för multiklassdata.\n", + "\n", + "- **Multiklass Boosted Decision Trees löser ett annat problem**. Multiklass Boosted Decision Trees är mest lämpliga för icke-parametriska uppgifter, t.ex. uppgifter som är utformade för att skapa rankningar, så de är inte användbara för oss.\n", + "\n", + "Dessutom, innan man vanligtvis ger sig in på mer komplexa maskininlärningsmodeller, t.ex. ensemblemetoder, är det en bra idé att bygga den enklaste möjliga modellen för att få en uppfattning om vad som händer. Så för denna lektion börjar vi med en `multinomial regression`-modell.\n", + "\n", + "> Logistisk regression är en teknik som används när utfallsvariabeln är kategorisk (eller nominell). För binär logistisk regression är antalet utfallsvariabler två, medan antalet utfallsvariabler för multinomial logistisk regression är fler än två. Se [Avancerade regressionsmetoder](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html) för vidare läsning.\n", + "\n", + "## 4. Träna och utvärdera en multinomial logistisk regressionsmodell.\n", + "\n", + "I Tidymodels definierar `parsnip::multinom_reg()` en modell som använder linjära prediktorer för att förutsäga multiklassdata med hjälp av multinomialfördelningen. Se `?multinom_reg()` för de olika sätt/engines du kan använda för att passa denna modell.\n", + "\n", + "För detta exempel kommer vi att passa en multinomial regressionsmodell via den förvalda [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf)-motorn.\n", + "\n", + "> Jag valde ett värde för `penalty` lite slumpmässigt. Det finns bättre sätt att välja detta värde, nämligen genom att använda `resampling` och `tuning` av modellen, vilket vi kommer att diskutera senare.\n", + ">\n", + "> Se [Tidymodels: Kom igång](https://www.tidymodels.org/start/tuning/) om du vill lära dig mer om hur man finjusterar modellens hyperparametrar.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat 🥳! Nu när vi har ett recept och en modellspecifikation behöver vi hitta ett sätt att kombinera dem till ett objekt som först förbehandlar data, sedan anpassar modellen på den förbehandlade datan och även möjliggör potentiella efterbehandlingsaktiviteter. I Tidymodels kallas detta praktiska objekt för en [`workflow`](https://workflows.tidymodels.org/) och håller smidigt dina modellkomponenter! Detta är vad vi skulle kalla *pipelines* i *Python*.\n", + "\n", + "Så låt oss samla allt i en workflow!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Arbetsflöden 👌👌! En **`workflow()`** kan anpassas på ungefär samma sätt som en modell kan. Så, dags att träna en modell!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Utdata visar de koefficienter som modellen lärde sig under träningen.\n", + "\n", + "### Utvärdera den tränade modellen\n", + "\n", + "Det är dags att se hur modellen presterade 📏 genom att utvärdera den på en testuppsättning! Låt oss börja med att göra förutsägelser på testuppsättningen.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat! I Tidymodels kan utvärdering av modellprestanda göras med [yardstick](https://yardstick.tidymodels.org/) - ett paket som används för att mäta modellers effektivitet med hjälp av prestandamått. Som vi gjorde i vår lektion om logistisk regression, låt oss börja med att beräkna en förväxlingsmatris.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "När man arbetar med flera klasser är det generellt mer intuitivt att visualisera detta som en värmekarta, så här:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "De mörkare rutorna i förvirringsmatrisens diagram indikerar ett högt antal fall, och förhoppningsvis kan du se en diagonal linje av mörkare rutor som visar fall där den förutspådda och faktiska etiketten är densamma.\n", + "\n", + "Låt oss nu beräkna sammanfattande statistik för förvirringsmatrisen.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Om vi fokuserar på några mått som noggrannhet, sensitivitet, ppv, så är vi inte helt fel ute för en början 🥳!\n", + "\n", + "## 4. Gå Djupare\n", + "\n", + "Låt oss ställa en subtil fråga: Vilka kriterier används för att välja en viss typ av kök som det förutspådda resultatet?\n", + "\n", + "Statistiska maskininlärningsalgoritmer, som logistisk regression, baseras på `sannolikhet`; så det som faktiskt förutspås av en klassificerare är en sannolikhetsfördelning över en uppsättning möjliga utfall. Klassen med högst sannolikhet väljs sedan som det mest sannolika resultatet för de givna observationerna.\n", + "\n", + "Låt oss se detta i praktiken genom att göra både hårda klassförutsägelser och sannolikheter.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Kan du förklara varför modellen är ganska säker på att den första observationen är thailändsk?\n", + "\n", + "## **🚀Utmaning**\n", + "\n", + "I den här lektionen använde du dina rensade data för att bygga en maskininlärningsmodell som kan förutsäga ett nationellt kök baserat på en serie ingredienser. Ta dig tid att läsa igenom de [många alternativen](https://www.tidymodels.org/find/parsnip/#models) som Tidymodels erbjuder för att klassificera data och [andra sätt](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) att anpassa multinomial regression.\n", + "\n", + "#### TACK TILL:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) för att ha skapat de fantastiska illustrationerna som gör R mer välkomnande och engagerande. Hitta fler illustrationer i hennes [galleri](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) och [Jen Looper](https://www.twitter.com/jenlooper) för att ha skapat den ursprungliga Python-versionen av denna modul ♥️\n", + "\n", + "
\n", + "Skulle ha slängt in några skämt, men jag fattar inte matvitsar 😅.\n", + "\n", + "
\n", + "\n", + "Lycka till med lärandet,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/sv/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..8e884cc43 --- /dev/null +++ b/translations/sv/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "source": [ + "# Bygg klassificeringsmodeller\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:09+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sv/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/sv/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..67a191a8f --- /dev/null +++ b/translations/sv/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:22+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sv/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/sv/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..5bbb4ac96 --- /dev/null +++ b/translations/sv/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,650 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:46:56+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [ + "# Bygg en klassificeringsmodell: Utsökta asiatiska och indiska rätter\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Klassificerare för kök 2\n", + "\n", + "I denna andra lektion om klassificering kommer vi att utforska `fler sätt` att klassificera kategoriska data. Vi kommer också att lära oss om konsekvenserna av att välja en klassificerare framför en annan.\n", + "\n", + "### [**Quiz före föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Förkunskaper**\n", + "\n", + "Vi antar att du har slutfört de tidigare lektionerna eftersom vi kommer att bygga vidare på några koncept vi lärde oss tidigare.\n", + "\n", + "För denna lektion behöver vi följande paket:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) är en [samling av R-paket](https://www.tidyverse.org/packages) som är utformade för att göra datavetenskap snabbare, enklare och roligare!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) är ett [ramverk av paket](https://www.tidymodels.org/packages/) för modellering och maskininlärning.\n", + "\n", + "- `themis`: [themis-paketet](https://themis.tidymodels.org/) tillhandahåller extra receptsteg för att hantera obalanserad data.\n", + "\n", + "Du kan installera dem med:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Alternativt kan skriptet nedan kontrollera om du har de paket som krävs för att slutföra denna modul och installera dem åt dig om de saknas.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. En klassificeringskarta**\n", + "\n", + "I vår [föregående lektion](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1) försökte vi besvara frågan: hur väljer vi mellan flera modeller? Till stor del beror det på egenskaperna hos datan och typen av problem vi vill lösa (till exempel klassificering eller regression).\n", + "\n", + "Tidigare lärde vi oss om de olika alternativen du har när du klassificerar data med hjälp av Microsofts fusklapp. Python's Machine Learning-ramverk, Scikit-learn, erbjuder en liknande men mer detaljerad fusklapp som kan hjälpa dig att ytterligare begränsa dina estimatorer (ett annat ord för klassificerare):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Tips: [besök den här kartan online](https://scikit-learn.org/stable/tutorial/machine_learning_map/) och klicka längs vägen för att läsa dokumentationen.\n", + ">\n", + "> [Tidymodels referenssida](https://www.tidymodels.org/find/parsnip/#models) erbjuder också utmärkt dokumentation om olika typer av modeller.\n", + "\n", + "### **Planen** 🗺️\n", + "\n", + "Den här kartan är väldigt användbar när du har en tydlig förståelse för din data, eftersom du kan \"vandra\" längs dess vägar mot ett beslut:\n", + "\n", + "- Vi har \\>50 prover\n", + "\n", + "- Vi vill förutsäga en kategori\n", + "\n", + "- Vi har märkt data\n", + "\n", + "- Vi har färre än 100K prover\n", + "\n", + "- ✨ Vi kan välja en Linear SVC\n", + "\n", + "- Om det inte fungerar, eftersom vi har numerisk data\n", + "\n", + " - Kan vi prova en ✨ KNeighbors Classifier\n", + "\n", + " - Om det inte fungerar, prova ✨ SVC och ✨ Ensemble Classifiers\n", + "\n", + "Det här är en väldigt användbar väg att följa. Nu ska vi dyka rakt in i det med [tidymodels](https://www.tidymodels.org/) modelleringsramverket: en konsekvent och flexibel samling av R-paket utvecklade för att främja god statistisk praxis 😊.\n", + "\n", + "## 2. Dela upp data och hantera obalanserade dataset.\n", + "\n", + "Från våra tidigare lektioner lärde vi oss att det fanns en uppsättning vanliga ingredienser över våra kök. Dessutom fanns det en ganska ojämn fördelning i antalet kök.\n", + "\n", + "Vi kommer att hantera detta genom att\n", + "\n", + "- Ta bort de vanligaste ingredienserna som skapar förvirring mellan olika kök, med hjälp av `dplyr::select()`.\n", + "\n", + "- Använda ett `recipe` som förbehandlar data för att göra den redo för modellering genom att tillämpa en `over-sampling`-algoritm.\n", + "\n", + "Vi tittade redan på detta i den tidigare lektionen, så det här borde gå som en dans 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Hantera obalanserad data\n", + "\n", + "Obalanserad data påverkar ofta modellens prestanda negativt. Många modeller presterar bäst när antalet observationer är lika, och har därför en tendens att ha svårt med obalanserad data.\n", + "\n", + "Det finns huvudsakligen två sätt att hantera obalanserade datasätt:\n", + "\n", + "- lägga till observationer till minoritetsklassen: `Över-sampling`, t.ex. med hjälp av en SMOTE-algoritm som syntetiskt genererar nya exempel av minoritetsklassen genom att använda närmaste grannar till dessa fall.\n", + "\n", + "- ta bort observationer från majoritetsklassen: `Under-sampling`\n", + "\n", + "I vår tidigare lektion visade vi hur man hanterar obalanserade datasätt med hjälp av ett `recept`. Ett recept kan ses som en ritning som beskriver vilka steg som ska tillämpas på ett datasätt för att göra det redo för dataanalys. I vårt fall vill vi ha en jämn fördelning av antalet kök i vår `träningsuppsättning`. Låt oss sätta igång!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Nu är vi redo att träna modeller 👩‍💻👨‍💻!\n", + "\n", + "## 3. Utöver multinomiala regressionsmodeller\n", + "\n", + "I vår tidigare lektion tittade vi på multinomiala regressionsmodeller. Låt oss utforska några mer flexibla modeller för klassificering.\n", + "\n", + "### Support Vector Machines\n", + "\n", + "I klassificeringssammanhang är `Support Vector Machines` en maskininlärningsteknik som försöker hitta ett *hyperplan* som \"bäst\" separerar klasserna. Låt oss titta på ett enkelt exempel:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ separerar inte klasserna. H2~ gör det, men endast med en liten marginal. H3~ separerar dem med maximal marginal.\n", + "\n", + "#### Linjär Support Vector Classifier\n", + "\n", + "Support-Vector clustering (SVC) är en del av Support-Vector-maskinerna inom ML-tekniker. I SVC väljs hyperplanet för att korrekt separera `de flesta` av träningsobservationerna, men `kan felklassificera` några observationer. Genom att tillåta vissa punkter att vara på fel sida blir SVM mer robust mot avvikelser och därmed bättre på att generalisera till ny data. Parametern som reglerar denna överträdelse kallas `cost` och har ett standardvärde på 1 (se `help(\"svm_poly\")`).\n", + "\n", + "Låt oss skapa en linjär SVC genom att sätta `degree = 1` i en polynomisk SVM-modell.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Nu när vi har sammanställt förbehandlingsstegen och modellspecifikationen i ett *arbetsflöde*, kan vi gå vidare och träna den linjära SVC och samtidigt utvärdera resultaten. För prestandamått, låt oss skapa en uppsättning mått som kommer att utvärdera: `accuracy`, `sensitivity`, `Positive Predicted Value` och `F Measure`.\n", + "\n", + "> `augment()` kommer att lägga till kolumn(er) för förutsägelser till den angivna datan.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Support Vector Machine\n", + "\n", + "Support Vector Machine (SVM) är en vidareutveckling av support vector classifier för att hantera en icke-linjär gräns mellan klasserna. I grund och botten använder SVMs *kernel-tricket* för att utöka funktionsutrymmet och anpassa sig till icke-linjära relationer mellan klasser. En populär och mycket flexibel kernel-funktion som används av SVMs är *Radial basis function.* Låt oss se hur den presterar på vår data.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Mycket bättre 🤩!\n", + "\n", + "> ✅ Vänligen se:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> för vidare läsning.\n", + "\n", + "### Närmaste granne-klassificerare\n", + "\n", + "*K*-närmsta granne (KNN) är en algoritm där varje observation förutsägs baserat på dess *likhet* med andra observationer.\n", + "\n", + "Låt oss anpassa en till vår data.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Det verkar som att den här modellen inte presterar särskilt bra. Förmodligen kan modellens prestanda förbättras genom att ändra argumenten (se `help(\"nearest_neighbor\")`). Se till att testa detta.\n", + "\n", + "> ✅ Vänligen se:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> för att lära dig mer om *K*-Närmaste Grannar-klassificerare.\n", + "\n", + "### Ensembleklassificerare\n", + "\n", + "Ensemblealgoritmer fungerar genom att kombinera flera basmodeller för att skapa en optimal modell antingen genom:\n", + "\n", + "`bagging`: att använda en *medelvärdesfunktion* på en samling av basmodeller\n", + "\n", + "`boosting`: att bygga en sekvens av modeller som bygger på varandra för att förbättra den prediktiva prestandan.\n", + "\n", + "Låt oss börja med att testa en Random Forest-modell, som bygger en stor samling beslutsträd och sedan använder en medelvärdesfunktion för att skapa en bättre övergripande modell.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Bra jobbat 👏!\n", + "\n", + "Låt oss också experimentera med en Boosted Tree-modell.\n", + "\n", + "Boosted Tree definierar en ensemblemetod som skapar en serie sekventiella beslutsträd där varje träd beror på resultaten från tidigare träd i ett försök att gradvis minska felet. Den fokuserar på vikterna för felklassificerade objekt och justerar passformen för nästa klassificerare för att korrigera.\n", + "\n", + "Det finns olika sätt att passa denna modell (se `help(\"boost_tree\")`). I detta exempel kommer vi att passa Boosted trees via `xgboost`-motorn.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ Vänligen se:\n", + ">\n", + "> - [Machine Learning för samhällsvetare](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [Hands-on Machine Learning med R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [En introduktion till statistisk inlärning med applikationer i R](https://www.statlearning.com/)\n", + ">\n", + "> - - Utforskar AdaBoost-modellen som är ett bra alternativ till xgboost.\n", + ">\n", + "> för att lära dig mer om ensembleklassificerare.\n", + "\n", + "## 4. Extra - jämföra flera modeller\n", + "\n", + "Vi har anpassat ganska många modeller i denna labb 🙌. Det kan bli tröttsamt eller jobbigt att skapa många arbetsflöden från olika uppsättningar av förbehandlingsmetoder och/eller modellspecifikationer och sedan beräkna prestandamåtten en efter en.\n", + "\n", + "Låt oss se om vi kan lösa detta genom att skapa en funktion som anpassar en lista med arbetsflöden på träningsuppsättningen och sedan returnerar prestandamåtten baserat på testuppsättningen. Vi kommer att använda `map()` och `map_dfr()` från paketet [purrr](https://purrr.tidyverse.org/) för att tillämpa funktioner på varje element i en lista.\n", + "\n", + "> [`map()`](https://purrr.tidyverse.org/reference/map.html)-funktioner låter dig ersätta många for-loopar med kod som både är mer kortfattad och lättare att läsa. Det bästa stället att lära sig om [`map()`](https://purrr.tidyverse.org/reference/map.html)-funktionerna är [kapitlet om iteration](http://r4ds.had.co.nz/iteration.html) i R för data science.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "[**workflowset**](https://workflowsets.tidymodels.org/) paketet gör det möjligt för användare att skapa och enkelt anpassa ett stort antal modeller, men är främst utformat för att fungera med omprovningstekniker som `cross-validation`, en metod vi ännu inte har täckt.\n", + "\n", + "## **🚀Utmaning**\n", + "\n", + "Var och en av dessa tekniker har ett stort antal parametrar som du kan justera, till exempel `cost` i SVMs, `neighbors` i KNN, `mtry` (Slumpmässigt Valda Prediktorer) i Random Forest.\n", + "\n", + "Undersök standardparametrarna för var och en och fundera på vad justering av dessa parametrar skulle innebära för modellens kvalitet.\n", + "\n", + "För att ta reda på mer om en specifik modell och dess parametrar, använd: `help(\"model\")` t.ex. `help(\"rand_forest\")`\n", + "\n", + "> I praktiken brukar vi *estimera* de *bästa värdena* för dessa genom att träna många modeller på en `simulerad datamängd` och mäta hur bra alla dessa modeller presterar. Denna process kallas **tuning**.\n", + "\n", + "### [**Quiz efter föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Granskning & Självstudier**\n", + "\n", + "Det finns mycket facktermer i dessa lektioner, så ta en stund att gå igenom [denna lista](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) med användbar terminologi!\n", + "\n", + "#### TACK TILL:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) för att ha skapat de fantastiska illustrationerna som gör R mer välkomnande och engagerande. Hitta fler illustrationer i hennes [galleri](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) och [Jen Looper](https://www.twitter.com/jenlooper) för att ha skapat den ursprungliga Python-versionen av denna modul ♥️\n", + "\n", + "Lycka till med lärandet,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör det noteras att automatiserade översättningar kan innehålla fel eller brister. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som kan uppstå vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/sv/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..c0a0c0c42 --- /dev/null +++ b/translations/sv/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prova olika klassificerare\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:42:52+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sv/4-Classification/4-Applied/notebook.ipynb b/translations/sv/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..2490b7253 --- /dev/null +++ b/translations/sv/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:32+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/4-Classification/4-Applied/solution/notebook.ipynb b/translations/sv/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..7241b9530 --- /dev/null +++ b/translations/sv/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:41:57+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/1-Visualize/notebook.ipynb b/translations/sv/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..1ce436d9e --- /dev/null +++ b/translations/sv/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:08:02+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/sv/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..08e3bc095 --- /dev/null +++ b/translations/sv/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Nigeriansk musik hämtad från Spotify - en analys**\n", + "\n", + "Klustring är en typ av [Oövervakad inlärning](https://wikipedia.org/wiki/Unsupervised_learning) som förutsätter att en dataset är oetiketterad eller att dess indata inte matchas med fördefinierade utdata. Den använder olika algoritmer för att sortera igenom oetiketterad data och skapa grupper baserat på mönster den identifierar i datan.\n", + "\n", + "[**Quiz före föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Introduktion**\n", + "\n", + "[Klustring](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) är mycket användbart för datautforskning. Låt oss se om det kan hjälpa oss att upptäcka trender och mönster i hur nigerianska lyssnare konsumerar musik.\n", + "\n", + "> ✅ Ta en minut och fundera över användningsområden för klustring. I vardagen sker klustring när du har en hög med tvätt och behöver sortera ut familjemedlemmarnas kläder 🧦👕👖🩲. Inom datavetenskap sker klustring när man försöker analysera en användares preferenser eller bestämma egenskaperna hos en oetiketterad dataset. Klustring hjälper på sätt och vis att skapa ordning i kaos, som en strumplåda.\n", + "\n", + "I en professionell miljö kan klustring användas för att bestämma saker som marknadssegmentering, till exempel vilka åldersgrupper som köper vilka produkter. Ett annat användningsområde kan vara att upptäcka avvikelser, kanske för att identifiera bedrägerier i en dataset med kreditkortstransaktioner. Eller så kan du använda klustring för att identifiera tumörer i en samling medicinska skanningar.\n", + "\n", + "✅ Fundera en minut på hur du kan ha stött på klustring \"i det vilda\", inom bank, e-handel eller affärsverksamhet.\n", + "\n", + "> 🎓 Intressant nog har klusteranalys sitt ursprung inom antropologi och psykologi på 1930-talet. Kan du föreställa dig hur det kan ha använts?\n", + "\n", + "Alternativt kan du använda det för att gruppera sökresultat - till exempel shoppinglänkar, bilder eller recensioner. Klustring är användbart när du har en stor dataset som du vill reducera och analysera mer detaljerat, så tekniken kan användas för att förstå data innan andra modeller konstrueras.\n", + "\n", + "✅ När din data är organiserad i kluster tilldelar du den ett kluster-ID, och denna teknik kan vara användbar för att bevara en datasets integritet; du kan istället referera till en datapunkt med dess kluster-ID, snarare än med mer avslöjande identifierbar data. Kan du tänka dig andra anledningar till varför du skulle referera till ett kluster-ID istället för andra element i klustret för att identifiera det?\n", + "\n", + "### Kom igång med klustring\n", + "\n", + "> 🎓 Hur vi skapar kluster har mycket att göra med hur vi samlar datapunkterna i grupper. Låt oss packa upp lite terminologi:\n", + ">\n", + "> 🎓 ['Transduktiv' vs. 'induktiv'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Transduktiv inferens härleds från observerade träningsfall som kartläggs till specifika testfall. Induktiv inferens härleds från träningsfall som kartläggs till generella regler som sedan tillämpas på testfall.\n", + ">\n", + "> Ett exempel: Föreställ dig att du har en dataset som bara är delvis etiketterad. Vissa saker är \"skivor\", vissa \"cd-skivor\" och vissa är tomma. Din uppgift är att tilldela etiketter till de tomma. Om du väljer en induktiv metod skulle du träna en modell som letar efter \"skivor\" och \"cd-skivor\" och tillämpa dessa etiketter på din oetiketterade data. Denna metod skulle ha svårt att klassificera saker som faktiskt är \"kassetter\". En transduktiv metod, å andra sidan, hanterar denna okända data mer effektivt eftersom den arbetar för att gruppera liknande objekt och sedan tilldelar en etikett till en grupp. I detta fall kan kluster reflektera \"runda musikföremål\" och \"fyrkantiga musikföremål\".\n", + ">\n", + "> 🎓 ['Icke-platt' vs. 'platt' geometri](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Härstammar från matematisk terminologi, icke-platt vs. platt geometri hänvisar till mätningen av avstånd mellan punkter antingen med \"platt\" ([Euklidisk](https://wikipedia.org/wiki/Euclidean_geometry)) eller \"icke-platt\" (icke-Euklidisk) geometriska metoder.\n", + ">\n", + "> \"Platt\" i detta sammanhang hänvisar till Euklidisk geometri (delar av vilken lärs ut som \"plan\" geometri), och icke-platt hänvisar till icke-Euklidisk geometri. Vad har geometri med maskininlärning att göra? Tja, som två områden som är rotade i matematik måste det finnas ett gemensamt sätt att mäta avstånd mellan punkter i kluster, och det kan göras på ett \"platt\" eller \"icke-platt\" sätt, beroende på datans natur. [Euklidiska avstånd](https://wikipedia.org/wiki/Euclidean_distance) mäts som längden på en linjesegment mellan två punkter. [Icke-Euklidiska avstånd](https://wikipedia.org/wiki/Non-Euclidean_geometry) mäts längs en kurva. Om din data, visualiserad, verkar inte existera på en plan, kan du behöva använda en specialiserad algoritm för att hantera den.\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "> 🎓 ['Avstånd'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Kluster definieras av deras avståndsmatris, t.ex. avstånden mellan punkter. Detta avstånd kan mätas på några sätt. Euklidiska kluster definieras av medelvärdet av punktvärdena och innehåller en \"centroid\" eller mittpunkt. Avstånd mäts således genom avståndet till den centroiden. Icke-Euklidiska avstånd hänvisar till \"clustroids\", den punkt som är närmast andra punkter. Clustroids kan i sin tur definieras på olika sätt.\n", + ">\n", + "> 🎓 ['Begränsad'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Begränsad klustring](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) introducerar \"semi-övervakad\" inlärning i denna oövervakade metod. Relationerna mellan punkter flaggas som \"kan inte länka\" eller \"måste länka\" så att vissa regler tvingas på datasetet.\n", + ">\n", + "> Ett exempel: Om en algoritm släpps fri på en samling oetiketterad eller semi-etiketterad data kan klustren den producerar vara av dålig kvalitet. I exemplet ovan kan klustren gruppera \"runda musikföremål\" och \"fyrkantiga musikföremål\" och \"triangulära föremål\" och \"kakor\". Om algoritmen ges vissa begränsningar, eller regler att följa (\"föremålet måste vara gjort av plast\", \"föremålet måste kunna producera musik\") kan detta hjälpa till att \"begränsa\" algoritmen att göra bättre val.\n", + ">\n", + "> 🎓 'Densitet'\n", + ">\n", + "> Data som är \"brusig\" anses vara \"tät\". Avstånden mellan punkter i varje kluster kan vid undersökning visa sig vara mer eller mindre täta, eller \"trånga\", och denna data behöver analyseras med lämplig klustringsmetod. [Denna artikel](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) demonstrerar skillnaden mellan att använda K-Means klustring och HDBSCAN-algoritmer för att utforska en brusig dataset med ojämn klusterdensitet.\n", + "\n", + "Fördjupa din förståelse av klustringstekniker i detta [Learn-modul](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Klustringsalgoritmer**\n", + "\n", + "Det finns över 100 klustringsalgoritmer, och deras användning beror på datans natur. Låt oss diskutera några av de viktigaste:\n", + "\n", + "- **Hierarkisk klustring**. Om ett objekt klassificeras baserat på dess närhet till ett närliggande objekt, snarare än till ett längre bort, bildas kluster baserat på medlemmarnas avstånd till och från andra objekt. Hierarkisk klustring kännetecknas av att två kluster upprepade gånger kombineras.\n", + "\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Centroid-klustring**. Denna populära algoritm kräver valet av \"k\", eller antalet kluster som ska bildas, varefter algoritmen bestämmer mittpunkten för ett kluster och samlar data runt den punkten. [K-means klustring](https://wikipedia.org/wiki/K-means_clustering) är en populär version av centroid-klustring som separerar en dataset i fördefinierade K-grupper. Centret bestäms av det närmaste medelvärdet, därav namnet. Det kvadrerade avståndet från klustret minimeras.\n", + "\n", + "

\n", + " \n", + "

Infografik av Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Fördelningsbaserad klustring**. Baserad på statistisk modellering fokuserar fördelningsbaserad klustring på att bestämma sannolikheten att en datapunkt tillhör ett kluster och tilldelar den därefter. Gaussiska blandningsmetoder tillhör denna typ.\n", + "\n", + "- **Densitetsbaserad klustring**. Datapunkter tilldelas kluster baserat på deras densitet, eller deras gruppering runt varandra. Datapunkter långt från gruppen anses vara avvikelser eller brus. DBSCAN, Mean-shift och OPTICS tillhör denna typ av klustring.\n", + "\n", + "- **Rutbaserad klustring**. För multidimensionella datasets skapas ett rutnät och datan delas upp mellan rutnätets celler, vilket skapar kluster.\n", + "\n", + "Det bästa sättet att lära sig om klustring är att prova det själv, så det är vad du kommer att göra i denna övning.\n", + "\n", + "Vi behöver några paket för att genomföra denna modul. Du kan installera dem med: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Alternativt kontrollerar skriptet nedan om du har de paket som krävs för att slutföra denna modul och installerar dem åt dig om några saknas.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Övning - klustra din data\n", + "\n", + "Klustring som teknik underlättas mycket av korrekt visualisering, så låt oss börja med att visualisera vår musikdata. Denna övning kommer att hjälpa oss att avgöra vilken av klustringsmetoderna vi bör använda mest effektivt för denna datas natur.\n", + "\n", + "Låt oss sätta igång genom att importera datan.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Ibland kan vi vilja ha lite mer information om vår data. Vi kan titta på `data` och `dess struktur` genom att använda funktionen [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Bra jobbat!💪\n", + "\n", + "Vi kan se att `glimpse()` visar det totala antalet rader (observationer) och kolumner (variabler), samt de första få värdena för varje variabel i en rad efter variabelns namn. Dessutom anges *datatypen* för variabeln direkt efter variabelns namn inom `< >`.\n", + "\n", + "`DataExplorer::introduce()` kan sammanfatta denna information på ett snyggt sätt:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Fantastiskt! Vi har precis fått veta att vår data inte har några saknade värden.\n", + "\n", + "När vi ändå håller på kan vi utforska vanliga mått för central tendens (t.ex. [medelvärde](https://en.wikipedia.org/wiki/Arithmetic_mean) och [median](https://en.wikipedia.org/wiki/Median)) samt spridningsmått (t.ex. [standardavvikelse](https://en.wikipedia.org/wiki/Standard_deviation)) med hjälp av `summarytools::descr()`.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss titta på de generella värdena i datan. Observera att popularitet kan vara `0`, vilket visar låtar som inte har någon ranking. Vi kommer att ta bort dessa snart.\n", + "\n", + "> 🤔 Om vi arbetar med klustring, en osuperviserad metod som inte kräver märkt data, varför visar vi då denna data med etiketter? Under datautforskningsfasen är de användbara, men de är inte nödvändiga för att klustringsalgoritmerna ska fungera.\n", + "\n", + "### 1. Utforska populära genrer\n", + "\n", + "Låt oss gå vidare och ta reda på de mest populära genrerna 🎶 genom att räkna antalet förekomster.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Det gick bra! De säger att en bild säger mer än tusen rader i en data frame (fast egentligen säger ingen det 😅). Men du förstår poängen, eller hur?\n", + "\n", + "Ett sätt att visualisera kategoriska data (tecken- eller faktorvariabler) är att använda stapeldiagram. Låt oss skapa ett stapeldiagram över de 10 populäraste genrerna:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Nu är det mycket enklare att identifiera att vi har `saknade` genrer 🧐!\n", + "\n", + "> En bra visualisering visar dig saker som du inte förväntade dig, eller väcker nya frågor om datan - Hadley Wickham och Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Observera, när den främsta genren beskrivs som `Saknad`, betyder det att Spotify inte har klassificerat den, så låt oss ta bort den.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Från den lilla datautforskningen lär vi oss att de tre främsta genrerna dominerar denna dataset. Låt oss fokusera på `afro dancehall`, `afropop` och `nigerian pop`, och dessutom filtrera datasetet för att ta bort allt med ett popularitetsvärde på 0 (vilket innebär att det inte klassificerades med en popularitet i datasetet och kan betraktas som brus för våra syften):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss undersöka om det finns någon tydlig linjär relation mellan de numeriska variablerna i vår datamängd. Denna relation kvantifieras matematiskt med [korrelationsstatistiken](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Korrelationsstatistiken är ett värde mellan -1 och 1 som anger styrkan i en relation. Värden över 0 indikerar en *positiv* korrelation (höga värden för en variabel tenderar att sammanfalla med höga värden för den andra), medan värden under 0 indikerar en *negativ* korrelation (höga värden för en variabel tenderar att sammanfalla med låga värden för den andra).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Data är inte starkt korrelerad förutom mellan `energy` och `loudness`, vilket är logiskt eftersom hög musik oftast är ganska energisk. `Popularity` har en koppling till `release date`, vilket också är rimligt, eftersom nyare låtar förmodligen är mer populära. Längd och energi verkar också ha en korrelation.\n", + "\n", + "Det ska bli intressant att se vad en klustringsalgoritm kan göra med denna data!\n", + "\n", + "> 🎓 Observera att korrelation inte innebär kausalitet! Vi har bevis på korrelation men inget bevis på kausalitet. En [underhållande webbplats](https://tylervigen.com/spurious-correlations) har några visuella exempel som betonar denna poäng.\n", + "\n", + "### 2. Utforska datadistribution\n", + "\n", + "Låt oss ställa några mer subtila frågor. Är genrerna signifikant olika i uppfattningen av deras dansvänlighet, baserat på deras popularitet? Låt oss undersöka datadistributionen för våra tre främsta genrer när det gäller popularitet och dansvänlighet längs en given x- och y-axel med hjälp av [täthetsdiagram](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vi ser att det finns koncentriska cirklar som stämmer överens, oavsett genre. Kan det vara så att nigerianska smakpreferenser möts vid en viss nivå av dansvänlighet för denna genre?\n", + "\n", + "Generellt sett är de tre genrerna i linje när det gäller deras popularitet och dansvänlighet. Att identifiera kluster i dessa löst sammanhängande data kommer att vara en utmaning. Låt oss se om ett spridningsdiagram kan ge stöd för detta.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Ett spridningsdiagram med samma axlar visar ett liknande mönster av konvergens.\n", + "\n", + "Generellt sett, för klustring, kan du använda spridningsdiagram för att visa datakluster, så att bemästra denna typ av visualisering är mycket användbart. I nästa lektion kommer vi att ta denna filtrerade data och använda k-means klustring för att upptäcka grupper i denna data som verkar överlappa på intressanta sätt.\n", + "\n", + "## **🚀 Utmaning**\n", + "\n", + "Som förberedelse inför nästa lektion, skapa ett diagram över de olika klustringsalgoritmer du kan upptäcka och använda i en produktionsmiljö. Vilka typer av problem försöker klustringen lösa?\n", + "\n", + "## [**Quiz efter föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Granskning & Självstudier**\n", + "\n", + "Innan du tillämpar klustringsalgoritmer, som vi har lärt oss, är det en bra idé att förstå naturen av din dataset. Läs mer om detta ämne [här](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html)\n", + "\n", + "Fördjupa din förståelse för klustringstekniker:\n", + "\n", + "- [Träna och utvärdera klustringsmodeller med Tidymodels och vänner](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Uppgift**\n", + "\n", + "[Utforska andra visualiseringar för klustring](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## TACK TILL:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) för att ha skapat den ursprungliga Python-versionen av denna modul ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) för att ha skapat de fantastiska illustrationerna som gör maskininlärningskoncept mer begripliga och lättare att förstå.\n", + "\n", + "Lycka till med lärandet,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:14:37+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/sv/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..65f097b6e --- /dev/null +++ b/translations/sv/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,821 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Få information om dataframen\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Titta på de allmänna värdena för data. Observera att popularitet kan vara '0' - och det finns många rader med det värdet\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Låt oss undersöka genrerna. Ganska många är listade som 'Saknas', vilket betyder att de inte är kategoriserade i datasetet med en genre.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I'm sorry, but I need the content of the markdown file to proceed with the translation. Could you please provide the text you'd like me to translate?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generellt sett överensstämmer de tre genrerna när det gäller deras popularitet och dansbarhet. Ett spridningsdiagram med samma axlar visar ett liknande mönster av konvergens. Prova ett spridningsdiagram för att kontrollera fördelningen av data per genre.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör det noteras att automatiserade översättningar kan innehålla fel eller brister. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:09:22+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/2-K-Means/notebook.ipynb b/translations/sv/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..5eea38f2b --- /dev/null +++ b/translations/sv/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,231 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:41+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Börja där vi avslutade i förra lektionen, med data importerad och filtrerad.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Vi kommer att fokusera på endast 3 genrer. Kanske kan vi få 3 kluster skapade!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5SeaPjY01KEOSJGm4JhSskvwrsBj4Qt+0CHhQVT0OeC3wxST3He+1VXVEVc2tqrmzZs2aSBmSJEkjYaWDVZL9gGcAz6+qAqiqm6vq2v7xmcDFwCMa1ClJkjTyVipYJdkJeCOwS1XdONA+K8mM/vFDgTnAJS0KlSRJGnUzl3dCkmOA7YB1k1wBHEK3CnAN4OQkAKf3KwC3AQ5LcitwO/Cyqrpu3AtLkiRNM8sNVlW11zjNn13GuccBx020KEmSpKnIndclSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJamSFglWSo5JcneS8gbZ1kpyc5KL++9p9e5J8NMmCJOck2WKyipckSRolK9pj9Tlgp6Xa3gycUlVzgFP65wBPB+b0X/OAwydepiRJ0uhboWBVVacC1y3VvCtwdP/4aGC3gfbPV+d0YK0kG7QoVpIkaZRNZI7VelW1qH98JbBe/3hD4PKB867o2/5GknlJ5ieZPzY2NoEyJEmSRkOTyetVVUDdxdccUVVzq2rurFmzWpQhSZI0VBMJVlctGeLrv1/dty8ENh44b6O+TZIkaVqbSLA6Edi3f7wvcMJA+wv61YGPB64fGDKUJEmatmauyElJjgG2A9ZNcgVwCPAe4Ngk+wOXAXv0p58E7AwsAG4EXti4ZkmSpJG0QsGqqvZaxqHtxzm3gAMmUpQkSdJU5M7rkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyc2VfmGQT4MsDTQ8F3g6sBbwEGOvb31pVJ610hZIkSVPESgerqroQ2BwgyQxgIXA88ELgw1X1gSYVSpIkTRGthgK3By6uqssaXU+SJGnKaRWs9gSOGXh+YJJzkhyVZO3xXpBkXpL5SeaPjY2Nd4okSdKUMuFgleQewC7AV/qmw4GH0Q0TLgI+ON7rquqIqppbVXNnzZo10TIkSZKGrkWP1dOBs6rqKoCquqqqbquq24EjgS0bvIckSdLIaxGs9mJgGDDJBgPHdgfOa/AekiRJI2+lVwUCJLk3sAPw0oHm9yXZHCjg0qWOSZIkTVsTClZV9Sfg/ku17TOhiiRJkqYod16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKmRmcMuQNJd95vDHj3sEjTNPOjt5w67BGlasMdKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWpk5kQvkORS4AbgNmBxVc1Nsg7wZWA2cCmwR1X9bqLvJUmSNMpa9Vg9uao2r6q5/fM3A6dU1RzglP65JEnStDZZQ4G7Akf3j48Gdpuk95EkSRoZLYJVAd9NcmaSeX3belW1qH98JbDe0i9KMi/J/CTzx8bGGpQhSZI0XBOeYwU8saoWJnkAcHKSXw4erKpKUku/qKqOAI4AmDt37h2OS5IkTTUT7rGqqoX996uB44EtgauSbADQf796ou8jSZI06iYUrJLcO8l9ljwGngacB5wI7Nufti9wwkTeR5IkaSqY6FDgesDxSZZc64tV9e0kZwDHJtkfuAzYY4LvI0mSNPImFKyq6hLgseO0XwtsP5FrS5IkTTXuvC5JktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIysdrJJsnOQHSS5Icn6SV/Xt70iyMMnZ/dfO7cqVJEkaXTMn8NrFwOuq6qwk9wHOTHJyf+zDVfWBiZcnSZI0dax0sKqqRcCi/vENSX4BbNiqMEmSpKmmyRyrJLOBxwE/7ZsOTHJOkqOSrL2M18xLMj/J/LGxsRZlSJIkDdWEg1WSNYHjgFdX1R+Aw4GHAZvT9Wh9cLzXVdURVTW3qubOmjVromVIkiQN3YSCVZLV6ULVF6rqawBVdVVV3VZVtwNHAltOvExJkqTRN5FVgQE+C/yiqj400L7BwGm7A+etfHmSJElTx0RWBW4N7AOcm+Tsvu2twF5JNgcKuBR46YQqlCRJmiImsirwx0DGOXTSypcjSZI0dbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyke0WJEmaNFt/bOthl6Bp5rRXnjbp72GPlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyacEqyU5JLkyyIMmbJ+t9JEmSRsWkBKskM4BPAE8HNgP2SrLZZLyXJEnSqJisHqstgQVVdUlV3QJ8Cdh1kt5LkiRpJKSq2l80+Rdgp6p6cf98H2Crqjpw4Jx5wLz+6SbAhc0L0Z1ZF7hm2EVIk8yfc60K/Dm/+z24qmaNd2Dm3V3JElV1BHDEsN5/VZdkflXNHXYd0mTy51yrAn/OR8tkDQUuBDYeeL5R3yZJkjRtTVawOgOYk+QhSe4B7AmcOEnvJUmSNBImZSiwqhYnORD4DjADOKqqzp+M99JKcxhWqwJ/zrUq8Od8hEzK5HVJkqRVkTuvS5IkNWKwkiRJasRgpSaSzE1yn2HXIUnSMBms1MpLgO8ariRp6kmSYdcwXRisNCFJtgCoqpcCZwLHG640VYz3x8Q/MFrVJElVVZKtk+yfZPt+qyStBFcFakKSnA7cWFVP6Z8fDswBdq+qG4ZanLQCkmxDt6HxH4Bv9n9gVquq24dcmnS3SfJk4LPAl4FnAEcDX6+qBUMtbAqyx0oTUlWPB2Yk+Ub//OXARdhzpRG2pFcqyVzgKGBrYG/g60tClT1XWlUk2QR4GfDqqnoLsC/dB+QdhlrYFGWw0l028EdpJkBVbQvMWipc/RL4fpI1h1aotAx9r9T2wFuAF1fVK4D9gKuBjyw5Z3gVSpMvPWAb4GHAjknuXVVnAccA85KsPdQipyCDle6SJWPx/dMNk8yBv/Rc3T/JN/vnBwKnAusMp1JpudYCdgf+sX9+C/BpwLklmtYGemPXBWZW1ZHAu4DQ3YIO4Erghr5Nd4FzrLRSkrwO2Bm4J/D9qjq4bz8VoKq2GWJ50h0MTNBdD7ihqm5M8s/A14Gdq+rkJDsA76MbArnWXitNV0l2Bg4DFgJ/AvYHnk03DLga3S3v3l9V3xxakVPUpNwrUNNbkhcBu1TVtkk+Brw2yd9V1euqapsk30mycVVdPuxapSX6UPVM4JVAJTmNrodqN+A7SY6l+4R+WFVdM8RSpUmV5JHAO4EDgbOBLwL/r6r2TPJnYEfg3CWhaqmRCi2HQ4FarnEm8S4A9knySmBD4DHA3kk+BVBVOxqqNGqSPIyuN+oNwAfoQtShwLfohgSfCfxPVR2/ZP6gNE3dDFwAnFVVN1bVbsAGSQ6g68H9KfDYJHsaqu46f3louZb8o+onot9cVacmuR+wLfC+qrq4/7S/VZJ1quq6YdYrDRr4w7A2cFlV/W/f/htgK+CpVXVCkn2BY5P8uqp+OLyKpbYGhsFn0HWoXAdsAMwFftyf9iW6X/eLkxwN3Ar8wFB119ljpWVK8rAkm/WPXwt8nm45+gOq6nrg18Czk7yZrufq2YYqjYqBntZ79d/PAxYnORCgqi4ELgc2659/FfgXYNHdXKo0qfpQtStwLN0+VY8EPgF8LMmBSV5MNyy4oD//1qo6uqquGlrRU5iT1zWuJPcCPgZcRddlPA94Od2ta3YHtqALU7sBTwYOqqrzhlOtNL4kO9H9zF4CnA4U3Z5Va9J9Qv80sF9V/Y9DHpqukmwKfAb4d7qVgO8A9qHrldoR2Aj4alV9d1g1TicGKy1Tv5XCa4H7AudX1bv79g8DOwFPqqprktyzqv48xFKlO0jyeOC9dB8QHkO3jcKtdJ/aX0230/r3q+obQytSmmRJHgV8ELiwqg7q23YEPkf3O9yd1RtzKFB/Y3CielVdBLwbuB54TJLH9O2vAf4b+EE/Zn/LMGqVliXJhnQT1H/aD/G9D/gh3bySRVW1P/CGqvqGO6xrmvsV3Z5Uj0wyJ8kaVfUd4Dhg1nBLm54MVvqLwaGQJM9NshuwKV2v1fXA7gPhah7dpN/bvKeaRtBNdJNy90yyVVX9saq+DTyIrveKqlrcf7fbXtNSkhlVdQvwYrq5g68HdkmyLfAsYPEw65uuDFb6i4FQdSDdXj8A36D7Q/ReYH26bRb+vj929d1epDSOgdssPSrJdnRzqN5D11N1WJKn90PbGwO/H1qh0t2k/6B8W5KZVXUrXbhaDfhXulC1X1WdYY9tewYr/UWS1ZJsQDcZfXvgocApwM+r6hK6YcGZdBPa/aSvkdGvetoZOAF4Id1ePM+kG/47jW4DxE8AL6qqs/xjoulm4MPFnCTrL2nvt0+Y2fdcvQKYD/wdcJYLNiaHwWoVt9QfmBl0+5tcS7cr7zbAc6rq1iQv7895vbtSa9QkuTfdH419qmpfuo0/twXWo/tZPhj4I93PtzStDOxTtSNwIt0HiwOSPBz+JlzdSvfv5AF0NyB3L8tJYLBahS01p2pvYF5V3Uy3JP0gun2pbkzyPLr7SFVV3Ta8iqW/SrJa//0f6XaSvgbYBKCqTqDbt+oN/enH0n1SPyTJPe/+aqXJ04equXTDfc8EXgf8PbDbUuFqyZyr5wAf7IOWGjOtrsIGQtUBwIvo9jWhql6aZC3g1CQ/p9uder+qumJoxUq9JPeqqpuq6vYkTwQOp7tx7M+AjZPMrar5dCtXtwBmVNXVSY4AbndrEE03Se5DNwS+Rb99woL+g8dewHOTfKWqftXPuVqtD1e/HWbN05n7WK3ikqwNHAG8qaou6Zfi3twf24muJ+DSqvr1MOuU4C978vwH8Ay6rRMOp9vY8DNJHgocQLfIYjHwD8DBVXX8sOqVJsvS86OSbAJ8lG739Ff2Hzy2A54PvNvf4Xcfg9UqZrzJikm+Rrf673MDvVhbAedU1U1DKFO6gySr0wWpn9L9vD6NbthjbeAFVfXbJOvS7SK9KbCgquY7QVfTzcCcqh3othBJ/+HiEcCb6Ta/fW0frtauqt8NteBVjHOsViFLzama03/CAfgO8GDgn/pjzwXeRrdkXRolC+kmpX+Fbs7UYcDZwEFJ1q+qa6rq7Kr6Uj8c6OpVTSv9UF4l+We6HdWvoNtS5ANV9Su61dvrAx/vX3L9kEpdZdljtYpYKlS9lm5O1U3AfwH/RnfvqMfRDaE8DHheVZ07nGqlv7XUJ/T/BH5YVXv2x7amGxq8F92Qh/uradpJ8hBgtaq6uO+Z/U/gNcCSXqoNge9V1f79h+Y1quqc4VW86nLy+ipiIFQ9HngC8ERgDeAMYHFVHdzvYfVwuiGURUMrVhowEKoeSncLjmcBr07yTrqVTaf1E3V3oxsWNFhpOnoCcFGSK/p7tM6j+3k/lG4+4Wy6Ses3VdWBQ6xzlWewWoUkeSRwCF2v1GpVdVW/VP0nSR5YVa+gu+2BNDL6ULUL3bDfAuAS4NN0S8oPSvLRqvrvJOdWlbuqa1qqqi8kWRM4I8neVXVOkgcCZ/ZzqdYHPkQ3tUND5ByraWzp3aWr6hfAkXTBarsk61bVVcDWwBOTrOeO1Bo1fS/rwcCOwPF0Gxw+je4my9sCr+s3PzRUadoZ2FF9R+BRdEOAR/YrZC8F7pfkk3Q3VT6hqk729/hwOcdqmhpn88916O5y/h3g2XTDJl8HTu33+Jnh5p8aRUk2ottaYW26XdSfB3yKbhf1zwFjVXXG0AqUJlmSLYGPAK+pqtP7ebLPo/s9Dt39XP9UVT8aVo36K4cCp6mBUPUaYFe6VVRvotvs893AbcB+wK1JvgHcPpxKpTvXb0x7RZJ3AV+oqgVJPk93d4Dzq+qy4VYoTZ4kGwNvBM6tqtMBqupDfafUyXS3HTtpiCVqKQaraaa/fcE6VfWzfk7VFnQ3VX4D3f/f69NtpXAY3ZDgmVVlqNJUcC7w0n4/q2cBrzJUaRWwGDgH2DXJTlX1bfhLuJoBrDXU6nQHBqtppL+twf7A6kluBf6Xbhnu04Gdge3ptlk4iO7WHocOq1ZpJZxEt5J1F+BdVXXakOuRmh/RBFIAAAVUSURBVBtYBftPdKtgf0N3t4HfA7snubWqTgGoqvcPsVQtg5PXp4l+07gb6HamXgzsCTyiqhYC9wN+1t8f6hbgW3SrqqQpo6r+UFVHA8+tqv9ygq6moz5UPQ04ClgPOJNugdGJdD1X+/XHNaLssZomBobzdgQeC2wC3DPJZ4CfAJ/t96naDtihqq4cSqHSxN0G7qiu6affj20t4KXA7nSLji4Azuq3x/kKXa+t2+KMMFcFTiNJngR8DNgSeDywE7A63XyqNek2kbugqi4ZWpGSpDuV5E10Iw1PAZ7f77a+H3AqcKnzYkebQ4HTy5rAtVV1S1WdSrevyVPo7hm1TlV901AlSaMnyeZJDumf3hvYB9i7D1WPpVvV/UBD1ehzKHB6+RmwMMmewFeq6swkp9EF6KuGW5okadDARPUnAc8BdkxydVW9PcmmwCFJFgObA2+qqh8PtWCtEIPV9HI98GO6vaqelmQ+3T0Bn11V1wy1MkkS8NdA1YeqbYAvAAcCC4EnJ1mjqvZI8kS6jXE/3n9QjnMLR59zrKaYfvXfMruCk9wL2JRu4uOawGer6vy7qz5J0rL19/d7JPDDqrqtvzPGhlX13v5egJsD7wWOraqPDLNWrRyD1RTV/2N8EHADcMx4PVL9/dMW3+3FSZLGlWRX4CLgCrqtcbYHPky3WvvX/crAo+nmWX21qr44tGK1Upy8PgUleRHdxp8X093376Akj+6PZcn+PoYqSRotVXUCcCXwSbp7/X2X7t6XH+nnVT2G7t6YFwEbDqtOrTznWE0B44yrbwe8vqq+neRU4GC6DUHPdfxdkkbP4O/xqrouyY+Ap9Ft2nw8EOA/6Xqx9qe7HdkO/S2cFvu7fepwKHAKGFg58jLgDLpb1NwT+FD/D/QhdLv07l5Vvx9mrZKk8SXZFng08P2quiDJXnS/z79eVV9Lcu/+1C3p7o6xu3Nkpx6HAkdYkk3gL7c4eBawB/BbunC1Ft3Kv7WAR9F9yrllWLVKku5oydSMJFvRDf9tC7wxyUuq6hjgm8DeSfYA/kz3ofkJwK6GqqnJocARlWRH4PAkW9CNt78YOK+qFgGLkmwMbNO33wN4ZVXdOLSCJUl30H8w3hI4FNirqs7p9xp8Qh+ujkwyA7iwqm4Drk3y/v7erpqCDFYjKMlMuq7gg4HN6Jbf/gDYNckz+h3UP5PkfnR7nPypqsaGV7Ek6U6sBTwV2IHuRspfBW6nn0NVVZ+Ev9nfylA1hRmsRlBVLU5yMfA2uhvOPpmui/gmYJcki6vq21V1Pd2moJKkEVVV3+2nc/x7kt9W1TFJvgrMAP534DwnPU8DBqvRdQ5wI/AH4H5VdU2Sr9F9ytk3ya1VdcpQK5QkrZCqOrG/Pc2/JblHVR0NHDPsutSeqwJHxOBS3CT3AG7rd+V9Pd2NlA+pqjOSbES3iuSb/XwrSdIUkWQX4D10Q4NXelPl6cdgNQKWClUH0s2r+gPwjqr6c5K30t3/7z1V9ZMkM/pJjpKkKSbJLOfFTl8GqxGS5BXAc4HnAWcB3wPeXlUXJ3kn8HBgv6r68xDLlCRJy2CwGhFJ7gt8iG4l4HOAnYGr6bZaeHlVLUhy/6q6dohlSpKkO2GwGiFJ1gA2Bf6jqp7cbyw3RrcD7zuq6tahFihJku6UqwJHSFXdnORGYGZ/U+UHA6cAnzZUSZI0+uyxGjF9r9Wr6VaMPBB4TlVdMNyqJEnSijBYjaD+bubrA7dX1cJh1yNJklaMwUqSJKmR1YZdgCRJ0nRhsJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmN/B/Djeb5PsBsCgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/sv/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..df725287e --- /dev/null +++ b/translations/sv/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,640 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:27:39+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Utforska K-Means-klustring med R och principer för Tidy-data.\n", + "\n", + "### [**Quiz före föreläsningen**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "I den här lektionen kommer du att lära dig hur man skapar kluster med hjälp av Tidymodels-paketet och andra paket i R-ekosystemet (vi kallar dem vänner 🧑‍🤝‍🧑) samt den nigerianska musikdatamängden som du importerade tidigare. Vi kommer att gå igenom grunderna i K-Means för klustring. Kom ihåg att, som du lärde dig i den tidigare lektionen, finns det många sätt att arbeta med kluster, och metoden du använder beror på din data. Vi kommer att prova K-Means eftersom det är den vanligaste klustringstekniken. Nu kör vi!\n", + "\n", + "Begrepp du kommer att lära dig om:\n", + "\n", + "- Silhuettvärdering\n", + "\n", + "- Armbågmetoden\n", + "\n", + "- Tröghet\n", + "\n", + "- Varians\n", + "\n", + "### **Introduktion**\n", + "\n", + "[K-Means-klustring](https://wikipedia.org/wiki/K-means_clustering) är en metod som härstammar från signalbehandlingsområdet. Den används för att dela upp och gruppera data i `k kluster` baserat på likheter i deras egenskaper.\n", + "\n", + "Klustrerna kan visualiseras som [Voronoi-diagram](https://wikipedia.org/wiki/Voronoi_diagram), som inkluderar en punkt (eller 'frö') och dess motsvarande område.\n", + "\n", + "

\n", + " \n", + "

Infografik av Jen Looper
\n", + "\n", + "\n", + "K-Means-klustring har följande steg:\n", + "\n", + "1. Dataanalytikern börjar med att specificera det önskade antalet kluster som ska skapas.\n", + "\n", + "2. Därefter väljer algoritmen slumpmässigt K observationer från datamängden som ska fungera som de initiala centren för klustren (dvs. centroiderna).\n", + "\n", + "3. Sedan tilldelas varje återstående observation till sin närmaste centroid.\n", + "\n", + "4. Därefter beräknas de nya medelvärdena för varje kluster och centroiden flyttas till medelvärdet.\n", + "\n", + "5. Nu när centren har räknats om kontrolleras varje observation igen för att se om den kanske är närmare ett annat kluster. Alla objekt tilldelas på nytt med hjälp av de uppdaterade klustermedlen. Stegen för klustertilldelning och centroiduppdatering upprepas iterativt tills klustertilldelningarna slutar förändras (dvs. när konvergens uppnås). Vanligtvis avslutas algoritmen när varje ny iteration resulterar i försumbar rörelse av centroiderna och klustren blir statiska.\n", + "\n", + "
\n", + "\n", + "> Observera att på grund av slumpmässigheten i de initiala k observationerna som används som startcentroider kan vi få något olika resultat varje gång vi tillämpar proceduren. Av denna anledning använder de flesta algoritmer flera *slumpmässiga starter* och väljer iterationen med lägst WCSS. Därför rekommenderas det starkt att alltid köra K-Means med flera värden på *nstart* för att undvika ett *oönskat lokalt optimum.*\n", + "\n", + "
\n", + "\n", + "Den här korta animationen med [illustrationer](https://github.com/allisonhorst/stats-illustrations) av Allison Horst förklarar klustringsprocessen:\n", + "\n", + "

\n", + " \n", + "

Illustration av @allison_horst
\n", + "\n", + "\n", + "\n", + "En grundläggande fråga som uppstår vid klustring är denna: hur vet du hur många kluster du ska dela upp din data i? En nackdel med att använda K-Means är att du måste fastställa `k`, det vill säga antalet `centroider`. Lyckligtvis hjälper `armbågmetoden` till att uppskatta ett bra startvärde för `k`. Du kommer att prova det om en stund.\n", + "\n", + "### \n", + "\n", + "**Förkunskaper**\n", + "\n", + "Vi fortsätter precis där vi slutade i [föregående lektion](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), där vi analyserade datamängden, skapade många visualiseringar och filtrerade datamängden till intressanta observationer. Se till att kolla in den!\n", + "\n", + "Vi kommer att behöva några paket för att klara av den här modulen. Du kan installera dem med: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Alternativt kontrollerar skriptet nedan om du har de paket som krävs för att slutföra den här modulen och installerar dem åt dig om några saknas.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "## 1. En dans med data: Begränsa till de 3 mest populära musikgenrerna\n", + "\n", + "Det här är en sammanfattning av vad vi gjorde i föregående lektion. Låt oss analysera och bearbeta lite data!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 Det gick bra!\n", + "\n", + "## 2. Mer datautforskning.\n", + "\n", + "Hur ren är denna data? Låt oss kontrollera för avvikare med hjälp av låddiagram. Vi kommer att fokusera på numeriska kolumner med färre avvikare (även om du skulle kunna rensa bort avvikarna). Låddiagram kan visa datans intervall och hjälpa till att välja vilka kolumner som ska användas. Observera att låddiagram inte visar varians, en viktig aspekt av bra data som kan klustras. Se gärna [denna diskussion](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) för mer information.\n", + "\n", + "[Låddiagram](https://en.wikipedia.org/wiki/Box_plot) används för att grafiskt visa fördelningen av `numerisk` data, så låt oss börja med att *välja* alla numeriska kolumner tillsammans med de populära musikgenrerna.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Se hur urvalshjälparen `where` gör detta enkelt 💁? Utforska fler sådana funktioner [här](https://tidyselect.r-lib.org/).\n", + "\n", + "Eftersom vi ska skapa ett lådagram för varje numerisk egenskap och vill undvika att använda loopar, låt oss omformatera våra data till ett *längre* format som gör det möjligt för oss att dra nytta av `facets` - delgrafer som var och en visar en delmängd av data.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Mycket längre! Nu är det dags för några `ggplots`! Så vilken `geom` ska vi använda?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Easy-gg!\n", + "\n", + "Nu kan vi se att dessa data är lite brusiga: genom att observera varje kolumn som ett låddiagram kan du se avvikare. Du skulle kunna gå igenom datasetet och ta bort dessa avvikare, men det skulle göra datan ganska minimal.\n", + "\n", + "För tillfället, låt oss välja vilka kolumner vi ska använda för vår klusterövning. Låt oss välja de numeriska kolumnerna med liknande intervall. Vi skulle kunna koda `artist_top_genre` som numerisk, men vi hoppar över det för tillfället.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Beräkning av k-means-klustring i R\n", + "\n", + "Vi kan beräkna k-means i R med den inbyggda funktionen `kmeans`, se `help(\"kmeans()\")`. Funktionen `kmeans()` accepterar en data frame med enbart numeriska kolumner som sitt primära argument.\n", + "\n", + "Det första steget när man använder k-means-klustring är att ange antalet kluster (k) som ska genereras i den slutliga lösningen. Vi vet att det finns 3 musikgenrer som vi har identifierat från datasetet, så låt oss prova med 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Kmeans-objektet innehåller flera delar av information som förklaras väl i `help(\"kmeans()\")`. För tillfället fokuserar vi på några få. Vi ser att data har delats in i 3 kluster med storlekarna 65, 110, 111. Utdata innehåller också klustercentra (medelvärden) för de 3 grupperna över de 5 variablerna.\n", + "\n", + "Klustervektorn är klustertilldelningen för varje observation. Låt oss använda funktionen `augment` för att lägga till klustertilldelningen till den ursprungliga datamängden.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Perfekt, vi har precis delat upp vår dataset i en uppsättning av 3 grupper. Så, hur bra är vår klustring 🤷? Låt oss ta en titt på `Silhouette score`.\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[Silhouette-analys](https://en.wikipedia.org/wiki/Silhouette_(clustering)) kan användas för att studera separationsavståndet mellan de resulterande klustren. Denna poäng varierar från -1 till 1, och om poängen är nära 1 är klustret tätt och väl separerat från andra kluster. Ett värde nära 0 representerar överlappande kluster med prover som ligger mycket nära beslutsgränsen för de närliggande klustren. [källa](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Metoden för genomsnittlig silhouette beräknar den genomsnittliga silhouette för observationer för olika värden av *k*. En hög genomsnittlig silhouette-poäng indikerar en bra klustring.\n", + "\n", + "Funktionen `silhouette` i klusterpaketet används för att beräkna den genomsnittliga silhouette-bredden.\n", + "\n", + "> Silhouetten kan beräknas med vilken [avståndsmetrik](https://en.wikipedia.org/wiki/Distance \"Distance\") som helst, såsom [Euklidiskt avstånd](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") eller [Manhattan-avstånd](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") som vi diskuterade i [föregående lektion](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Vårt resultat är **0,549**, alltså precis i mitten. Detta tyder på att våra data inte är särskilt väl lämpade för den här typen av klustring. Låt oss se om vi kan bekräfta denna misstanke visuellt. Paketet [factoextra](https://rpkgs.datanovia.com/factoextra/index.html) erbjuder funktioner (`fviz_cluster()`) för att visualisera klustring.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Överlappningen i kluster indikerar att vår data inte är särskilt väl lämpad för denna typ av klustring, men låt oss fortsätta.\n", + "\n", + "## 4. Bestämma optimalt antal kluster\n", + "\n", + "En grundläggande fråga som ofta uppstår vid K-Means-klustring är denna - utan kända klassetiketter, hur vet du hur många kluster du ska dela upp din data i?\n", + "\n", + "Ett sätt att försöka ta reda på det är att använda ett dataprovs för att `skapa en serie klustringsmodeller` med ett ökande antal kluster (t.ex. från 1-10) och utvärdera klustringsmått som **Silhouette-poängen.**\n", + "\n", + "Låt oss bestämma det optimala antalet kluster genom att beräkna klustringsalgoritmen för olika värden av *k* och utvärdera **Within Cluster Sum of Squares** (WCSS). Den totala inom-kluster-summan av kvadrater (WCSS) mäter klustringens kompakthet, och vi vill att den ska vara så liten som möjligt, där lägre värden innebär att datapunkterna är närmare varandra.\n", + "\n", + "Låt oss undersöka effekten av olika val av `k`, från 1 till 10, på denna klustring.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Nu när vi har den totala inomklustersumman av kvadrater (tot.withinss) för varje klustringsalgoritm med centrum *k*, använder vi [elbow-metoden](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) för att hitta det optimala antalet kluster. Metoden innebär att man plottar WCSS som en funktion av antalet kluster och väljer [kurvans \"armbåge\"](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") som det antal kluster som ska användas.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Diagrammet visar en stor minskning i WCSS (alltså större *sammanhållning*) när antalet kluster ökar från ett till två, och ytterligare en märkbar minskning från två till tre kluster. Därefter blir minskningen mindre framträdande, vilket resulterar i en `armbåge` 💪 i diagrammet vid ungefär tre kluster. Detta är en bra indikation på att det finns två till tre rimligt väl separerade kluster av datapunkter.\n", + "\n", + "Vi kan nu gå vidare och extrahera klustermodellen där `k = 3`:\n", + "\n", + "> `pull()`: används för att extrahera en enskild kolumn\n", + ">\n", + "> `pluck()`: används för att indexera datastrukturer såsom listor\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Bra! Låt oss visualisera de kluster vi fått fram. Är du sugen på lite interaktivitet med hjälp av `plotly`?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Kanske hade vi förväntat oss att varje kluster (representerat av olika färger) skulle ha distinkta genrer (representerat av olika former).\n", + "\n", + "Låt oss ta en titt på modellens noggrannhet.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Den här modellens noggrannhet är inte dålig, men inte heller fantastisk. Det kan vara så att datan inte lämpar sig särskilt väl för K-Means-klustring. Datan är för obalanserad, har för låg korrelation och det finns för mycket variation mellan kolumnvärdena för att klustringen ska fungera bra. Faktum är att de kluster som bildas troligen är starkt påverkade eller snedvridna av de tre genrekategorier vi definierade ovan.\n", + "\n", + "Trots det var det en lärorik process!\n", + "\n", + "I Scikit-learns dokumentation kan du se att en modell som denna, med kluster som inte är särskilt väl avgränsade, har ett \"varians\"-problem:\n", + "\n", + "

\n", + " \n", + "

Infografik från Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **Varians**\n", + "\n", + "Varians definieras som \"medelvärdet av de kvadrerade avvikelserna från medelvärdet\" [källa](https://www.mathsisfun.com/data/standard-deviation.html). I kontexten av detta klustringsproblem syftar det på att siffrorna i vår dataset tenderar att avvika lite för mycket från medelvärdet.\n", + "\n", + "✅ Det här är ett bra tillfälle att fundera över alla sätt du kan lösa detta problem. Justera datan lite mer? Använda andra kolumner? Testa en annan algoritm? Tips: Prova att [skala din data](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) för att normalisera den och testa andra kolumner.\n", + "\n", + "> Prova denna '[varianskalkylator](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' för att förstå konceptet lite bättre.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Utmaning**\n", + "\n", + "Tillbringa lite tid med denna notebook och justera parametrarna. Kan du förbättra modellens noggrannhet genom att rensa datan mer (till exempel ta bort outliers)? Du kan använda vikter för att ge större vikt åt vissa dataprover. Vad mer kan du göra för att skapa bättre kluster?\n", + "\n", + "Tips: Försök att skala din data. Det finns kommenterad kod i notebooken som lägger till standardisering för att få datakolumnerna att likna varandra mer i termer av intervall. Du kommer att märka att även om silhuettpoängen sjunker, så jämnas \"knäet\" i armbågsgrafen ut. Detta beror på att om datan lämnas oskalad, får data med mindre varians större vikt. Läs mer om detta problem [här](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Efterföreläsningsquiz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Granskning & Självstudier**\n", + "\n", + "- Ta en titt på en K-Means-simulator [som denna](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Du kan använda detta verktyg för att visualisera exempeldata och bestämma dess centroid. Du kan redigera datans slumpmässighet, antal kluster och antal centroid. Hjälper detta dig att få en bättre förståelse för hur datan kan grupperas?\n", + "\n", + "- Ta också en titt på [detta handout om K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) från Stanford.\n", + "\n", + "Vill du testa dina nyförvärvade klustringskunskaper på dataset som lämpar sig väl för K-Means-klustring? Se:\n", + "\n", + "- [Träna och utvärdera klustringsmodeller](https://rpubs.com/eR_ic/clustering) med hjälp av Tidymodels och vänner\n", + "\n", + "- [K-means Cluster Analysis](https://uc-r.github.io/kmeans_clustering), UC Business Analytics R Programming Guide\n", + "\n", + "- [K-means-klustring med principer för \"tidy data\"](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Uppgift**\n", + "\n", + "[Prova olika klustringsmetoder](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## TACK TILL:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) för att ha skapat den ursprungliga Python-versionen av denna modul ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) för att ha skapat de fantastiska illustrationerna som gör R mer välkomnande och engagerande. Hitta fler illustrationer i hennes [galleri](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Lycka till med lärandet,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

Konstverk av @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/sv/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..6642b38f0 --- /dev/null +++ b/translations/sv/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,550 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:21:11+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Börja där vi slutade i förra lektionen, med data importerad och filtrerad.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Vi kommer att fokusera på endast 3 genrer. Kanske kan vi få 3 kluster skapade!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Hur ren är denna data? Kontrollera efter avvikare med hjälp av lådagram. Vi kommer att fokusera på kolumner med färre avvikare (även om du skulle kunna rensa bort avvikelserna). Lådagram kan visa datans omfång och hjälpa till att välja vilka kolumner som ska användas. Observera att lådagram inte visar varians, en viktig komponent för bra klusterbar data (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [ + "Välj flera kolumner med liknande intervall. Se till att inkludera kolumnen artist_top_genre för att hålla våra genrer tydliga.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "De där siffrorna betyder inte mycket för oss, så låt oss ta ett 'silhuettvärde' för att se noggrannheten. Vårt värde är i mitten.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [ + "Importera KMeans och bygg en modell\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Använd den modellen för att bestämma, med hjälp av armbågmetoden, det bästa antalet kluster att skapa\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Den här modellens noggrannhet är inte dålig, men inte heller fantastisk. Det kan vara så att datan inte lämpar sig väl för K-Means-klustring. Du kanske kan prova en annan metod.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/sv/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..5e5d45d59 --- /dev/null +++ b/translations/sv/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,343 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:33+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Börja där vi slutade i förra lektionen, med data importerad och filtrerad.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Vi kommer att fokusera på endast 3 genrer. Kanske kan vi få 3 kluster byggda!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/sv/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..7e2f77e7b --- /dev/null +++ b/translations/sv/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:13+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/sv/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sv/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/sv/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..2f7cf9051 --- /dev/null +++ b/translations/sv/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:52+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör det noteras att automatiserade översättningar kan innehålla fel eller brister. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som kan uppstå vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/sv/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..14fbcdff1 --- /dev/null +++ b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:34+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..57f9f688f --- /dev/null +++ b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:22:55+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör det noteras att automatiserade översättningar kan innehålla fel eller brister. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som kan uppstå vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..f837e1619 --- /dev/null +++ b/translations/sv/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:15+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/sv/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..12ba423ea --- /dev/null +++ b/translations/sv/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Datainställning\n", + "\n", + "I den här notebooken visar vi hur man:\n", + "- ställer in tidsseriedata för den här modulen\n", + "- visualiserar data\n", + "\n", + "Datan i det här exemplet är hämtad från GEFCom2014 prognostävling. Den består av 3 års timvisa elförbruknings- och temperaturvärden mellan 2012 och 2014.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli och Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, s. 896-913, juli-september, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ladda data från csv till en Pandas-dataram\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotta all tillgänglig lastdata (januari 2012 till december 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:29+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/sv/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..8ac017716 --- /dev/null +++ b/translations/sv/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Datainställning\n", + "\n", + "I den här notebooken visar vi hur man:\n", + "\n", + "ställer in tidsseriedata för denna modul \n", + "visualiserar data \n", + "Datan i detta exempel är hämtad från GEFCom2014 prognostävling1. Den består av 3 års timvisa värden för elförbrukning och temperatur mellan 2012 och 2014.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli och Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, s. 896-913, juli-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på sitt ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:25+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/sv/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..0cd7e9887 --- /dev/null +++ b/translations/sv/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1132 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Tidsserieprognos med ARIMA\n", + "\n", + "I denna notebook demonstrerar vi hur man:\n", + "- förbereder tidsseriedata för att träna en ARIMA-modell för tidsserieprognos\n", + "- implementerar en enkel ARIMA-modell för att prognostisera de kommande HORIZON-stegen framåt (tid *t+1* till *t+HORIZON*) i tidsserien\n", + "- utvärderar modellen\n", + "\n", + "Data i detta exempel är hämtad från GEFCom2014 prognostävling. Det består av 3 års timvisa värden för elförbrukning och temperatur mellan 2012 och 2014. Uppgiften är att prognostisera framtida värden för elförbrukning. I detta exempel visar vi hur man prognostiserar ett tidssteg framåt, med endast historiska data för elförbrukning.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli och Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, s. 896-913, juli-september, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Installera beroenden\n", + "Kom igång genom att installera några av de nödvändiga beroendena. Dessa bibliotek med sina motsvarande versioner är kända för att fungera för lösningen:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Plotta all tillgänglig belastningsdata (januari 2012 till december 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Skapa tränings- och testdatauppsättningar\n", + "\n", + "### Introduktion\n", + "\n", + "Att dela upp data i tränings- och testuppsättningar är en viktig del av maskininlärningsprocessen. Träningsuppsättningen används för att lära modellen, medan testuppsättningen används för att utvärdera modellens prestanda på ny, osedd data.\n", + "\n", + "### Steg för att skapa datauppsättningar\n", + "\n", + "1. **Samla in och förbered data** \n", + " Se till att din data är ren och välstrukturerad innan du delar upp den. Detta inkluderar att hantera saknade värden, normalisera data och ta bort irrelevanta funktioner.\n", + "\n", + "2. **Dela upp data** \n", + " Använd en metod som @@INLINE_CODE_1@@ för att dela upp data i tränings- och testuppsättningar. En vanlig fördelning är 80 % för träning och 20 % för testning, men detta kan variera beroende på datasetets storlek och problemets natur.\n", + "\n", + " ```python\n", + " # Exempel på att dela upp data\n", + " from sklearn.model_selection import train_test_split\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + " ```\n", + "\n", + "3. **Verifiera fördelningen** \n", + " Kontrollera att både tränings- och testuppsättningarna representerar hela datasetet på ett rättvist sätt. Detta är särskilt viktigt för obalanserade dataset.\n", + "\n", + "### Vanliga misstag att undvika\n", + "\n", + "- **Överträning** \n", + " Om testdata används under träningen kan modellen överanpassas och prestera dåligt på ny data. Håll testdata strikt åtskild från träningsdata.\n", + "\n", + "- **För liten testuppsättning** \n", + " En för liten testuppsättning kan leda till opålitliga utvärderingar. Se till att testuppsättningen är tillräckligt stor för att ge en rättvis bedömning av modellens prestanda.\n", + "\n", + "### Slutsats\n", + "\n", + "Att korrekt dela upp data i tränings- och testuppsättningar är avgörande för att bygga robusta maskininlärningsmodeller. Följ bästa praxis och undvik vanliga fallgropar för att säkerställa att din modell presterar bra på verkliga data.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Original vs skalad data:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Låt oss också skala testdatan\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Utvärdera modellen\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Skapa en testdatapunkt för varje HORIZON-steg.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Gör förutsägelser på testdatan\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Jämför prognoser med faktisk belastning\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Beräkna **medelprocentuellt absolutfel (MAPE)** för alla förutsägelser\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Plotta förutsägelserna mot de faktiska värdena för den första veckan av testuppsättningen\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:58:21+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/sv/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..4b24d6429 --- /dev/null +++ b/translations/sv/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,59 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:29+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Tidsserieprognos med ARIMA\n", + "\n", + "I denna notebook demonstrerar vi hur man:\n", + "- förbereder tidsseriedata för att träna en ARIMA-modell för tidsserieprognos\n", + "- implementerar en enkel ARIMA-modell för att prognostisera de kommande HORIZON stegen framåt (tid *t+1* till *t+HORIZON*) i tidsserien\n", + "- utvärderar modellen\n", + "\n", + "Data i detta exempel är hämtad från GEFCom2014 prognostävling. Det består av 3 års timvisa värden för elförbrukning och temperatur mellan 2012 och 2014. Uppgiften är att prognostisera framtida värden för elförbrukning. I detta exempel visar vi hur man prognostiserar ett tidssteg framåt, med hjälp av historiska data för elförbrukning.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli och Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, s. 896-913, juli-september, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/sv/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..8399e3128 --- /dev/null +++ b/translations/sv/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1025 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I den här anteckningsboken demonstrerar vi hur man:\n", + "\n", + "- förbereder 2D-tidsseriedata för att träna en SVM-regressormodell\n", + "- implementerar SVR med hjälp av RBF-kärna\n", + "- utvärderar modellen med hjälp av diagram och MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importera moduler\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Förbereder data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Ladda data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nu behöver du förbereda data för träning genom att utföra filtrering och skalning av din data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Skala data för att vara inom intervallet (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Skapa data med tidssteg\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "För vår SVR transformerar vi indatadata till formen `[batch, timesteps]`. Så vi omformar den befintliga `train_data` och `test_data` så att det finns en ny dimension som hänvisar till tidsstegen. För vårt exempel tar vi `timesteps = 5`. Så indata till modellen är data för de första 4 tidsstegen, och utdata kommer att vara data för det 5:e tidssteget.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [ + "## Skapa SVR-modell\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Gör modellförutsägelse\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Full dataset prediction\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:04:11+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sv/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/sv/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..57cf84aef --- /dev/null +++ b/translations/sv/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I den här anteckningsboken demonstrerar vi hur man:\n", + "\n", + "- förbereder 2D-tidsseriedata för att träna en SVM-regressormodell\n", + "- implementerar SVR med hjälp av RBF-kärna\n", + "- utvärderar modellen med hjälp av diagram och MAPE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importera moduler\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Förbereder data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Ladda data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nu behöver du förbereda data för träning genom att utföra filtrering och skalning av din data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Skala data för att vara inom intervallet (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Skapa data med tidssteg\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "För vår SVR transformerar vi indata till formen `[batch, timesteps]`. Så vi omformar den befintliga `train_data` och `test_data` så att det finns en ny dimension som hänvisar till tidsstegen. I vårt exempel tar vi `timesteps = 5`. Så indata till modellen är data för de första 4 tidsstegen, och utdata kommer att vara data för det 5:e tidssteget.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [ + "## Skapa SVR-modell\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Gör modellförutsägelse\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Fullständig datasetförutsägelse\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:06:41+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sv/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/sv/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..5849bf6cb --- /dev/null +++ b/translations/sv/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:04:39+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter och vargen: Introduktion till förstärkningsinlärning\n", + "\n", + "I denna handledning kommer vi att lära oss hur man tillämpar förstärkningsinlärning på ett problem med att hitta vägar. Miljön är inspirerad av den musikaliska sagan [Peter och vargen](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) av den ryske kompositören [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Det är en berättelse om den unge pionjären Peter, som modigt lämnar sitt hus och går till skogsgläntan för att jaga en varg. Vi kommer att träna maskininlärningsalgoritmer som hjälper Peter att utforska området och skapa en optimal navigeringskarta.\n", + "\n", + "Först, låt oss importera en mängd användbara bibliotek:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Översikt av Förstärkningsinlärning\n", + "\n", + "**Förstärkningsinlärning** (RL) är en inlärningsteknik som låter oss lära oss ett optimalt beteende för en **agent** i en viss **miljö** genom att utföra många experiment. En agent i denna miljö bör ha ett **mål**, definierat av en **belöningsfunktion**.\n", + "\n", + "## Miljön\n", + "\n", + "För enkelhetens skull, låt oss betrakta Peters värld som en kvadratisk spelplan med storleken `width` x `height`. Varje ruta på denna spelplan kan vara:\n", + "* **mark**, där Peter och andra varelser kan gå\n", + "* **vatten**, där man uppenbarligen inte kan gå\n", + "* **ett träd** eller **gräs** – en plats där man kan vila\n", + "* **ett äpple**, som representerar något Peter gärna skulle vilja hitta för att mätta sig\n", + "* **en varg**, som är farlig och bör undvikas\n", + "\n", + "För att arbeta med miljön kommer vi att definiera en klass som heter `Board`. För att inte göra denna anteckningsbok för rörig har vi flyttat all kod för att arbeta med spelplanen till en separat modul som heter `rlboard`, vilken vi nu kommer att importera. Du kan titta in i denna modul för att få mer information om implementationens detaljer.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Låt oss nu skapa ett slumpmässigt bräde och se hur det ser ut:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Åtgärder och Policy\n", + "\n", + "I vårt exempel är Peters mål att hitta ett äpple, samtidigt som han undviker vargen och andra hinder. Definiera dessa åtgärder som en ordbok och koppla dem till par av motsvarande koordinatförändringar.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Strategin för vår agent (Peter) definieras av en så kallad **policy**. Låt oss titta på den enklaste policyn som kallas **slumpvandring**.\n", + "\n", + "## Slumpvandring\n", + "\n", + "Låt oss först lösa vårt problem genom att implementera en strategi för slumpvandring.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Belöningsfunktion\n", + "\n", + "För att göra vår policy mer intelligent behöver vi förstå vilka drag som är \"bättre\" än andra.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Bygg en Q-Tabell, eller en flerdimensionell matris. Eftersom vår spelplan har dimensionerna `width` x `height`, kan vi representera Q-Tabellen med en numpy-array med formen `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Passera Q-tabellen till `plot`-funktionen för att visualisera tabellen på brädet:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Essensen av Q-Learning: Bellman-ekvationen och inlärningsalgoritmen\n", + "\n", + "Skriv en pseudokod för vår inlärningsalgoritm:\n", + "\n", + "* Initiera Q-Tabell Q med lika värden för alla tillstånd och handlingar\n", + "* Sätt inlärningshastighet $\\alpha\\leftarrow 1$\n", + "* Upprepa simuleringen många gånger\n", + " 1. Börja på en slumpmässig position\n", + " 1. Upprepa\n", + " 1. Välj en handling $a$ vid tillstånd $s$\n", + " 2. Utför handlingen genom att flytta till ett nytt tillstånd $s'$\n", + " 3. Om vi stöter på ett slutspelsvillkor, eller den totala belöningen är för liten - avsluta simuleringen \n", + " 4. Beräkna belöningen $r$ vid det nya tillståndet\n", + " 5. Uppdatera Q-Funktionen enligt Bellman-ekvationen: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Uppdatera total belöning och minska $\\alpha$.\n", + "\n", + "## Exploatera vs. Utforska\n", + "\n", + "Den bästa metoden är att balansera mellan utforskning och exploatering. När vi lär oss mer om vår miljö, kommer vi vara mer benägna att följa den optimala vägen, men ändå välja den outforskade vägen då och då.\n", + "\n", + "## Python-implementation\n", + "\n", + "Nu är vi redo att implementera inlärningsalgoritmen. Innan dess behöver vi också en funktion som kan konvertera godtyckliga värden i Q-Tabellen till en sannolikhetsvektor för motsvarande handlingar:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Vi lägger till en liten mängd `eps` till den ursprungliga vektorn för att undvika division med 0 i det initiala fallet, när alla komponenter i vektorn är identiska.\n", + "\n", + "Den faktiska inlärningsalgoritmen kommer vi att köra i 5000 experiment, även kallade **epoker**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Efter att ha kört denna algoritm bör Q-tabellen uppdateras med värden som definierar attraktiviteten hos olika handlingar vid varje steg. Visualisera tabellen här:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kontrollera policyn\n", + "\n", + "Eftersom Q-Tabellen listar \"attraktiviteten\" för varje handling i varje tillstånd, är det ganska enkelt att använda den för att definiera den effektiva navigeringen i vår värld. I det enklaste fallet kan vi helt enkelt välja den handling som motsvarar det högsta värdet i Q-Tabellen:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Om du testar koden ovan flera gånger, kanske du märker att den ibland bara \"fastnar\", och du måste trycka på STOP-knappen i notebooken för att avbryta den.\n", + "\n", + "> **Uppgift 1:** Ändra `walk`-funktionen så att den begränsar den maximala längden på vägen till ett visst antal steg (säg, 100), och observera hur koden ovan returnerar detta värde då och då.\n", + "\n", + "> **Uppgift 2:** Ändra `walk`-funktionen så att den inte återvänder till platser där den redan har varit tidigare. Detta kommer att förhindra att `walk` hamnar i en loop, men agenten kan fortfarande bli \"fast\" på en plats där den inte kan ta sig vidare.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Övning\n", + "## En mer realistisk värld för Peter och vargen\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/sv/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..1a5c41e60 --- /dev/null +++ b/translations/sv/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,469 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:14:38+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter och vargen: Realistisk miljö\n", + "\n", + "I vår situation kunde Peter röra sig nästan utan att bli trött eller hungrig. I en mer realistisk värld måste han sätta sig ner och vila då och då, och även äta för att hålla sig mätt. Låt oss göra vår värld mer realistisk genom att implementera följande regler:\n", + "\n", + "1. När Peter rör sig från en plats till en annan förlorar han **energi** och får en viss **trötthet**.\n", + "2. Peter kan få mer energi genom att äta äpplen.\n", + "3. Peter kan bli av med trötthet genom att vila under ett träd eller på gräset (dvs. gå in på en plats på spelplanen med ett träd eller gräs - grönt fält).\n", + "4. Peter måste hitta och döda vargen.\n", + "5. För att kunna döda vargen måste Peter ha vissa nivåer av energi och trötthet, annars förlorar han striden.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Definiera tillstånd\n", + "\n", + "I våra nya spelregler behöver vi hålla koll på energi och trötthet vid varje brädposition. Därför kommer vi att skapa ett objekt `state` som innehåller all nödvändig information om det aktuella problemtillståndet, inklusive brädets tillstånd, aktuella nivåer av energi och trötthet, samt om vi kan besegra vargen vid slutläget:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [ + "Låt oss försöka lösa problemet med hjälp av slumpvandring och se om vi lyckas:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Belöningsfunktion\n", + "\n", + "### Introduktion\n", + "Belöningsfunktionen är en viktig komponent i att definiera hur en agent ska bete sig i en given miljö. Den hjälper till att styra agentens beslut genom att tilldela poäng baserat på dess handlingar.\n", + "\n", + "### Grundläggande principer\n", + "- Belöningar bör vara utformade för att uppmuntra önskade beteenden.\n", + "- Straff kan användas för att avskräcka oönskade beteenden.\n", + "- En välbalanserad belöningsfunktion är avgörande för att uppnå optimala resultat.\n", + "\n", + "### Exempel på belöningsfunktion\n", + "Nedan följer ett exempel på hur en belöningsfunktion kan implementeras:\n", + "\n", + "```python\n", + "def reward_function(params):\n", + " # Extrahera relevanta parametrar\n", + " speed = params['speed']\n", + " distance_from_center = params['distance_from_center']\n", + " track_width = params['track_width']\n", + "\n", + " # Beräkna belöning baserat på position på banan\n", + " if distance_from_center < 0.1 * track_width:\n", + " reward = 1.0 # Hög belöning för att hålla sig nära mitten\n", + " else:\n", + " reward = 0.5 # Lägre belöning för att vara längre från mitten\n", + "\n", + " # Justera belöning baserat på hastighet\n", + " reward *= speed\n", + "\n", + " return reward\n", + "```\n", + "\n", + "### Vanliga misstag\n", + "- **Överkomplicerade belöningsfunktioner**: Försök att hålla belöningsfunktionen enkel och lätt att förstå.\n", + "- **Felaktiga parametrar**: Se till att använda rätt parametrar för att undvika oväntade beteenden.\n", + "- **Obalanserade belöningar**: Om belöningarna är för höga eller för låga kan det leda till suboptimala resultat.\n", + "\n", + "### Tips för att designa en effektiv belöningsfunktion\n", + "- Testa belöningsfunktionen i olika scenarier för att säkerställa att den fungerar som avsett.\n", + "- Analysera agentens beteende och justera belöningsfunktionen vid behov.\n", + "- Dokumentera tydligt hur belöningsfunktionen är utformad och vilka parametrar den använder.\n", + "\n", + "### Slutsats\n", + "En välutformad belöningsfunktion är avgörande för att styra agentens beteende och uppnå önskade mål. Genom att följa bästa praxis och undvika vanliga misstag kan du skapa en belöningsfunktion som är både effektiv och robust.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Q-Learning-algoritm\n", + "\n", + "Själva inlärningsalgoritmen förblir i stort sett oförändrad, vi använder bara `state` istället för enbart brädposition.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Resultat\n", + "\n", + "Låt oss se om vi lyckades träna Peter att bekämpa vargen!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Vi ser nu mycket färre fall av drunkning, men Peter kan fortfarande inte alltid döda vargen. Försök att experimentera och se om du kan förbättra detta resultat genom att justera hyperparametrar.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/sv/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..acef3ec70 --- /dev/null +++ b/translations/sv/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:10:12+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter och vargen: Introduktion till förstärkningsinlärning\n", + "\n", + "I denna handledning kommer vi att lära oss hur man tillämpar förstärkningsinlärning på ett problem med att hitta vägar. Miljön är inspirerad av den musikaliska sagan [Peter och vargen](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) av den ryske kompositören [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Det är en berättelse om den unge pionjären Peter, som modigt lämnar sitt hus och går till skogsgläntan för att jaga vargen. Vi kommer att träna maskininlärningsalgoritmer som hjälper Peter att utforska området och skapa en optimal navigeringskarta.\n", + "\n", + "Först, låt oss importera några användbara bibliotek:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Översikt av Förstärkningsinlärning\n", + "\n", + "**Förstärkningsinlärning** (RL) är en inlärningsteknik som låter oss lära oss ett optimalt beteende hos en **agent** i en viss **miljö** genom att utföra många experiment. En agent i denna miljö bör ha ett **mål**, definierat av en **belöningsfunktion**.\n", + "\n", + "## Miljön\n", + "\n", + "För enkelhetens skull, låt oss anta att Peters värld är en kvadratisk spelbräda med storleken `width` x `height`. Varje ruta på denna bräda kan vara:\n", + "* **mark**, där Peter och andra varelser kan gå\n", + "* **vatten**, där man uppenbarligen inte kan gå\n", + "* **ett träd** eller **gräs** - en plats där man kan vila\n", + "* **ett äpple**, som representerar något Peter gärna vill hitta för att äta\n", + "* **en varg**, som är farlig och bör undvikas\n", + "\n", + "För att arbeta med miljön kommer vi att definiera en klass som heter `Board`. För att undvika att överbelasta denna notebook med kod har vi flyttat all kod för att arbeta med brädan till en separat modul som heter `rlboard`, vilken vi nu kommer att importera. Du kan titta inuti denna modul för att få mer detaljer om implementeringens interna funktioner.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Låt oss nu skapa ett slumpmässigt bräde och se hur det ser ut:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Åtgärder och Policy\n", + "\n", + "I vårt exempel är Peters mål att hitta ett äpple, samtidigt som han undviker vargen och andra hinder. För att göra detta kan han i princip gå runt tills han hittar ett äpple. Därför kan han vid varje position välja mellan en av följande åtgärder: upp, ner, vänster och höger. Vi kommer att definiera dessa åtgärder som en ordbok och koppla dem till par av motsvarande koordinatförändringar. Till exempel skulle att röra sig åt höger (`R`) motsvara paret `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Strategin för vår agent (Peter) definieras av en så kallad **policy**. Låt oss titta på den enklaste policyn som kallas **slumpmässig promenad**.\n", + "\n", + "## Slumpmässig promenad\n", + "\n", + "Låt oss först lösa vårt problem genom att implementera en strategi för slumpmässig promenad.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Låt oss köra slumpvandringsexperimentet flera gånger och se det genomsnittliga antalet steg som tas:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Belöningsfunktion\n", + "\n", + "För att göra vår policy mer intelligent behöver vi förstå vilka drag som är \"bättre\" än andra.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Bygg en Q-Tabell, eller en flerdimensionell matris. Eftersom vår spelplan har dimensionerna `width` x `height`, kan vi representera Q-Tabellen med en numpy-matris med formen `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Skicka Q-tabellen till plotfunktionen för att visualisera tabellen på brädet:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Essensen av Q-Learning: Bellman-ekvationen och inlärningsalgoritmen\n", + "\n", + "Skriv en pseudokod för vår inlärningsalgoritm:\n", + "\n", + "* Initiera Q-Tabell Q med lika värden för alla tillstånd och handlingar\n", + "* Sätt inlärningshastighet $\\alpha\\leftarrow 1$\n", + "* Upprepa simuleringen många gånger\n", + " 1. Börja på en slumpmässig position\n", + " 1. Upprepa\n", + " 1. Välj en handling $a$ vid tillstånd $s$\n", + " 2. Utför handlingen genom att flytta till ett nytt tillstånd $s'$\n", + " 3. Om vi stöter på ett slutspelsvillkor, eller den totala belöningen är för liten - avsluta simuleringen \n", + " 4. Beräkna belöningen $r$ vid det nya tillståndet\n", + " 5. Uppdatera Q-Funktionen enligt Bellman-ekvationen: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Uppdatera total belöning och minska $\\alpha$.\n", + "\n", + "## Exploatera vs. Utforska\n", + "\n", + "Det bästa tillvägagångssättet är att balansera mellan utforskning och exploatering. När vi lär oss mer om vår miljö, kommer vi vara mer benägna att följa den optimala vägen, men det är också viktigt att välja den outforskade vägen då och då.\n", + "\n", + "## Python-implementation\n", + "\n", + "Nu är vi redo att implementera inlärningsalgoritmen. Innan dess behöver vi också en funktion som kan konvertera godtyckliga värden i Q-Tabellen till en sannolikhetsvektor för motsvarande handlingar:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Vi lägger till en liten mängd `eps` till den ursprungliga vektorn för att undvika division med 0 i det initiala fallet, när alla komponenter i vektorn är identiska.\n", + "\n", + "Den faktiska inlärningsalgoritmen kommer vi att köra i 5000 experiment, även kallade **epoker**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Efter att ha kört denna algoritm bör Q-Tabellen uppdateras med värden som definierar attraktiviteten hos olika åtgärder vid varje steg. Visualisera tabellen här:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kontrollera policyn\n", + "\n", + "Eftersom Q-Tabellen listar \"attraktiviteten\" för varje handling i varje tillstånd, är det ganska enkelt att använda den för att definiera den effektiva navigeringen i vår värld. I det enklaste fallet kan vi helt enkelt välja den handling som motsvarar det högsta värdet i Q-Tabellen:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Om du testar koden ovan flera gånger, kanske du märker att den ibland bara \"fastnar\", och du måste trycka på STOP-knappen i notebooken för att avbryta den.\n", + "\n", + "> **Uppgift 1:** Ändra `walk`-funktionen så att den begränsar den maximala längden på vägen till ett visst antal steg (till exempel 100), och observera hur koden ovan returnerar detta värde då och då.\n", + "\n", + "> **Uppgift 2:** Ändra `walk`-funktionen så att den inte går tillbaka till platser där den redan har varit tidigare. Detta kommer att förhindra att `walk` hamnar i en loop, men agenten kan fortfarande bli \"fast\" på en plats där den inte kan ta sig vidare.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Vad vi ser här är att den genomsnittliga längden på vägen först ökade. Detta beror troligen på att när vi inte vet något om miljön – är det troligt att vi fastnar i dåliga tillstånd, vatten eller varg. När vi lär oss mer och börjar använda denna kunskap kan vi utforska miljön längre, men vi vet fortfarande inte riktigt var äpplena finns.\n", + "\n", + "När vi har lärt oss tillräckligt blir det lättare för agenten att nå målet, och vägens längd börjar minska. Dock är vi fortfarande öppna för utforskning, så vi avviker ofta från den bästa vägen och utforskar nya alternativ, vilket gör vägen längre än optimal.\n", + "\n", + "Vad vi också observerar på denna graf är att längden vid något tillfälle ökade abrupt. Detta indikerar den stokastiska naturen hos processen, och att vi vid något tillfälle kan \"förstöra\" Q-Tabellens koefficienter genom att skriva över dem med nya värden. Detta bör idealt minimeras genom att minska inlärningshastigheten (dvs. mot slutet av träningen justerar vi endast Q-Tabellens värden med ett litet värde).\n", + "\n", + "Överlag är det viktigt att komma ihåg att framgången och kvaliteten på inlärningsprocessen beror avsevärt på parametrar, såsom inlärningshastighet, minskning av inlärningshastighet och diskonteringsfaktor. Dessa kallas ofta **hyperparametrar**, för att skilja dem från **parametrar** som vi optimerar under träningen (t.ex. Q-Tabellens koefficienter). Processen att hitta de bästa värdena för hyperparametrar kallas **hyperparameteroptimering**, och det förtjänar ett eget ämne.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Övning\n", + "#### En Mer Realistisk Värld för Peter och Vargen\n", + "\n", + "I vår situation kunde Peter röra sig nästan utan att bli trött eller hungrig. I en mer realistisk värld måste han sätta sig ner och vila då och då, samt äta för att hålla sig vid liv. Låt oss göra vår värld mer realistisk genom att implementera följande regler:\n", + "\n", + "1. När Peter rör sig från en plats till en annan förlorar han **energi** och blir mer **trött**.\n", + "2. Peter kan få mer energi genom att äta äpplen.\n", + "3. Peter kan bli av med trötthet genom att vila under trädet eller på gräset (dvs. gå till en plats på spelplanen med ett träd eller gräs - grönt fält).\n", + "4. Peter måste hitta och döda vargen.\n", + "5. För att kunna döda vargen behöver Peter ha vissa nivåer av energi och trötthet, annars förlorar han striden.\n", + "\n", + "Modifiera belöningsfunktionen ovan enligt spelets regler, kör förstärkningsinlärningsalgoritmen för att lära dig den bästa strategin för att vinna spelet, och jämför resultaten av slumpmässiga rörelser med din algoritm i termer av antal vunna och förlorade spel.\n", + "\n", + "> **Note**: Du kan behöva justera hyperparametrar för att få det att fungera, särskilt antalet epoker. Eftersom spelets framgång (att slåss mot vargen) är en sällsynt händelse, kan du förvänta dig mycket längre träningstid.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/8-Reinforcement/2-Gym/notebook.ipynb b/translations/sv/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..ec8c9323d --- /dev/null +++ b/translations/sv/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,394 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:17:18+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## CartPole Skating\n", + "\n", + "> **Problem**: Om Peter vill fly från vargen måste han kunna röra sig snabbare än den. Vi ska se hur Peter kan lära sig att åka skridskor, särskilt att hålla balansen, med hjälp av Q-Learning.\n", + "\n", + "Först, låt oss installera gym och importera nödvändiga bibliotek:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Skapa en cartpole-miljö\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "För att se hur miljön fungerar, låt oss köra en kort simulering i 100 steg.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Under simulering behöver vi få observationer för att kunna bestämma hur vi ska agera. Faktum är att `step`-funktionen ger oss aktuella observationer, belöningsfunktionen och `done`-flaggan som indikerar om det är meningsfullt att fortsätta simuleringen eller inte:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Vi kan få min- och maxvärde för de där siffrorna:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Låt oss också utforska en annan diskretiseringsmetod med hjälp av bin:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Låt oss nu köra en kort simulering och observera dessa diskreta miljövärden.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [ + "## Q-Tabellstruktur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Från denna graf är det inte möjligt att säga något, eftersom längden på träningssessionerna varierar kraftigt på grund av den stokastiska träningsprocessens natur. För att göra denna graf mer meningsfull kan vi beräkna **glidande medelvärde** över en serie experiment, låt oss säga 100. Detta kan göras enkelt med hjälp av `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Variera hyperparametrar och se resultatet i praktiken\n", + "\n", + "Nu skulle det vara intressant att faktiskt se hur den tränade modellen beter sig. Låt oss köra simuleringen, och vi kommer att följa samma strategi för val av åtgärder som under träningen: sampling enligt sannolikhetsfördelningen i Q-Tabellen:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Spara resultatet som en animerad GIF\n", + "\n", + "Om du vill imponera på dina vänner kanske du vill skicka den animerade GIF-bilden av balansstången till dem. För att göra detta kan vi anropa `env.render` för att skapa en bildruta och sedan spara dessa som en animerad GIF med hjälp av PIL-biblioteket:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör det noteras att automatiserade översättningar kan innehålla fel eller brister. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som kan uppstå vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/sv/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..e43d44e50 --- /dev/null +++ b/translations/sv/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,526 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:20:10+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "sv" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## CartPole Skridskoåkning\n", + "\n", + "> **Problem**: Om Peter vill fly från vargen måste han kunna röra sig snabbare än den. Vi ska se hur Peter kan lära sig att åka skridskor, särskilt att hålla balansen, med hjälp av Q-Learning.\n", + "\n", + "Först, låt oss installera gym och importera nödvändiga bibliotek:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Skapa en cartpole-miljö\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "För att se hur miljön fungerar, låt oss köra en kort simulering i 100 steg.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Under simulering behöver vi få observationer för att kunna bestämma hur vi ska agera. Faktum är att `step`-funktionen ger oss aktuella observationer, belöningsfunktionen och `done`-flaggan som indikerar om det är meningsfullt att fortsätta simuleringen eller inte:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Vi kan få min- och maxvärde för dessa nummer:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Låt oss också utforska en annan diskretiseringsmetod med hjälp av bin:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Låt oss nu köra en kort simulering och observera dessa diskreta miljövärden.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [ + "## Q-Tabellstruktur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Från denna graf är det inte möjligt att säga något, eftersom längden på träningssessionerna varierar kraftigt på grund av den stokastiska träningsprocessens natur. För att göra denna graf mer meningsfull kan vi beräkna **rullande medelvärde** över en serie experiment, låt oss säga 100. Detta kan enkelt göras med `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Variera hyperparametrar och se resultatet i praktiken\n", + "\n", + "Nu skulle det vara intressant att faktiskt se hur den tränade modellen beter sig. Låt oss köra simuleringen, och vi kommer att följa samma strategi för val av åtgärder som under träningen: sampling enligt sannolikhetsfördelningen i Q-Tabellen:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Spara resultatet som en animerad GIF\n", + "\n", + "Om du vill imponera på dina vänner kanske du vill skicka den animerade GIF-bilden av balansstången till dem. För att göra detta kan vi anropa `env.render` för att skapa en bildruta och sedan spara dessa som en animerad GIF med hjälp av PIL-biblioteket:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, bör du vara medveten om att automatiserade översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess ursprungliga språk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sv/PyTorch_Fundamentals.ipynb b/translations/sv/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..57bf9dd39 --- /dev/null +++ b/translations/sv/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2830 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:06+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "sv" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Tensor Datatyper\n" + ], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Ansvarsfriskrivning**: \nDetta dokument har översatts med hjälp av AI-översättningstjänsten [Co-op Translator](https://github.com/Azure/co-op-translator). Även om vi strävar efter noggrannhet, vänligen notera att automatiska översättningar kan innehålla fel eller felaktigheter. Det ursprungliga dokumentet på dess originalspråk bör betraktas som den auktoritativa källan. För kritisk information rekommenderas professionell mänsklig översättning. Vi ansvarar inte för eventuella missförstånd eller feltolkningar som uppstår vid användning av denna översättning.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/2-Regression/1-Tools/notebook.ipynb b/translations/sw/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sw/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/sw/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..75632799f --- /dev/null +++ b/translations/sw/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,448 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:43:37+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Utangulizi wa Urejeleaji - Somo la 1\n", + "\n", + "#### Kuweka katika muktadha\n", + "\n", + "✅ Kuna aina nyingi za mbinu za urejeleaji, na chaguo lako linategemea jibu unalotafuta. Ikiwa unataka kutabiri urefu unaowezekana wa mtu wa umri fulani, ungetumia `urejeleaji wa mstari`, kwa kuwa unatafuta **thamani ya nambari**. Ikiwa unavutiwa na kugundua kama aina fulani ya chakula inapaswa kuzingatiwa kuwa cha mboga au la, unatafuta **ugawaji wa kategoria**, kwa hivyo ungetumia `urejeleaji wa kimantiki`. Utajifunza zaidi kuhusu urejeleaji wa kimantiki baadaye. Fikiria kidogo kuhusu maswali unayoweza kuuliza kutoka kwa data, na ni mbinu ipi kati ya hizi ingefaa zaidi.\n", + "\n", + "Katika sehemu hii, utatumia [seti ndogo ya data kuhusu kisukari](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Fikiria kwamba unataka kujaribu matibabu kwa wagonjwa wa kisukari. Miundo ya Kujifunza kwa Mashine inaweza kukusaidia kubaini ni wagonjwa gani wangepokea matibabu vizuri zaidi, kulingana na mchanganyiko wa vigezo. Hata mfano wa urejeleaji wa msingi kabisa, unapowekwa kwenye taswira, unaweza kuonyesha taarifa kuhusu vigezo ambavyo vingekusaidia kupanga majaribio yako ya kinadharia ya kliniki.\n", + "\n", + "Kwa hayo, hebu tuanze kazi hii!\n", + "\n", + "

\n", + " \n", + "

Uchoraji na @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Kuandaa zana zetu\n", + "\n", + "Kwa kazi hii, tutahitaji vifurushi vifuatavyo:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) ulioundwa kufanya sayansi ya data kuwa ya haraka, rahisi, na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa mifano na ujifunzaji wa mashine.\n", + "\n", + "Unaweza kuvifunga kwa kutumia:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Skripti iliyo hapa chini inakagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na inavifunga kwa ajili yako endapo baadhi havipo.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa, hebu tupakie vifurushi hivi vya kushangaza na kuvifanya vipatikane katika kikao chetu cha sasa cha R. (Hii ni kwa maelezo tu, `pacman::p_load()` tayari ilifanya hivyo kwa ajili yako)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Seti ya data ya kisukari\n", + "\n", + "Katika zoezi hili, tutatumia ujuzi wetu wa regression kufanya utabiri kwenye seti ya data ya kisukari. [Seti ya data ya kisukari](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) ina `442 sampuli` za data zinazohusiana na kisukari, ikiwa na vipengele 10 vya utabiri, `umri`, `jinsia`, `kiwango cha uzito wa mwili`, `shinikizo la damu la wastani`, na `vipimo sita vya damu` pamoja na kipengele cha matokeo `y`: kipimo cha kiasi cha maendeleo ya ugonjwa mwaka mmoja baada ya msingi.\n", + "\n", + "|Idadi ya uchunguzi|442|\n", + "|------------------|:---|\n", + "|Idadi ya vipengele vya utabiri|Safu 10 za kwanza ni za nambari za utabiri|\n", + "|Matokeo/Lengo|Safu ya 11 ni kipimo cha kiasi cha maendeleo ya ugonjwa mwaka mmoja baada ya msingi|\n", + "|Maelezo ya vipengele vya utabiri|- umri kwa miaka\n", + "||- jinsia\n", + "||- bmi kiwango cha uzito wa mwili\n", + "||- bp shinikizo la damu la wastani\n", + "||- s1 tc, jumla ya cholesterol ya damu\n", + "||- s2 ldl, lipoproteini zenye msongamano mdogo\n", + "||- s3 hdl, lipoproteini zenye msongamano mkubwa\n", + "||- s4 tch, jumla ya cholesterol / HDL\n", + "||- s5 ltg, labda logi ya kiwango cha triglycerides ya damu\n", + "||- s6 glu, kiwango cha sukari kwenye damu|\n", + "\n", + "> 🎓 Kumbuka, huu ni ujifunzaji unaosimamiwa, na tunahitaji lengo lililopewa jina 'y'.\n", + "\n", + "Kabla ya kuweza kushughulikia data na R, unahitaji kuingiza data kwenye kumbukumbu ya R, au kujenga muunganisho na data ambayo R inaweza kutumia kufikia data hiyo kwa mbali.\n", + "\n", + "> Kifurushi cha [readr](https://readr.tidyverse.org/), ambacho ni sehemu ya Tidyverse, kinatoa njia ya haraka na rafiki ya kusoma data ya mstatili kwenye R.\n", + "\n", + "Sasa, hebu tuingize seti ya data ya kisukari iliyotolewa kwenye URL hii ya chanzo: \n", + "\n", + "Pia, tutafanya ukaguzi wa hali ya data yetu kwa kutumia `glimpse()` na kuonyesha safu 5 za kwanza kwa kutumia `slice()`.\n", + "\n", + "Kabla ya kuendelea zaidi, hebu pia tuanzishe kitu ambacho utakutana nacho mara nyingi katika msimbo wa R 🥁🥁: opereta wa bomba `%>%`\n", + "\n", + "Opereta wa bomba (`%>%`) hufanya operesheni kwa mpangilio wa kimantiki kwa kupitisha kitu mbele kwenye kazi au usemi wa wito. Unaweza kufikiria opereta wa bomba kama kusema \"na kisha\" katika msimbo wako.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` inaonyesha kuwa data hii ina safu 442 na nguzo 11, ambapo nguzo zote zina aina ya data `double`\n", + "\n", + "
\n", + "\n", + "> glimpse() na slice() ni kazi katika [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, sehemu ya Tidyverse, ni sarufi ya uendeshaji wa data inayotoa seti thabiti ya vitenzi vinavyokusaidia kutatua changamoto za kawaida za uendeshaji wa data.\n", + "\n", + "
\n", + "\n", + "Sasa kwa kuwa tuna data, hebu tuzingatie kipengele kimoja (`bmi`) kwa lengo la zoezi hili. Hii itahitaji kuchagua nguzo zinazohitajika. Kwa hivyo, tunafanyaje hili?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) inatuwezesha *kuchagua* (na kwa hiari kubadilisha jina) nguzo katika fremu ya data.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Mafunzo na Data ya Kupima\n", + "\n", + "Ni jambo la kawaida katika kujifunza kwa usimamizi *kugawanya* data katika sehemu mbili; seti moja (ambayo kwa kawaida ni kubwa zaidi) ya kufundishia modeli, na seti ndogo ya \"kuhifadhi\" ili kuona jinsi modeli ilivyofanya kazi.\n", + "\n", + "Sasa kwa kuwa tuna data tayari, tunaweza kuona kama mashine inaweza kusaidia kuamua mgawanyo wa kimantiki kati ya nambari katika seti hii ya data. Tunaweza kutumia kifurushi cha [rsample](https://tidymodels.github.io/rsample/), ambacho ni sehemu ya mfumo wa Tidymodels, kuunda kitu kinachobeba taarifa juu ya *jinsi* ya kugawanya data, na kisha kutumia kazi mbili zaidi za rsample kutoa seti za mafunzo na kupima zilizoundwa:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Fundisha mfano wa regression ya mstari kwa kutumia Tidymodels\n", + "\n", + "Sasa tuko tayari kufundisha mfano wetu!\n", + "\n", + "Katika Tidymodels, unataja mifano kwa kutumia `parsnip()` kwa kufafanua dhana tatu:\n", + "\n", + "- **Aina ya mfano** inatofautisha mifano kama regression ya mstari, regression ya logistic, mifano ya mti wa maamuzi, na kadhalika.\n", + "\n", + "- **Hali ya mfano** inajumuisha chaguo za kawaida kama regression na uainishaji; baadhi ya aina za mifano zinaunga mkono mojawapo ya hizi au zote mbili, wakati nyingine zina hali moja tu.\n", + "\n", + "- **Injini ya mfano** ni zana ya kihesabu ambayo itatumika kufanikisha mfano. Mara nyingi hizi ni pakiti za R, kama **`\"lm\"`** au **`\"ranger\"`**\n", + "\n", + "Taarifa hii ya uundaji wa mfano inahifadhiwa katika maelezo ya mfano, kwa hivyo hebu tuunde moja!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Baada ya mfano *kuelezwa*, mfano unaweza `kukadiriwa` au `kufunzwa` kwa kutumia kazi ya [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), kwa kawaida kwa kutumia fomula na data fulani.\n", + "\n", + "`y ~ .` inamaanisha tutafitisha `y` kama kiasi kinachotabiriwa/lengo, kinachoelezewa na vihisishi/vipengele vyote yaani, `.` (katika kesi hii, tuna kihisishi kimoja tu: `bmi`)\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kutoka kwenye matokeo ya modeli, tunaweza kuona vigezo vilivyojifunzwa wakati wa mafunzo. Vigezo hivi vinawakilisha vigezo vya mstari wa kufaa bora ambao hutupatia makosa ya chini kabisa kati ya thamani halisi na iliyotabiriwa.\n", + "\n", + "
\n", + "\n", + "## 5. Fanya utabiri kwenye seti ya majaribio\n", + "\n", + "Sasa kwa kuwa tumefundisha modeli, tunaweza kuitumia kutabiri maendeleo ya ugonjwa y kwa kutumia seti ya data ya majaribio kwa kutumia [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Hii itatumika kuchora mstari kati ya makundi ya data.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 💃🕺 Tumefanikiwa kufundisha modeli na kuitumia kufanya utabiri!\n", + "\n", + "Wakati wa kufanya utabiri, utaratibu wa tidymodels ni daima kutoa tibble/data frame ya matokeo yenye majina ya safu yaliyosanifishwa. Hii hufanya iwe rahisi kuunganisha data ya awali na utabiri katika muundo unaoweza kutumika kwa shughuli zinazofuata kama vile kuchora grafu.\n", + "\n", + "`dplyr::bind_cols()` inaunganisha kwa ufanisi data frame nyingi kwa safu.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Onyesha matokeo ya uundaji wa modeli\n", + "\n", + "Sasa ni wakati wa kuona hili kwa njia ya picha 📈. Tutatengeneza mchoro wa alama za kutawanyika wa thamani zote za `y` na `bmi` kutoka kwenye seti ya majaribio, kisha tutatumia utabiri kuonyesha mstari mahali panapofaa zaidi, kati ya makundi ya data ya modeli.\n", + "\n", + "R ina mifumo kadhaa ya kutengeneza grafu, lakini `ggplot2` ni mojawapo ya mifumo maridadi na yenye matumizi mengi zaidi. Hii inakuwezesha kuunda grafu kwa **kuunganisha vipengele huru**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "> ✅ Fikiria kidogo kuhusu kinachoendelea hapa. Mstari wa moja kwa moja unapita katikati ya nukta nyingi ndogo za data, lakini unafanya nini hasa? Je, unaweza kuona jinsi unavyoweza kutumia mstari huu kutabiri mahali ambapo nukta mpya ya data isiyoonekana inapaswa kuwekwa kuhusiana na mhimili wa y wa mchoro? Jaribu kuelezea kwa maneno matumizi ya vitendo ya mfano huu.\n", + "\n", + "Hongera, umeunda mfano wako wa kwanza wa regression ya mstari, ukatengeneza utabiri kwa kutumia huo, na ukaonyesha katika mchoro!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/2-Regression/1-Tools/solution/notebook.ipynb b/translations/sw/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..bfd389c43 --- /dev/null +++ b/translations/sw/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,671 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pakia seti ya data ya kisukari, gawanya kuwa data za `X` na sifa za `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chagua kipengele kimoja tu kulenga kwa zoezi hili\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chagua mfano na uufanyie mazoezi kwa data ya mafunzo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tumia data ya majaribio kutabiri mstari\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Onyesha matokeo katika mchoro\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:39:19+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/2-Data/notebook.ipynb b/translations/sw/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..8f35fe4af --- /dev/null +++ b/translations/sw/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:46:00+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/sw/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..e96c6cf16 --- /dev/null +++ b/translations/sw/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,671 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:52:52+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Jenga mfano wa regression: andaa na onyesha data\n", + "\n", + "## **Regression ya Mstari kwa Malenge - Somo la 2**\n", + "#### Utangulizi\n", + "\n", + "Sasa kwa kuwa umejiandaa na zana unazohitaji kuanza kujenga mifano ya kujifunza mashine kwa kutumia Tidymodels na Tidyverse, uko tayari kuanza kuuliza maswali kuhusu data yako. Unapofanya kazi na data na kutumia suluhisho za ML, ni muhimu sana kuelewa jinsi ya kuuliza swali sahihi ili kufungua uwezo wa dataset yako ipasavyo.\n", + "\n", + "Katika somo hili, utajifunza:\n", + "\n", + "- Jinsi ya kuandaa data yako kwa ajili ya kujenga mifano.\n", + "\n", + "- Jinsi ya kutumia `ggplot2` kwa uonyeshaji wa data.\n", + "\n", + "Swali unalotaka kujibiwa litaamua ni aina gani ya algorithmi za ML utatumia. Na ubora wa jibu unalopata utategemea sana asili ya data yako.\n", + "\n", + "Hebu tuone hili kwa kufanya zoezi la vitendo.\n", + "\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Kuleta data za malenge na kuitumia Tidyverse\n", + "\n", + "Tutahitaji vifurushi vifuatavyo ili kuchambua somo hili:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) vilivyoundwa ili kufanya sayansi ya data kuwa ya haraka, rahisi, na ya kufurahisha!\n", + "\n", + "Unaweza kuvifunga kwa kutumia:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Skripti iliyo hapa chini inakagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo baadhi havipo.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa, wacha tuwashe baadhi ya vifurushi na kupakia [data](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) iliyotolewa kwa somo hili!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` ya haraka inaonyesha mara moja kwamba kuna nafasi tupu na mchanganyiko wa mistari (`chr`) na data ya nambari (`dbl`). `Date` ni aina ya herufi, na pia kuna safu ya ajabu inayoitwa `Package` ambapo data ni mchanganyiko wa `sacks`, `bins`, na thamani nyinginezo. Kwa kweli, data hii ni fujo kidogo 😤.\n", + "\n", + "Kwa kweli, si jambo la kawaida kupewa seti ya data ambayo iko tayari kabisa kutumika kuunda mfano wa ML moja kwa moja. Lakini usiwe na wasiwasi, katika somo hili, utajifunza jinsi ya kuandaa seti ya data mbichi kwa kutumia maktaba za kawaida za R 🧑‍🔧. Pia utajifunza mbinu mbalimbali za kuona data. 📈📊\n", + "
\n", + "\n", + "> Kumbusho: Opereta wa bomba (`%>%`) hufanya shughuli kwa mpangilio wa kimantiki kwa kupitisha kitu mbele kwenye kazi au usemi wa simu. Unaweza kufikiria opereta wa bomba kama kusema \"na kisha\" katika msimbo wako.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Angalia data iliyokosekana\n", + "\n", + "Moja ya changamoto za kawaida ambazo wanasayansi wa data hukutana nazo ni data isiyokamilika au iliyokosekana. R inawakilisha thamani zilizokosekana, au zisizojulikana, kwa thamani maalum: `NA` (Not Available).\n", + "\n", + "Kwa hivyo, tunawezaje kujua kwamba fremu ya data ina thamani zilizokosekana?\n", + "
\n", + "- Njia moja rahisi ni kutumia kazi ya msingi ya R `anyNA` ambayo inarejesha vitu vya kimantiki `TRUE` au `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kunaonekana kuna baadhi ya data zinazokosekana! Hapo ndipo mahali pazuri pa kuanzia.\n", + "\n", + "- Njia nyingine ni kutumia kazi `is.na()` ambayo inaonyesha ni vipengele vipi vya safu wima vinavyokosekana kwa kutumia mantiki ya `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sawa, kazi imekamilika lakini kwa fremu kubwa ya data kama hii, itakuwa isiyo na ufanisi na karibu haiwezekani kupitia safu zote na nguzo moja baada ya nyingine😴.\n", + "\n", + "- Njia ya kueleweka zaidi itakuwa ni kuhesabu jumla ya thamani zinazokosekana kwa kila nguzo:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ni bora zaidi! Kuna data inayokosekana, lakini labda haitakuwa na umuhimu kwa kazi inayofanyika. Hebu tuone uchambuzi zaidi utakavyoleta matokeo.\n", + "\n", + "> Pamoja na seti nzuri za pakiti na kazi, R ina nyaraka bora sana. Kwa mfano, tumia `help(colSums)` au `?colSums` ili kujifunza zaidi kuhusu kazi hiyo.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Sarufi ya Uendeshaji wa Takwimu\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), kifurushi katika Tidyverse, ni sarufi ya uendeshaji wa data inayotoa seti thabiti ya vitenzi vinavyokusaidia kutatua changamoto za kawaida za uendeshaji wa data. Katika sehemu hii, tutachunguza baadhi ya vitenzi vya dplyr!\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` ni kazi katika kifurushi cha `dplyr` ambayo hukusaidia kuchagua safu za kuweka au kuondoa.\n", + "\n", + "Ili kufanya fremu yako ya data iwe rahisi kufanya kazi nayo, ondoa safu kadhaa zake, ukitumia `select()`, ukihifadhi tu safu unazohitaji.\n", + "\n", + "Kwa mfano, katika zoezi hili, uchambuzi wetu utahusisha safu za `Package`, `Low Price`, `High Price` na `Date`. Hebu tuchague safu hizi.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` ni kazi katika kifurushi cha `dplyr` ambayo husaidia kuunda au kurekebisha safu, huku ukihifadhi safu zilizopo.\n", + "\n", + "Muundo wa jumla wa `mutate` ni:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Hebu tujaribu `mutate` kwa kutumia safu ya `Date` kwa kufanya shughuli zifuatazo:\n", + "\n", + "1. Badilisha tarehe (ambazo kwa sasa ni aina ya herufi) kuwa muundo wa mwezi (hizi ni tarehe za Marekani, kwa hivyo muundo ni `MM/DD/YYYY`).\n", + "\n", + "2. Toa mwezi kutoka kwa tarehe na uweke kwenye safu mpya.\n", + "\n", + "Katika R, kifurushi [lubridate](https://lubridate.tidyverse.org/) hufanya iwe rahisi kufanya kazi na data ya tarehe na muda. Kwa hivyo, hebu tutumie `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` na tuone jinsi ya kufanikisha malengo haya. Tunaweza kuondoa safu ya Date kwa kuwa hatutaihitaji tena katika shughuli zinazofuata.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 🤩\n", + "\n", + "Sasa, hebu tuunde safu mpya `Price`, inayowakilisha bei ya wastani ya malenge. Sasa, chukua wastani wa safu za `Low Price` na `High Price` ili kujaza safu mpya ya Price.\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ndio!💪\n", + "\n", + "\"Lakini subiri kidogo!\", utasema baada ya kuangalia haraka seti nzima ya data kwa kutumia `View(pumpkins)`, \"Kuna kitu cha ajabu hapa!\"🤔\n", + "\n", + "Ukichunguza safu ya `Package`, malenge yanauzwa katika mipangilio mbalimbali. Baadhi yanauzwa kwa kipimo cha `1 1/9 bushel`, mengine kwa kipimo cha `1/2 bushel`, mengine kwa kila malenge, mengine kwa paundi, na mengine katika masanduku makubwa yenye upana tofauti.\n", + "\n", + "Hebu tuhakikishe hili:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ajabu!👏\n", + "\n", + "Malenge yanaonekana kuwa magumu sana kupima kwa uthabiti, kwa hivyo hebu tuyachuje kwa kuchagua malenge tu yenye neno *bushel* katika safu ya `Package` na kuweka haya kwenye fremu mpya ya data `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() na stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): huunda sehemu ndogo ya data inayojumuisha tu **mistari** inayokidhi masharti yako, katika hali hii, maboga yenye neno *bushel* katika safu ya `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): hutambua uwepo au kutokuwepo kwa muundo fulani ndani ya maandishi.\n", + "\n", + "Kifurushi cha [`stringr`](https://github.com/tidyverse/stringr) kinatoa kazi rahisi kwa operesheni za kawaida za maandishi.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Unaweza kuona kwamba tumepunguza hadi takriban safu 415 za data zinazohusiana na maboga kwa gunia.🤩\n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Lakini subiri! Kuna jambo moja zaidi la kufanya**\n", + "\n", + "Je, uliona kwamba kiasi cha bushel kinatofautiana kwa kila safu? Unahitaji kuweka bei sawa ili kuonyesha bei kwa bushel moja, si kwa 1 1/9 au 1/2 bushel. Ni wakati wa kufanya hesabu ili kuifanya iwe ya kawaida.\n", + "\n", + "Tutatumia kazi [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) kubadilisha safu ya Bei kulingana na masharti fulani. `case_when` inakuruhusu kuunganisha taarifa nyingi za `if_else()` kwa urahisi.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa, tunaweza kuchambua bei kwa kila kipimo kulingana na kipimo chao cha busheli. Hata hivyo, utafiti huu wote wa busheli za maboga unaonyesha jinsi ilivyo `muhimu` sana `kuelewa asili ya data yako`!\n", + "\n", + "> ✅ Kulingana na [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308), uzito wa busheli hutegemea aina ya mazao, kwa kuwa ni kipimo cha ujazo. \"Busheli ya nyanya, kwa mfano, inapaswa kuwa na uzito wa pauni 56... Majani na mboga za majani huchukua nafasi zaidi na uzito mdogo, hivyo busheli ya mchicha ni pauni 20 tu.\" Ni jambo gumu kidogo! Tusijisumbue na kufanya ubadilishaji wa busheli hadi pauni, badala yake tuweke bei kwa busheli. Hata hivyo, utafiti huu wote wa busheli za maboga unaonyesha jinsi ilivyo muhimu sana kuelewa asili ya data yako!\n", + "\n", + "> ✅ Je, uliona kwamba maboga yanayouzwa kwa nusu busheli ni ghali sana? Je, unaweza kubaini kwa nini? Dokezo: maboga madogo ni ghali zaidi kuliko makubwa, labda kwa sababu kuna mengi zaidi yao kwa busheli, ikizingatiwa nafasi isiyotumika inayochukuliwa na boga moja kubwa la pie lenye uwazi.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa mwisho, kwa ajili ya kujifurahisha tu 💁‍♀️, hebu pia tuhamishe safu ya Mwezi kwenye nafasi ya kwanza yaani `kabla` ya safu ya `Package`.\n", + "\n", + "`dplyr::relocate()` inatumika kubadilisha nafasi za safu.\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri!👌 Sasa una seti safi na nadhifu ya data ambayo unaweza kutumia kujenga mfano wako mpya wa regression! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Uonyeshaji wa data kwa ggplot2\n", + "\n", + "

\n", + " \n", + "

Infographic na Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "Kuna msemo *mwenye busara* unaosema hivi:\n", + "\n", + "> \"Grafu rahisi imeleta taarifa zaidi kwa akili ya mchambuzi wa data kuliko kifaa kingine chochote.\" --- John Tukey\n", + "\n", + "Sehemu ya jukumu la mwanasayansi wa data ni kuonyesha ubora na asili ya data wanayofanyia kazi. Ili kufanya hivyo, mara nyingi huunda uonyeshaji wa kuvutia, au michoro, grafu, na chati, zinazoonyesha vipengele tofauti vya data. Kwa njia hii, wanaweza kuonyesha kwa taswira mahusiano na mapungufu ambayo vinginevyo ni vigumu kugundua.\n", + "\n", + "Uonyeshaji pia unaweza kusaidia kuamua mbinu ya kujifunza kwa mashine inayofaa zaidi kwa data. Kwa mfano, grafu ya alama inayofuata mstari inaweza kuonyesha kuwa data ni mgombea mzuri kwa zoezi la regression ya mstari.\n", + "\n", + "R inatoa mifumo kadhaa ya kutengeneza grafu, lakini [`ggplot2`](https://ggplot2.tidyverse.org/index.html) ni mojawapo ya mifumo maridadi na yenye uwezo mkubwa. `ggplot2` hukuruhusu kuunda grafu kwa **kuunganisha vipengele huru**.\n", + "\n", + "Tuanzie na grafu rahisi ya alama kwa safu za Price na Month.\n", + "\n", + "Kwa hivyo, katika hali hii, tutaanza na [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), tutaweka dataset na ramani ya esthetiki (kwa [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)) kisha tutaongeza tabaka (kama [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) kwa grafu za alama.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Je, huu ni mchoro wa maana 🤷? Kuna chochote kinachokushangaza kuhusu huu mchoro?\n", + "\n", + "Sio wa maana sana kwani unachofanya tu ni kuonyesha data yako kama usambazaji wa alama katika mwezi fulani. \n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Je, tunafanyaje iwe ya manufaa?**\n", + "\n", + "Ili kupata chati zinazoonyesha data ya manufaa, mara nyingi unahitaji kuunganisha data kwa namna fulani. Kwa mfano, katika hali yetu, kupata wastani wa bei ya maboga kwa kila mwezi kungeweza kutoa ufahamu zaidi kuhusu mifumo ya msingi katika data yetu. Hii inatupeleka kwenye kipengele kingine cha **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Uchanganuzi wa vikundi katika R unaweza kufanywa kwa urahisi kwa kutumia\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` hubadilisha kitengo cha uchambuzi kutoka seti nzima ya data hadi vikundi vya mtu binafsi kama vile kwa kila mwezi.\n", + "\n", + "- `dplyr::summarize()` huunda fremu mpya ya data yenye safu moja kwa kila kigezo cha kikundi na safu moja kwa kila takwimu ya muhtasari uliyoainisha.\n", + "\n", + "Kwa mfano, tunaweza kutumia `dplyr::group_by() %>% summarize()` kuunganisha maboga katika vikundi kulingana na safu ya **Month** na kisha kupata **wastani wa bei** kwa kila mwezi.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Fupi!✨\n", + "\n", + "Vipengele vya kategoria kama miezi vinaonyeshwa vyema kwa kutumia mchoro wa mistari 📊. Tabaka zinazohusika na michoro ya mistari ni `geom_bar()` na `geom_col()`. Tazama `?geom_bar` ili kujifunza zaidi.\n", + "\n", + "Hebu tuunde moja!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩Hii ni uwasilishaji wa data unaofaa zaidi! Inaonekana inaonyesha kwamba bei ya juu zaidi ya maboga hutokea mwezi wa Septemba na Oktoba. Je, hilo linakubaliana na matarajio yako? Kwa nini au kwa nini siyo?\n", + "\n", + "Hongera kwa kumaliza somo la pili 👏! Uliandaa data yako kwa ajili ya kujenga modeli, kisha ukagundua maarifa zaidi kwa kutumia uwasilishaji wa data!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/2-Regression/2-Data/solution/notebook.ipynb b/translations/sw/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..36f1d0a54 --- /dev/null +++ b/translations/sw/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:25+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/3-Linear/notebook.ipynb b/translations/sw/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..0b8c37fc5 --- /dev/null +++ b/translations/sw/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bei za Maboga\n", + "\n", + "Pakia maktaba zinazohitajika na seti ya data. Badilisha data kuwa dataframe inayoonyesha sehemu ndogo ya data:\n", + "\n", + "- Chagua tu maboga yaliyo na bei kwa kipimo cha bushel\n", + "- Badilisha tarehe kuwa mwezi\n", + "- Hesabu bei kuwa wastani wa bei ya juu na ya chini\n", + "- Badilisha bei ili kuonyesha bei kulingana na idadi ya bushel\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mchoro wa msingi wa kutawanya unatukumbusha kwamba tuna data ya miezi kutoka Agosti hadi Desemba tu. Huenda tunahitaji data zaidi ili kuweza kutoa hitimisho kwa mtindo wa mstari.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:09:00+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/sw/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..bb7e99d76 --- /dev/null +++ b/translations/sw/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1084 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:21:17+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Urejeleaji wa Linear na Polynomial kwa Bei ya Maboga - Somo la 3\n", + "

\n", + " \n", + "

Picha ya Dasani Madipalli
\n", + "\n", + "\n", + "#### Utangulizi\n", + "\n", + "Hadi sasa umechunguza maana ya urejeleaji (regression) kwa kutumia data ya mfano iliyokusanywa kutoka kwenye seti ya data ya bei ya maboga ambayo tutatumia katika somo hili. Pia umeweza kuiona kwa kutumia `ggplot2`.💪\n", + "\n", + "Sasa uko tayari kuingia kwa undani zaidi katika urejeleaji kwa ML. Katika somo hili, utajifunza zaidi kuhusu aina mbili za urejeleaji: *urejeleaji wa msingi wa linear* na *urejeleaji wa polynomial*, pamoja na baadhi ya hesabu zinazohusiana na mbinu hizi.\n", + "\n", + "> Katika mtaala huu, tunadhani ujuzi mdogo wa hesabu, na tunalenga kuifanya iwe rahisi kwa wanafunzi kutoka nyanja nyingine, kwa hivyo angalia maelezo, 🧮 vidokezo, michoro, na zana nyingine za kujifunza ili kusaidia kuelewa.\n", + "\n", + "#### Maandalizi\n", + "\n", + "Kama ukumbusho, unachukua data hii ili kuuliza maswali kuhusu data hiyo.\n", + "\n", + "- Ni wakati gani bora wa kununua maboga?\n", + "\n", + "- Ni bei gani ninayoweza kutarajia kwa sanduku la maboga madogo?\n", + "\n", + "- Je, ninunue maboga kwa vikapu vya nusu-bushel au kwa sanduku la bushel 1 1/9? Hebu tuendelee kuchimba data hii.\n", + "\n", + "Katika somo lililopita, uliunda `tibble` (mabadiliko ya kisasa ya fremu ya data) na kuijaza na sehemu ya seti ya data ya awali, ukistandardisha bei kwa bushel. Kwa kufanya hivyo, hata hivyo, uliweza tu kukusanya takriban alama 400 za data na tu kwa miezi ya vuli. Labda tunaweza kupata maelezo zaidi kuhusu asili ya data kwa kuisafisha zaidi? Tutaona... 🕵️‍♀️\n", + "\n", + "Kwa kazi hii, tutahitaji vifurushi vifuatavyo:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) iliyoundwa kufanya sayansi ya data kuwa ya haraka, rahisi na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa mifano na ujifunzaji wa mashine.\n", + "\n", + "- `janitor`: Kifurushi cha [janitor](https://github.com/sfirke/janitor) kinatoa zana rahisi za kuchunguza na kusafisha data chafu.\n", + "\n", + "- `corrplot`: Kifurushi cha [corrplot](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) kinatoa zana ya kuona kwa uchunguzi wa matriki ya uhusiano ambayo inaunga mkono upangaji upya wa kiotomatiki wa vigezo ili kusaidia kugundua mifumo iliyofichwa kati ya vigezo.\n", + "\n", + "Unaweza kuvifunga kwa kutumia:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Skripti iliyo hapa chini inakagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo havipo.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tutatumia baadaye vifurushi hivi vya ajabu na kuvifanya viweze kupatikana katika kikao chetu cha sasa cha R. (Hii ni kwa madhumuni ya maelezo tu, `pacman::p_load()` tayari imefanya hivyo kwako)\n", + "\n", + "## 1. Mstari wa regression ya mstari\n", + "\n", + "Kama ulivyojifunza katika Somo la 1, lengo la zoezi la regression ya mstari ni kuweza kuchora *mstari* *wa* *ufanisi bora* ili:\n", + "\n", + "- **Kuonyesha uhusiano wa vigezo**. Kuonyesha uhusiano kati ya vigezo\n", + "\n", + "- **Kutabiri**. Kufanya utabiri sahihi kuhusu mahali ambapo data mpya itakuwa ikilinganishwa na mstari huo.\n", + "\n", + "Ili kuchora aina hii ya mstari, tunatumia mbinu ya takwimu inayoitwa **Regression ya Least-Squares**. Neno `least-squares` linamaanisha kwamba alama zote za data zinazozunguka mstari wa regression zinapigwa mraba na kisha kuongezwa. Kwa kawaida, jumla ya mwisho inapaswa kuwa ndogo iwezekanavyo, kwa sababu tunataka idadi ndogo ya makosa, au `least-squares`. Kwa hivyo, mstari wa ufanisi bora ni mstari unaotupa thamani ya chini zaidi kwa jumla ya makosa yaliyopigwa mraba - hivyo jina *least squares regression*.\n", + "\n", + "Tunafanya hivyo kwa sababu tunataka kuunda mstari ambao una umbali wa chini zaidi wa jumla kutoka kwa alama zote za data. Pia tunapiga mraba maneno kabla ya kuyaongeza kwa sababu tunajali ukubwa wake badala ya mwelekeo wake.\n", + "\n", + "> **🧮 Nionyeshe hesabu**\n", + ">\n", + "> Mstari huu, unaoitwa *mstari wa ufanisi bora* unaweza kuonyeshwa na [mchoro wa hesabu](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` ni '`kigezo cha kuelezea` au `kigezo cha kutabiri`'. `Y` ni '`kigezo kinachotegemea` au `matokeo`'. Mwelekeo wa mstari ni `b` na `a` ni y-intercept, ambayo inahusu thamani ya `Y` wakati `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"mwelekeo = $y/x$\")\n", + " Picha ya maelezo na Jen Looper\n", + ">\n", + "> Kwanza, hesabu mwelekeo `b`.\n", + ">\n", + "> Kwa maneno mengine, na tukirejelea swali la awali la data ya malenge: \"tabiri bei ya malenge kwa gunia kwa mwezi\", `X` ingekuwa inahusu bei na `Y` ingekuwa inahusu mwezi wa mauzo.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Picha ya maelezo na Jen Looper\n", + "> \n", + "> Hesabu thamani ya Y. Ikiwa unalipa karibu \\$4, lazima iwe Aprili!\n", + ">\n", + "> Hesabu inayochora mstari lazima ionyeshe mwelekeo wa mstari, ambao pia unategemea intercept, au mahali ambapo `Y` iko wakati `X = 0`.\n", + ">\n", + "> Unaweza kuona mbinu ya hesabu ya thamani hizi kwenye tovuti ya [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Pia tembelea [hii Calculator ya Least-Squares](https://www.mathsisfun.com/data/least-squares-calculator.html) ili kuona jinsi thamani za namba zinavyoathiri mstari.\n", + "\n", + "Si ya kutisha sana, sivyo? 🤓\n", + "\n", + "#### Uhusiano\n", + "\n", + "Neno moja zaidi la kuelewa ni **Coefficient ya Uhusiano** kati ya vigezo X na Y vilivyotolewa. Kwa kutumia scatterplot, unaweza kuona haraka coefficient hii. Mchoro wenye alama za data zilizopangwa kwa mstari mzuri una uhusiano wa juu, lakini mchoro wenye alama za data zilizotawanyika kila mahali kati ya X na Y una uhusiano wa chini.\n", + "\n", + "Mfano mzuri wa regression ya mstari utakuwa ule ambao una Coefficient ya Uhusiano ya juu (karibu na 1 kuliko 0) kwa kutumia mbinu ya Regression ya Least-Squares na mstari wa regression.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Mchezo na data: kuunda fremu ya data itakayotumika kwa uundaji wa mifano**\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pakia maktaba zinazohitajika na seti ya data. Badilisha data kuwa fremu ya data inayojumuisha sehemu ndogo ya data:\n", + "\n", + "- Chagua tu maboga yanayouzwa kwa bei ya bushel\n", + "\n", + "- Badilisha tarehe kuwa mwezi\n", + "\n", + "- Hesabu bei kuwa wastani wa bei ya juu na ya chini\n", + "\n", + "- Badilisha bei ili kuonyesha bei kulingana na idadi ya bushel\n", + "\n", + "> Tulifunika hatua hizi katika [somo la awali](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kwa roho ya ujasiri wa kweli, hebu tuchunguze [`janitor package`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor) ambayo inatoa kazi rahisi za kuchunguza na kusafisha data chafu. Kwa mfano, hebu tuangalie majina ya safu kwa data yetu:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Tunaweza kufanya vizuri zaidi. Hebu tufanye majina haya ya safu `friendR` kwa kuyabadilisha kuwa muundo wa [snake_case](https://en.wikipedia.org/wiki/Snake_case) kwa kutumia `janitor::clean_names`. Ili kujifunza zaidi kuhusu kazi hii: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Safi sana tidyR 🧹! Sasa, dansi na data ukitumia `dplyr` kama kwenye somo lililopita! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri!👌 Sasa una seti ya data safi na nadhifu ambayo unaweza kutumia kujenga mfano wako mpya wa regression!\n", + "\n", + "Ungependa mchoro wa kutawanyika?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mchoro wa kutawanyika unatukumbusha kwamba tuna data ya miezi kuanzia Agosti hadi Desemba tu. Huenda tunahitaji data zaidi ili kuweza kutoa hitimisho kwa mtindo wa mstari.\n", + "\n", + "Hebu tuangalie tena data yetu ya uundaji modeli:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Je, tungependa kutabiri `bei` ya boga kwa kuzingatia safu za `jiji` au `kifurushi` ambazo ni za aina ya herufi? Au hata kwa urahisi zaidi, tunawezaje kupata uhusiano (ambao unahitaji viingizo vyake vyote viwe vya namba) kati ya, kwa mfano, `kifurushi` na `bei`? 🤷🤷\n", + "\n", + "Mifano ya kujifunza kwa mashine hufanya kazi vizuri zaidi na vipengele vya namba badala ya thamani za maandishi, kwa hivyo kwa kawaida unahitaji kubadilisha vipengele vya kategoria kuwa uwakilishi wa namba.\n", + "\n", + "Hii inamaanisha kuwa tunapaswa kupata njia ya kurekebisha vigezo vyetu ili kuzifanya ziwe rahisi kwa mfano kutumia kwa ufanisi, mchakato unaojulikana kama `ufundi wa vipengele` (feature engineering).\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Kuchakata data kwa ajili ya modeli kwa kutumia recipes 👩‍🍳👨‍🍳\n", + "\n", + "Shughuli zinazobadilisha maadili ya utabiri ili kuyafanya rahisi kwa modeli kutumia kwa ufanisi zimepewa jina `feature engineering`.\n", + "\n", + "Modeli tofauti zina mahitaji tofauti ya uchakataji wa awali. Kwa mfano, least squares inahitaji `encoding categorical variables` kama mwezi, aina, na city_name. Hii inahusisha tu `kutafsiri` safu yenye `categorical values` kuwa moja au zaidi ya `numeric columns` zinazochukua nafasi ya ile ya awali.\n", + "\n", + "Kwa mfano, fikiria data yako ina kipengele cha kategoria kama ifuatavyo:\n", + "\n", + "| city |\n", + "|:-------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokyo |\n", + "\n", + "Unaweza kutumia *ordinal encoding* kubadilisha kila kategoria kuwa thamani ya kipekee ya nambari, kama hivi:\n", + "\n", + "| city |\n", + "|:----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "Na hivyo ndivyo tutakavyofanya kwa data yetu!\n", + "\n", + "Katika sehemu hii, tutachunguza kifurushi kingine cha ajabu cha Tidymodels: [recipes](https://tidymodels.github.io/recipes/) - ambacho kimeundwa kusaidia kuchakata data yako **kabla** ya kufundisha modeli yako. Kwa msingi wake, recipe ni kitu kinachofafanua hatua gani zinapaswa kutumika kwenye seti ya data ili kuifanya iwe tayari kwa modeli.\n", + "\n", + "Sasa, hebu tuunde recipe inayotayarisha data yetu kwa modeli kwa kubadilisha nambari ya kipekee kwa maoni yote katika safu za utabiri:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hongera! 👏 Tumetengeneza mapishi yetu ya kwanza yanayobainisha matokeo (bei) na viashiria vyake vinavyolingana, na kwamba safu zote za viashiria zinapaswa kubadilishwa kuwa seti ya nambari za mzima 🙌! Hebu tuichambue haraka:\n", + "\n", + "- Wito kwa `recipe()` na fomula unaeleza mapishi kuhusu *majukumu* ya vigezo kwa kutumia data ya `new_pumpkins` kama rejeleo. Kwa mfano, safu ya `price` imepewa jukumu la `outcome` huku safu nyingine zote zikipewa jukumu la `predictor`.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` inaeleza kwamba viashiria vyote vinapaswa kubadilishwa kuwa seti ya nambari za mzima, na kuhesabu kuanzia 0.\n", + "\n", + "Tuna hakika unaweza kuwa na mawazo kama: \"Hii ni ya kuvutia sana!! Lakini vipi kama ningehitaji kuthibitisha kwamba mapishi yanatekeleza kile ninachotarajia? 🤔\"\n", + "\n", + "Hilo ni wazo zuri sana! Unaona, mara mapishi yako yanapobainishwa, unaweza kukadiria vigezo vinavyohitajika ili kuchakata data, kisha kutoa data iliyochakatwa. Kwa kawaida huhitaji kufanya hivi unapotumia Tidymodels (tutaona utaratibu wa kawaida muda si muda-\\> `workflows`) lakini inaweza kuwa muhimu unapohitaji kufanya ukaguzi wa haraka ili kuthibitisha kwamba mapishi yanatekeleza kile unachotarajia.\n", + "\n", + "Kwa hilo, utahitaji vitenzi viwili zaidi: `prep()` na `bake()` na kama kawaida, marafiki wetu wadogo wa R kutoka kwa [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) wanakusaidia kuelewa hili vyema zaidi!\n", + "\n", + "

\n", + " \n", + "

Uchoraji na @allison_horst
\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): inakadiria vigezo vinavyohitajika kutoka kwenye seti ya mafunzo ambayo inaweza kutumika baadaye kwenye seti nyingine za data. Kwa mfano, kwa safu fulani ya utabiri, ni uchunguzi gani utakaotolewa namba ya mzima 0 au 1 au 2 na kadhalika.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): inachukua mapishi yaliyotayarishwa na kutekeleza operesheni kwenye seti yoyote ya data.\n", + "\n", + "Kwa kusema hivyo, hebu tuandae na kutekeleza mapishi yetu ili kuthibitisha kweli kwamba ndani ya mfumo, safu za utabiri zitakuwa zimekodishwa kwanza kabla ya modeli kufanyiwa kazi.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo!🥳 Data iliyochakatwa `baked_pumpkins` ina vigezo vyake vyote vimekodishwa, ikithibitisha kwamba hatua za awali za uchakataji zilizofafanuliwa kama mapishi yetu zitafanya kazi kama ilivyotarajiwa. Hii inafanya iwe ngumu kwako kusoma lakini rahisi zaidi kueleweka kwa Tidymodels! Chukua muda kidogo kugundua ni uchunguzi gani umebadilishwa kuwa nambari inayolingana.\n", + "\n", + "Pia ni muhimu kutaja kwamba `baked_pumpkins` ni fremu ya data ambayo tunaweza kufanya mahesabu juu yake.\n", + "\n", + "Kwa mfano, hebu jaribu kutafuta uhusiano mzuri kati ya alama mbili za data yako ili kujenga mfano mzuri wa utabiri. Tutatumia kazi `cor()` kufanya hivyo. Andika `?cor()` ili kujifunza zaidi kuhusu kazi hiyo.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kama inavyotokea, kuna uhusiano dhaifu tu kati ya Jiji na Bei. Hata hivyo, kuna uhusiano bora kidogo kati ya Kifurushi na Bei yake. Hilo lina mantiki, sivyo? Kwa kawaida, kadri sanduku la mazao linavyokuwa kubwa, ndivyo bei inavyokuwa juu.\n", + "\n", + "Wakati tuko hapa, hebu pia tujaribu kuonyesha matriki ya uhusiano wa safu zote kwa kutumia pakiti ya `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Bora zaidi.\n", + "\n", + "Swali zuri la kuuliza kuhusu data hii sasa ni: '`Ni bei gani ninayoweza kutarajia kwa kifurushi fulani cha malenge?`' Hebu tuanze moja kwa moja!\n", + "\n", + "> Note: Unapobake **`bake()`** mapishi yaliyoandaliwa **`pumpkins_prep`** na **`new_data = NULL`**, unatoa data ya mafunzo iliyosindikwa (yaani, iliyosimbwa). Ikiwa ulikuwa na seti nyingine ya data, kwa mfano seti ya majaribio, na ungependa kuona jinsi mapishi yanavyoweza kuisindika, ungebake tu **`pumpkins_prep`** na **`new_data = test_set`**\n", + "\n", + "## 4. Tengeneza modeli ya regression ya mstari\n", + "\n", + "

\n", + " \n", + "

Picha ya Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa kwa kuwa tumetengeneza mapishi, na tumethibitisha kuwa data itachakatwa ipasavyo, hebu sasa tujenge mfano wa regression ili kujibu swali: `Ni bei gani ninayoweza kutarajia kwa kifurushi fulani cha malenge?`\n", + "\n", + "#### Fundisha mfano wa regression ya mstari ukitumia seti ya mafunzo\n", + "\n", + "Kama unavyoweza kuwa umeshagundua, safu ya *price* ni `kigezo cha matokeo` wakati safu ya *package* ni `kigezo cha utabiri`.\n", + "\n", + "Ili kufanya hivi, tutagawanya data kwanza ili asilimia 80 iingie kwenye seti ya mafunzo na asilimia 20 kwenye seti ya majaribio, kisha tutaelezea mapishi ambayo yataweka safu ya utabiri katika seti ya namba nzima, kisha tujenge maelezo ya mfano. Hatutapika na kuandaa mapishi yetu kwa sababu tayari tunajua yatachakata data kama inavyotarajiwa.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri! Sasa kwa kuwa tuna mapishi na maelezo ya mfano, tunahitaji kupata njia ya kuyafungamanisha pamoja katika kitu ambacho kitafanya kazi ya kwanza ya kuchakata data (prep+bake nyuma ya pazia), kufundisha mfano kwenye data iliyochakatwa, na pia kuruhusu shughuli za baada ya uchakataji. Hiyo inakupa utulivu wa akili, sivyo!🤩\n", + "\n", + "Katika Tidymodels, kitu hiki rahisi kinaitwa [`workflow`](https://workflows.tidymodels.org/) na kwa urahisi kinashikilia vipengele vyako vya uundaji wa mifano! Hiki ndicho tunachokiita *pipelines* katika *Python*.\n", + "\n", + "Sasa hebu tufungamanishe kila kitu katika workflow!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Pia, mchakato wa kazi unaweza kufaa/kufunzwa kwa njia sawa na jinsi mfano unavyoweza kufanywa.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kutoka kwenye matokeo ya modeli, tunaweza kuona vigezo vilivyojifunzwa wakati wa mafunzo. Vigezo hivi vinawakilisha vigezo vya mstari wa kufaa bora ambao hutupatia makosa ya chini kabisa kati ya thamani halisi na ile iliyotabiriwa.\n", + "\n", + "#### Kutathmini utendaji wa modeli kwa kutumia seti ya majaribio\n", + "\n", + "Ni wakati wa kuona jinsi modeli ilivyofanya kazi 📏! Tunafanyaje hivi?\n", + "\n", + "Sasa kwa kuwa tumefundisha modeli, tunaweza kuitumia kutabiri kwa seti ya majaribio (`test_set`) kwa kutumia `parsnip::predict()`. Kisha tunaweza kulinganisha utabiri huu na thamani halisi za lebo ili kutathmini jinsi modeli inavyofanya kazi (au haifanyi kazi!).\n", + "\n", + "Hebu tuanze kwa kufanya utabiri kwa seti ya majaribio kisha tuunganishe safu kwenye seti ya majaribio.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ndio, umeshafundisha modeli na kuitumia kufanya utabiri!🔮 Je, ni nzuri? Hebu tathmini utendaji wa modeli!\n", + "\n", + "Katika Tidymodels, tunafanya hivi kwa kutumia `yardstick::metrics()`! Kwa regression ya mstari, hebu tuzingatie vipimo vifuatavyo:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Mizizi ya mraba ya [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). Hii inatoa kipimo cha moja kwa moja katika kipimo sawa na lebo (katika kesi hii, bei ya malenge). Thamani ndogo zaidi, ndivyo modeli inavyokuwa bora (kwa mtazamo rahisi, inawakilisha wastani wa bei ambayo utabiri uko makosa!)\n", + "\n", + "- `Coefficient of Determination (maarufu kama R-squared au R2)`: Kipimo cha kulinganisha ambapo thamani ya juu zaidi inaonyesha modeli inayofaa zaidi. Kimsingi, kipimo hiki kinaonyesha ni kiasi gani cha tofauti kati ya thamani za lebo zilizotabiriwa na halisi ambacho modeli inaweza kuelezea.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hapo ndipo utendaji wa modeli unaporudi chini. Hebu tuone kama tunaweza kupata dalili bora kwa kuonyesha mchoro wa kutawanyika wa kifurushi na bei kisha kutumia utabiri uliofanywa kuweka mstari wa kufaa bora.\n", + "\n", + "Hii inamaanisha tutalazimika kuandaa na kuchakata seti ya majaribio ili kusimba safu ya kifurushi kisha kuunganisha hii na utabiri uliofanywa na modeli yetu.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nzuri! Kama unavyoona, mfano wa linear regression hauwezi kwa kweli kujumlisha uhusiano kati ya kifurushi na bei yake inayolingana.\n", + "\n", + "🎃 Hongera, umeunda mfano ambao unaweza kusaidia kutabiri bei ya aina chache za maboga. Shamba lako la maboga kwa ajili ya sikukuu litakuwa zuri. Lakini pengine unaweza kuunda mfano bora zaidi!\n", + "\n", + "## 5. Jenga mfano wa polynomial regression\n", + "\n", + "

\n", + " \n", + "

Picha ya maelezo na Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Wakati mwingine data zetu zinaweza kuwa hazina uhusiano wa moja kwa moja, lakini bado tunataka kutabiri matokeo. Urejeleaji wa polinomu unaweza kutusaidia kufanya utabiri kwa uhusiano mgumu zaidi usio wa moja kwa moja.\n", + "\n", + "Chukua kwa mfano uhusiano kati ya kifurushi na bei katika seti yetu ya data ya maboga. Ingawa wakati mwingine kuna uhusiano wa moja kwa moja kati ya vigezo - boga kubwa zaidi kwa ujazo, bei ya juu zaidi - wakati mwingine uhusiano huu hauwezi kuchorwa kama ndege au mstari wa moja kwa moja.\n", + "\n", + "> ✅ Hapa kuna [mifano zaidi](https://online.stat.psu.edu/stat501/lesson/9/9.8) ya data ambayo inaweza kutumia urejeleaji wa polinomu \n", + "> \n", + "> Angalia tena uhusiano kati ya Aina na Bei katika mchoro wa awali. Je, mchoro huu wa alama unaonekana kama unapaswa kuchambuliwa kwa mstari wa moja kwa moja? Labda hapana. Katika hali hii, unaweza kujaribu urejeleaji wa polinomu. \n", + "> \n", + "> ✅ Polinomu ni maelezo ya kihisabati ambayo yanaweza kuwa na moja au zaidi ya vigezo na viwango \n", + "\n", + "#### Fundisha mfano wa urejeleaji wa polinomu kwa kutumia seti ya mafunzo\n", + "\n", + "Urejeleaji wa polinomu huunda *mstari uliopinda* ili kuendana vyema na data isiyo ya moja kwa moja.\n", + "\n", + "Hebu tuone kama mfano wa polinomu utaonyesha utendaji bora katika kufanya utabiri. Tutafuata utaratibu unaofanana kidogo na tulivyofanya awali:\n", + "\n", + "- Unda mapishi yanayobainisha hatua za awali za uchakataji ambazo zinapaswa kufanywa kwenye data yetu ili kuifanya iwe tayari kwa uundaji wa mifano, yaani: kuweka vigezo na kuhesabu polinomu za kiwango *n*\n", + "\n", + "- Tengeneza maelezo ya mfano\n", + "\n", + "- Unganisha mapishi na maelezo ya mfano katika mtiririko wa kazi\n", + "\n", + "- Unda mfano kwa kufanikisha mtiririko wa kazi\n", + "\n", + "- Tathmini jinsi mfano unavyofanya kazi kwenye data ya majaribio\n", + "\n", + "Hebu tuanze moja kwa moja!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Tathmini Utendaji wa Modeli\n", + "\n", + "👏👏Umeunda modeli ya polinomial, hebu tufanye utabiri kwenye seti ya majaribio!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo, wacha tupime jinsi mfano ulivyofanya kazi kwenye test_set kwa kutumia `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Utendaji bora zaidi.\n", + "\n", + "`rmse` ilipungua kutoka takriban 7 hadi takriban 3, ikionyesha kupungua kwa makosa kati ya bei halisi na bei iliyotabiriwa. Unaweza *kwa urahisi* kufasiri hili kama kwamba kwa wastani, utabiri usio sahihi unakosea kwa takriban \\$3. `rsq` iliongezeka kutoka takriban 0.4 hadi 0.8.\n", + "\n", + "Vipimo vyote hivi vinaonyesha kwamba modeli ya polynomial inafanya kazi vizuri zaidi kuliko modeli ya mstari. Kazi nzuri!\n", + "\n", + "Hebu tuone kama tunaweza kuonyesha hili!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Unaweza kuona mstari uliopinda unaofaa data yako vizuri zaidi! 🤩\n", + "\n", + "Unaweza kufanya hii kuwa laini zaidi kwa kupitisha fomula ya polinomial kwa `geom_smooth` kama hivi:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kama vile mwelekeo laini!🤩\n", + "\n", + "Hivi ndivyo unavyoweza kufanya utabiri mpya:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Utabiri wa `polynomial model` una mantiki, ukizingatia grafu za kutawanyika za `price` na `package`! Na, ikiwa huu ni mfano bora kuliko ule wa awali, ukitazama data hiyo hiyo, unahitaji kupanga bajeti kwa ajili ya malenge haya ya gharama kubwa zaidi!\n", + "\n", + "🏆 Hongera! Umeunda mifano miwili ya regression katika somo moja. Katika sehemu ya mwisho ya regression, utajifunza kuhusu logistic regression ili kubaini kategoria.\n", + "\n", + "## **🚀Changamoto**\n", + "\n", + "Jaribu kutofautisha vigezo kadhaa katika daftari hili ili kuona jinsi uhusiano unavyolingana na usahihi wa mfano.\n", + "\n", + "## [**Jaribio baada ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Mapitio na Kujisomea**\n", + "\n", + "Katika somo hili tulijifunza kuhusu Linear Regression. Kuna aina nyingine muhimu za Regression. Soma kuhusu mbinu za Stepwise, Ridge, Lasso na Elasticnet. Kozi nzuri ya kusoma ili kujifunza zaidi ni [Kozi ya Stanford ya Statistical Learning](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Ikiwa unataka kujifunza zaidi kuhusu jinsi ya kutumia mfumo wa ajabu wa Tidymodels, tafadhali angalia rasilimali zifuatazo:\n", + "\n", + "- Tovuti ya Tidymodels: [Anza na Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn na Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **ASANTE KWA:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) kwa kuunda michoro ya ajabu inayofanya R kuwa ya kuvutia na ya kupendeza zaidi. Pata michoro zaidi kwenye [galeria yake](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/2-Regression/3-Linear/solution/notebook.ipynb b/translations/sw/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..adcdc9a55 --- /dev/null +++ b/translations/sw/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usawazishaji wa Mstari na Usawazishaji wa Polynomial kwa Bei ya Maboga - Somo la 3\n", + "\n", + "Pakia maktaba zinazohitajika na seti ya data. Badilisha data kuwa fremu ya data inayoonyesha sehemu ndogo ya data:\n", + "\n", + "- Chagua tu maboga yaliyo na bei kwa kipimo cha bushel\n", + "- Badilisha tarehe kuwa mwezi\n", + "- Hesabu bei kuwa wastani wa bei ya juu na ya chini\n", + "- Badilisha bei ili kuonyesha upimaji kwa idadi ya bushel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mchoro wa kutawanyika unatukumbusha kwamba tuna data ya miezi kutoka Agosti hadi Desemba tu. Huenda tunahitaji data zaidi ili tuweze kutoa hitimisho kwa mtindo wa mstari.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Inaonekana kama uhusiano ni mdogo sana, lakini kuna uhusiano mwingine muhimu zaidi - kwa sababu bei katika mchoro hapo juu zinaonekana kuwa na makundi tofauti tofauti. Hebu tufanye mchoro ambao utaonyesha aina mbalimbali za maboga:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAGKCAYAAAAVEBpAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcxklEQVR4nO3df5h3dV3n8ecrwEBEARm4bhW8UxEDfwDekqztpiiFmiGGCW5GZhduLRVpJaX5a7e9bP25musurAiRgqaopGEiF+TiKnrzQ4SQIEMSEG615BbzB/DeP8538sswv+c7c85n5vm4rrnm/PjOzIvx/r488znnfE6qCklSe36s7wCSpOWxwCWpURa4JDXKApekRlngktSoHdfyh+211161efPmtfyRktS8yy677BtVNTVz+5oW+ObNm9m6deta/khJal6Sr862fcEhlCQ7J/l8ki8muSbJ60bbX5vk5iRXjj6eNenQkqS5LeYI/PvAEVX1nSQ7AZckOX+0761V9abViydJmsuCBV7drZrfGa3uNPrw9k1J6tmirkJJskOSK4HbgQuq6tLRrpOSXJXk9CR7rFZISdJ9LarAq+ruqjoYeBhwWJLHAu8CHgkcDNwKvHm2r01yYpKtSbZu27ZtIqElSUu8Dryq/gW4GDiqqm4bFfs9wGnAYXN8zalVtaWqtkxN3ecqGEnSMi3mKpSpJLuPlncBngF8OcmmsZcdA1y9KgklSbNazFUom4Azk+xAV/gfqKqPJTkrycF0JzRvBF66aiklSfexmKtQrgIOmWX7i1YlkSRpUdb0TkwN3+ZTPt53hAXd+IZn9x1BGgQns5KkRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGWeCS1KjmH6nWwiPAwMeASZo8j8AlqVEWuCQ1asECT7Jzks8n+WKSa5K8brR9zyQXJLl+9HmP1Y8rSZq2mCPw7wNHVNUTgIOBo5I8GTgFuLCq9gcuHK1LktbIggVene+MVncafRRwNHDmaPuZwHNXI6AkaXaLGgNPskOSK4HbgQuq6lJgn6q6FWD0ee85vvbEJFuTbN22bduEYkuSFlXgVXV3VR0MPAw4LMljF/sDqurUqtpSVVumpqaWGVOSNNOSrkKpqn8BLgaOAm5Lsglg9Pn2SYeTJM1tMVehTCXZfbS8C/AM4MvAecAJo5edAHx0lTJKkmaxmDsxNwFnJtmBrvA/UFUfS/JZ4ANJXgLcBDx/FXNKkmZYsMCr6irgkFm2fxN4+mqEkiQtzDsxJalRFrgkNcoCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhq1YIEn2TfJRUmuTXJNkt8ZbX9tkpuTXDn6eNbqx5UkTdtxEa+5C3h5VV2eZDfgsiQXjPa9taretHrxJElzWbDAq+pW4NbR8vYk1wIPXe1gkqT5LWkMPMlm4BDg0tGmk5JcleT0JHvM8TUnJtmaZOu2bdtWllaS9G8WXeBJHgB8CDi5qu4A3gU8EjiY7gj9zbN9XVWdWlVbqmrL1NTUyhNLkoBFFniSnejK+71VdS5AVd1WVXdX1T3AacBhqxdTkjTTYq5CCfBu4NqqesvY9k1jLzsGuHry8SRJc1nMVShPAV4EfCnJlaNtfwQcn+RgoIAbgZeuQj5J0hwWcxXKJUBm2fXXk48jSVos78SUpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGmWBS1KjLHBJatSCBZ5k3yQXJbk2yTVJfme0fc8kFyS5fvR5j9WPK0matpgj8LuAl1fVTwJPBv5zkgOBU4ALq2p/4MLRuiRpjSxY4FV1a1VdPlreDlwLPBQ4Gjhz9LIzgeeuUkZJ0iyWNAaeZDNwCHApsE9V3QpdyQN7TzydJGlOiy7wJA8APgScXFV3LOHrTkyyNcnWbdu2LSejJGkWiyrwJDvRlfd7q+rc0ebbkmwa7d8E3D7b11bVqVW1paq2TE1NTSKzJInFXYUS4N3AtVX1lrFd5wEnjJZPAD46+XiSpLnsuIjXPAV4EfClJFeOtv0R8AbgA0leAtwEPH9VEkqSZrVggVfVJUDm2P30ycaRJC2Wd2JKUqMscElqlAUuSY2ywCWpURa4JDXKApekRi3mOnBJy7D5lI/3HWFRbnzDs/uOoGXyCFySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjXIyK0lNcHKw+/IIXJIaZYFLUqMWLPAkpye5PcnVY9tem+TmJFeOPp61ujElSTMt5gj8DOCoWba/taoOHn389WRjSZIWsmCBV9WngW+tQRZJ0hKsZAz8pCRXjYZY9pjrRUlOTLI1ydZt27at4MdJksYtt8DfBTwSOBi4FXjzXC+sqlOraktVbZmamlrmj5MkzbSsAq+q26rq7qq6BzgNOGyysSRJC1lWgSfZNLZ6DHD1XK+VJK2OBe/ETHI28FRgryRfA14DPDXJwUABNwIvXb2IkqTZLFjgVXX8LJvfvQpZJElL4J2YktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY1asMCTnJ7k9iRXj23bM8kFSa4ffd5jdWNKkmZazBH4GcBRM7adAlxYVfsDF47WJUlraMECr6pPA9+asflo4MzR8pnAcycbS5K0kOWOge9TVbcCjD7vPdcLk5yYZGuSrdu2bVvmj5MkzbTqJzGr6tSq2lJVW6amplb7x0nShrHcAr8tySaA0efbJxdJkrQYyy3w84ATRssnAB+dTBxJ0mIt5jLCs4HPAgck+VqSlwBvAI5Mcj1w5GhdkrSGdlzoBVV1/By7nj7hLJKkJfBOTElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGLfhU+vkkuRHYDtwN3FVVWyYRSpK0sBUV+MjTquobE/g+kqQlcAhFkhq10gIv4JNJLkty4iQCSZIWZ6VDKE+pqluS7A1ckOTLVfXp8ReMiv1EgP3222+FP06SNG1FR+BVdcvo8+3Ah4HDZnnNqVW1paq2TE1NreTHSZLGLLvAk+yaZLfpZeBngasnFUySNL+VDKHsA3w4yfT3eV9VfWIiqSRJC1p2gVfVV4AnTDCLJGkJvIxQkhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1akUFnuSoJNcluSHJKZMKJUla2LILPMkOwDuBZwIHAscnOXBSwSRJ81vJEfhhwA1V9ZWq+gFwDnD0ZGJJkhaSqlreFybHAkdV1a+P1l8E/FRVnTTjdScCJ45WDwCuW37cNbMX8I2+Q6wj/j4nx9/lZLXy+3x4VU3N3LjjCr5hZtl2n/83qKpTgVNX8HPWXJKtVbWl7xzrhb/PyfF3OVmt/z5XMoTyNWDfsfWHAbesLI4kabFWUuBfAPZP8hNJ7gccB5w3mViSpIUsewilqu5KchLwN8AOwOlVdc3EkvWrqSGfBvj7nBx/l5PV9O9z2ScxJUn98k5MSWqUBS5JjbLAJalRG77Akzxwnn37rWUWaS5JHpzkmCRP7DtLi5J8YGz5T2fs++TaJ5qMDV/gwMXTC0kunLHvI2uapHHr9U3ShyQfS/LY0fIm4Grg14CzkpzcZ7ZG7T+2fOSMffe5w7EVFvi97yjdc559Wti6fJP05Ceq6urR8ouBC6rqOcBP0RW5lma+y+2avRRvJbfSrxc1x/Js65rfunyT9OSHY8tPB04DqKrtSe7pJ1LT7p/kELqD1l1Gyxl97NJrshWwwGHvJC+j+x9yepnRukeNS7Mu3yQ9+ackv0U3ZcWhwCcAkuwC7NRnsEZ9HXjLLMvT603a8DfyJHnNfPur6nVrlaV1SS6ab39VPW2tsrQuyd7A64FNwDur6pOj7U8DnlhVb+ozn4Zhwxf4fJKcXFVv6zuHNC7JjlV1V985WpJkf+CNwKOALwG/V1U395tq5TyJOb+XLfwSLSTJkUku6DtHS5JcMrZ81ozdn1/jOOvB6cDHgV8ELgfe0W+cybDA5+dVKEuQ5Igkf5/kO0n+IsmBSbYCbwDe1Xe+xuw6tnzQjH3+u1y63arqtKq6rqreCGzuO9AkeBJzfo4vLc2b6Z6+9Fm6Z6V+DvjjqvofvaZqk1f0TNbOYyfV4d4n2amqy3tLtgIbvsCTbGf2N0SA+69xnNZVVV08Wv5Ikm2W97LtnuQYur+Sd0/yvNH2AA/qL1azZrvyZHq9gCPWPNEEeBJTE5PkK8DvjW160/h6VZ275qEaleQ98+2vqhevVZb1IMkDq+qOvnNMmgU+iyS7As8FXlhVz+45TjMWKJ2qKu8gVC+S/APwyqo6p+8sk2SBj4weC/cs4IXAUcCHgHOr6q96DaYNaeyGsllV1Vvm2697S/Jw4G3AA4DfqKob+k00GY6BJ0cCxwM/B1wEnAUc5p+oSzdL6RTwDeCSqvrHHiK1bLe+A6wnVfVV4JgkRwGfSfIF4J6x/b/QW7gV2PAFTvdMz/8L/PR0ySTxxNvyzFY6m4FXJnntevvzdZV9s6r+rO8Q60mSA4A/oHu/v5OxAm/Vhh9CGV1KdBxwLPAV4Bzg1VX18F6DrSNJ9gQ+VVWH9p2lFUku9/c1OUneAPwC8PKqOr/vPJOy4W/kqaorquoVVfVI4LXAIcD9kpyf5MR+060PVfUtvPlE/doCHLqeyhss8Hs9aKCqPlNVJwEPpTvhcXhfudaTJEcA/9x3jsY8Pskds3xsT7LuLodbA3tW1ff6DjFpjoHPMmVsVd1DNzb+N2sfp11JvsR9b4raE7gF+JW1T9S0L1XVIX2H0LBZ4PCgsbvc7sObT5bkecAPxtaL7mTcnT3lkaY9Isl5c+30KpR2PQj4eWYfoy3AAl+893vibWL+su8A68w2url61hULHG7yDsGJ8UTl5Bw8vZDkT6vqFWPrn6yqn+0lVbu2V9Xf9h1i0ixwZ3abpKn57iD07sEledTY8pHAK8bWfdTf0t3Yd4DVYIHDt9brRDc92IHuVmWPxFeXBx1LVFVznudqmQUOnwQuS/Kaqnpf32Ead2tVvb7vEOuED4jWgjb8nZgASR5KNzfwXnRPjhmfI8GTmIuU5IrZLn1Lsi9w3OhJKFqEJBczz5G2D4gWeAQOQFXdnOTjwJ8Az+FHBe5VKEvz9OmFJHsBz6ebKOxh+Htckqp6at8Z1pMkv1xVfzFafkpVfWZs30mtzjuz4Y/AkxxEd9R9C/C7VXVrz5GalWQ34Bi6KXkfDXwYeEFVPazXYA2a794E8C/DpRqfW2bmPDMtzzvjETh8EDi5qu5z12WSXb0JZUlup3ti+qvoppCt0WPBtHTPmWeffxkuXeZYnm29GRZ4d73tXkm2AFdV1Q+S7A2cDPwq8JD+ojXnj+hmdnwX8L4k7+85T7Ocj37iao7l2dabseEnswL+E3Al8A7gc0lOAK6lO9P/xB5zNaeq3lpVP0U3bWeAjwAPSfKKJI/uNVyDkuwwOpcwvX6/JCcmubbPXI16TJKrRvP1TC9Prx/Qd7jlcgw8+Tu6hzl8K8l+wA3Af6iqz/UcbV1I8ji6MfFfGk3Zq0VIchzwv4E7gevppjo+C/gC8F+q6vL+0rVn9Ei1OY2e2NMcC/y+JzSurqrH9plJSnI18NyquiHJocBn6S7F/HDP0TQgFnhyO91TeKYdN75eVb+95qEalWQ7s48nhu6p9A9c40jNmuXA4stV9Zg+M7Vs7N/m9AnL6X+nTf/b9CQm/P6M9ct6SbEOVJUP4p2cvWfMK/OA8XXnlVma9fpvc8MfgUtDlOQ18+wupyxYmiQ7012w8CjgKuD0qrqr31Qrt+ELPMlfMf8ty01O9N6HWf5MZbS+I3C/qvIvvglI8qSq+kLfOVoyuqT1h3RPpH8m8NWq+p1+U62cbyh4U98B1ouZf6aO7sz8TeCldHdlapmSHEh3fuZ44Nt0D+nV4h1YVY8DSPJuuhvOmrfhC3yuSd6nJ2AC1t0k8Kstye50N0L9CvA+4ElV9c0+M7VodOnb8aOPu4CHA1uq6sY+czXqh9MLVXVX0uzNl/ey4Qt83IwJmB6KR41LMvr9vRx4AXA6cEhVfbvfVG1K8v/oHvd3DnBsVV2f5B8t72V7QpLpOf9DN0XvHXgVStvmmIDpEU7AtCxfpXv24HuA7wIvGT/S8cqJJdlGN4vjPnRP4Lmehm/57ltV7dB3htWw4QscJ2CapDfyo5JZl5dtrZWqOjrJg4BfBF6X5FHA7kkOq6p1MX6rlfMqlOR36ca6d6Ubr30/cEFVPaLXYNrQkuxUVT8cW9+bbmjqeGDfqtq3t3AajA1f4NOSPILuzXEcsD/wGuDDVfX3vQZrSJK3z7ffu1oXb3SH8EeBs4GLauyNmuThrc7docna8AWeZL+qumnGtsfRlfkLnIBp8UYzOc6pqs5cqyytS/Jg4Fh+dEDxQeDsqrq012AaFAv83k/q+FBV/WLfmaRxSR5Cd3XUccDewDlV9cp+U2kILPCxB/HO9VBeLU6S8+bb712ty5fkAcDzgJcBm6pqn54jaQC8CmX+J3VoaQ4H/olu3PZSGn5U1RCM5u94Dt1w3lOATwB/CHyyz1waDo/Ak7vpJs0P3VN4vju9i4Yv8O9Dkh2AI+kK5/HAx+nGba/pNViDkrwPeAbwabqbeT5WVd/rN5WGZsMXuFZHkh+nK/I3Aq+vqnf0HKkpoxPC51bV9r6zaLgscE3UqLifTVfem4Hz6KbuvLnPXNJ6ZIFrYpKcCTwWOJ/uSomre44krWsWuCYmyT105xPg3ieEPZ8grQILXBqgJM+bb39VnbtWWTRcFrg0QKO/Zq4cfcCMpxxV1a+tdSYNjwUuDdBoRswX0D3D8aN0l2Pe0G8qDY0FLg1Ykl2Bo+nK/MHAK+d6ipQ2nh/rO4CkeX2P7hmYd9BNebxzv3E0JB6BSwOU5Gl019IfBnyK7rLMrf2m0tBY4NIAjU5iXgVcQndJ5r3eqM6tLnAyK2moXtx3AA2fR+DSwI2mkq2qunPBF2tD8SSmNFBJfiPJTcBXgZuSfDXJb/adS8NhgUsDlORVdHOBP7WqHlxVDwaeBjxztE9yCEUaoiTXAU+YOQd4kl2AL1bVo/tJpiHxCFwaqNke4FBV/wrc00McDZAFLg3T15I8febGJEcAt/aQRwPkEIo0QEkOopsD5RLgMrrrwJ9E92zMo31MncAClwZr9FDjFwIH0c1GeA3wXp+NqWneyCMNVFV9L8lFwO10R+DXWt4a5xG4NEBJHgj8H+CJdHOC/xjwBLrhlJdU1R39pdNQWODSACU5A7gReH1V3TPaFuCPgUdV1a/0l05DYYFLA5Tk+qraf6n7tLF4GaE0TFn4JdroLHBpmD6T5NWjYZN/k+SPgc/1lEkD4xCKNECjk5jvBg6lO4lZwCHAFXQnMb/dXzoNhQUuDViSRwIHMroOvKr+IcnJVfW2fpNpCCxwqTFJbqqq/frOof45Bi61xxOcAixwqUX+2SzAW+mlQUqyndmLOsAuaxxHA+UYuCQ1yiEUqSFJdk/yyr5zaBgscGmAkuyb5NQkH0vy60nun+TNwPXA3n3n0zA4Bi4N058Dfwt8CDiK7u7La4DHVdXX+wym4XAMXBqgJF+sqieMrd8G7FdV3+8xlgbGI3BpoJLswY+u+f46cP8kuwJU1bd6C6bB8AhcGqAkN9I9fX62m3aqqh6xtok0RBa4JDXKIRRpgJIcOt/+qrp8rbJouDwClwZo9DDjuVRVHbFmYTRYFrgkNcobeaQBSvIHY8vPn7Hvv619Ig2RBS4N03Fjy384Y99RaxlEw2WBS8OUOZZnW9cGZYFLw1RzLM+2rg3Kk5jSACW5G7iTH83//d3pXcDOVbVTX9k0HBa4JDXKG3mkAUqy53z7nQtF4BG4NEhJ/pFurDvAJuAWfnTy0rlQBFjg0uAluaKqDuk7h4bHq1Ck4fMoS7OywCWpUZ7ElAYoycvGVveesU5VvWWNI2mALHBpmHYbWz5txroEeBJTkprlGLgkNcoCl6RGWeCS1ChPYkoDleQA4ETgMaNN1wKnVdV1/aXSkHgELg1QksOBi4HtwKl0V6LcCVyU5Mk9RtOAeBWKNEBJzgf+tKounrH9Z4BTquqZvQTToFjg0gAl+fuqevQc+66rqgPWOpOGxyEUaZi2z7PvzjVLoUHzJKY0TPsmefss2wM8dK3DaJgscGmYfn+efVvXLIUGzTFwqTFJdqyqu/rOof45Bi4NUJJLxpbPmrH782scRwNlgUvDtOvY8kEz9gUJC1waqvnGNh33FOBJTGmodk9yDN1B1u5JnjfaHuBB/cXSkHgSUxqgJGcwz5F2Vb147dJoqCxwSWqUY+DSACV5R5L7PEYtyWOSfKqPTBoeC1wapq8DVyZ5IUCS+yf578B5wDt7TabBcAhFGqgkPwH8Gd0DjR8CfAD4r1X13V6DaTA8ApeGa/roake69+q1lrfGWeDSACV5FfAp4M+r6t8B/x44OsnfJjmw33QaCq8Dl4ZpCjikqrYDVNXNwLFJngl8CPjJPsNpGBwDlxqT5Mer6vt951D/PAKXBmiOucDH/faaBNGgWeDSMF3WdwANn0MoktQoj8ClAUpy3nz7q+oX1iqLhssCl4bpcOCfgLOBS3EOcM3CIRRpgJLsABwJHA88Hvg4cHZVXdNrMA2KN/JIA1RVd1fVJ6rqBODJwA3AxUl+q+doGhCHUKSBSvLjwLPpjsI3A28Hzu0zk4bFIRRpgJKcCTwWOB84p6qu7jmSBsgClwYoyT3AnaPV8TdpgKqqB659Kg2NBS5JjfIkpiQ1ygKXpEZZ4JLUKAtczUtycZKfm7Ht5CT/c5Ff//okz1jgNb+a5CErySlNmgWu9eBs4LgZ244bbZ9Xkh2q6tVVtdCT3n+V7rmU0mBY4FoPPgj8/OjGF5JspivbFybZmuSaJK+bfnGSG5O8OsklwPOTnJHk2NG+J44eW3ZZkr9Jsmm0bwvw3iRXJnl2kg+Pfb8jk3iDjdacBa7mVdU3gc8DR402HQe8H3hlVW2hm0vkZ5I8fuzLvldVP11V50xvSLIT8A7g2Kp6InA68CdV9UFgK/Afq+pg4K+Bn0wyNfrSFwPvWbX/QGkOFrjWi/FhlOnhk19KcjlwBXAQMP4w4PfP8j0OoLv78YIkVwKvAh4280XV3TxxFvDLSXanmznw/In8V0hL4FwoWi8+ArwlyaHALsA/A78HPKmq/jnJGcDOY6+/8z7fobvL8ZqqOnwRP+89wF8B3wP+sqruWkF2aVk8Ate6UFXfAS6mG/Y4G3ggXUl/O8k+wDMX8W2uA6aSHA7dkEqSg0b7tgO7jf28W4Bb6I7Sz5jMf4W0NB6Baz05m262vuOq6stJrgCuAb4CfGahL66qH4xOWL49yYPo3h9vG32PM4D/leRfgcOr6l+B9wJTVfV3q/EfIy3EuVCkZUryZ8AVVfXuvrNoY7LApWVIchndEM2RVfX9vvNoY7LAJalRnsSUpEZZ4JLUKAtckhplgUtSoyxwSWrU/wdO32Yxjk19aAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Urejeleaji wa Mstari\n", + "\n", + "Tutatumia Scikit Learn kufundisha mfano wa urejeleaji wa mstari:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mwelekeo wa mstari unaweza kuamuliwa kutoka kwa vigezo vya usawa wa mstari:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Usawazishaji wa Polynomial\n", + "\n", + "Wakati mwingine uhusiano kati ya vipengele na matokeo ni wa asili usio wa mstari. Kwa mfano, bei za maboga zinaweza kuwa juu wakati wa baridi (miezi=1,2), kisha zishuke wakati wa kiangazi (miezi=5-7), na kisha zipande tena. Usawazishaji wa mstari hauwezi kupata uhusiano huu kwa usahihi.\n", + "\n", + "Katika hali hii, tunaweza kufikiria kuongeza vipengele vya ziada. Njia rahisi ni kutumia polinomiali kutoka kwa vipengele vya ingizo, ambayo itasababisha **usawazishaji wa polynomial**. Katika Scikit Learn, tunaweza kuhesabu kiotomatiki vipengele vya polynomial kwa kutumia pipelines:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aina za usimbaji\n", + "\n", + "Katika ulimwengu bora, tunataka kuwa na uwezo wa kutabiri bei za aina tofauti za maboga kwa kutumia modeli moja. Ili kuzingatia aina, tunahitaji kwanza kuibadilisha kuwa fomu ya nambari, au **kusimba**. Kuna njia kadhaa tunazoweza kutumia:\n", + "\n", + "* Usimbaji rahisi wa nambari ambao utajenga jedwali la aina tofauti, kisha kubadilisha jina la aina kwa kiashiria katika jedwali hilo. Hii si wazo bora kwa urarukaji wa mstari, kwa sababu urarukaji wa mstari unazingatia thamani ya nambari ya kiashiria, na thamani ya nambari huenda isiwe na uhusiano wa moja kwa moja na bei.\n", + "* Usimbaji wa one-hot, ambao utabadilisha safu ya `Variety` kuwa safu 4 tofauti, moja kwa kila aina, ambayo itakuwa na 1 ikiwa safu husika ni ya aina fulani, na 0 vinginevyo.\n", + "\n", + "Nambari iliyo hapa chini inaonyesha jinsi tunavyoweza kusimba aina kwa one-hot:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Urejeleaji wa Mstari kwenye Aina\n", + "\n", + "Sasa tutatumia msimbo ule ule kama hapo juu, lakini badala ya `DayOfYear` tutatumia aina yetu iliyowekwa kwa njia ya one-hot-encoding kama ingizo:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tunaweza pia kujaribu kutumia vipengele vingine kwa njia hiyo hiyo, na kuviunganisha na vipengele vya nambari, kama vile `Month` au `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Usajili wa Polynomial\n", + "\n", + "Usajili wa polynomial unaweza pia kutumika na vipengele vya kategoria ambavyo vimekodishwa kwa njia ya one-hot. Msimbo wa kufundisha usajili wa polynomial kimsingi utakuwa sawa na tulivyoona hapo juu.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:11:26+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/4-Logistic/notebook.ipynb b/translations/sw/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..4862612b7 --- /dev/null +++ b/translations/sw/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aina za Malenge na Rangi\n", + "\n", + "Pakia maktaba zinazohitajika na seti ya data. Badilisha data kuwa dataframe inayojumuisha sehemu ya data:\n", + "\n", + "Hebu tuangalie uhusiano kati ya rangi na aina\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:39+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/sw/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..192980941 --- /dev/null +++ b/translations/sw/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Jenga mfano wa logistic regression - Somo la 4\n", + "\n", + "![Picha ya kulinganisha logistic na linear regression](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Jaribio la kabla ya somo](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Utangulizi\n", + "\n", + "Katika somo hili la mwisho kuhusu Regression, mojawapo ya mbinu za msingi za *klasiki* za ML, tutachunguza Logistic Regression. Ungetumia mbinu hii kugundua mifumo ya kutabiri makundi mawili. Je, pipi hii ni ya chokoleti au la? Je, ugonjwa huu unaambukiza au la? Je, mteja huyu atachagua bidhaa hii au la?\n", + "\n", + "Katika somo hili, utajifunza:\n", + "\n", + "- Mbinu za logistic regression\n", + "\n", + "✅ Kuimarisha uelewa wako wa kufanya kazi na aina hii ya regression katika [moduli ya kujifunza](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Mahitaji ya awali\n", + "\n", + "Baada ya kufanya kazi na data ya malenge, sasa tunajua vya kutosha kutambua kwamba kuna kundi moja la binary ambalo tunaweza kufanya kazi nalo: `Color`.\n", + "\n", + "Hebu tujenge mfano wa logistic regression ili kutabiri, kwa kuzingatia baadhi ya vigezo, *rangi ya malenge fulani itakuwa nini* (machungwa 🎃 au nyeupe 👻).\n", + "\n", + "> Kwa nini tunazungumzia binary classification katika somo lililojumuishwa kuhusu regression? Ni kwa urahisi wa lugha tu, kwani logistic regression ni [kweli ni mbinu ya classification](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), ingawa inategemea linear. Jifunze kuhusu njia nyingine za kuainisha data katika kundi la somo linalofuata.\n", + "\n", + "Kwa somo hili, tutahitaji vifurushi vifuatavyo:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) vilivyoundwa kufanya sayansi ya data kuwa ya haraka, rahisi na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa mifano na machine learning.\n", + "\n", + "- `janitor`: Kifurushi cha [janitor](https://github.com/sfirke/janitor) kinatoa zana rahisi za kuchunguza na kusafisha data chafu.\n", + "\n", + "- `ggbeeswarm`: Kifurushi cha [ggbeeswarm](https://github.com/eclarke/ggbeeswarm) kinatoa mbinu za kuunda michoro ya mtindo wa beeswarm kwa kutumia ggplot2.\n", + "\n", + "Unaweza kuvifunga kwa kutumia:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Vinginevyo, script hapa chini itakagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo havipo.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Tafsiri swali**\n", + "\n", + "Kwa madhumuni yetu, tutalielezea kama binary: 'Nyeupe' au 'Sio Nyeupe'. Pia kuna kategoria ya 'mistari' katika seti yetu ya data lakini kuna mifano michache sana ya hiyo, kwa hivyo hatutaitumia. Inatoweka mara tu tunapoondoa thamani za null kutoka kwenye seti ya data, hata hivyo.\n", + "\n", + "> 🎃 Ukweli wa kufurahisha, wakati mwingine tunaita maboga meupe 'maboga ya roho'. Hayachongwi kwa urahisi, kwa hivyo si maarufu kama yale ya rangi ya machungwa lakini yanaonekana ya kuvutia! Kwa hivyo tunaweza pia kuunda upya swali letu kama: 'Roho' au 'Sio Roho'. 👻\n", + "\n", + "## **Kuhusu regression ya logistic**\n", + "\n", + "Regression ya logistic inatofautiana na regression ya linear, ambayo ulijifunza hapo awali, kwa njia kadhaa muhimu.\n", + "\n", + "#### **Uainishaji wa binary**\n", + "\n", + "Regression ya logistic haitoi vipengele sawa na regression ya linear. Ya kwanza inatoa utabiri kuhusu `kategoria ya binary` (\"machungwa au sio machungwa\") ilhali ya pili ina uwezo wa kutabiri `thamani zinazoendelea`, kwa mfano ukizingatia asili ya boga na wakati wa kuvuna, *bei yake itaongezeka kwa kiasi gani*.\n", + "\n", + "![Infographic na Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Uainishaji mwingine\n", + "\n", + "Kuna aina nyingine za regression ya logistic, ikiwa ni pamoja na multinomial na ordinal:\n", + "\n", + "- **Multinomial**, ambayo inahusisha kuwa na zaidi ya kategoria moja - \"Machungwa, Nyeupe, na Mistari\".\n", + "\n", + "- **Ordinal**, ambayo inahusisha kategoria zilizo na mpangilio, muhimu ikiwa tungependa kupanga matokeo yetu kwa mantiki, kama maboga yetu ambayo yamepangwa kwa idadi finyu ya ukubwa (mini,sm,med,lg,xl,xxl).\n", + "\n", + "![Multinomial vs ordinal regression](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Vigezo HAVIHITAJI kuhusiana**\n", + "\n", + "Unakumbuka jinsi regression ya linear ilivyofanya kazi vizuri zaidi na vigezo vilivyohusiana? Regression ya logistic ni kinyume - vigezo havihitaji kuhusiana. Hii inafaa kwa data hii ambayo ina uhusiano dhaifu kiasi.\n", + "\n", + "#### **Unahitaji data safi nyingi**\n", + "\n", + "Regression ya logistic itatoa matokeo sahihi zaidi ikiwa utatumia data nyingi; seti yetu ndogo ya data si bora kwa kazi hii, kwa hivyo kumbuka hilo.\n", + "\n", + "✅ Fikiria aina za data ambazo zinaweza kufaa kwa regression ya logistic\n", + "\n", + "## Zoezi - safisha data\n", + "\n", + "Kwanza, safisha data kidogo, ukiondoa thamani za null na kuchagua baadhi tu ya safu:\n", + "\n", + "1. Ongeza msimbo ufuatao:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Unaweza kila wakati kutazama kwa haraka dataframe yako mpya, kwa kutumia [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) kama ilivyo hapa chini:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tuthibitishe kwamba tutakuwa tunashughulikia tatizo la uainishaji wa binary:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Uonyeshaji - mchoro wa kategoria\n", + "Hadi sasa umepakia tena data ya malenge na kuisafisha ili kuhifadhi seti ya data inayojumuisha vigezo kadhaa, ikiwemo Rangi. Hebu tuonyeshe dataframe kwenye daftari kwa kutumia maktaba ya ggplot.\n", + "\n", + "Maktaba ya ggplot inatoa njia nzuri za kuonyesha data yako. Kwa mfano, unaweza kulinganisha usambazaji wa data kwa kila Aina na Rangi katika mchoro wa kategoria.\n", + "\n", + "1. Tengeneza mchoro kama huo kwa kutumia kazi ya geombar, ukitumia data yetu ya malenge, na kubainisha ramani ya rangi kwa kila kategoria ya malenge (machungwa au nyeupe):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kwa kuangalia data, unaweza kuona jinsi data ya Rangi inavyohusiana na Aina.\n", + "\n", + "✅ Kwa kuzingatia mchoro huu wa kategoria, ni uchunguzi gani wa kuvutia unaweza kufikiria?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Usindikaji wa data: usimbaji wa vipengele\n", + "\n", + "Seti yetu ya data ya malenge ina thamani za maandishi kwa safu zake zote. Kufanya kazi na data ya kategoria ni rahisi kwa binadamu lakini si kwa mashine. Algorithimu za kujifunza kwa mashine hufanya kazi vizuri na nambari. Ndiyo sababu usimbaji ni hatua muhimu sana katika awamu ya usindikaji wa data, kwani inatuwezesha kubadilisha data ya kategoria kuwa data ya nambari bila kupoteza taarifa yoyote. Usimbaji mzuri huchangia kujenga modeli nzuri.\n", + "\n", + "Kwa usimbaji wa vipengele kuna aina mbili kuu za usimbaji:\n", + "\n", + "1. **Ordinal encoder**: Inafaa vizuri kwa vigezo vya ordinal, ambavyo ni vigezo vya kategoria ambapo data yake inafuata mpangilio wa kimantiki, kama safu ya `item_size` katika seti yetu ya data. Inaunda ramani ambapo kila kategoria inawakilishwa na nambari, ambayo ni mpangilio wa kategoria katika safu.\n", + "\n", + "2. **Categorical encoder**: Inafaa vizuri kwa vigezo vya nominal, ambavyo ni vigezo vya kategoria ambapo data yake haifuati mpangilio wa kimantiki, kama vipengele vyote tofauti na `item_size` katika seti yetu ya data. Hii ni usimbaji wa one-hot, ambayo inamaanisha kwamba kila kategoria inawakilishwa na safu ya binary: kigezo kilichosimbwa ni sawa na 1 ikiwa malenge yanahusiana na Aina hiyo na 0 vinginevyo.\n", + "\n", + "Tidymodels inatoa kifurushi kingine kizuri: [recipes](https://recipes.tidymodels.org/) - kifurushi cha kusindika data. Tutafafanua `recipe` inayobainisha kwamba safu zote za utabiri zinapaswa kusimbwa kuwa seti ya nambari, `prep` ili kukadiria kiasi na takwimu zinazohitajika kwa operesheni yoyote, na hatimaye `bake` ili kutumia hesabu kwa data mpya.\n", + "\n", + "> Kwa kawaida, recipes hutumika kama usindikaji wa awali kwa uundaji wa modeli ambapo inabainisha hatua gani zinapaswa kutumika kwa seti ya data ili kuifanya iwe tayari kwa uundaji wa modeli. Katika hali hiyo, **inapendekezwa sana** kwamba utumie `workflow()` badala ya kukadiria recipe kwa mikono ukitumia prep na bake. Tutaliona hili kwa undani muda si mrefu.\n", + ">\n", + "> Hata hivyo, kwa sasa tunatumia recipes + prep + bake kubainisha hatua gani zinapaswa kutumika kwa seti ya data ili kuifanya iwe tayari kwa uchambuzi wa data na kisha kutoa data iliyosindikwa na hatua zilizotumika.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Je, ni faida gani za kutumia ordinal encoder kwa safu ya Item Size?\n", + "\n", + "### Kuchambua uhusiano kati ya vigezo\n", + "\n", + "Sasa kwa kuwa tumeshughulikia data yetu, tunaweza kuchambua uhusiano kati ya vipengele na lebo ili kupata wazo la jinsi ambavyo modeli itaweza kutabiri lebo kwa kuzingatia vipengele. Njia bora ya kufanya uchambuzi wa aina hii ni kwa kuchora data. \n", + "Tutatumia tena kipengele cha ggplot geom_boxplot_ ili kuonyesha uhusiano kati ya Item Size, Variety, na Color katika mchoro wa kategoria. Ili kuchora data vizuri zaidi, tutatumia safu ya Item Size iliyosimbwa na safu ya Variety ambayo haijasimbwa.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Tumia mchoro wa kundi\n", + "\n", + "Kwa kuwa Rangi ni kategoria ya binary (Nyeupe au Sio Nyeupe), inahitaji '[mbinu maalum](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)' kwa ajili ya uonyeshaji.\n", + "\n", + "Jaribu `mchoro wa kundi` kuonyesha usambazaji wa rangi kulingana na ukubwa wa kipengee.\n", + "\n", + "Tutatumia [pakiti ya ggbeeswarm](https://github.com/eclarke/ggbeeswarm) ambayo inatoa mbinu za kuunda michoro ya mtindo wa nyuki kwa kutumia ggplot2. Michoro ya nyuki ni njia ya kuchora alama ambazo kwa kawaida zingekuwa zinagongana ili ziweze kuangukia karibu na kila moja badala yake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sasa kwa kuwa tuna wazo la uhusiano kati ya makundi mawili ya rangi na kundi kubwa la ukubwa, hebu tuchunguze logistic regression ili kubaini rangi inayowezekana ya malenge fulani.\n", + "\n", + "## Tengeneza modeli yako\n", + "\n", + "Chagua vigezo unavyotaka kutumia katika modeli yako ya uainishaji na gawanya data katika seti za mafunzo na majaribio. [rsample](https://rsample.tidymodels.org/), kifurushi katika Tidymodels, kinatoa miundombinu ya kugawanya data kwa ufanisi na kufanya resampling:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Sasa tuko tayari kufundisha modeli kwa kuoanisha vipengele vya mafunzo na lebo ya mafunzo (rangi).\n", + "\n", + "Tutaanza kwa kuunda mapishi yanayobainisha hatua za awali za uchakataji ambazo zinapaswa kufanywa kwenye data yetu ili kujiandaa kwa uundaji wa modeli, yaani: kubadilisha vigezo vya kategoria kuwa seti ya nambari. Kama vile `baked_pumpkins`, tunaunda `pumpkins_recipe` lakini hatufanyi `prep` na `bake` kwa sababu itajumuishwa katika mtiririko wa kazi, ambao utaona katika hatua chache zijazo.\n", + "\n", + "Kuna njia kadhaa za kubainisha modeli ya regression ya logistic katika Tidymodels. Tazama `?logistic_reg()` Kwa sasa, tutabainisha modeli ya regression ya logistic kupitia injini ya msingi `stats::glm()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sasa kwa kuwa tuna mapishi na maelezo ya mfano, tunahitaji kupata njia ya kuyafungasha pamoja kuwa kitu kimoja ambacho kwanza kitachakata data (prep+bake kwa nyuma ya pazia), kufundisha mfano kwa data iliyochakatwa, na pia kuruhusu shughuli za baada ya uchakataji ikiwa zitahitajika.\n", + "\n", + "Katika Tidymodels, kitu hiki rahisi kinaitwa [`workflow`](https://workflows.tidymodels.org/) na kwa urahisi kinashikilia vipengele vyako vya uundaji wa mifano.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Baada ya mtiririko wa kazi kuwa *umeainishwa*, modeli inaweza `kufundishwa` kwa kutumia [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html) kazi. Mtiririko wa kazi utatathmini mapishi na kuchakata data kabla ya mafunzo, kwa hivyo hatutalazimika kufanya hivyo kwa mikono kwa kutumia prep na bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mfano unatoa viwango vilivyojifunzwa wakati wa mafunzo.\n", + "\n", + "Sasa tumefundisha mfano kwa kutumia data ya mafunzo, tunaweza kufanya utabiri kwenye data ya majaribio kwa kutumia [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Hebu tuanze kwa kutumia mfano kutabiri lebo za seti yetu ya majaribio na uwezekano wa kila lebo. Wakati uwezekano ni zaidi ya 0.5, darasa linalotabiriwa ni `WHITE` vinginevyo ni `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hii ni nzuri sana! Inatoa ufahamu zaidi kuhusu jinsi logistic regression inavyofanya kazi.\n", + "\n", + "### Uelewa bora kupitia matriki ya kuchanganya\n", + "\n", + "Kulinganisha kila utabiri na thamani yake halisi ya \"ground truth\" si njia bora sana ya kuamua jinsi mfano unavyotabiri kwa usahihi. Kwa bahati nzuri, Tidymodels ina mbinu nyingine chache za kusaidia: [`yardstick`](https://yardstick.tidymodels.org/) - kifurushi kinachotumika kupima ufanisi wa mifano kwa kutumia vipimo vya utendaji.\n", + "\n", + "Kipimo kimoja cha utendaji kinachohusiana na matatizo ya uainishaji ni [`confusion matrix`](https://wikipedia.org/wiki/Confusion_matrix). Matriki ya kuchanganya inaelezea jinsi mfano wa uainishaji unavyofanya kazi. Matriki ya kuchanganya huonyesha ni mifano mingapi katika kila darasa iliyoainishwa kwa usahihi na mfano. Katika hali yetu, itakuonyesha ni maboga ya rangi ya machungwa mangapi yaliyoainishwa kama machungwa na ni maboga meupe mangapi yaliyoainishwa kama meupe; matriki ya kuchanganya pia inaonyesha ni mangapi yaliyoainishwa katika makundi **yasiyo sahihi**.\n", + "\n", + "Kazi ya [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) kutoka yardstick huhesabu msalaba huu wa tabulation wa madarasa yaliyotazamwa na yaliyotabiriwa.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tuchambue matriki ya mkanganyiko. Modeli yetu imepewa jukumu la kuainisha maboga kati ya makundi mawili ya binary, kundi `nyeupe` na kundi `sio-nyeupe`.\n", + "\n", + "- Ikiwa modeli yako inatabiri boga kuwa nyeupe na kwa kweli linahusiana na kundi 'nyeupe', tunaliita `chanya halisi`, linaonyeshwa na namba ya juu kushoto.\n", + "\n", + "- Ikiwa modeli yako inatabiri boga kuwa sio nyeupe na kwa kweli linahusiana na kundi 'nyeupe', tunaliita `hasi ya uongo`, linaonyeshwa na namba ya chini kushoto.\n", + "\n", + "- Ikiwa modeli yako inatabiri boga kuwa nyeupe na kwa kweli linahusiana na kundi 'sio-nyeupe', tunaliita `chanya ya uongo`, linaonyeshwa na namba ya juu kulia.\n", + "\n", + "- Ikiwa modeli yako inatabiri boga kuwa sio nyeupe na kwa kweli linahusiana na kundi 'sio-nyeupe', tunaliita `hasi halisi`, linaonyeshwa na namba ya chini kulia.\n", + "\n", + "| Ukweli |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Iliyotabiriwa** | NYEUPE | MACHUNGWA |\n", + "| NYEUPE | TP | FP |\n", + "| MACHUNGWA | FN | TN |\n", + "\n", + "Kama ulivyotambua, ni bora kuwa na idadi kubwa ya chanya halisi na hasi halisi, na idadi ndogo ya chanya ya uongo na hasi ya uongo, ambayo inaonyesha kuwa modeli inafanya kazi vizuri zaidi.\n", + "\n", + "Matriki ya mkanganyiko ni muhimu kwa sababu inazalisha vipimo vingine ambavyo vinaweza kutusaidia kutathmini utendaji wa modeli ya uainishaji kwa usahihi zaidi. Hebu tuzipitie:\n", + "\n", + "🎓 Usahihi: `TP/(TP + FP)` inafafanuliwa kama uwiano wa chanya zilizotabiriwa ambazo kwa kweli ni chanya. Pia huitwa [thamani ya utabiri chanya](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Urejeshaji: `TP/(TP + FN)` inafafanuliwa kama uwiano wa matokeo chanya kati ya idadi ya sampuli ambazo kwa kweli ni chanya. Pia inajulikana kama `hisia`.\n", + "\n", + "🎓 Umaalumu: `TN/(TN + FP)` inafafanuliwa kama uwiano wa matokeo hasi kati ya idadi ya sampuli ambazo kwa kweli ni hasi.\n", + "\n", + "🎓 Usahihi wa jumla: `TP + TN/(TP + TN + FP + FN)` Asilimia ya lebo zilizotabiriwa kwa usahihi kwa sampuli.\n", + "\n", + "🎓 Kipimo cha F: Wastani wa uzito wa usahihi na urejeshaji, bora ikiwa ni 1 na mbaya ikiwa ni 0.\n", + "\n", + "Hebu tuhisi vipimo hivi!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kuonyesha Mchoro wa ROC wa mfano huu\n", + "\n", + "Hebu tufanye uonyeshaji mwingine ili kuona kinachoitwa [`Mchoro wa ROC`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mikondo ya ROC mara nyingi hutumika kupata mtazamo wa matokeo ya kiondoaji katika muktadha wa kweli dhidi ya chanya za uongo. Mikondo ya ROC kwa kawaida huonyesha `True Positive Rate`/Unyeti kwenye mhimili wa Y, na `False Positive Rate`/1-Specifisiti kwenye mhimili wa X. Kwa hivyo, mwinuko wa mkondo na nafasi kati ya mstari wa katikati na mkondo ni muhimu: unataka mkondo unaopanda haraka na kuvuka mstari. Katika hali yetu, kuna chanya za uongo mwanzoni, kisha mstari unapanda na kuvuka vizuri.\n", + "\n", + "Hatimaye, hebu tutumie `yardstick::roc_auc()` kuhesabu eneo halisi chini ya mkondo. Njia moja ya kufasiri AUC ni kama uwezekano kwamba modeli itaweka mfano chanya wa nasibu juu zaidi kuliko mfano hasi wa nasibu.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Matokeo ni karibu `0.975`. Kwa kuwa AUC inatoka 0 hadi 1, unataka alama kubwa, kwani modeli ambayo ni sahihi kwa 100% katika utabiri wake itakuwa na AUC ya 1; katika hali hii, modeli ni *nzuri sana*.\n", + "\n", + "Katika masomo ya baadaye kuhusu uainishaji, utajifunza jinsi ya kuboresha alama za modeli yako (kama kushughulikia data isiyo na uwiano katika hali hii).\n", + "\n", + "## 🚀Changamoto\n", + "\n", + "Kuna mengi zaidi ya kuchunguza kuhusu regression ya logistic! Lakini njia bora ya kujifunza ni kujaribu. Tafuta seti ya data inayofaa kwa aina hii ya uchambuzi na tengeneza modeli nayo. Unajifunza nini? kidokezo: jaribu [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) kwa seti za data za kuvutia.\n", + "\n", + "## Mapitio na Kujisomea\n", + "\n", + "Soma kurasa chache za mwanzo za [karatasi hii kutoka Stanford](https://web.stanford.edu/~jurafsky/slp3/5.pdf) kuhusu matumizi ya vitendo ya regression ya logistic. Fikiria kuhusu kazi ambazo zinafaa zaidi kwa aina moja au nyingine ya kazi za regression ambazo tumejifunza hadi sasa. Nini kingefanya kazi vizuri zaidi?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:34:32+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sw/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/sw/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..343d3a9ab --- /dev/null +++ b/translations/sw/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usajili wa Kihesabu - Somo la 4\n", + "\n", + "Pakia maktaba zinazohitajika na seti ya data. Badilisha data kuwa fremu ya data inayojumuisha sehemu ndogo ya data:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hebu tuangalie data yetu!\n", + "\n", + "Kwa kuitazama kwa kutumia Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Usindikaji wa Awali wa Data\n", + "\n", + "Hebu tusimbue sifa na lebo ili kuweza kuchora data vizuri na kufundisha modeli.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Angalia**: Kupuuza maonyo SIYO mbinu bora na inapaswa kuepukwa, inapowezekana. Maonyo mara nyingi huwa na ujumbe muhimu unaotusaidia kuboresha msimbo wetu na kutatua tatizo. \n", + "Sababu ya kupuuza onyo hili maalum ni kuhakikisha usomaji wa mchoro. Kuchora alama zote za data kwa ukubwa mdogo wa alama, huku tukidumisha uthabiti na rangi ya palette, kunasababisha taswira isiyo wazi.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:27:50+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/3-Web-App/1-Web-App/notebook.ipynb b/translations/sw/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sw/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/sw/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..03b48afaf --- /dev/null +++ b/translations/sw/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:14+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/1-Introduction/notebook.ipynb b/translations/sw/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..c5095a128 --- /dev/null +++ b/translations/sw/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:53+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/sw/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..4afca5939 --- /dev/null +++ b/translations/sw/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,721 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T14:58:56+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Utangulizi wa uainishaji: Kusafisha, kuandaa, na kuonyesha data yako\n", + "\n", + "Katika masomo haya manne, utachunguza kipengele muhimu cha ujifunzaji wa mashine wa kawaida - *uainishaji*. Tutapitia matumizi ya algoriti mbalimbali za uainishaji kwa kutumia seti ya data kuhusu vyakula vya ajabu vya Asia na India. Tumaini una njaa!\n", + "\n", + "

\n", + " \n", + "

Sherehekea vyakula vya pan-Asia katika masomo haya! Picha na Jen Looper
\n", + "\n", + "\n", + "\n", + "\n", + "Uainishaji ni aina ya [ujifunzaji unaosimamiwa](https://wikipedia.org/wiki/Supervised_learning) ambao una mfanano mkubwa na mbinu za regression. Katika uainishaji, unafundisha modeli kutabiri ni `kategoria` gani kipengele fulani kinachohusiana nacho. Ikiwa ujifunzaji wa mashine unahusu kutabiri thamani au majina ya vitu kwa kutumia seti za data, basi uainishaji kwa ujumla unagawanyika katika makundi mawili: *uainishaji wa binary* na *uainishaji wa multiclass*.\n", + "\n", + "Kumbuka:\n", + "\n", + "- **Linear regression** ilikusaidia kutabiri uhusiano kati ya vigezo na kufanya utabiri sahihi kuhusu mahali ambapo kipengele kipya kingeangukia kwa uhusiano na mstari huo. Kwa mfano, ungeweza kutabiri thamani ya nambari kama vile *bei ya malenge itakuwa kiasi gani mwezi wa Septemba dhidi ya Desemba*.\n", + "\n", + "- **Logistic regression** ilikusaidia kugundua \"kategoria za binary\": kwa kiwango hiki cha bei, *je, malenge hili ni la rangi ya machungwa au si la machungwa*?\n", + "\n", + "Uainishaji hutumia algoriti mbalimbali kuamua njia nyingine za kutambua lebo au darasa la kipengele cha data. Hebu tufanye kazi na data hii ya vyakula ili kuona kama, kwa kuangalia kikundi cha viungo, tunaweza kuamua asili ya vyakula hivyo.\n", + "\n", + "### [**Jaribio la awali la somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Utangulizi**\n", + "\n", + "Uainishaji ni moja ya shughuli za msingi za mtafiti wa ujifunzaji wa mashine na mwanasayansi wa data. Kutoka uainishaji wa msingi wa thamani ya binary (\"je, barua pepe hii ni spam au si spam?\"), hadi uainishaji wa picha ngumu na kugawanya kwa kutumia maono ya kompyuta, daima ni muhimu kuwa na uwezo wa kupanga data katika madarasa na kuuliza maswali kuhusu data hiyo.\n", + "\n", + "Kwa kusema mchakato kwa njia ya kisayansi zaidi, mbinu yako ya uainishaji inaunda modeli ya utabiri inayokuwezesha kuonyesha uhusiano kati ya vigezo vya ingizo na vigezo vya matokeo.\n", + "\n", + "

\n", + " \n", + "

Masuala ya binary dhidi ya multiclass kwa algoriti za uainishaji kushughulikia. Infographic na Jen Looper
\n", + "\n", + "\n", + "\n", + "Kabla ya kuanza mchakato wa kusafisha data yetu, kuionyesha, na kuandaa kwa kazi zetu za ML, hebu tujifunze kidogo kuhusu njia mbalimbali ambazo ujifunzaji wa mashine unaweza kutumika kuainisha data.\n", + "\n", + "Iliyotokana na [takwimu](https://wikipedia.org/wiki/Statistical_classification), uainishaji kwa kutumia ujifunzaji wa mashine wa kawaida hutumia vipengele, kama vile `smoker`, `weight`, na `age` kuamua *uwezekano wa kupata ugonjwa X*. Kama mbinu ya ujifunzaji unaosimamiwa inayofanana na mazoezi ya regression uliyofanya awali, data yako ina lebo na algoriti za ML hutumia lebo hizo kuainisha na kutabiri madarasa (au 'vipengele') vya seti ya data na kuyapanga katika kikundi au matokeo.\n", + "\n", + "✅ Chukua muda kufikiria seti ya data kuhusu vyakula. Je, modeli ya multiclass ingeweza kujibu nini? Je, modeli ya binary ingeweza kujibu nini? Je, ungependa kuamua kama chakula fulani kina uwezekano wa kutumia fenugreek? Je, ungependa kuona kama, ukipewa zawadi ya mfuko wa mboga uliojaa star anise, artichokes, cauliflower, na horseradish, ungeweza kuunda chakula cha kawaida cha Kihindi?\n", + "\n", + "### **Habari 'classifier'**\n", + "\n", + "Swali tunalotaka kuuliza kuhusu seti hii ya data ya vyakula ni swali la **multiclass**, kwa kuwa tuna vyakula vya kitaifa kadhaa vya kufanya kazi navyo. Ukipewa kundi la viungo, ni darasa gani kati ya haya mengi data itafaa?\n", + "\n", + "Tidymodels inatoa algoriti mbalimbali za kutumia kuainisha data, kulingana na aina ya tatizo unalotaka kutatua. Katika masomo mawili yajayo, utajifunza kuhusu baadhi ya algoriti hizi.\n", + "\n", + "#### **Mahitaji ya awali**\n", + "\n", + "Kwa somo hili, tutahitaji vifurushi vifuatavyo kusafisha, kuandaa na kuonyesha data yetu:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) iliyoundwa kufanya sayansi ya data kuwa ya haraka, rahisi na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa modeli na ujifunzaji wa mashine.\n", + "\n", + "- `DataExplorer`: Kifurushi cha [DataExplorer](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) kinalenga kurahisisha na kuendesha mchakato wa EDA na uzalishaji wa ripoti.\n", + "\n", + "- `themis`: Kifurushi cha [themis](https://themis.tidymodels.org/) kinatoa Hatua za Ziada za Mapishi kwa Kushughulikia Data Isiyosawazishwa.\n", + "\n", + "Unaweza kuvifunga kwa:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Vinginevyo, script iliyo hapa chini inakagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo vinakosekana.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tutapakia baadaye vifurushi hivi vya ajabu na kuvifanya viweze kupatikana katika kikao chetu cha sasa cha R. (Hii ni kwa madhumuni ya maelezo tu, `pacman::p_load()` tayari ilifanya hivyo kwako)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Zoezi - safisha na linganisha data yako\n", + "\n", + "Kazi ya kwanza kabla ya kuanza mradi huu ni kusafisha na **kulinganisha** data yako ili kupata matokeo bora.\n", + "\n", + "Hebu tukutane na data!🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Inavutia! Kwa kuangalia, safu ya kwanza ni aina ya safu ya `id`. Hebu tupate maelezo zaidi kuhusu data.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kutoka kwa matokeo, tunaweza kuona mara moja kwamba tuna `2448` safu na `385` nguzo na `0` thamani zilizokosekana. Pia tuna safu moja ya kidhahania, *cuisine*.\n", + "\n", + "## Zoezi - kujifunza kuhusu vyakula\n", + "\n", + "Sasa kazi inaanza kuwa ya kuvutia zaidi. Hebu tugundue usambazaji wa data, kwa kila aina ya chakula.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kuna idadi ndogo ya vyakula, lakini usambazaji wa data hauko sawa. Unaweza kurekebisha hilo! Kabla ya kufanya hivyo, chunguza kidogo zaidi.\n", + "\n", + "Sasa, wacha tugawanye kila aina ya chakula kwenye tibble yake binafsi na tujue ni kiasi gani cha data kinapatikana (safu, nguzo) kwa kila aina ya chakula.\n", + "\n", + "> [Tibble](https://tibble.tidyverse.org/) ni fremu ya data ya kisasa.\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Zoezi - Kugundua viungo bora kwa kila aina ya vyakula kwa kutumia dplyr**\n", + "\n", + "Sasa unaweza kuchunguza zaidi data na kujifunza viungo vya kawaida kwa kila aina ya vyakula. Unapaswa kuondoa data inayojirudia ambayo inasababisha mkanganyiko kati ya aina za vyakula, kwa hivyo hebu tujifunze kuhusu tatizo hili.\n", + "\n", + "Tengeneza kazi `create_ingredient()` katika R ambayo inarudisha dataframe ya viungo. Kazi hii itaanza kwa kuondoa safu isiyo ya msaada na kuchambua viungo kulingana na idadi yao.\n", + "\n", + "Muundo wa msingi wa kazi katika R ni:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "Utangulizi mzuri wa kazi za R unaweza kupatikana [hapa](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Hebu tuanze moja kwa moja! Tutatumia [vitenzi vya dplyr](https://dplyr.tidyverse.org/) ambavyo tumekuwa tukijifunza katika masomo yetu ya awali. Kama ukumbusho:\n", + "\n", + "- `dplyr::select()`: hukusaidia kuchagua ni **safu** zipi za kuweka au kuondoa.\n", + "\n", + "- `dplyr::pivot_longer()`: hukusaidia \"kurefusha\" data, kuongeza idadi ya safu na kupunguza idadi ya safu wima.\n", + "\n", + "- `dplyr::group_by()` na `dplyr::summarise()`: hukusaidia kupata takwimu za muhtasari kwa vikundi tofauti, na kuziweka katika jedwali zuri.\n", + "\n", + "- `dplyr::filter()`: huunda subset ya data inayojumuisha tu safu zinazokidhi masharti yako.\n", + "\n", + "- `dplyr::mutate()`: hukusaidia kuunda au kurekebisha safu wima.\n", + "\n", + "Angalia [mafunzo ya learnr yaliyojaa *sanaa*](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) na Allison Horst, ambayo yanatambulisha baadhi ya kazi muhimu za kushughulikia data katika dplyr *(sehemu ya Tidyverse)*\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa tunaweza kutumia kazi hii kupata wazo la viungo kumi maarufu zaidi kwa kila aina ya vyakula. Hebu tuijaribu na `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Katika sehemu iliyopita, tulitumia `geom_col()`, hebu tuone jinsi unavyoweza kutumia `geom_bar` pia, kuunda chati za mstari. Tumia `?geom_bar` kwa kusoma zaidi.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vipi kuhusu vyakula vya Kichina?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kutokana na taswira za data, sasa tunaweza kuondoa viungo vya kawaida zaidi vinavyosababisha mkanganyiko kati ya vyakula tofauti, kwa kutumia `dplyr::select()`.\n", + "\n", + "Kila mtu anapenda mchele, vitunguu saumu na tangawizi!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Kuchakata data kwa kutumia mapishi 👩‍🍳👨‍🍳 - Kushughulikia data isiyo na uwiano ⚖️\n", + "\n", + "

\n", + " \n", + "

Uchoraji na @allison_horst
\n", + "\n", + "Kwa kuwa somo hili linahusu vyakula, tunapaswa kuweka `mapishi` katika muktadha.\n", + "\n", + "Tidymodels inatoa kifurushi kingine kizuri: `recipes` - kifurushi cha kuchakata data.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuchunguze tena usambazaji wa vyakula vyetu.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kama unavyoona, kuna usambazaji usio sawa kabisa katika idadi ya vyakula. Vyakula vya Kikorea ni karibu mara tatu ya vyakula vya Kithai. Data isiyo na uwiano mara nyingi ina athari mbaya kwenye utendaji wa modeli. Fikiria kuhusu uainishaji wa binary. Ikiwa data yako nyingi ni ya darasa moja, modeli ya ML itaelekea kutabiri darasa hilo mara kwa mara, kwa sababu kuna data zaidi kwa ajili yake. Kusawazisha data huchukua data iliyopotoshwa na husaidia kuondoa uwiano huu. Modeli nyingi hufanya kazi vizuri zaidi wakati idadi ya uchunguzi ni sawa na, kwa hivyo, huwa na changamoto na data isiyo na uwiano.\n", + "\n", + "Kuna njia kuu mbili za kushughulikia seti za data zisizo na uwiano:\n", + "\n", + "- kuongeza uchunguzi kwenye darasa la wachache: `Over-sampling` mfano kutumia algorithimu ya SMOTE\n", + "\n", + "- kuondoa uchunguzi kutoka darasa la walio wengi: `Under-sampling`\n", + "\n", + "Sasa wacha tuonyeshe jinsi ya kushughulikia seti za data zisizo na uwiano kwa kutumia `recipe`. Recipe inaweza kufikiriwa kama ramani inayoelezea hatua gani zinapaswa kutumika kwenye seti ya data ili kuifanya iwe tayari kwa uchambuzi wa data.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuchambue hatua zetu za awali za usindikaji.\n", + "\n", + "- Simu ya `recipe()` na fomula inaambia recipe *majukumu* ya vigezo kwa kutumia data ya `df_select` kama rejeleo. Kwa mfano, safu ya `cuisine` imepewa jukumu la `outcome` huku safu nyingine zimepewa jukumu la `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) inaunda *maelezo maalum* ya hatua ya recipe ambayo huzalisha mifano mipya ya darasa la wachache kwa kutumia majirani wa karibu wa kesi hizi.\n", + "\n", + "Sasa, ikiwa tungependa kuona data iliyosindikwa, tungehitaji [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) na [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) recipe yetu.\n", + "\n", + "`prep()`: inakadiria vigezo vinavyohitajika kutoka kwenye seti ya mafunzo ambavyo vinaweza kutumika baadaye kwenye seti nyingine za data.\n", + "\n", + "`bake()`: inachukua recipe iliyosindikwa na kutumia operesheni kwenye seti yoyote ya data.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hebu sasa tukague usambazaji wa vyakula vyetu na tuvilinganishe na data isiyo na uwiano.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Yum! Takwimu ni safi, zenye uwiano mzuri, na ladha tamu sana 😋!\n", + "\n", + "> Kwa kawaida, mapishi hutumika kama mchakato wa awali wa kuandaa modeli ambapo hufafanua hatua gani zinapaswa kutekelezwa kwenye seti ya data ili kuifanya iwe tayari kwa modeli. Katika hali hiyo, `workflow()` kwa kawaida hutumika (kama tulivyoona katika masomo yetu ya awali) badala ya kukadiria mapishi kwa mikono.\n", + ">\n", + "> Kwa hivyo, kwa kawaida huhitaji kutumia **`prep()`** na **`bake()`** mapishi unapotumia tidymodels, lakini ni kazi muhimu kuwa nazo kwenye zana zako ili kuthibitisha kwamba mapishi yanafanya kile unachotarajia kama ilivyo katika hali yetu.\n", + ">\n", + "> Unapofanya **`bake()`** kwenye mapishi yaliyotayarishwa kwa **`new_data = NULL`**, unapata data uliyotoa wakati wa kufafanua mapishi, lakini ikiwa imepitia hatua za usindikaji wa awali.\n", + "\n", + "Sasa tuweke nakala ya data hii kwa matumizi ya masomo ya baadaye:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "CSV mpya sasa inaweza kupatikana katika folda kuu ya data.\n", + "\n", + "**🚀Changamoto**\n", + "\n", + "Mtaala huu una seti kadhaa za data za kuvutia. Chunguza folda za `data` na uone kama kuna seti za data zinazofaa kwa uainishaji wa binary au wa darasa nyingi? Ni maswali gani ungeuliza kuhusu seti hii ya data?\n", + "\n", + "## [**Jaribio baada ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Mapitio na Kujisomea**\n", + "\n", + "- Angalia [pakiti ya themis](https://github.com/tidymodels/themis). Ni mbinu gani nyingine tunaweza kutumia kushughulikia data isiyo na uwiano?\n", + "\n", + "- Tovuti ya marejeleo ya [Tidy models](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham na G. Grolemund, [*R kwa Sayansi ya Data: Kuonyesha, Kuelekeza, Kubadilisha, Kupanga, na Kuingiza Data*](https://r4ds.had.co.nz/).\n", + "\n", + "#### ASANTE KWA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) kwa kuunda michoro ya kushangaza inayofanya R kuwa ya kuvutia na ya kupendeza zaidi. Pata michoro zaidi katika [galeria yake](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) na [Jen Looper](https://www.twitter.com/jenlooper) kwa kuunda toleo la awali la moduli hii kwa Python ♥️\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuchukuliwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutokuelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/sw/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..2259f301d --- /dev/null +++ b/translations/sw/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,739 @@ +{ + "cells": [ + { + "source": [ + "# Mapishi Matamu ya Kiasia na Kihindi\n", + "\n", + "## Utangulizi\n", + "\n", + "Chakula cha Kiasia na Kihindi ni maarufu kwa ladha zake za kipekee, matumizi ya viungo mbalimbali, na mbinu za kupika zinazovutia. Katika mwongozo huu, tutachunguza baadhi ya mapishi maarufu na vidokezo vya jinsi ya kuandaa vyakula hivi nyumbani.\n", + "\n", + "[!NOTE] Mapishi haya yanaweza kubadilishwa kulingana na ladha yako au upatikanaji wa viungo.\n", + "\n", + "---\n", + "\n", + "## Mapishi Maarufu\n", + "\n", + "### 1. **Chakula cha Kiasia: Noodles za Mboga**\n", + "Noodles za mboga ni rahisi kuandaa na zinaweza kubadilishwa kulingana na mboga unazopenda.\n", + "\n", + "#### Viungo:\n", + "- Noodles zilizopikwa tayari\n", + "- Mafuta ya ufuta\n", + "- Vitunguu saumu, iliyokatwa vizuri\n", + "- Karoti, zilizokatwa nyembamba\n", + "- Pilipili hoho, zilizokatwa vipande vidogo\n", + "- Mchuzi wa soya\n", + "- Mchuzi wa oyster (hiari)\n", + "\n", + "#### Maelekezo:\n", + "1. Katika sufuria kubwa, ongeza mafuta ya ufuta na kaanga vitunguu saumu hadi viwe vya dhahabu.\n", + "2. Ongeza karoti na pilipili hoho, pika kwa dakika chache.\n", + "3. Changanya noodles zilizopikwa tayari na ongeza mchuzi wa soya na mchuzi wa oyster.\n", + "4. Koroga vizuri na pika kwa dakika 2-3 zaidi.\n", + "5. Tumikia moto na ufurahie!\n", + "\n", + "---\n", + "\n", + "### 2. **Chakula cha Kihindi: Kuku wa Butter**\n", + "Kuku wa butter ni moja ya vyakula maarufu vya Kihindi, maarufu kwa ladha yake laini na mchuzi wake wa krimu.\n", + "\n", + "#### Viungo:\n", + "- Vipande vya kuku, vilivyopikwa\n", + "- Siagi\n", + "- Vitunguu, vilivyokatwa vizuri\n", + "- Nyanya, zilizopondwa\n", + "- Cream nzito\n", + "- Viungo: Garam masala, unga wa coriander, unga wa manjano, na pilipili nyekundu\n", + "\n", + "#### Maelekezo:\n", + "1. Katika sufuria, yayusha siagi na kaanga vitunguu hadi viwe laini.\n", + "2. Ongeza nyanya zilizopondwa na pika hadi mafuta yatengane.\n", + "3. Changanya viungo na pika kwa dakika chache.\n", + "4. Ongeza vipande vya kuku na cream nzito, koroga vizuri.\n", + "5. Pika kwa moto mdogo kwa dakika 10-15.\n", + "6. Tumikia na wali au chapati.\n", + "\n", + "[!TIP] Unaweza kuongeza kiasi cha pilipili kulingana na upendeleo wako wa viwango vya ukali.\n", + "\n", + "---\n", + "\n", + "## Vidokezo vya Jumla\n", + "\n", + "- **Tumia Viungo Safi:** Ladha ya chakula chako itakuwa bora zaidi unapochagua viungo safi na vya hali ya juu.\n", + "- **Usiogope Kucheza na Viungo:** Mapishi haya ni mwongozo tu; unaweza kubadilisha viungo ili kufanikisha ladha unayoipenda.\n", + "- **Andaa Mapema:** Hakikisha viungo vyote vimeandaliwa kabla ya kuanza kupika ili kurahisisha mchakato.\n", + "\n", + "[!IMPORTANT] Hakikisha unafuata hatua za usalama wa chakula, kama vile kuosha mikono na kuhifadhi vyakula kwa njia sahihi.\n", + "\n", + "---\n", + "\n", + "## Hitimisho\n", + "\n", + "Kupika vyakula vya Kiasia na Kihindi nyumbani ni njia nzuri ya kufurahia ladha za kipekee na za kitamaduni. Kwa kufuata mapishi haya na vidokezo, utaweza kuandaa chakula kitamu ambacho familia na marafiki watafurahia. Jaribu leo na ugundue ladha mpya!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Sakinisha Imblearn ambayo itawezesha SMOTE. Hii ni kifurushi cha Scikit-learn kinachosaidia kushughulikia data isiyo na uwiano wakati wa kufanya uainishaji. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Seti hii ya data inajumuisha safu 385 zinazoonyesha aina zote za viungo katika vyakula mbalimbali kutoka kwa seti fulani ya vyakula.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Onyesha vyakula katika grafu ya mstari.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAeAklEQVR4nO3de7RVdb338fcHRFAhTPBCeNnogzckQJcdb11ES01QSxGLUtNHjl1EM2twhmbUo+PY5dhN07DHII+nuKSPFKe0FPEasTcCG1TUEk8QCmjuJIRg+33+WL+ti82+AWvvORfr8xpjjT3Xb/7mnN85x2J/+M0595qKCMzMzPKiW9YFmJmZlXIwmZlZrjiYzMwsVxxMZmaWKw4mMzPLlV2yLmBn0L9//6ipqcm6DDOzitG/f3/uv//++yPi9ObzHExlUFNTQ21tbdZlmJlVFEn9W2r3qTwzM8sVB5OZmeWKg8nMzHLF15jMzLrApk2bWLFiBRs2bMi6lC7Xq1cv9t9/f3r06NGh/g6mMqhf2UDNxNlZl2G2Q5bfdGbWJezUVqxYQZ8+faipqUFS1uV0mYjg1VdfZcWKFQwaNKhDy/hUnplZF9iwYQP9+vWrqlACkES/fv22aaRYEcEkaYqk81pof4+kme0su7y1WxLNzLpStYVSk23d74o+lRcRfwW2CiwzM6tcuQwmSRcC1wABLAYagQ9IuhrYD/hKRMyUVAP8OiKOktQd+CZwOvAWcEdE/LBknbsB9wD3RMQdkj4FTAB2BeYBn4uIRknrgO8Do4A3gbMj4pWu2G8zqx7lvi5d7muEvXv3Zt26dWVdZ0fl7lSepCHAdcDIiBgGXJlmDQBOohgYN7Ww6HigBhgeEe8F7i6Z1xv4FfDzFEpHAGOBEyNiOMXgG5f67gH8IW37EeCyVuocL6lWUm3j+obt3l8zM9tS7oIJGAnMiIi1ABHxWmr/fxHxVkQ8DezbwnKnAj+OiM3NlgO4D/hpRPwsvT8FOAaYL2lhen9wmvdP4Ndpuo5i2G0lIiZHRCEiCt1377sdu2lm1nUmTpzIrbfe+vb7SZMmccMNN3DKKadw9NFHM3ToUO67776tlnv44YcZNWrU2++/8IUvMGXKFADq6ur44Ac/yDHHHMNpp53GqlWrylJrHoOpNRtLprf1CuLjwOl65wqcgKkRMTy9DouISWnepnjnefON5PR0p5nZthg7dizTp09/+/306dO56KKLuPfee1mwYAFz5szhS1/6Eu/8+mvbpk2buOKKK5g5cyZ1dXVccsklXHvttWWpNY+/dB8C7pV0c0S8KmmvDi73O+BfJc2JiM2S9ioZNV2fXrcCnwMeBO6T9N2IWJ220SciXir3zpiZ5cGIESNYvXo1f/3rX1mzZg3vfve72W+//fjiF7/II488Qrdu3Vi5ciWvvPIK++23X7vrW7ZsGUuWLOHDH/4wAI2NjQwYMKAsteYumCJiqaQbgbmSGoGnOrjoT4BDgcWSNgF3ALeUzL8SuFPStyLiK5KuAx6Q1A3YBHwecDCZ2U5rzJgxzJw5k5dffpmxY8dy9913s2bNGurq6ujRowc1NTVb/b3RLrvswltvvfX2+6b5EcGQIUN48skny15n7oIJICKmAlPbmN87/VwOHJWmNwNXp1dp35qSt58paZ8GTGtt3Wl6JtDm30mZmVWKsWPHctlll7F27Vrmzp3L9OnT2WeffejRowdz5szhpZe2/r/5QQcdxNNPP83GjRt58803efDBBznppJM47LDDWLNmDU8++STHH388mzZt4rnnnmPIkCE7XGcug6nSDB3Yl1p/nYuZbYMsvgJqyJAhvPHGGwwcOJABAwYwbtw4Ro8ezdChQykUChx++OFbLXPAAQdw/vnnc9RRRzFo0CBGjBgBwK677srMmTOZMGECDQ0NbN68mauuuqoswaSOXuiy1hUKhfCDAs2sLc888wxHHHFE1mVkpqX9l1QXEYXmfSvprjwzM6sCDiYzM8sVB5OZWRep1ksn27rfDiYzsy7Qq1cvXn311aoLp6bnMfXq1avDy/iuPDOzLrD//vuzYsUK1qxZk3UpXa7pCbYd5WAyM+sCPXr06PATXKudT+WZmVmuOJjMzCxXHExmZpYrvsZUBvUrG8r+NEqzvMriq3SsunjEZGZmuVJ1wSTpvyXtmXUdZmbWsqo6lZeeYDsqIt5qt7OZmWVipx8xSaqRtEzSz4AlQKOk/mnehZIWS1ok6a7UtrekX0qan14nZlm/mVm1qZYR02Dgooj4g6TlAJKGANcBJ0TE2pJHuH8f+G5EPCbpQOB+YKvvqpc0HhgP0P1de3fBLpiZVYdqCaaXIuIPzdpGAjMiYi1ARLyW2k8Fjiye9QPgXZJ6R8S60oUjYjIwGaDngMHV9eVXZmadqFqC6R/b0LcbcFxEbGi3p5mZld1Of42pDQ8BYyT1Ayg5lfcAcEVTJ0nDM6jNzKxqVW0wRcRS4EZgrqRFwM1p1gSgkG6KeBq4PKsazcyqkart2SCdoVAoRG1tbdZlmJlVFEl1EVFo3l61IyYzM8snB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Uq1fLt4p6pf2UDNxNlZl2GWmeU3nZl1CbYT8YjJzMxyxcFkZma5UjHBJOksSRPLtK5Jkq4px7rMzKy8KuIak6RdImIWMCvD7W/OYttmZtWmy0dMki5MD+FbJOkuSTWSHkptD0o6MPWbIul2SfOAb0m6WNItad5oSfMkPSXp95L2Te2TJN0p6WFJf5Y0oWS710p6TtJjwGEl7YdI+q2kOkmPSjq8pe134SEyM6tqXTpikjQEuA44ISLWpseZTwWmRsRUSZcAPwDOSYvsn/o2Srq4ZFWPAcdFREj638BXgC+leYcDJwN9gGWSbgPeC1wADKe4zwuAutR/MnB5RDwv6V+AHwEjm2+/hX0ZD4wH6P6uvXfksJiZWYmuPpU3EpgREWsBIuI1SccDH0/z72LL0cmMlkKBYmBMkzQA2BV4sWTe7IjYCGyUtBrYF3g/cG9ErAeQNCv97A2cAMyQ1LR8zw5sn4iYTDHU6DlgsB8DbGZWJnm/xvSPVtp/CNwcEbMkfQiYVDJvY8l0I23vYzfg9YgYvo3bNzOzTtLV15geAsZI6geQTuU9QfE0G8A44NEOrKcvsDJNX9SB/o8A50jaTVIfYDRARPwdeFHSmFSPJA3r6M6YmVn5demIKSKWSroRmCupEXgKuAL4qaQvA2uAz3RgVZMonn77G8WwG9TOdhdImgYsAlYD80tmjwNuk3Qd0AP4RepnZmYZUIQvj+yoQqEQtbW1WZdhZlZRJNVFRKF5e8X8ga2ZmVUHB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Urev128ItSvbKBm4uysyzDL3PKbzsy6BNsJeMRkZma54mAyM7NcyTSYJJ0j6cgO9Jsi6bwW2j8k6ddlrKcg6Qdp+mJJt5Rr3WZm1jFZj5jOAdoNpq4SEbURMSHrOszMqlmbwSTpJkmfL3k/SdI1kr4sab6kxZK+XjL/q5KWSXpM0s8lXZPaD5H0W0l1kh6VdLikE4CzgG9LWpj6XJbWu0jSLyXtXlLOqZJqJT0naVQLte4h6U5Jf5T0lKSz29ivXpJ+Kqk+9T05tXd4BCZpfKqntnF9Q0cWMTOzDmhvxDQNOL/k/fkUnzI7GHgfMBw4RtIHJB0LnAsMA84ASh/+NBm4IiKOAa4BfhQRTwCzgC9HxPCI+BNwT0QcGxHDgGeAS0vWUZO2eSZwu6RezWq9FngoIt4HnEwx8PZoZb8+D0REDAU+AUxtYX1tiojJEVGIiEL33ftuy6JmZtaGNm8Xj4inJO0j6T3A3sDfgKHARyg+Fh2gN8Wg6gPcFxEbgA2SfgUgqTdwAsVHoTetumcrmzxK0g3Anmm995fMmx4RbwHPS/ozcHizZT8CnNU0SgN6AQdSDLjmTgJ+mPbxWUkvAYe2dSzMzKxrdOTvmGYA5wH7URxBHQT8e0T8uLSTpKtaWb4b8HpEDO/AtqYA50TEIkkXAx8qmdf8GfDN3ws4NyKWdWA7ZmaWUx25+WEacAHFcJpBcRRzSRoJIWmgpH2Ax4HR6fpNb2AUQET8HXhR0pjUX5KGpXW/QXGk1aQPsEpSD2BcszrGSOom6RDgYKB5AN0PXKE0LJM0oo19erRp/ZIOpTiycqCZmeVAu8EUEUspBsbKiFgVEQ8A/wU8KakemAn0iYj5FK8ZLQZ+A9QDTXcFjAMulbQIWAo03ZjwC+DL6QaEQ4CvAvMohtyzzUr5H+CPad2Xp1OGpf4P0ANYLGlpet+aHwHdUv3TgIsjYmN7x8LMzDqfIpqfEduBlUm9I2JdupvuEWB8RCwo2wZyqlAoRG1tbdZlmJlVFEl1EVFo3l7u78qbnP5gthcwtRpCyczMyquswRQRnyzn+naUpNOAbzZrfjEiPpZFPWZm1r6d+tvFI+J+trzl3MzMci7rryQyMzPbgoPJzMxyxcFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYrO/XfMXWV+pUN1EycnXUZZhVh+U1nZl2C5ZxHTGZmlisOplZIGi7po1nXYWZWbRxMrRsOOJjMzLpY7oJJ0oWSFktaJOkuSTWSHkptD0o6MPXbV9K9qd8iSSek9qslLUmvq1JbjaRnJN0haamkByTtluY9LKmQpvtLWi5pV+AbwFhJCyWNzeZomJlVn1wFk6QhwHXAyIgYBlwJ/JDiIzTeC9wN/CB1/wEwN/U7Glgq6RjgM8C/AMcBl5U8yXYwcGtEDAFeB85trY6I+CdwPTAtIoZHxLQWah0vqVZSbeP6hq1XYmZm2yVXwQSMBGZExFqAiHgNOJ7iE3MB7gJOKul7W+rXGBENad69EfGPiFgH3AO8P/V/MSIWpuk6oGZHCo2IyRFRiIhC99377siqzMysRN6CqTOVPjq9kXduld/MO8ehV5dWZGZmW8lbMD0EjJHUD0DSXsATwAVp/jjg0TT9IPDZ1K+7pL5p3jmSdpe0B/Cxkv6tWQ4ck6bPK2l/A+izQ3tjZmbbLFfBFBFLgRuBuZIWATcDVwCfkbQY+DTF606knydLqqd4au7I9Cj3KcAfgXnATyLiqXY2+x3gs5KeAvqXtM8BjvTND2ZmXUsRkXUNFa/ngMEx4KLvZV2GWUXwNz9YE0l1EVFo3u6vJCqDoQP7Uut/bGZmZZGrU3lmZmYOJjMzyxUHk5mZ5YqDyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWKw4mMzPLFX8lURnUr2ygZuLsrMswq2j+Dj1r4hGTmZnlSsUHk6RvSDo16zrMzKw8Kv5UXkRc39nbkNQ9Iho7eztmZlZhIyZJX5W0TNJjkn4u6RpJUySdl+Yvl/R1SQsk1Us6PLXvLel3kpZK+omklyT1T/M+JemP6YGAP5bUPbWvk/Qf6YGFx2e202ZmVaZigknSscC5wDDgDGCrh0slayPiaOA24JrU9jXgoYgYAswEDkzrPAIYC5wYEcOBRoqPbwfYA5gXEcMi4rEW6hkvqVZSbeP6hrLso5mZVdapvBOB+yJiA7BB0q9a6XdP+lkHfDxNnwR8DCAifivpb6n9FOAYYL4kgN2A1WleI/DL1oqJiMnAZCg+wXZ7dsjMzLZWScHUURvTz0ba3z8BUyPi31qYt8HXlczMul7FnMoDHgdGS+olqTcwahuXPR9A0keAd6f2B4HzJO2T5u0l6aAy1mxmZtuoYkZMETFf0ixgMfAKUA909OLO14GfS/o08CTwMvBGRKyVdB3wgKRuwCbg88BLZd8BMzPrEEVUzuURSb0jYp2k3YFHgPERsaADy/UEGiNis6TjgdvSzQ5lUSgUora2tlyrMzOrCpLqImKrG9kqZsSUTJZ0JNCL4rWhdkMpORCYnkZF/wQu66wCzcxsx1RUMEXEJ7dzueeBEWUux8zMOkEl3fxgZmZVwMFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYrDiYzM8sVB5OZmeVKRf2BbV7Vr2ygZuLsrMswqzrLbzoz6xKsE3jEZGZmueJgMjOzXKmIYJJ0uaQL0/QUSedt53qGS/poeaszM7NyqohrTBFxe5lWNRwoAP/dfIakXSJic5m2Y2Zm2ymXwZRGR9cAQfHBgH8C1kXEd5r1ux4YDewGPAH8a0SEpIeBecDJwJ7Apen9N4DdJJ0E/DtwBHAIcDDwP5I+A9xGMbw2A1dHxJzO3VszMyuVu1N5koYA1wEjI2IYcGUb3W+JiGMj4iiK4VT6uPVdIuJ9wFXA1yLin8D1wLSIGB4R01K/I4FTI+ITFJ9eGxExFPgEMFVSr1bqHC+pVlJt4/qOPkjXzMzak7tgAkYCMyJiLUBEvNZG35MlzZNUn5YbUjLvnvSzDqhpYx2zIuLNNH0S8J9pu89SfMT6oS0tFBGTI6IQEYXuu/dtZ5fMzKyjcnkqryPSSOZHQCEi/iJpEsUn2zbZmH420vZ+/qNzKjQzs+2RxxHTQ8AYSf0AJO3VSr+mEForqTfQkTv13gD6tDH/UWBc2u6hFB/JvqwjRZuZWXnkLpgiYilwIzBX0iLg5lb6vQ7cASwB7gfmd2D1c4AjJS2UNLaF+T8CuqVTg9OAiyNiYwv9zMyskygisq6h4hUKhaitrc26DDOziiKpLiIKzdtzN2IyM7Pq5mAyM7NccTCZmVmuOJjMzCxXHExmZpYrDiYzM8sVB5OZmeWKg8nMzHLFwWRmZrniYDIzs1yp2G8Xz5P6lQ3UTJyddRlm1omW33Rm1iVUDY+YzMwsVxxMZmaWKw4mMzPLlVwEk6QLJS2WtEjSXZJGp0emPyXp95L2ldRN0vOS9k7LdJP0gqS90+uXkuan14mpzyRJd0p6WNKfJU1I7TWSnpF0h6Slkh6QtFuad4ik30qqk/SopMOzOzJmZtUn82CSNAS4DhgZEcOAK4HHgOMiYgTwC+ArEfEW8J+kJ8wCpwKLImIN8H3guxFxLHAu8JOSTRwOnAa8D/iapB6pfTBwa0QMAV5PywFMBq6IiGOAayg+PLClusdLqpVU27i+YYePg5mZFeXhrryRwIyIWAsQEa9JGgpMkzQA2BV4MfW9E7gP+B5wCfDT1H4qxSfTNq3zXelx6wCz01NoN0paDeyb2l+MiIVpug6oScucAMwoWVfPloqOiMkUQ4yeAwb7aYtmZmWSh2BqyQ+BmyNilqQPAZMAIuIvkl6RNJLiCKhp9NSN4ghrQ+lKUriUPhq9kXf2uXn7bmk9r0fE8LLujZmZdVjmp/KAh4AxkvoBSNoL6AusTPMvatb/JxRP6c2IiMbU9gBwRVMHSdsVLBHxd+BFSWPSeiRp2Pasy8zMtk/mwRQRS4EbgbmSFgE3UxwhzZBUB6xttsgsoDfvnMYDmAAU0g0UTwOX70BJ44BLUy1LgbN3YF1mZraNFFFZl0ckFSje6PD+rGtp0nPA4Bhw0feyLsPMOpG/+aH8JNVFRKF5e16vMbVI0kTgs7xzbSkXhg7sS60/tGZmZZH5qbxtERE3RcRBEfFY1rWYmVnnqKhgMjOznZ+DyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWKw4mMzPLFQeTmZnlSkV9JVFe1a9soGbi7KzLMLOdWDV9V59HTGZmlitVEUySJkh6RtLf0hfBttbvYkm3dGVtZma2pWo5lfc54NSIWJF1IWZm1radfsQk6XbgYOA3kr7YNCKSNEbSEkmLJD1Sssh7JP1W0vOSvpVJ0WZmVWynD6aIuBz4K3Ay8LeSWdcDp0XEMOCskvbhwFhgKDBW0gEtrVfSeEm1kmob1zd0TvFmZlVopw+mNjwOTJF0GdC9pP3BiGiIiA3A08BBLS0cEZMjohARhe679+2Ccs3MqkPVBlMaSV0HHADUSeqXZm0s6dZI9VyHMzPLhar9pSvpkIiYB8yTdAbFgDIzs4xV7YgJ+LakeklLgCeARVkXZGZmVTJiioiaNDklvYiIj7fQ9e35qc+oTi3MzMy2UhXB1NmGDuxLbRV9XYiZWWeq5lN5ZmaWQw4mMzPLFQeTmZnlioPJzMxyxcFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYr/uaHMqhf2UDNxNlZl2Fm1qWWd9I33njEZGZmueJgMjOzXKnaYJJ0saRb0vTlki7MuiYzM6vSa0ySttjviLg9q1rMzGxLFR1Mkr4KfApYA/wFqAMagPHArsALwKcjYr2kKcAGYATwOLC4ZD2TgHUR8R1J/wu4Hdib4qPVx0TEn7pqn8zMql3FnsqTdCxwLjAMOAMopFn3RMSxETEMeAa4tGSx/YETIuLqNlZ9N3BrWv4EYFUr2x8vqVZSbeP6hh3cGzMza1LJI6YTgfsiYgOwQdKvUvtRkm4A9gR6A/eXLDMjIhpbW6GkPsDAiLgXIK27RRExGZgM0HPA4NihPTEzs7dV7IipDVOAL0TEUODrQK+Sef/IpCIzM+uwSg6mx4HRknpJ6g2MSu19gFWSegDjtmWFEfEGsELSOQCSekravZxFm5lZ2yo2mCJiPjCL4k0MvwHqKd748FVgHsXgenY7Vv1pYIKkxcATwH5lKdjMzDpEEZV7eURS74hYl0Y1jwDjI2JBV9dRKBSitra2qzdrZlbRJNVFRKF5eyXf/AAwWdKRFK8jTc0ilMzMrLwqOpgi4pNZ12BmZuVVsdeYzMxs5+RgMjOzXHEwmZlZrlT0XXl5IekNYFnWdWyH/sDarIvYDq67a7nurlUtda8FiIjTm8+o6JsfcmRZS7c85p2kWtfddVx313LdXaucdftUnpmZ5YqDyczMcsXBVB6Tsy5gO7nuruW6u5br7lplq9s3P5iZWa54xGRmZrniYDIzs1xxMO0ASadLWibpBUkTs66nNZIOkDRH0tOSlkq6MrVPkrRS0sL0+mjWtTYnabmk+lRfbWrbS9LvJD2ffr476zpLSTqs5JgulPR3SVfl9XhLulPSaklLStpaPMYq+kH6zC+WdHTO6v62pGdTbfdK2jO110h6s+TY356zulv9bEj6t3S8l0k6LZuqW617WknNyyUtTO07drwjwq/teAHdgT8BBwO7AouAI7Ouq5VaBwBHp+k+wHPAkcAk4Jqs62un9uVA/2Zt3wImpumJwDezrrOdz8nLwEF5Pd7AB4CjgSXtHWPgoxSffybgOGBezur+CLBLmv5mSd01pf1yeLxb/Gykf6eLgJ7AoPQ7p3te6m42/z+A68txvD1i2n7vA16IiD9HxD+BXwBnZ1xTiyJiVaRHgkTxKb3PAAOzrWqHnA1MTdNTgXMyrKU9pwB/ioiXsi6kNRHxCPBas+bWjvHZwM+i6A/AnpIGdE2lW2qp7oh4ICI2p7d/APbv8sLa0crxbs3ZwC8iYmNEvAi8QPF3T5drq25JAs4Hfl6ObTmYtt9A4C8l71dQAb/sJdUAIyg+5RfgC+m0x515OyWWBPCApDpJ41PbvhGxKk2/DOybTWkdcgFb/mPN+/Fu0toxrqTP/SUUR3dNBkl6StJcSe/Pqqg2tPTZqJTj/X7glYh4vqRtu4+3g6mKSOoN/BK4KiL+DtwGHAIMB1ZRHIrnzUkRcTRwBvB5SR8onRnF8wa5/JsHSbsCZwEzUlMlHO+t5PkYt0bStcBm4O7UtAo4MCJGAFcD/yXpXVnV14KK/GyU+ARb/gdsh463g2n7rQQOKHm/f2rLJUk9KIbS3RFxD0BEvBIRjRHxFnAHGZ0iaEtErEw/VwP3UqzxlabTR+nn6uwqbNMZwIKIeAUq43iXaO0Y5/5zL+liYBQwLoUq6VTYq2m6juK1mkMzK7KZNj4blXC8dwE+DkxratvR4+1g2n7zgcGSBqX/GV8AzMq4phal87//F3gmIm4uaS+9NvAxYEnzZbMkaQ9JfZqmKV7YXkLxOF+Uul0E3JdNhe3a4n+ReT/ezbR2jGcBF6a7844DGkpO+WVO0unAV4CzImJ9Sfvekrqn6YOBwcCfs6lya218NmYBF0jqKWkQxbr/2NX1teNU4NmIWNHUsMPHO4u7O3aWF8U7lJ6j+L+Ba7Oup406T6J4KmYxsDC9PgrcBdSn9lnAgKxrbVb3wRTvSFoELG06xkA/4EHgeeD3wF5Z19pC7XsArwJ9S9pyebwphucqYBPFaxiXtnaMKd6Nd2v6zNcDhZzV/QLFazJNn/PbU99z02doIbAAGJ2zulv9bADXpuO9DDgjT3Wn9inA5c367tDx9lcSmZlZrvhUnpmZ5YqDyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWK/8fnSxrKwF+wYgAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Sawazisha data kwa kutumia SMOTE oversampling hadi darasa la juu zaidi. Soma zaidi hapa: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:52:26+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sw/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/sw/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..9435cd6ce --- /dev/null +++ b/translations/sw/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:43+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/sw/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..def29434d --- /dev/null +++ b/translations/sw/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1294 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:38:21+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Vainishi vya vyakula 1\n", + "\n", + "Katika somo hili, tutachunguza aina mbalimbali za vainishi ili *kutabiri aina ya chakula cha kitaifa kulingana na kikundi cha viungo.* Wakati wa kufanya hivyo, tutajifunza zaidi kuhusu baadhi ya njia ambazo algorithimu zinaweza kutumika kwa kazi za uainishaji.\n", + "\n", + "### [**Jaribio la kabla ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Maandalizi**\n", + "\n", + "Somo hili linajengwa juu ya [somo letu la awali](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) ambapo tulifanya yafuatayo:\n", + "\n", + "- Tulifanya utangulizi rahisi wa uainishaji kwa kutumia seti ya data kuhusu vyakula vyote vya kuvutia vya Asia na India 😋.\n", + "\n", + "- Tulichunguza baadhi ya [vitenzi vya dplyr](https://dplyr.tidyverse.org/) ili kuandaa na kusafisha data yetu.\n", + "\n", + "- Tulitengeneza taswira nzuri kwa kutumia ggplot2.\n", + "\n", + "- Tulionyesha jinsi ya kushughulikia data isiyo na uwiano kwa kuisafisha kwa kutumia [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Tulionyesha jinsi ya `prep` na `bake` mapishi yetu ili kuthibitisha kuwa yatafanya kazi kama inavyotarajiwa.\n", + "\n", + "#### **Mahitaji ya awali**\n", + "\n", + "Kwa somo hili, tutahitaji vifurushi vifuatavyo ili kusafisha, kuandaa, na kutazama data yetu:\n", + "\n", + "- `tidyverse`: [Tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) vilivyoundwa kufanya sayansi ya data kuwa ya haraka, rahisi, na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa mifano na ujifunzaji wa mashine.\n", + "\n", + "- `themis`: [Themis package](https://themis.tidymodels.org/) hutoa Hatua za Ziada za Mapishi kwa Kushughulikia Data Isiyo na Uwiano.\n", + "\n", + "- `nnet`: [Nnet package](https://cran.r-project.org/web/packages/nnet/nnet.pdf) hutoa kazi za kukadiria mitandao ya neva ya kulisha mbele yenye safu moja ya siri, na kwa mifano ya uratibu wa kimantiki wa multinomial.\n", + "\n", + "Unaweza kuvisakinisha kama:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Vinginevyo, maandishi yaliyo hapa chini yanakagua kama una pakiti zinazohitajika kukamilisha moduli hii na kuyasakinisha kwako endapo hazipo.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Gawanya data kuwa seti za mafunzo na majaribio.\n", + "\n", + "Tutaanza kwa kuchagua hatua chache kutoka somo letu la awali.\n", + "\n", + "### Ondoa viungo vya kawaida zaidi vinavyosababisha mkanganyiko kati ya vyakula tofauti, kwa kutumia `dplyr::select()`.\n", + "\n", + "Kila mtu anapenda mchele, vitunguu saumu na tangawizi!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sawa! Sasa, ni wakati wa kugawanya data ambapo 70% ya data itaenda kwa mafunzo na 30% itaenda kwa majaribio. Pia tutatumia mbinu ya `stratification` wakati wa kugawanya data ili `kuweka uwiano wa kila aina ya chakula` katika seti za mafunzo na uthibitisho.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), kifurushi katika Tidymodels, kinatoa miundombinu kwa ajili ya kugawanya data na kurudia kwa ufanisi:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Kushughulikia data isiyo na uwiano\n", + "\n", + "Kama ulivyogundua kwenye seti ya data ya awali pamoja na seti yetu ya mafunzo, kuna usambazaji usio sawa kabisa katika idadi ya vyakula. Vyakula vya Kikorea ni *karibu* mara 3 ya vyakula vya Kithai. Data isiyo na uwiano mara nyingi ina athari mbaya kwenye utendaji wa modeli. Modeli nyingi hufanya kazi vizuri zaidi pale ambapo idadi ya uchunguzi ni sawa, na kwa hivyo huwa na changamoto wanapokutana na data isiyo na uwiano.\n", + "\n", + "Kuna njia kuu mbili za kushughulikia seti za data zisizo na uwiano:\n", + "\n", + "- kuongeza uchunguzi kwenye darasa lenye idadi ndogo: `Over-sampling` kwa mfano kutumia algoriti ya SMOTE ambayo huzalisha mifano mipya ya darasa lenye idadi ndogo kwa kutumia majirani wa karibu wa kesi hizo.\n", + "\n", + "- kuondoa uchunguzi kutoka darasa lenye idadi kubwa: `Under-sampling`\n", + "\n", + "Katika somo letu la awali, tulionyesha jinsi ya kushughulikia seti za data zisizo na uwiano kwa kutumia `recipe`. Recipe inaweza kufikiriwa kama mpango unaoelezea hatua gani zinapaswa kutumika kwenye seti ya data ili kuifanya iwe tayari kwa uchambuzi wa data. Katika hali yetu, tunataka kuwa na usambazaji sawa wa idadi ya vyakula vyetu kwa `training set` yetu. Hebu tuingie moja kwa moja.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Unaweza kuthibitisha (kwa kutumia prep+bake) kwamba mapishi yatafanya kazi kama unavyotarajia - lebo zote za vyakula zikiwa na uchunguzi `559`.\n", + "\n", + "Kwa kuwa tutatumia mapishi haya kama sehemu ya maandalizi ya uundaji wa modeli, `workflow()` itafanya maandalizi na kuoka kwa ajili yetu, kwa hivyo hatutalazimika kukadiria mapishi kwa mikono.\n", + "\n", + "Sasa tuko tayari kufundisha modeli 👩‍💻👨‍💻!\n", + "\n", + "## 3. Kuchagua kiondoaji wako\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sasa tunapaswa kuamua ni algoriti gani ya kutumia kwa kazi hii 🤔.\n", + "\n", + "Katika Tidymodels, [`parsnip package`](https://parsnip.tidymodels.org/index.html) hutoa kiolesura thabiti cha kufanya kazi na mifano kupitia injini tofauti (pakiti). Tafadhali angalia nyaraka za parsnip ili kuchunguza [aina za mifano na injini](https://www.tidymodels.org/find/parsnip/#models) pamoja na [hoja za mifano](https://www.tidymodels.org/find/parsnip/#model-args) zinazohusiana. Aina mbalimbali zinaweza kuonekana kuwa nyingi mwanzoni. Kwa mfano, mbinu zifuatazo zote zinajumuisha mbinu za uainishaji:\n", + "\n", + "- C5.0 Mifano ya Uainishaji Inayotegemea Kanuni\n", + "\n", + "- Mifano ya Uainishaji Inayobadilika\n", + "\n", + "- Mifano ya Uainishaji wa Mstari\n", + "\n", + "- Mifano ya Uainishaji wa Kawaida\n", + "\n", + "- Mifano ya Usawazishaji wa Kilogistiki\n", + "\n", + "- Mifano ya Usawazishaji wa Multinomial\n", + "\n", + "- Mifano ya Naive Bayes\n", + "\n", + "- Mashine za Msaada wa Vector\n", + "\n", + "- Majirani wa Karibu\n", + "\n", + "- Miti ya Maamuzi\n", + "\n", + "- Mbinu za Ensemble\n", + "\n", + "- Mitandao ya Neva\n", + "\n", + "Orodha inaendelea!\n", + "\n", + "### **Ni uainishaji gani wa kuchagua?**\n", + "\n", + "Kwa hivyo, ni uainishaji gani unapaswa kuchagua? Mara nyingi, kujaribu kadhaa na kutafuta matokeo mazuri ni njia ya kupima.\n", + "\n", + "> AutoML hutatua tatizo hili kwa urahisi kwa kuendesha kulinganisha hizi mtandaoni, ikikuruhusu kuchagua algoriti bora kwa data yako. Jaribu hapa [hapa](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Pia, chaguo la uainishaji linategemea tatizo letu. Kwa mfano, wakati matokeo yanaweza kugawanywa katika `madaraja zaidi ya mawili`, kama ilivyo katika kesi yetu, lazima utumie `algoriti ya uainishaji wa madaraja mengi` badala ya `uainishaji wa binary.`\n", + "\n", + "### **Njia bora zaidi**\n", + "\n", + "Njia bora zaidi kuliko kubahatisha kiholela, hata hivyo, ni kufuata mawazo yaliyo kwenye [ML Cheat sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott) inayoweza kupakuliwa. Hapa, tunagundua kwamba, kwa tatizo letu la madaraja mengi, tuna chaguo kadhaa:\n", + "\n", + "

\n", + " \n", + "

Sehemu ya Karatasi ya Udanganyifu ya Microsoft, ikionyesha chaguo za uainishaji wa madaraja mengi
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Mantiki**\n", + "\n", + "Tujaribu kufikiria njia tofauti za kutatua tatizo hili tukizingatia vikwazo tulivyo navyo:\n", + "\n", + "- **Mitandao ya neva yenye kina ni nzito sana**. Kwa dataset yetu safi lakini ndogo, na kwa kuwa tunafanya mafunzo kwa ndani kupitia notebooks, mitandao ya neva yenye kina ni nzito sana kwa kazi hii.\n", + "\n", + "- **Hakuna classifier ya darasa mbili**. Hatutumii classifier ya darasa mbili, kwa hivyo hiyo inatupilia mbali mbinu ya one-vs-all.\n", + "\n", + "- **Mti wa maamuzi au regression ya logistic inaweza kufanya kazi**. Mti wa maamuzi unaweza kufanya kazi, au regression ya multinomial/regression ya logistic ya darasa nyingi kwa data ya darasa nyingi.\n", + "\n", + "- **Multiclass Boosted Decision Trees hutatua tatizo tofauti**. Multiclass boosted decision tree inafaa zaidi kwa kazi zisizo za parametric, kwa mfano kazi zinazolenga kujenga rankings, kwa hivyo siyo muhimu kwetu.\n", + "\n", + "Pia, kwa kawaida kabla ya kuanza kutumia mifano ya machine learning yenye ugumu zaidi kama ensemble methods, ni wazo zuri kujenga mfano rahisi iwezekanavyo ili kupata wazo la kinachoendelea. Kwa somo hili, tutaanza na mfano wa `multinomial regression`.\n", + "\n", + "> Regression ya logistic ni mbinu inayotumika pale ambapo variable ya matokeo ni ya kikundi (au nominal). Kwa Binary logistic regression idadi ya variable za matokeo ni mbili, ilhali idadi ya variable za matokeo kwa multinomial logistic regression ni zaidi ya mbili. Tazama [Advanced Regression Methods](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html) kwa maelezo zaidi.\n", + "\n", + "## 4. Kufundisha na kutathmini mfano wa Multinomial logistic regression.\n", + "\n", + "Katika Tidymodels, `parsnip::multinom_reg()`, inafafanua mfano unaotumia predictors za linear kutabiri data ya darasa nyingi kwa kutumia usambazaji wa multinomial. Tazama `?multinom_reg()` kwa njia tofauti/engines unazoweza kutumia kufit mfano huu.\n", + "\n", + "Kwa mfano huu, tutafit mfano wa Multinomial regression kupitia engine ya default [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf).\n", + "\n", + "> Nilichagua thamani ya `penalty` kwa njia ya nasibu. Kuna njia bora za kuchagua thamani hii, yaani, kwa kutumia `resampling` na `tuning` ya mfano ambayo tutajadili baadaye.\n", + ">\n", + "> Tazama [Tidymodels: Get Started](https://www.tidymodels.org/start/tuning/) ikiwa unataka kujifunza zaidi kuhusu jinsi ya kurekebisha hyperparameters za mfano.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri 🥳! Sasa kwa kuwa tuna mapishi na maelezo ya mfano, tunahitaji kupata njia ya kuyafunga pamoja katika kitu ambacho kwanza kitaandaa data, kisha kufitisha mfano kwenye data iliyotayarishwa, na pia kuruhusu shughuli za baada ya usindikaji. Katika Tidymodels, kitu hiki rahisi kinaitwa [`workflow`](https://workflows.tidymodels.org/) na kwa urahisi huhifadhi vipengele vyako vya uundaji wa mifano! Hiki ndicho tungeita *pipelines* katika *Python*.\n", + "\n", + "Sasa hebu tufunge kila kitu kwenye workflow!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mtiririko wa kazi 👌👌! **`workflow()`** inaweza kutumika kwa njia sawa na jinsi mfano unavyoweza kutumika. Kwa hivyo, ni wakati wa kufundisha mfano!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Matokeo yanaonyesha vigezo ambavyo modeli ilijifunza wakati wa mafunzo.\n", + "\n", + "### Tathmini Modeli Iliyofunzwa\n", + "\n", + "Ni wakati wa kuona jinsi modeli ilivyofanya kazi 📏 kwa kuitathmini kwenye seti ya majaribio! Hebu tuanze kwa kufanya utabiri kwenye seti ya majaribio.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri! Katika Tidymodels, kutathmini utendaji wa modeli kunaweza kufanywa kwa kutumia [yardstick](https://yardstick.tidymodels.org/) - kifurushi kinachotumika kupima ufanisi wa modeli kwa kutumia vipimo vya utendaji. Kama tulivyofanya katika somo letu la usanjari wa kimantiki, hebu tuanze kwa kuhesabu matriki ya kuchanganya.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mraba za giza kwenye mchoro wa matriki ya mkanganyiko zinaonyesha idadi kubwa ya kesi, na unaweza kuona mstari wa diagonal wa mraba za giza unaoonyesha kesi ambapo lebo iliyotabiriwa na lebo halisi ni sawa.\n", + "\n", + "Sasa hebu tuhifadhi takwimu za muhtasari kwa matriki ya mkanganyiko.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ikiwa tutazingatia vipimo kama vile usahihi, unyeti, na ppv, hatuko mbali sana kwa mwanzo mzuri 🥳!\n", + "\n", + "## 4. Kuchunguza Kwa Undani\n", + "\n", + "Hebu tujiulize swali moja la kina: Ni vigezo gani vinavyotumika kuamua aina fulani ya chakula kama matokeo yaliyotabiriwa?\n", + "\n", + "Kweli, algoriti za kujifunza kwa mashine za takwimu, kama vile logistic regression, zinategemea `uwezekano`; kwa hivyo kinachotabiriwa na classifier kwa kweli ni mgawanyo wa uwezekano juu ya seti ya matokeo yanayowezekana. Darasa lenye uwezekano wa juu zaidi ndilo huchaguliwa kama matokeo yanayowezekana zaidi kwa uchunguzi uliotolewa.\n", + "\n", + "Hebu tuone hili likifanyika kwa kufanya utabiri wa madarasa madhubuti na uwezekano.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kwa nini mfano una uhakika kwamba uchunguzi wa kwanza ni wa Kithai?\n", + "\n", + "## **🚀Changamoto**\n", + "\n", + "Katika somo hili, ulitumia data yako iliyosafishwa kujenga mfano wa mashine unaoweza kutabiri aina ya chakula cha kitaifa kulingana na mfululizo wa viungo. Chukua muda kusoma [chaguzi nyingi](https://www.tidymodels.org/find/parsnip/#models) zinazotolewa na Tidymodels ili kuainisha data na [njia nyingine](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) za kufanikisha multinomial regression.\n", + "\n", + "#### SHUKRANI KWA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) kwa kuunda michoro ya kushangaza inayofanya R kuwa ya kuvutia na ya kirafiki zaidi. Tafuta michoro zaidi kwenye [galeria yake](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) na [Jen Looper](https://www.twitter.com/jenlooper) kwa kuunda toleo la awali la Python la moduli hii ♥️\n", + "\n", + "
\n", + "Ningetupa vichekesho lakini sijaelewa utani wa chakula 😅.\n", + "\n", + "
\n", + "\n", + "Jifunze kwa furaha,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Balozi wa Dhahabu wa Wanafunzi wa Microsoft Learn.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/sw/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..b5232ef1b --- /dev/null +++ b/translations/sw/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:11+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sw/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/sw/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..e4fa9d14a --- /dev/null +++ b/translations/sw/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:25+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sw/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/sw/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..ae56f0909 --- /dev/null +++ b/translations/sw/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,648 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:47:49+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Wainishaji wa vyakula 2\n", + "\n", + "Katika somo hili la pili la uainishaji, tutachunguza `njia zaidi` za kuainisha data ya kategoria. Pia tutajifunza kuhusu athari za kuchagua mainishaji mmoja badala ya mwingine.\n", + "\n", + "### [**Jaribio la awali la somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Mahitaji ya awali**\n", + "\n", + "Tunadhani kuwa umekamilisha masomo ya awali kwa kuwa tutatumia baadhi ya dhana tulizojifunza hapo kabla.\n", + "\n", + "Kwa somo hili, tutahitaji vifurushi vifuatavyo:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) ni [mkusanyiko wa vifurushi vya R](https://www.tidyverse.org/packages) vilivyoundwa ili kufanya sayansi ya data kuwa ya haraka, rahisi, na ya kufurahisha!\n", + "\n", + "- `tidymodels`: Mfumo wa [tidymodels](https://www.tidymodels.org/) ni [mkusanyiko wa vifurushi](https://www.tidymodels.org/packages/) kwa ajili ya uundaji wa mifano na ujifunzaji wa mashine.\n", + "\n", + "- `themis`: [Kifurushi cha themis](https://themis.tidymodels.org/) kinatoa Hatua za Ziada za Mapishi kwa Kushughulikia Data Isiyosawazishwa.\n", + "\n", + "Unaweza kuvifunga kwa kutumia:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Vinginevyo, script iliyo hapa chini hukagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo havipo.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Ramani ya uainishaji**\n", + "\n", + "Katika [somo letu la awali](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1), tulijaribu kujibu swali: tunachaguaje kati ya mifano mbalimbali? Kwa kiasi kikubwa, inategemea sifa za data na aina ya tatizo tunalotaka kutatua (kwa mfano, uainishaji au regression?)\n", + "\n", + "Hapo awali, tulijifunza kuhusu chaguo mbalimbali unazoweza kutumia unapouainisha data kwa kutumia karatasi ya msaada ya Microsoft. Mfumo wa Kujifunza kwa Mashine wa Python, Scikit-learn, unatoa karatasi ya msaada inayofanana lakini ya kina zaidi ambayo inaweza kusaidia zaidi kupunguza chaguo zako za estimators (neno lingine kwa classifiers):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Kidokezo: [tembelea ramani hii mtandaoni](https://scikit-learn.org/stable/tutorial/machine_learning_map/) na bonyeza kwenye njia ili kusoma nyaraka.\n", + ">\n", + "> Tovuti ya [Tidymodels reference](https://www.tidymodels.org/find/parsnip/#models) pia inatoa nyaraka bora kuhusu aina tofauti za modeli.\n", + "\n", + "### **Mpango** 🗺️\n", + "\n", + "Ramani hii ni muhimu sana mara tu unapokuwa na uelewa mzuri wa data yako, kwani unaweza 'kutembea' kwenye njia zake kuelekea uamuzi:\n", + "\n", + "- Tuna sampuli \\>50\n", + "\n", + "- Tunataka kutabiri kategoria\n", + "\n", + "- Tuna data yenye lebo\n", + "\n", + "- Tuna sampuli chini ya 100K\n", + "\n", + "- ✨ Tunaweza kuchagua Linear SVC\n", + "\n", + "- Ikiwa hiyo haifanyi kazi, kwa kuwa tuna data ya nambari\n", + "\n", + " - Tunaweza kujaribu ✨ KNeighbors Classifier\n", + "\n", + " - Ikiwa hiyo haifanyi kazi, jaribu ✨ SVC na ✨ Ensemble Classifiers\n", + "\n", + "Hii ni njia muhimu sana ya kufuata. Sasa, hebu tuanze moja kwa moja kwa kutumia mfumo wa modeli wa [tidymodels](https://www.tidymodels.org/): mkusanyiko thabiti na rahisi wa pakiti za R zilizotengenezwa ili kuhimiza mazoea mazuri ya takwimu 😊.\n", + "\n", + "## 2. Gawanya data na kushughulikia seti ya data isiyo na uwiano.\n", + "\n", + "Kutoka kwenye masomo yetu ya awali, tulijifunza kuwa kulikuwa na seti ya viungo vya kawaida katika vyakula vyetu. Pia, kulikuwa na usambazaji usio sawa katika idadi ya vyakula.\n", + "\n", + "Tutashughulikia haya kwa:\n", + "\n", + "- Kuondoa viungo vya kawaida zaidi vinavyosababisha mkanganyiko kati ya vyakula tofauti, kwa kutumia `dplyr::select()`.\n", + "\n", + "- Kutumia `recipe` inayosindika data ili kuifanya iwe tayari kwa modeli kwa kutumia algoriti ya `over-sampling`.\n", + "\n", + "Tayari tulitazama haya katika somo la awali kwa hivyo hili linapaswa kuwa rahisi 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Kushughulikia Data Isiyosawazishwa\n", + "\n", + "Data isiyosawazishwa mara nyingi ina athari mbaya kwenye utendaji wa modeli. Modeli nyingi hufanya kazi vizuri zaidi pale idadi ya uchunguzi ni sawa, na kwa hivyo huwa zinapata changamoto na data isiyosawazishwa.\n", + "\n", + "Kuna njia kuu mbili za kushughulikia seti za data isiyosawazishwa:\n", + "\n", + "- kuongeza uchunguzi kwenye darasa lenye idadi ndogo: `Over-sampling` kwa mfano kutumia algoriti ya SMOTE ambayo huzalisha mifano mipya ya darasa lenye idadi ndogo kwa kutumia majirani wa karibu wa kesi hizo.\n", + "\n", + "- kuondoa uchunguzi kutoka darasa lenye idadi kubwa: `Under-sampling`\n", + "\n", + "Katika somo letu la awali, tulionyesha jinsi ya kushughulikia seti za data isiyosawazishwa kwa kutumia `recipe`. Recipe inaweza kufikiriwa kama mpango unaoelezea hatua gani zinapaswa kutumika kwenye seti ya data ili kuifanya iwe tayari kwa uchambuzi wa data. Katika hali yetu, tunataka kuwa na usambazaji sawa wa idadi ya vyakula vyetu kwa `training set` yetu. Hebu tuingie moja kwa moja.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Sasa tuko tayari kufundisha mifano 👩‍💻👨‍💻!\n", + "\n", + "## 3. Zaidi ya mifano ya regression ya multinomial\n", + "\n", + "Katika somo letu la awali, tulichunguza mifano ya regression ya multinomial. Hebu tuangalie mifano mingine yenye kubadilika zaidi kwa ajili ya uainishaji.\n", + "\n", + "### Support Vector Machines\n", + "\n", + "Katika muktadha wa uainishaji, `Support Vector Machines` ni mbinu ya kujifunza kwa mashine inayojaribu kutafuta *hyperplane* inayotenganisha darasa kwa \"ubora\" zaidi. Hebu tuangalie mfano rahisi:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ haigawanyi madarasa. H2~ inagawanya, lakini kwa pengo dogo tu. H3~ inagawanya kwa pengo kubwa zaidi.\n", + "\n", + "#### Klasifaya ya Msaada wa Vector ya Mstari\n", + "\n", + "Kuweka vikundi kwa kutumia Support-Vector (SVC) ni sehemu ya familia ya mbinu za ML za Support-Vector machines. Katika SVC, hyperplane huchaguliwa ili kutenganisha kwa usahihi `sehemu kubwa` ya uchunguzi wa mafunzo, lakini `inaweza kukosea` uchunguzi kadhaa. Kwa kuruhusu baadhi ya alama kuwa upande usio sahihi, SVM inakuwa thabiti zaidi kwa data isiyo ya kawaida na hivyo kuboresha uwezo wa kujumlisha data mpya. Kigezo kinachosimamia ukiukaji huu kinaitwa `gharama` ambayo ina thamani ya msingi ya 1 (tazama `help(\"svm_poly\")`).\n", + "\n", + "Hebu tuunde SVC ya mstari kwa kuweka `degree = 1` katika mfano wa polynomial SVM.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Sasa kwa kuwa tumeshakamata hatua za awali za uchakataji na maelezo ya modeli ndani ya *workflow*, tunaweza kuendelea na kufundisha SVC ya mstari na kutathmini matokeo wakati huo huo. Kwa vipimo vya utendaji, hebu tuunde seti ya vipimo ambayo itatathmini: `accuracy`, `sensitivity`, `Positive Predicted Value` na `F Measure`.\n", + "\n", + "> `augment()` itaongeza safu(safu) za utabiri kwenye data iliyotolewa.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Mashine ya Msaada wa Vector\n", + "\n", + "Mashine ya msaada wa vector (SVM) ni upanuzi wa mclasifia wa msaada wa vector ili kuweza kushughulikia mpaka usio wa mstari kati ya madarasa. Kimsingi, SVM hutumia *mbinu ya kernel* kupanua nafasi ya sifa ili kuendana na uhusiano usio wa mstari kati ya madarasa. Mojawapo ya kazi maarufu na yenye kubadilika sana ya kernel inayotumiwa na SVM ni *Radial basis function.* Hebu tuone jinsi itakavyofanya kazi kwenye data yetu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Bora zaidi 🤩!\n", + "\n", + "> ✅ Tafadhali angalia:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> kwa kusoma zaidi.\n", + "\n", + "### Vainishi vya Jirani wa Karibu\n", + "\n", + "*K*-jirani wa karibu (KNN) ni algorithimu ambapo kila uchunguzi unatabiriwa kulingana na *ufanano* wake na uchunguzi mwingine.\n", + "\n", + "Hebu tuifanyie data yetu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Inaonekana kwamba modeli hii haifanyi kazi vizuri sana. Huenda kubadilisha vigezo vya modeli (tazama `help(\"nearest_neighbor\")`) kutaboresha utendaji wa modeli. Hakikisha kujaribu.\n", + "\n", + "> ✅ Tafadhali angalia:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> ili kujifunza zaidi kuhusu *K*-Nearest Neighbors classifiers.\n", + "\n", + "### Wahesabuji wa Ensemble\n", + "\n", + "Algoriti za ensemble hufanya kazi kwa kuunganisha makadirio kadhaa ya msingi ili kuzalisha modeli bora kwa kutumia:\n", + "\n", + "`bagging`: kutumia *kazi ya wastani* kwa mkusanyiko wa modeli za msingi\n", + "\n", + "`boosting`: kujenga mfululizo wa modeli zinazojenga juu ya kila moja ili kuboresha utendaji wa utabiri.\n", + "\n", + "Hebu tuanze kwa kujaribu modeli ya Random Forest, ambayo hujenga mkusanyiko mkubwa wa miti ya maamuzi kisha hutumia kazi ya wastani ili kupata modeli bora zaidi kwa ujumla.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Kazi nzuri 👏!\n", + "\n", + "Hebu pia tujaribu na mfano wa Boosted Tree.\n", + "\n", + "Boosted Tree hufafanua mbinu ya ensemble inayounda mfululizo wa miti ya maamuzi ya mfululizo ambapo kila mti unategemea matokeo ya miti ya awali kwa lengo la kupunguza makosa hatua kwa hatua. Inalenga uzito wa vitu vilivyokosewa kuainishwa na kurekebisha mwelekeo wa classifier inayofuata ili kusahihisha.\n", + "\n", + "Kuna njia tofauti za kufanikisha mfano huu (tazama `help(\"boost_tree\")`). Katika mfano huu, tutafanikisha Boosted trees kupitia injini ya `xgboost`.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "✅ Tafadhali angalia:\n", + "\n", + "- [Machine Learning for Social Scientists](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + "\n", + "- [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + "\n", + "- [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + "\n", + "- - Inachunguza mfano wa AdaBoost ambao ni mbadala mzuri kwa xgboost.\n", + "\n", + "kujifunza zaidi kuhusu waainishaji wa Ensemble.\n", + "\n", + "## 4. Ziada - kulinganisha mifano mingi\n", + "\n", + "Tumetengeneza idadi kubwa ya mifano katika maabara hii 🙌. Inaweza kuwa kazi ngumu au ya kuchosha kuunda mtiririko wa kazi nyingi kutoka kwa seti tofauti za preprocessors na/au maelezo ya mifano kisha kuhesabu vipimo vya utendaji moja baada ya nyingine.\n", + "\n", + "Hebu tuone kama tunaweza kushughulikia hili kwa kuunda kazi ambayo inafaa orodha ya mtiririko wa kazi kwenye seti ya mafunzo kisha inarudisha vipimo vya utendaji kulingana na seti ya majaribio. Tutatumia `map()` na `map_dfr()` kutoka kwenye kifurushi cha [purrr](https://purrr.tidyverse.org/) ili kutumia kazi kwa kila kipengele katika orodha.\n", + "\n", + "> [`map()`](https://purrr.tidyverse.org/reference/map.html) kazi zinakuruhusu kubadilisha mikondo mingi ya for na msimbo ambao ni mfupi zaidi na rahisi kusoma. Sehemu bora ya kujifunza kuhusu [`map()`](https://purrr.tidyverse.org/reference/map.html) kazi ni sura ya [iteration](http://r4ds.had.co.nz/iteration.html) katika R kwa data science.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "Kifurushi cha [**workflowset**](https://workflowsets.tidymodels.org/) kinawawezesha watumiaji kuunda na kufanikisha urahisi idadi kubwa ya mifano ya modeli, lakini kimeundwa hasa kufanya kazi na mbinu za sampuli kama `cross-validation`, mbinu ambayo bado hatujafikia.\n", + "\n", + "## **🚀Changamoto**\n", + "\n", + "Kila moja ya mbinu hizi ina idadi kubwa ya vigezo ambavyo unaweza kurekebisha, kwa mfano `cost` katika SVMs, `neighbors` katika KNN, `mtry` (Vitabiri Vilivyochaguliwa kwa Nasibu) katika Random Forest.\n", + "\n", + "Fanya utafiti kuhusu vigezo vya msingi vya kila moja na fikiria maana ya kurekebisha vigezo hivi kwa ubora wa modeli.\n", + "\n", + "Ili kupata maelezo zaidi kuhusu modeli fulani na vigezo vyake, tumia: `help(\"model\")` mfano `help(\"rand_forest\")`\n", + "\n", + "> Kwa vitendo, mara nyingi tunafanya *makadirio* ya *thamani bora* kwa kufundisha modeli nyingi kwenye `seti ya data iliyosimuliwa` na kupima jinsi modeli hizi zinavyofanya kazi. Mchakato huu unaitwa **tuning**.\n", + "\n", + "### [**Jaribio la baada ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Mapitio na Kujifunza Binafsi**\n", + "\n", + "Kuna maneno mengi ya kitaalamu katika masomo haya, kwa hivyo chukua muda kupitia [orodha hii](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) ya istilahi muhimu!\n", + "\n", + "#### SHUKRANI KWA:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) kwa kuunda michoro ya kuvutia inayofanya R kuwa ya kupendeza na ya kuvutia zaidi. Pata michoro zaidi kwenye [galeria yake](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) na [Jen Looper](https://www.twitter.com/jenlooper) kwa kuunda toleo la awali la moduli hii kwa Python ♥️\n", + "\n", + "Jifunze kwa furaha,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Balozi wa Wanafunzi wa Microsoft Learn wa Dhahabu.\n", + "\n", + "

\n", + " \n", + "

Michoro na @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/sw/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..7341987c3 --- /dev/null +++ b/translations/sw/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Jaribu aina tofauti za viainishi\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:42:56+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/sw/4-Classification/4-Applied/notebook.ipynb b/translations/sw/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..ef586c536 --- /dev/null +++ b/translations/sw/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:35+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/4-Classification/4-Applied/solution/notebook.ipynb b/translations/sw/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..697253ac9 --- /dev/null +++ b/translations/sw/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:42:00+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/1-Visualize/notebook.ipynb b/translations/sw/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..0a62ecdb9 --- /dev/null +++ b/translations/sw/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:08:05+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/sw/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..1dc7d8623 --- /dev/null +++ b/translations/sw/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Muziki wa Nigeria uliokusanywa kutoka Spotify - uchambuzi**\n", + "\n", + "Clustering ni aina ya [Unsupervised Learning](https://wikipedia.org/wiki/Unsupervised_learning) inayodhani kuwa seti ya data haina lebo au kwamba maingizo yake hayajafungamanishwa na matokeo yaliyoainishwa. Inatumia algorithmi mbalimbali kuchambua data isiyo na lebo na kutoa makundi kulingana na mifumo inayotambua kwenye data.\n", + "\n", + "[**Maswali ya awali ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Utangulizi**\n", + "\n", + "[Clustering](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) ni muhimu sana kwa uchunguzi wa data. Hebu tuone kama inaweza kusaidia kugundua mitindo na mifumo katika jinsi hadhira ya Nigeria inavyotumia muziki.\n", + "\n", + "> ✅ Chukua dakika moja kufikiria matumizi ya clustering. Katika maisha ya kila siku, clustering hutokea unapokuwa na rundo la nguo na unahitaji kupanga nguo za wanafamilia wako 🧦👕👖🩲. Katika sayansi ya data, clustering hutokea unapojaribu kuchambua mapendeleo ya mtumiaji, au kubaini sifa za seti yoyote ya data isiyo na lebo. Kwa namna fulani, clustering husaidia kuleta mpangilio katika hali ya fujo, kama droo ya soksi.\n", + "\n", + "Katika mazingira ya kitaalamu, clustering inaweza kutumika kubaini mambo kama mgawanyiko wa soko, kubaini ni makundi ya umri gani yanayonunua bidhaa fulani, kwa mfano. Matumizi mengine yanaweza kuwa kugundua hali zisizo za kawaida, labda kugundua udanganyifu kutoka kwa seti ya data ya miamala ya kadi za mkopo. Au unaweza kutumia clustering kubaini uvimbe katika kundi la skani za matibabu.\n", + "\n", + "✅ Fikiria kwa dakika moja jinsi unavyoweza kuwa umekutana na clustering 'katika mazingira halisi', katika benki, biashara ya mtandaoni, au mazingira ya kibiashara.\n", + "\n", + "> 🎓 Kwa kushangaza, uchambuzi wa makundi ulianzia katika nyanja za Anthropolojia na Saikolojia katika miaka ya 1930. Je, unaweza kufikiria jinsi ulivyotumika?\n", + "\n", + "Vinginevyo, unaweza kuitumia kwa kupanga matokeo ya utafutaji - kwa viungo vya ununuzi, picha, au hakiki, kwa mfano. Clustering ni muhimu unapokuwa na seti kubwa ya data unayotaka kupunguza na ambayo unataka kufanya uchambuzi wa kina zaidi, hivyo mbinu hii inaweza kutumika kujifunza kuhusu data kabla ya kujenga mifano mingine.\n", + "\n", + "✅ Mara data yako inapopangwa katika makundi, unaiwekea kitambulisho cha kundi, na mbinu hii inaweza kuwa muhimu katika kuhifadhi faragha ya seti ya data; badala yake unaweza kurejelea kipengele cha data kwa kitambulisho cha kundi, badala ya data inayofichua zaidi. Je, unaweza kufikiria sababu nyingine za kurejelea kitambulisho cha kundi badala ya vipengele vingine vya kundi ili kukitambua?\n", + "\n", + "### Kuanza na clustering\n", + "\n", + "> 🎓 Jinsi tunavyounda makundi inahusiana sana na jinsi tunavyokusanya vipengele vya data katika vikundi. Hebu tuchambue baadhi ya istilahi:\n", + ">\n", + "> 🎓 ['Transductive' vs. 'inductive'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Utoaji wa hitimisho wa transductive hutokana na kesi za mafunzo zilizotazamwa ambazo zinahusiana na kesi maalum za majaribio. Utoaji wa hitimisho wa inductive hutokana na kesi za mafunzo ambazo zinahusiana na sheria za jumla ambazo baadaye tu zinatumika kwa kesi za majaribio.\n", + ">\n", + "> Mfano: Fikiria una seti ya data ambayo imewekwa lebo kwa sehemu tu. Baadhi ya vitu ni 'rekodi', baadhi ni 'cds', na baadhi havina lebo. Kazi yako ni kutoa lebo kwa vile visivyo na lebo. Ukichagua mbinu ya inductive, ungefundisha mfano kutafuta 'rekodi' na 'cds', na kutumia lebo hizo kwa data isiyo na lebo. Mbinu hii itakuwa na shida kuainisha vitu ambavyo kwa kweli ni 'kanda za kaseti'. Mbinu ya transductive, kwa upande mwingine, hushughulikia data isiyojulikana kwa ufanisi zaidi kwani inafanya kazi ya kuunda vikundi vya vitu vinavyofanana na kisha kutumia lebo kwa kundi. Katika kesi hii, makundi yanaweza kuonyesha 'vitu vya muziki vya mviringo' na 'vitu vya muziki vya mraba'.\n", + ">\n", + "> 🎓 ['Non-flat' vs. 'flat' geometry](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Imetokana na istilahi za hisabati, 'non-flat' vs. 'flat' geometry inahusu kipimo cha umbali kati ya vipengele kwa kutumia mbinu za kijiometri za 'flat' ([Euclidean](https://wikipedia.org/wiki/Euclidean_geometry)) au 'non-flat' (non-Euclidean).\n", + ">\n", + "> 'Flat' katika muktadha huu inahusu jiometri ya Euclidean (sehemu zake hufundishwa kama jiometri ya 'plane'), na 'non-flat' inahusu jiometri isiyo ya Euclidean. Jiometri inahusiana vipi na ujifunzaji wa mashine? Kweli, kama nyanja mbili zinazotokana na hisabati, lazima kuwe na njia ya kawaida ya kupima umbali kati ya vipengele katika makundi, na hiyo inaweza kufanywa kwa njia ya 'flat' au 'non-flat', kulingana na asili ya data. [Umbali wa Euclidean](https://wikipedia.org/wiki/Euclidean_distance) hupimwa kama urefu wa sehemu ya mstari kati ya vipengele viwili. [Umbali usio wa Euclidean](https://wikipedia.org/wiki/Non-Euclidean_geometry) hupimwa kando ya mkurva. Ikiwa data yako, ikionyeshwa, inaonekana haipo kwenye ndege, unaweza kuhitaji kutumia algorithmi maalum kuishughulikia.\n", + "\n", + "

\n", + " \n", + "

Infographic na Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "> 🎓 ['Umbali'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Makundi yanafafanuliwa na matrix ya umbali, yaani umbali kati ya vipengele. Umbali huu unaweza kupimwa kwa njia kadhaa. Makundi ya Euclidean yanafafanuliwa na wastani wa thamani za vipengele, na yana 'centroid' au kipengele cha katikati. Umbali hupimwa kwa umbali hadi centroid hiyo. Umbali usio wa Euclidean unahusu 'clustroids', kipengele kilicho karibu zaidi na vipengele vingine. Clustroids kwa upande wake vinaweza kufafanuliwa kwa njia mbalimbali.\n", + ">\n", + "> 🎓 ['Constrained'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Constrained Clustering](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) huanzisha 'semi-supervised' learning katika mbinu hii isiyo na usimamizi. Mahusiano kati ya vipengele yanawekwa alama kama 'cannot link' au 'must-link' ili sheria fulani zifuatwe kwenye seti ya data.\n", + ">\n", + "> Mfano: Ikiwa algorithmi inaruhusiwa kuchambua kundi la data isiyo na lebo au yenye lebo kwa sehemu, makundi inayozalisha yanaweza kuwa ya ubora duni. Katika mfano hapo juu, makundi yanaweza kuunda 'vitu vya muziki vya mviringo' na 'vitu vya muziki vya mraba' na 'vitu vya pembetatu' na 'biskuti'. Ikiwa algorithmi inapewa vikwazo, au sheria za kufuata (\"kipengele lazima kiwe cha plastiki\", \"kipengele kinahitaji kuwa na uwezo wa kutoa muziki\") hii inaweza kusaidia 'kuzuia' algorithmi kufanya chaguo bora.\n", + ">\n", + "> 🎓 'Density'\n", + ">\n", + "> Data iliyo na 'kelele' inachukuliwa kuwa 'dense'. Umbali kati ya vipengele katika kila moja ya makundi yake unaweza kuonyesha, kwa uchunguzi, kuwa ni zaidi au chini ya 'dense', au 'imejaa' na hivyo data hii inahitaji kuchambuliwa kwa mbinu sahihi ya clustering. [Makala hii](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) inaonyesha tofauti kati ya kutumia algorithmi za K-Means clustering vs. HDBSCAN kuchunguza seti ya data yenye kelele na density isiyo sawa.\n", + "\n", + "Panua uelewa wako wa mbinu za clustering katika [Learn module](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Algorithmi za clustering**\n", + "\n", + "Kuna zaidi ya algorithmi 100 za clustering, na matumizi yake yanategemea asili ya data inayoshughulikiwa. Hebu tujadili baadhi ya zile kuu:\n", + "\n", + "- **Hierarchical clustering**. Ikiwa kipengele kinaainishwa kwa ukaribu wake na kipengele kilicho karibu, badala ya kile kilicho mbali zaidi, makundi yanaundwa kulingana na umbali wa wanachama wake kutoka na kwenda kwa vipengele vingine. Hierarchical clustering inajulikana kwa kuunganisha makundi mawili mara kwa mara.\n", + "\n", + "\n", + "

\n", + " \n", + "

Infographic na Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Centroid clustering**. Algorithmi hii maarufu inahitaji kuchagua 'k', au idadi ya makundi ya kuunda, baada ya hapo algorithmi huamua kipengele cha katikati cha kundi na kukusanya data karibu na kipengele hicho. [K-means clustering](https://wikipedia.org/wiki/K-means_clustering) ni toleo maarufu la centroid clustering ambalo linatenganisha seti ya data katika makundi ya K yaliyoainishwa awali. Kipengele cha katikati kinaamuliwa na wastani wa karibu zaidi, hivyo jina hilo. Umbali wa mraba kutoka kwa kundi hupunguzwa.\n", + "\n", + "

\n", + " \n", + "

Infographic na Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "- **Distribution-based clustering**. Ikitokana na uundaji wa takwimu, distribution-based clustering inazingatia kubaini uwezekano wa kipengele cha data kuwa sehemu ya kundi, na kukipa kundi ipasavyo. Mbinu za Gaussian mixture zinahusiana na aina hii.\n", + "\n", + "- **Density-based clustering**. Vipengele vya data vinapewa makundi kulingana na density yao, au jinsi vinavyokusanyika karibu na kila kimoja. Vipengele vya data vilivyo mbali na kundi vinachukuliwa kuwa outliers au kelele. DBSCAN, Mean-shift na OPTICS vinahusiana na aina hii ya clustering.\n", + "\n", + "- **Grid-based clustering**. Kwa seti za data za vipimo vingi, gridi huundwa na data hugawanywa kati ya seli za gridi hiyo, hivyo kuunda makundi.\n", + "\n", + "Njia bora ya kujifunza kuhusu clustering ni kuijaribu mwenyewe, hivyo ndivyo utakavyofanya katika zoezi hili.\n", + "\n", + "Tutahitaji baadhi ya pakiti ili kukamilisha moduli hii. Unaweza kuzisakinisha kama: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Vinginevyo, script hapa chini hukagua ikiwa una pakiti zinazohitajika kukamilisha moduli hii na kuzisakinisha kwako ikiwa baadhi zinakosekana.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Zoezi - pangisha data yako katika makundi\n", + "\n", + "Upangishaji katika makundi kama mbinu husaidiwa sana na uonyeshaji sahihi wa data, kwa hivyo hebu tuanze kwa kuonyesha data yetu ya muziki. Zoezi hili litatusaidia kuamua ni mbinu gani ya upangishaji katika makundi tunapaswa kutumia kwa ufanisi zaidi kulingana na asili ya data hii.\n", + "\n", + "Hebu tuanze mara moja kwa kuingiza data.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Wakati mwingine, tunaweza kutaka maelezo zaidi kuhusu data yetu. Tunaweza kuangalia `data` na `muundo wake` kwa kutumia [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) kazi:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Kazi nzuri!💪\n", + "\n", + "Tunaweza kuona kwamba `glimpse()` itakupa jumla ya idadi ya safu (uchunguzi) na safu wima (vigezo), kisha, maingizo machache ya kwanza ya kila kigezo katika safu baada ya jina la kigezo. Zaidi ya hayo, *aina ya data* ya kigezo inatolewa mara moja baada ya jina la kila kigezo ndani ya `< >`.\n", + "\n", + "`DataExplorer::introduce()` inaweza kufupisha taarifa hii kwa urahisi:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Nzuri sana! Tumegundua kuwa data yetu haina thamani zilizokosekana.\n", + "\n", + "Wakati tukiendelea, tunaweza kuchunguza takwimu za kawaida za mwelekeo wa kati (mfano [wastani](https://en.wikipedia.org/wiki/Arithmetic_mean) na [median](https://en.wikipedia.org/wiki/Median)) na vipimo vya mtawanyiko (mfano [mkengeuko wa kawaida](https://en.wikipedia.org/wiki/Standard_deviation)) kwa kutumia `summarytools::descr()`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hebu tuangalie maadili ya jumla ya data. Kumbuka kuwa umaarufu unaweza kuwa `0`, ambayo inaonyesha nyimbo ambazo hazina daraja. Tutaziondoa hivi karibuni.\n", + "\n", + "> 🤔 Ikiwa tunafanya kazi na clustering, mbinu isiyo ya kusimamiwa ambayo haihitaji data yenye lebo, kwa nini tunaonyesha data hii ikiwa na lebo? Katika awamu ya uchunguzi wa data, zinaweza kuwa muhimu, lakini hazihitajiki kwa algorithimu za clustering kufanya kazi.\n", + "\n", + "### 1. Chunguza aina maarufu za muziki\n", + "\n", + "Twende mbele na tujue aina za muziki maarufu 🎶 kwa kuhesabu idadi ya mara zinavyoonekana.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hiyo imeenda vizuri! Wanasema picha ina thamani ya mistari elfu moja ya fremu ya data (kwa kweli hakuna mtu anayesema hivyo 😅). Lakini unaelewa maana yake, sivyo?\n", + "\n", + "Njia moja ya kuonyesha data ya kategoria (vigezo vya herufi au sababu) ni kutumia chati za mistari. Hebu tufanye chati ya mistari ya aina 10 bora za muziki:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Sasa ni rahisi zaidi kutambua kwamba tuna `missing` aina za muziki 🧐!\n", + "\n", + "> Uwasilishaji mzuri wa data utaonyesha mambo ambayo hukutarajia, au kuibua maswali mapya kuhusu data - Hadley Wickham na Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Kumbuka, pale ambapo aina kuu ya muziki imeelezwa kama `Missing`, inamaanisha kwamba Spotify haikuigawa, kwa hivyo tuiondoe.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Kutokana na uchunguzi mdogo wa data, tunajifunza kwamba aina tatu kuu za muziki zinatawala dataset hii. Hebu tuzingatie `afro dancehall`, `afropop`, na `nigerian pop`, na pia tuchuje dataset ili kuondoa chochote chenye thamani ya umaarufu ya 0 (ikimaanisha hakikuainishwa na umaarufu katika dataset na kinaweza kuchukuliwa kama kelele kwa madhumuni yetu):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hebu tuone kama kuna uhusiano wa moja kwa moja kati ya vigezo vya namba katika seti yetu ya data. Uhusiano huu hupimwa kihisabati kwa kutumia [takwimu ya uhusiano](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Takwimu ya uhusiano ni thamani kati ya -1 na 1 inayonyesha nguvu ya uhusiano. Thamani zilizo juu ya 0 zinaonyesha uhusiano *chanya* (thamani za juu za kigezo kimoja huwa sambamba na thamani za juu za kigezo kingine), wakati thamani zilizo chini ya 0 zinaonyesha uhusiano *hasi* (thamani za juu za kigezo kimoja huwa sambamba na thamani za chini za kigezo kingine).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Data haijaonyesha uhusiano mkubwa isipokuwa kati ya `energy` na `loudness`, jambo ambalo linaeleweka, kwa kuwa muziki wenye sauti kubwa mara nyingi huwa na nguvu nyingi. `Popularity` ina uhusiano na `release date`, jambo ambalo pia lina mantiki, kwa kuwa nyimbo za hivi karibuni huenda zikawa maarufu zaidi. Urefu na nguvu pia vinaonekana kuwa na uhusiano.\n", + "\n", + "Itakuwa ya kuvutia kuona kile ambacho algorithimu ya kugawanya (clustering algorithm) inaweza kufanya na data hii!\n", + "\n", + "> 🎓 Kumbuka kwamba uhusiano hauimaanishi sababu! Tuna ushahidi wa uhusiano lakini hatuna ushahidi wa sababu. [Tovuti ya kufurahisha](https://tylervigen.com/spurious-correlations) ina michoro inayoangazia hoja hii.\n", + "\n", + "### 2. Chunguza usambazaji wa data\n", + "\n", + "Hebu tujiulize maswali ya kina zaidi. Je, aina za muziki (genres) zinatofautiana sana katika mtazamo wa uwezo wa kuchezeka (danceability), kulingana na umaarufu wao? Hebu tuchunguze usambazaji wa data wa aina zetu tatu kuu za muziki kwa umaarufu na uwezo wa kuchezeka kwenye mhimili wa x na y kwa kutumia [density plots](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tunaona kwamba kuna miduara inayozunguka kwa mduara mmoja ndani ya mwingine ambayo inalingana, bila kujali aina ya muziki. Inawezekana kwamba ladha za Wanigeria zinakubaliana kwa kiwango fulani cha uwezo wa kuchezeka kwa aina hii ya muziki?\n", + "\n", + "Kwa ujumla, aina hizi tatu za muziki zinaendana kwa umaarufu na uwezo wa kuchezeka. Kuamua makundi katika data hii isiyo na mpangilio wa moja kwa moja itakuwa changamoto. Hebu tuone kama mchoro wa kutawanyika unaweza kusaidia katika hili.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Grafu ya kutawanyika ya mhimili sawa inaonyesha mtindo unaofanana wa muunganiko.\n", + "\n", + "Kwa ujumla, kwa ajili ya kugawanya data katika makundi, unaweza kutumia grafu za kutawanyika kuonyesha makundi ya data, hivyo kujifunza aina hii ya uwasilishaji ni muhimu sana. Katika somo lijalo, tutachukua data hii iliyochujwa na kutumia k-means clustering kugundua makundi katika data hii ambayo yanaonekana kuingiliana kwa njia za kuvutia.\n", + "\n", + "## **🚀 Changamoto**\n", + "\n", + "Kwa maandalizi ya somo lijalo, tengeneza chati kuhusu mbinu mbalimbali za kugawanya data katika makundi ambazo unaweza kugundua na kutumia katika mazingira ya uzalishaji. Ni aina gani za matatizo mbinu za kugawanya data zinajaribu kutatua?\n", + "\n", + "## [**Jaribio la baada ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Mapitio na Kujisomea**\n", + "\n", + "Kabla ya kutumia mbinu za kugawanya data, kama tulivyojifunza, ni wazo zuri kuelewa asili ya seti yako ya data. Soma zaidi kuhusu mada hii [hapa](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html)\n", + "\n", + "Kuimarisha uelewa wako wa mbinu za kugawanya data:\n", + "\n", + "- [Fanya mafunzo na tathmini ya mifano ya kugawanya data kwa kutumia Tidymodels na marafiki](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Kazi ya Nyumbani**\n", + "\n", + "[Chunguza uwasilishaji mwingine wa kugawanya data katika makundi](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## ASANTE KWA:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) kwa kuunda toleo la awali la moduli hii kwa Python ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) kwa kuunda michoro ya kuvutia ambayo hufanya dhana za kujifunza kwa mashine kueleweka zaidi na rahisi kufuatilia.\n", + "\n", + "Jifunze kwa furaha,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Balozi wa Dhahabu wa Wanafunzi wa Microsoft Learn.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:15:48+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/sw/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..413773301 --- /dev/null +++ b/translations/sw/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pata taarifa kuhusu dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Angalia thamani za jumla za data. Kumbuka kuwa umaarufu unaweza kuwa '0' - na kuna safu nyingi zenye thamani hiyo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Maudhui ya faili ya Markdown hayakutolewa. Tafadhali weka maudhui ya faili ili niweze kutafsiri kwa Kiswahili kulingana na sheria ulizotoa.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAGkCAYAAAB+TFE1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOydd3gc1dWH39m+q9Xuqvde3eTeG7bBdAwm9A6BBEJJvtASSggJCYQk1IQSCBBIqKaYjgvuvTfZktV71/Y+8/2x0tpCkrst28z7PPvM7MydmbNl7m/uveeeI0iSJCEjIyMjIzPAKAbaABkZGRkZGZAFSUZGRkbmJEEWJBkZGRmZkwJZkGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMC1UAbICNzuIiSiD8YwBf04Q368AX9+AKhZfd7b8CHL9j98qNRqjFpIzHrIkmOTMCkjUQQhIH+KDIyMvshC5LMgODyuWlyttLhttLpsdLhtuL0uXAHvLgDHjx+D+6AF4/fgyfg3U9sQgJztBg1EeREZzAsoZCihEFkWFJkgZKRGWAEOVKDzPGm1dXO7pa9lLZVUmtroNbWQIfb2qucVqVFr9KiV+nQqbXoVDr0Ki06lRaNSoNWqUGjVKNVadB0rytD6z22qfbbp9KgUajxBX3YvA7a3Vbq7Y3UWBvY3bqXOlsjAKmmJM7ImsD0zAmYdaYT/RXJyMggC5LMccDld7O1cReb6newq6WUFmcbEBKc1MhEUsyJpJqSSDTGEa23YNGbsehMaJTqE25ru6uTjfXbWVq5hpK2ctRKNWflTGVO4Wyi9OYTbo+MzI8ZWZBkjgl2r4PVNZtYX7eVHc17CIpBIjURDI7PZ1BcLoWxuWRaUlEoTl4/mlpbA/OLF7Csai1KhZI5hbO5uHA2GpVmoE2TkflRIAuSzBHjC/rZVL+dZZVr2dywg6AkkmSMZ0xKEWNThpMfk31SC1B/NDpaeG/7fFZVbyAhIpZbRl/FiKTBA22WjMxpjyxIMoeFJEkUt+xledU6VtdsxOV3E6UzMyVjLFMzxp9WzgE7mnbz+sb3qbM3MjtnGteNuBSt3FqSkTluyIIkc0jYvA6WVqxhYflyGuzNaFVaxqeOYFrGeIbGF5ySLaFDwR/08972+Xy+ZyHJkQn8cuJPyYxKHWizZGROS2RBkukXSZLY3bqXBWUrWFOziYAYoCA2h7NypjIudQQ6lXagTTxh7GjazQtr38Tpc/GzMdcyNXPcQJskI3PaIQuSTC9EUWRt3WY+Lf6Wio4aDGo90zLGc2bOFNItKQNt3oDR6bHxzKrXKG4p5by8GVw74lJUCuVAmyUjc9ogC5JMmIAYZFnlGj7b/R0N9maSjPFcWHgWUzLG/qhaQwciIAZ5Z+vHfFWymEFxufxq0q1Y5HlLMjLHBFmQZJAkic0NO3h7y8fU2RvJikrjkkHnMC5lxGk7NnS0rKhax8vr3yFSY+S+KT8jOzpjoE2SkTnlkQXpR06rs51XN/yXLY27SIqM57rhcxmdXHTaeModTyo6anh6xctYvXZuH3stUzLkcSUZmaNBFqQfKZIk8X3FKt7a/BEiElcOvZCzc6ejUsrhDQ8Hq8fG31f9i+KWvVxUeBZXD7tYblXKyBwhsiD9CPEEvLy07m1W12xkSHw+t4+9jnhj7ECbdcoSCAZ4c/OHfFe2jBGJg7l74s0YNREDbZaMzCmHLEg/MhrtzTy98hVqbQ1cNWwOFxWehUKQn+iPBQvLlvP6pveJM0Rz/5TbSTUnDbRJMjKnFLIg/YjY21bJn5a9CMAvJ95CUeKgAbbo9GN3y17+tvJVfEE/d064kbEpwwfaJBmZUwZZkH4k7GjazV9WvIxJa+ThM+4h0Rg30CadtrS62vnrilco76jmnLwzuHb43AGJZC4jc6ohC9KPgE31O/jbyldIjIznoel3Ea23DLRJpz2+oJ//bfuUr0oWk25O4c7xN5AZlTbQZsnInNTIgnSas6u5hCeWvUiqKZFHpt+DUSsPtp9INjfs4J/r3sbudXBR4Vn8ZPB5cjoLGZl+kAXpNKasvYrHv3+WaIOF38/8NSatcaBN+lHi8Dp5e+vHfF+xiviIGK4ZfgkTUkfJc71kZH6ALEinKY2OFh5a+Bd0Ki1/mHkv0QbLQJv0o2dH0x7e3Pwh1dY68mOyuXLYhQyJL5CFSUamC1mQTkOcPhcPL3waq9fOE2feT1Jk/ECbJNOFKIosqVzD+9vn0+GxkhudycWDzmZ08jCUcqBWmR85siCdZgTEIE8u+wc7W0p4ZPrdDI7PH2iTZPrAF/SzpGI183d/R7OzjWi9hZnZk5iZNZnYiOiBNk9GZkCQBek049+b3ueb0iXcPvY6ZmRPGmhzZA5CUAyysX47i8pXsKVhFwgwInEw0zLHMzZ5uOwAIfOjQhak04hllWt5ce2bnJ8/ixtG/mSgzZE5TFqcbSwqX8nSyjW0uTrQq3VMSB3F9MzxFMblyhE1ZE57ZEE6TajsqOXhRX8hNzqTR864Rx6POIURJZHilr0srVzDmppNeAJe4gzRTM0cx7SM8SSbEgfaRBmZ44IsSKcBDp+T33z3JD7Rz1OzfysnjDuN8AZ8rK/bwrLKtWxtKkaSJPKiM5maOZ7J6WOIlF35ZU4jZEE6xRElkaeWv8S2pmJ+P+P/yI/NHmiTZI4THW4rK6rWs6xyDVXWOtQKFVMzxnF+wSzSzMkDbZ6MzFEjC9Ipzkc7v+SDHV9w86grOCfvjIE2R+YEUdlRy4KyZSytXIMv6Gd44mAuLDiTYQmF8rwmmVMWWZBOYbY07OTPy/7BlIyx3Dn+Rrki+hFi9zpYULacb0qX0OmxURibw2VDL2CoPOFW5hREFqRTlGZnGw989ydi9VH88cz70cruwT9q/EE/i8tX8UnxN7S7OxkUl8tlQy5gaELBQJsmI3PIyIJ0CuIL+nl00V9pdLTw5FkPkihHYpDpwhf0s7h8JZ8Uf0OH28qQ+HyuGHoRhXE5A22ajMxBkQXpFOTl9e+wuHwl90/5OWPkBHAyfeAL+llYtpxPir/F6rExMmkIVwy9iOzo9IE2TUamX2RBOsVYXL6Kl9e/zSWDzuGqojkDbY7MSY4n4OWb0iV8tvs7nD4X41NHcsXQC+X06jInJbIgnUKUt1fxyKK/UhiXy0PT7kKhkGfuyxwaLp+bL0oW8sWeRXiDPqZmjOOyIeeTIGcOljmJkAXpFKHd1clvFj6JUlDy5OzfyLmNZI4Im9fBZ8Xf8s3epYhikJnZk7l08HlyehKZkwJZkE4BPAEvv1v8Nxrszfxx1n2kW1IG2iSZU5x2dycf7/qaReUrUQgKzs6ZxsWDzsakixxo02R+xMiCdJIjiiJ/X/0v1tdt5YEptzMqedhAmyRzGtHsaOWjnV+xtGoNWqWGM7OncF7BTGINcgoMmROPLEgnMZIk8eqG/7GofAU3jryM8/JnDrRJMqcpdbZG5u38ilU1GxGAieljuLDgTLKi0gbaNJkfEbIgnaRIksQ7Wz/m8z0LmTv4HK4cdnw86oJBkU6Hlzarh3abhw6bB5vLh9sTwOUN4PYG8PqCiKLUZReIXX8ZhSCgUSvQqJVdLwUaVWhd27Vdq1ai1SjD65qu99of7NOolSgVB48sIIoSgaCIPyASCIrh9e6XLxDE7xfx+oP4A0F8fhGfP4gvIPZ+7w92lQsd173PH+h5vD8QRBQP7ftUKEClVIReqtBSHV4Xwvu6P7tOq0Kn6VrXdK+r0He912qV6DUqIvRqjHo1Wo3yuEZgaHW281XJYhaWr8AT8DIsoYBz8mYwKmmoHEFe5rgjC9JJiCRJfLDjC+bt+oqzc6dz86grjroS8ngDVDbYqGiwUd/ioLbZQX2Lg8Z2V1hs9ketUqDXqtBrVWg1ShSCgCCA0L0ERJEeFXf3eiB4ZH8phUIgpEn7rkHX9QACAZFgH7YeLoIAalVINNUq5T5RVSm6titRh8U1tO1QxBJCYr2/WAYCUg/h3CegQTy+rpc3cMifS6UUwuJk1GuIMITWTREaok06oiK1RJl0REXqiDJpMUVoD9n2/XH6XCwsW8HXpd/T7u4kRh/FrJzJzMyeTLTectjnk5E5FGRBOskQJZE3N33IN3uXMDNrEreNveawE7OJokRVo40dZW2UVHdQVtdJXbOD7jpPo1aSEhdBcpyR5NgI4qIMxJh0oQqtqxJTq47cpTwoSqFWiD+I1xdqhXi71n1d6/vv6173B8Ww/RBqjUmEBBpCItnd2lB3tz66liqlIiwePxQUjSr0XqtWolYpUSmFky7Omz8g4vUF8PiC4VapZ7/3Trcfh9uPw+XD4fbve+/243T5sTq9uDyBXudVKASiIrXERxlIiDGQEG0gMTqChBgDybERRJt0B/wuujPaLihbxtbGYhSCgjEpRczMmszwxEFyq0nmmCIL0kmEL+jn5XVvs6J6PRcWnMm1w+ceUsUZFCUq6q3sKGtjR1kruyrasLv8AMSYdeSkWMhJNZOTYiYr2UysRY/iCJ6aZU5uPL4AnXYvHTYvHfZQ92uH3Uur1U1Tu4umdhdtnW72b4xF6NWkJ0SS1vVKT4wkPSGSGHNvoWq0N7OwfAXfl6/C7nNi1pmYkj6WaZnjybSknnQiL3PqIQvSSUKzs42/r3yV8o5qri66mDmFs/u9wYNBkbI6KzvKWtle1kZxRRvOrqfjpJgIhubEhF7ZscRHG07kx5A5yfEHRFo6XTS1uahvcVDVZKemyU51ox2b0xcuF6FTkZNqITvFTE6qhdxUM8mxRhQKgUAwwObGnSytXMPG+u0ExSBp5mSmZ45ncvpYYgxRA/gJZU5lZEE6CVhft5WX1r2NKIncOf6GXvHp/AGRvTWd7ChvZUdZG8WVbbi9QQBS4oxdAhTLsJwYYsz6gfgIB0QK+Am6bIg+N5Lfi+j3IPm6lx5EvxcpGABJBFFE+sESQFAoQaEILZXK0FJQIigUoFCiUGsR1FoUGl1o2fU+tK5DUGtCx8j0i9XhpbpLnCobbJTVdlLZYMMfCP0Geq2SrOSQQOWkmMlNtWA2w7r6zSyrXEdJWzkAeTFZjE8dyfjUEXIkCJnDQhakAaTd1cm/N7/PutotZFhS+fWkW0mMjMfl8bO7qoNdFW3sKm9nT3UHPn9IgNITIxmaHRKgodkxRJl0A2K7JEmIHicBa0voZWshYG0l6LKFxMdlI+gOrUs+z1FcqbuVeAycGdRaFFoDCl0ECm0ECp0h/F7Zva17v26/dW2onKDW/ui6pQJBkZomO2W1VspqOymrs1Jeb8XrC/0fNSoFWclmslPNxMYo6FRUU+raRpW1GoBMSyrjUkcwPHEwOVEZcrgrmQMiC9IAYPPY+aJkEd+ULiEgipybcS7p6qGUVHWys6KNijorogQKAbJTzAzOimFIduhlNmpPiI2SJCG6bPg7m7vEpuvV9d5vbUXyunocI6g0KCPMKPQmlIZIlAYTCoMJpcGEUh+JQqtHUOtCrReNLtRy0YRaMChVCIIi1ArqWiIowgIQai0FkcTuZTDUihKDSMEAUsC3r7Xl94aXkt+D6PeFlj43oseF6HEieruXToIeJ6LHBWJvp4AeKJQotPougdonVGFh2/8VFrKe5QXNgZ0ITgWCokRds53yOitldVb21nZSXmcNO1WolAIp8Qb0Ji92ZR2tVCAYbBh1OoYlFDI8YRDDEgcRZ4g+5b8LmWOLLEgnCEmS2NNcwVc717G2pByf3UBkMAWvw4DLHbqRNWolhRlRDM6KYXBWNAUZURh06uNjjxgkaG8nYGsjYG3Bb23eJzrWZgLWVqSAr8cxgtaA2hyHyhyHyhzftYwLb1MYTKdsBSNJElLAFxIs736C1f3yunq++trmdYe6HQ+EoAh1K2r0IbHqWu57b0Ch0XWJlx6FVhfa1lVW2P8Y9ckjbqIo0dju7NGSKqvtDDvXAOgjgohaG0FNJ4LegckE+alxDE7KJD8mi+zoDHSqE/PAJXNyIgvSMUSSJJxuP21WD21WD9XNneysqaOisZO2Dh8Bt5buLiiVUiAzOeT51t0nn51iRqU8ui4NSZKQfO5Q15nTSsDRTtDWRsDW2vUKrQcdnb0qT4XBhMoUh9oSFxab/YVHqYs4KttOd0LfvacPofqhgLlD42k+d3hd9LoQfZ7wtoMKGwBCl0CFBEyhCa0L3evdIvYD8eu9zYCg0YZapsf4+2jpdFNWa6Wy3kpNs4OaJju1zfaec9VUXgStG4XWQ0SERKxFR3KMiYz4WLLj48mMjSc2wiJ39/0IkAWJru4pKfSUFwyK4TkzP1x6fAEcbj82p5dOhwery4PN6cXqCLna2hwBAj/s9VEEUOjcWCwK8pJjGZOVQ0F6LGkJkaiUitAcGzGIFPQjBQKhZdDf1Q3lD73CFZerRwUmda13j9sEnVZElw0p6O/1GQWVBpUpBqUpFpUpBlVk19IUGxYchWZgxqNkehJurXldXb+9p8dvL4X/A/uWkq/7/+HpKhMSONHrOkRxIyRS+7XKBK0ehUoTcghR7XspfvBeUGtQqLTh9X371AgKJYJSCQpV17qKIAKtNj+1rW5qW11UNlmpbu6gpcONwyEhin20+pR+VJoAWp2EwaAkQq/AoFMTodNg1GswGbSY9AZMBi1GvQa9Roteo+5aatCp1aE5a11z2eRpDycnp6QgBQIBGhsbD1jG6vDywgdbcHr8BEWJYFBCEiWCkoQohl5BUULsen+4CIjoFD50gh+d4MOocBOpcGNUeDEpvZiVXqJUoaU6/OQpgtQ10VOSIBgAMcgRDdgLyq6nYy1KnRFBF4nSYAyN1eiMoTEcfSQKgxlVZBSCNuKk6d6ROXFIkgRBf2hczedG9HlDLTG/p2tczbOvZeb3dnk/uhG9XiS/e99DUdCHFPAjBvwQ8IMUPIZWCiAoQBnynHQKejoDeqyiDoeowhnU4BA1OEUNrqAGp6TBJWrwSWpEjrDVJIihCCDCfvde13r39q6gISgEAZPOiFJQ7BetZN+91L3avWV4fhxzz8g9qAmJiYmoVKojs/805ZQUpNraWmbNmjXQZsjIyMgcMYsWLSI1NXWgzTipOCUF6VBaSEdKY2Mj11xzDf/9739JTEw8Ltc4Xsi2Dwyy7QPDqWw7yC2kvjglvw2VSnXcnywSExNP2acX2faBQbZ9YDiVbZfpiey2IiMjIyNzUiALkoyMjIzMSYEsSDIyMjIyJwWyIP0Ak8nEnXfeiclkGmhTDhvZ9oFBtn1gOJVtl+mbU9LLTkZGRkbm9ENuIcnIyMjInBTIgiQjIyMjc1JwSgpSIBCgtraWQK/AcTIyMjKnDz+2uu6UFKTGxkZmzZp13KI1yMjIyJwM/NjquuMuSA6HgwsuuIDa2tpe+xYuXMicOXO46KKLuOOOO7BarcfbHBkZGRmZk5TjKkhbt27lqquuorKystc+h8PBY489xquvvsr8+fMpKCjghRdeOJ7myMjIyMicxBzXWHYffPABv/vd77j//vt77fP7/Tz22GMkJCQAUFBQwOeff96rnM1mw2az9dj2Y2m+ysjI/HiQ67rjLEhPPPFEv/uioqI488wzAfB4PLz66qtcd911vcq99dZbvPjii8fNRhkZGZmTAbmuOwmifdvtdu644w4KCwu55JJLeu2/4YYbem3vDjsvI/Njw+/3U1tbi8fjGWhTZA4BnU5HamoqarX6oGXlum6ABam5uZlbbrmFCRMm8Nvf/rbPMiaTSQ4NIiPTRW1tLZGRkWRmZsoZgE9yJEmira2N2tpasrKyDlperusG0O07GAzy85//nHPPPZeHHnpIvrlkZA4Bj8dDTEyMfL+cAgiCQExMjNyaPQxOeAvp1ltv5e6776axsZFdu3YRDAb59ttvARg6dOgBx51kZGSQxegUQv6tDo8TIkiLFy8Or//rX/8CYNiwYezevftEXF5GRkZG5hTglIzUICMjc+zYtm0bjz76KADbt2/n7rvvPuTyx6KcjEw3siDJyPzI2bt3L01NTUCo5+L5558/5PLHopyMTDcD7vYtIyNzfBBFkT/96U9s3boVp9OJJEn88Y9/5MMPP6Szs5OamhqGDx/OqlWrsNvt/OY3v+Hiiy/mD3/4A1988QUbNmzgySefRBRFAH72s59RVFTE888/Hy7/5z//uc9rNzQ09Cr3/vvv8/bbb6NQKIiNjeWRRx4hKyuLBx98EEEQKCsro729ncmTJ/Pwww8f0FU6GAzyl7/8hcWLFxMZGUlRURFlZWW8/fbb2O12nnjiCUpKSvD7/UycOJH7778flUrFsGHDuO2221i5ciXNzc1cf/313HjjjXz88cd89NFHuN1ujEYjb7/9Nh9++CHvvvsuoihisVh45JFHyMnJOS6/lUwX0ilITU2NlJ+fL9XU1Ay0KTIyJ5Rdu3YdctlNmzZJd911lxQMBiVJkqRXXnlF+tnPfiY98MAD0g033BAuN2/ePOm2226TJEmS1qxZI51//vmSJEnS9ddfL33xxReSJElScXGx9Nhjj/UqfyD2L7dq1SrpzDPPlNra2sL7zj33XEkURemBBx6QLr74YsnhcEher1e65pprpLfffvuA53733Xela665RvJ4PJLX65Vuvvlm6dprr5UkSZIefPBB6T//+Y8kSZIUCASke++9V3r11VclSZKk/Pz88Lm3b98uDR06VPJ4PNK8efOksWPHSna7XZIkSVq7dq109dVXSy6XS5IkSVq+fLl07rnnHvQz98Xh/GY/5MdW18ktJBmZ05SRI0diNpt57733qKmpYe3atURERGCxWBg9evRBjz/33HN5/PHHWbx4MZMmTeL//u//jtiW5cuXc9555xEdHQ3A3LlzeeKJJ8JBly+55BIiIiIAmDNnDosWLeLaa6/t93xLly5lzpw5aLVaAK644grefvttAJYsWcL27dv56KOPAHq5Xc+aNQuAIUOG4PP5cLlcQCh8mdFoDJ+jqqqKK6+8Mnyc1Wqls7MTi8VyxN+DzIGRBUlG5jRlyZIlPPHEE9x0003MmjWL7Oxs5s+fD4DBYDjo8VdeeSUzZsxg5cqVLF++nBdffDF8/OEiSVKf27rz/CiVyh7bFYoDD2+rVD2rrv3Li6LIc889F+5es9lsPdyvu0Wse1u3bft/J6IoMmfOHO67777w++bmZsxm80E+qczRIDs1yMicpqxcuZIZM2Zw9dVXM2zYMBYuXEgwGOxVTqlU9pkA7sorr6S4uJi5c+fyhz/8AZvNhtVq7bf8gc47ZcoUvvrqK9rb2wGYN28eFouFjIwMAL7++mt8Ph9er5dPPvmEGTNmHPDc06dPZ/78+fh8PgKBAJ988kl435QpU3jzzTeRJAmfz8ftt9/OO++8c1B792fy5Ml8+eWXNDc3A/Duu+9yww03HNY5ZA4fWZBkZE5TrrzyStavX8+FF17IFVdcQVpaGrW1tWEnhW5GjhxJeXk5v/jFL3psv/fee3n++ee5+OKLuf7667nzzjtJTU3tt/wP2b/c5MmTufHGG7nhhhs4//zz+fTTT3nllVfCLRudTsfVV1/NhRdeyJgxY7j00ksPeO65c+dSVFTExRdfzJVXXolarUav1wPw0EMP4XK5uPDCC7nwwgvJz8/npz/96WF9d1OnTuXWW2/l5ptv5sILL+SLL77gxRdflCe6HmcEqa+29ElObW0ts2bNYtGiRaSmpg60OTIyJ4zi4mIGDRo00GYcUx588EHy8vK45ZZbDvmYFStW0NbWxpw5cwD44x//iFarDXexnUwczW/2Y6vr5DEkGRmZI6K8vJxf/epXfe7Lysri2WefParzX3311Tidzj73/fOf/+T111/n9ddfJxgMUlhYyGOPPXZU15MZeGRBkpGROSKys7P57LPPjvo8Tz75ZJ/b//e//x3wuDfeeOOory1zciGPIcnIyMjInBTIgiQjIyMjc1IgC5KMjIyMzEmBLEgyMjIyMicFsiDJyMjIyJwUyIIkIyNzTHn++eeZNWuW7AUnc9jIbt8yMjLHlM8++4zXXnuNrKysgTZF5hRDFiQZmVOUxRuqWbCu+ric+6xx6cwck37AMoFAgMcee4zS0lJaW1vJysoiOTmZpqYmfvGLX/C3v/2Nm266iSFDhtDa2spHH33E66+/zvz581EqlUyePJn77ruPhoYGbr/9dtLS0qiqqiI5OZmnn34ai8XC999/z7PPPosoiqSlpfH4448TGxvLzJkzmTlzJhs2bADgT3/6E4MHDz4u34XMiUPuspORkTkiNm/ejFqt5v3332fBggV4vV4mT55MfHw8r776KoMGDaKjo4PbbruNzz77jFWrVrF48WI+/vhjPvnkE6qqqnjvvfcAKCkp4YYbbuDLL78kJyeHF198kba2Nh599FH+8Y9/8PnnnzNq1Cgef/zx8PUtFguffvopd999Nw888MBAfQ0yxxC5hSQjc4oyc8zBWzHHk7Fjx2KxWPjvf/9LeXk5lZWV4dxC+zN8+HAA1qxZw/nnn49OpwPg0ksv5dNPP2X69OlkZmYyfvx4AC6++GLuvfdeJk+eTFFRUTiG2xVXXMGrr74aPu/ll18OwMyZM3nwwQdpb28P51uSOTWRW0gyMjJHxKJFi7j33nvR6XTMnTuXsWPH9pn3qFuAfhhlHAinp9g/v5EkSSiVyl7l98+f9MNjRFHskVNJ5tREFiQZGZkjYvXq1Zx77rlceumlxMbGsn79+j7zLXUzYcIEvvzySzweD4FAgHnz5jFhwgQAKioqKC4uBkK5kqZNm8bw4cPZunVrOKvs+++/H25FAXz55ZcALFiwgJycHDl53mmA3GUnIyNzRFx22WXce++9fPPNN2g0GkaMGBEWj76YMWMGxcXFXHrppQQCAaZOncq1115LY2MjZrOZ559/nurqagoKCvjjH/+IwWDg8ccf584778Tv95OcnMwTTzwRPt+mTZv46KOP0Ov1/QZolTm1kAVJRkbmiCgoKODzzz/vtX3/lBR79uzpse+OO+7gjjvu6HWMXq/npZde6rW925uuL37961//KHIE/ZiQu+xkZGRkZE4K5BaSjIzMgJKamsrixYsP65jDLS9zaiC3kGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMCWZBkZGSOKU1NTdx6663H5FzPPfccixYtOibnkjn5kb3sZGRkjikJCQn861//Oibnuueee47JeWRODWRBkpGROSLWrl3LK6+8gk6no6ysjIKCAv7617/S3NzM9ddfz+LFi2lsbOTee+/FarWSn5/P+vXrWbZsGU6nk8cff5zS0lKCwSC33norF1xwQTgSeGdnJzNmzKC5uZlx48Yxd+5cnnnmGVavXo3VaiUqKooXXniBuLg4pkyZwtlnn83GjRtRKpU8++yzpKWl9bC1v3QVFRUVPProo3R2dmIwGHjooYcoKiriwQcfRBAESkpKcDgc3H777Vx88cUD8C3/uJAFSUbmFMW+bQn2rcdnPk7k8JlEFp1x0HKbN2/m66+/Jj4+nssvv5wVK1aQn58f3v/EE09w7rnncs0117BgwQK++OILAF566SWGDBnCU089hcPh4MorrwxHBW9qauKrr75CpVLx4IMPAlBVVUV5eTnvvfceCoWC+++/n88//5ybb76ZlpYWJk6cyCOPPMKTTz7Jf//73/Bx+9OdrmLx4sU88MADfP7559x3333cdtttzJ49my1btnDPPffw7bffhu147733aGtrY+7cuUyePJm4uLij/WqPiL6C1p6OyGNIMjIyR0xeXh6JiYkoFApycnKwWq099q9cuZI5c+YAcNZZZ2EymQBYtWoV7733HnPmzOGaa67B5XJRWloKwODBg3tE8gbIyMjggQce4MMPP+TJJ59ky5YtPVJdTJ06NWzPD23oZv90FU1NTTQ2NlJdXc3s2bMBGDFiBGazmfLycgDmzp2LWq0mMTGRUaNGsXHjxqP6ro6OH4cgyS0kGZlTlMiiMw6pFXM80Wq14XVBEHo9ySuVyj6f7kVR5Omnn2bIkCEAtLa2Yjab+fzzz8PpKvZnx44d/PrXv+bGG2/k7LPPRqFQ9Dhvtx192dDND9NVBIPBXmUlSQpHLN8/nYUoir1E8oQS8A/ctU8gcgtJRkbmuDFp0qRwANalS5dis9mAUCqKd999F4Dm5mYuuugiGhoa+j3P+vXrGTduHFdddRW5ubmsXLnygKku+uKH6SpSUlJIS0vju+++A2DLli20traSl5cHwNdff40kSdTV1bFt2zZGjx59eB/+GCIGfxyCJLeQZGRkjhu//e1veeCBB/jggw8oLCwMd9ndeeedPPbYY1xwwQUEg0Huu+8+0tPTw04HP+S8887jzjvv5MILL0StVlNQUHDAVBd90Ve6iqeffprHHnuMF154AbVazQsvvIBGowHA4/Fw6aWX4vP5ePzxx4mKijqKb+LocDl6Z+I9LZFOQWpqaqT8/HyppqZmoE2RkTmh7Nq1a6BNOCzeeustqbS0VJIkSdqxY4d0ySWXDIgdM2bMOKz64oEHHpDmzZt3TK59NL9Zd123esnSY2LLyc5xbyF1e9C8/PLLvXKXFBcX8/DDD+NwOBgzZgy///3vB7afVua0R5IkvAEvLr8HT8CDN+jHH/QTlLrHEwRUCiUqhRKNSoNepSNCrUer0iIIwkCbf8qRkZHB//3f/6FQKNBqtfzhD38YaJNOSaxW50CbcEI4rrX/1q1befjhh6msrOxz/3333ccf//hHRowYwW9/+1s++OADrr766uNpksxpjCRJWD02Gh2tNDtbaXW10+7qpNXdQYe7k06PDbvXSUAMHPa51QoVJl0k0XoLcYZo4o2xxEfEkhwZT4opEbPOdBw+0anP9OnTmT59+kCbcdjpKk62DLStVvtAm3BCOK6C9MEHH/C73/2O+++/v9e+uro6PB4PI0aMAEIuls8//3wvQbLZbOGB0G4aGxuPm80yJz+iKNLkbKXGWk+drZE6eyN11kbq7U24A54eZY2aCGIMUUTrzWRYUjFpI4nURBCh0aNTadEoNWiUahSCAoUgIAFBUSQg+vEGfbj9Xlx+Fzavg063jXZ3J+Ud1ayt3UxQEsPXMetMZEelkR2VQU50OnkxWSdUpCRJQpQkRDG0HnrtcxYWAEEIeaEpFAKKrqXMycOB6rqmDlmQjponnnii333Nzc09JpnFxcXR1NTUq9xbb73Fiy++eFzskzn56fTYqLHWU2Otp9paT1VnLTXWenz7eR1F6c2kmpKYnjmBpMh4kiLjiYuIIdYQjValOS52iaJIq7uDelsTtbYGqjprKe+oZkvjrrArcVJkPIPi8hgcl8fg+DxiDdFHdC2vP0hjm5Omdhcqb4DWTjeBoEgwKBEQQ0tRlA57pooggEohoFIqUKkUqLteGpUSjVopC9YJ5kB1XWOnp8/tpxsDNmAj9TFXoK8++htuuIFLLrmkx7bGxkauueaa42abzIlFlETa3Z002JuptzVRbw9V8tWddVi9+54MIzURZFhSOStnGmnmZNLNySSbEjCo9SfcZoVCQXxEDPERMYxIGhze7g34qOiopqStnF0te1lTs4nF5SsBSDTGMTShkCHxeQyJy8eiN4ePkySJDruX6kYb1U126pod1LU4qGt20GrdVxnde2kqnQ4vKoWAUhkSD6VGQKEUUO7X+uluDe1PqBXVNddG7HoFRQJBCY8viMPl79GiUqsV6DQq9BolOq0KjVqJzPHjQHVdu0Ps56jTiwETpISEBFpbW8PvW1paiI+P71XOZDKFXUVlTi0CwQBOvwunz4Xd58TudWD12On02OhwW2l1d9DiaKXJ2dqjxaNVaUmNTGRk8lDSzSlkWFJIMydj1kae9I4FWpWGwrhcCuNyuahwNqIkUt1Zz87mPWxv3sPK6vUsLFsOgEUdQ0QwgYDVQmu9DodVTUgKIEKnIiXeyNDcWFLijCTFRJAQYyDoaCQnxXxcvgdJkvAHRHz+IF5/EK8viNPtx+b0AaBWKYjQqYjQq9FrVSf9b3GqcaC6rsP943gYGDBBSklJQavVsnHjRkaPHs2nn37KtGnTBsocmX5w+z20uTpod3di9dixee04fC6cfhcuvxtPwIs34MXj9+IJeHEHPKGl34P/AM4DkZoIovUWEoxxDE8cTGJXV1tyZALRestpU9lZHT7amtS46tJQ1pvR1+fT6axHEdlGm6mdzsgSiAxAAUQpDKQaU8mPyyQ/PpF0SzKJxnhUin2VUXFx03H7bgRBQKMOddcZu7ZJkoQvIOL2BHB5/FidPjodPpQKgUiDmsgIDTqN7Bl7vHEGdDTZOkkwWQbalOPKCf8n3Xrrrdx9990MGzaMv/71rzz88MM4nU4GDx7M9ddff6LNkemi022lvKOGamsdtbYGGuzNNDpasHsdvcoKCOjVIXdonUqLTqVFq9Ji0kWiU2nRq3To1DoMah0GtZ4ItYFIbQSRWiMmrRGLzoRaqT7un0mSJFx+Nx0eKzaPHbvPidPnxhPw4Au7e4uAhEJQoFKo0Cg16FVaIjSGkK16MzH6qIOORYmiRGO7k/I6K+V1VirqbZTXWWm37etui4/Sk5VsZtqIcWQlm8hKNhNr0VFra6CkrYyS1grKOqr4unwBX5WFOs+UgoJEY3yXV18MI9T5OLzOLtd0FUqF8riKtyAIaNVKtGollkgtoijh8vixu/aJk06jxGzUEmlQh215/vnn+eyzz7j22mu56aabjsqGF154AYC77rrrqD/PgaitrQ1HKT9UZs6cyX/+8x/WrVvHunXrjqN3nsCCbdu4dsrp/dB+QgRp/x94/zwphYWFfPTRRyfCBJn9kCSJBnsTO5r3UNyyl5LWclpc7eH90XoLSZHxjE8ZQbwxllhDNNF6MxadCZM2EoNGj0LoHXVKCvrxNlXha64i0N5AwF6D6HYgBkJdPoJShag1YI0wozLForYkoo5NQR2dhKA4si4JURJpcbZRZ2uk3t5Mo6OZFmcbLc52WlzteAPeAx4vIIBw8GjKJq2RRGOoFZcYkYA2GIXfbqSpOUhFfUiA3N5Qi1CpEEiNN1KUF0tOioWcFDNZKWaM+r5FODMqlcyoVGbnhtyjfQEftbZGam0N1NoaqLc10eRspaS1jLz0VJqc+7q6BUClUIUFSqVQoVaqUCvUqJUhwTqWKBQCRoMGo0FDMCh2CZOXpnYXbVaBqEgdpggNn332Ga+99hpZWVnH9Po/Zlbu2SsLkszpQSAYYEfzHjbUbWNL406anW1AyEOtICaHc/NnkB2VQYYlhQiN4ZDPG3TZcBavxlmyHk/1TqQu8UGhQmW0oNBHIqhDk0pFr4tARyMBpxXJuy8UiqDSoEnMRpeSjy5tELr0wSj1xl7XcvicVHbUUNlZR3VnHTW2emqtDXiDvnAZg1pPQkQsyZEJFCUOIkYfRZTejKmrdWbQGDCodGhUGlQKZVhYQwP9wZCrd8CDw+ui02OnqrWZypYmaq0t1De3Utq0BUm1n8eTT48hMo68kWkMSchnTGYumUnmo3IA0Kg0ZEenkx2d3mvfzl27SDUlERCDLK9cy/LqdSEXb6Twcn8EBBSCIuTu3eXaLnDwFtWMrElMz5pwwDKSJPLXp/5ASUkJra1tJKem8+Cjf+b1l5+lsbGRX/ziF/ztb3/jpptuYsiQIbS2tvLRRx/x+uuvM3/+fJRKJZMnT+a+++7rEcgU4LXXXuODDz4gKioKk8lEUVERAO+88w6fffYZbrcbQRB49tlnycnJYebMmVx00UWsWLECt9vNU089xdChQykuLubRRx/F4/FgNpv561//SmJiIq+++ipff/01wWCQKVOmcN999wGhcEG/+tWvKC0txWQy8Y9//IOoqKh+r3uiMCq9NDRCVWctGZbUgx9wiiIL0mmMKInsbC5hedU61tdtxelzoVVpGRZfwEWFsylKHERCROxBu3yCotTlZizS3ZAINFfg2vgF7j1rQQygjk4icsSZ6NIHoU3IQmWJP2CrJ+hxEmhvwNdag7epEm9dKdYNX2FdOx9JUOBJzqI5KZ0mk4maoIvKzlpaukQUQvN+0s1JzMqeTJo5mRRTEsmmBCI1EYfdheUPiDS2OalttlPT5KCm2U5NU2jd5w8CWgQhlcToAoqSIkmO1hIR5SKg6aDRXc/e9kpKXSspbVjJgjYDQ+LyGZpQQFFCIUmRCce0S00hCGhVGrSAXq1Dreh9C4thgRJD65JEUNoXiFSBAoVCgVJQ9NnSPVQ2b96MWq3mgw8+QBRFrr/+esp3b+L/7nuIDevW8Ps/PUN2Tg4dHR3cdtttjB8/nqVLl7J48WI+/vhjVCoVd911F++9914Pr9nt27czb948PvnkEwRB4IorrqCoqAiHw8HChQt5++230el0PPfcc/zvf//jkUceAUL5jj766CPefvttXnnlFV544QXuvfde7r33XmbMmMH//vc/3nrrLSZOnMiOHTv46KOPEASB++67j/nz5zN69Gja29u56aabKCoq4u677+arr75izpw5B7zuiSBbZ2W3LZt5O7/h/yb/9IRd90QjC9JpSKurncXlK1lSsYZWVzt6tY4xyUVMTBtNUeIgxKBAU5uL6ioXGzsqaLN56LR76XR4sTt9ONx+XJ4AHl8Ary9IUNz31B2jsHORYSMjNNV4JDVrvbms9+fR5opF16xCu8mFXrsHg66MCJ2aCL2aSIOGSIMao0GDKWLfy2xMRR2dgjU1m+qcAio7qqlqKaPG3oRdtEPnToQOidggpOksTE8eQ172WLJjMg866VSURGweO+1uK50eG802Ky02G+1OJ1anC7vLg8Ptx+H243QHkYIKpKAKAipMWiNJ5mhmZSWSlxRHRpKJ9IRIdNr+b5dWZzu7WkrZ0byHnU17WFe3BYBYQ3RYnIYmFGI5hpNlp2dNOGgrphtREvEGQq2/bgcUCQmloCBCY8CoiUB3mOGRxo4di8Vi4b///S/l5eVUVVUR9HtJjTeiVAgEgiI1TaExyGFdLZw1a9Zw/vnnh1NMXHrppXz66ac9BGndunVMnz6diIgIAM455xxEUcRoNPK3v/2NL7/8ksrKSpYvX86gQYPCx+2fE+m7776jvb2dlpYWZsyYARCedP/UU0+xbds25s6dC4RaRcnJyYwePZr4+Phwayw3N5eOjo6DXvdEkK1ppdhbyKqSPZyXX0Zh3IlrnZ1IZEE6TZAkieKWvXxVupgNdduQJImhCYWclXY2OncKNY0uPtti5x/Ni3vMa4HQuIDFqMVi1GKK0BBj1mPQqdBrVWg1StQqJWqlRGL9chJrFiIpFDQkn0ljwmR0aBjf5Srs8QXx+AK4vQFcngBN7S7sbh8unwsPDgSNG4XOhaB1I+icoZfGQ3cdKIhKNKKZCDLJVsYRrzST57WR5CjDXLMH5e5SxJWLqIwZhDN+GPaofDokL62eFjp8LXT627EHOnGLdrw4QTjA3A1JAK0AWglVVM9uLg9QAVR4YUN9BAm2WFLqE0kzJ5MVlUZ2VDpGbUSPY2IjopkWMZ5pmeMBaHK0sLWxmG1Nxayv28qSitUApJqSGBSXS0FsDvmx2YfUQj0WKAQFerUOvVoH+tDEXlfAHXbJt3kdqJWqrkgWRpSKg7ecFi1axPPPP8/111/P3Llz6ejoQJKkcDSIlDgjWkNo3KzVFiBRFUQUe/8mgUBPb0xBEHqUU6lU+Hw+GhoauO6667j22muZNm0asbGxFBcXh8vtnxMJQK3uOWbn9Xppbm4mGAxyww03hJ0tbDYbSqWSjo6OHrE0u3MrHey6J4IsQqk5dO40Xtv4Ln8+64ET4hh0opEF6RRHlEQ21G3jk+JvKGuvQqfUk6Ecga8xlS2bAqwLdAAd6DRK0hIiGZYbS3KckeTYCBKiDcRFGbAYtQeclR+wt9P8yd/x1BQTUTiRmLNuIivCTKfXRqfbRqfHhtVjw+q1d63bET023B4rQbcVAl72T7mmVeowq6OIUMSjk8yo/WYErwm/S4fLHcTh9tPo8VPhDbDUG4kojUCpGEx6ZCWxhgZUUiVtnVU0uVT49rNb8msQfBGoRTNGIRWjMhKT1kS0wUy8yUJSlJnkKDMpcWZMhn2J5URJxB8M4Pa7cfhd2DwOOj1WWl3tNDlaaXS0sKN5D8uq1oaPSTTGkR+bzaDYXIbE55NgjOshLAnGOGbnxjE7dxqiKFLRWcP2pt0Ut5Syono9C7rmIkVqIsiMSiPdnEKaOYkUUyJJkUfW9Xg4KBQKjJoIjJoIREnE6QuFR2pzddDhtoa9IQ/kFLF69WrOPfdcLr30Upqamli/fj0TJ07c7xoCCTEh4Q4ERGqa7YwYNYY3Xv8XV1xxBSqVinnz5jFhQs9W3sSJE7nnnnu466670Gg0LFiwgOnTp7N9+3YyMjK48cYb8fl8vPzyy0RH9x/9IjIyksTERFauXMnkyZP57LPPWLduHeeddx7PP/88l19+OVqtll/84hdccskljBs3rs/zHO51jwcmyUp6QiR48qm2fsb/tn3GDSN/ckJtOBHIgnSKIkoiq6s38v62L2l0NaEKGPHXDcHdnIxdoSY3NYJzJ0WTl2YhN81CUkzEYYWCCYpBGh0tVFZtYe+aebQJIs4Rw7EpPbQvfrJPd3AIjWtYtCYsehOZljRGJg0lWm8hPiKGuK7IBsaDVLa+gI/KzlrK2qsoa6+ivKOaensTjZJIIxChtpCssTDeFyC+vZmEznbi/EHM8VkYcoZhyB2FOikbp9+D1WvH7nXi9Lvw+DtoCDZTUx+K7B1y9VaiVWkxqPVEaiOw6Ewkxcb3WRF3O1Xsba+itK2CrQ27WFYZEql9XXODKEochEm7zylDoVCQE51BTnQGFw86G1EUqbbWU9JWTnlHNZUdNXxXtgz/fpOD9SodsYaoUBw+QxTReksosGswEm/AF3bIOBaipRAURGqNRGqNeANeOj1dDxleOxatCbPO1GeL6bLLLuPee+/lm2++QaPRMGLEiH5zFKUlGGloc5E7eCzjJxZz6aWXEggEmDp1Ktdee22PsoMGDeKGG27gJz/5CSaTieTkZAAmT57Mu+++y3nnnYdGo6GoqCic9rw/uvMd/eUvfyEqKoq//OUvxMfHs3v3bi6//HKCwSBTp07lkksuoa6urs9zHMl1jwfTiuJ4Z0E5s0fP4MuSRRTEZjMhbdQJt+N4IkgH83c9CamtrWXWrFksWrSoV0qL0x1RFPlqxzo+2f0FdqkN0R1BoD6HTEMho/ITGJEfR0FGNNrD8PIKikGqOmspaatgb3slVR211NmbekTFjlDpiDfGEW0IVYxRejMWXcgV3KIzYdZFYtZGojnM2HEun5vKzhoqOmqo6FrW2RoRuwKXmnUmcqLSyezqKsuOTidGHxXu1mmwN1NZt4Pq6q3UtdfQ4nPQqVJgUykIHmFlLQgC0ToLCcZYkk2JpJuTybCkkGVJQ6fe19aTJIl6exM7mvawozn0cvpcCAhkRqWGxakgNgfNQbpXRFGk2dlKvb2JenvIdb3V1R6KWO4OzaOSkLgz5xpSstNCdnanylCqUCtUqJVqNEo1GqWmx2TaI8EX9NPh7sThc6EUlEQbLEfdahNFiaZ2Fw63H4tRQ6xFf9pMgD4QxcXFRzzm1F3XvTk3n/TbXuTnL27hytl5FCu/pKKzht+d8UvyY7OPscUDh9xCOkWwOrzMW72JRXXf4Nc3I3kMpElTmV0wifGXJhIVqTv4SbqQJIk6WyObGnawvWk3e1rL8HTN1+mOWj3EEI9x2wqS9FEMveQBzNHJR2W/w+ekwd5MXdf8mhprA9XWOtpcHeEyFp2JrKh0xqYUdUXNzghHbfAEvFR21LKudgsVHTVUd4Ym8O4fDcISYSIhLokCEYwOB4b2ZiJcdiKCIgatEVNiNsakPAzJ+Wjj0pAUCgLBAJ6u/Eh2376I3i2uNprsLayu2RgO9SMgkGZOJj8mi0FxeQyJzyfFlEiKKZGz86YjiiLlHdVsbdzFtqbdfLFnIZ/t/g61QkVe1zGFcTnkRWdh0PSMv6dQKEiMjCcxMp6+nnkDYpBOj5X68joSjLEExCBBMYhfDBAQAzh8rrCIA6GWn1KDTq1Dr9KhUaoPq/LXKNUkGOMwB7y0uTpocbZh9zqIi4g5qLj2h0IhkBhjoNXqptPuQ5IgLurHIUrHglh9kKE5MSzbVM9ffvkzHl78V55c/k8eOeMesqLSBtq8Y4IsSCc5e2s7mbdsJ+valqGIq0ap1TA+6kxunnQeUcZDDyoqSRKlbRWsqtnI+rqtYRfqFFMi0zLHUxibS2FsDjGGKHxNFdS//QgqcxzJVz2O0nBwzzBPV8XV6mqnxdlGs7ON5q44dU2OVhy+fQnGVAoVKaZEBsXmkmZOJjMqlSxLWjjYaCAYoNpax8b6bextq6KsvZJae2N48qpZG0lmVCrnJJxBmjmZ1C6X7x8GWZUkiUBHA+7KHXhqivFU7yKweyMuwK3SoEnMQpuUQ1RSDolJuaiTMnu5qkuSRIfbSkVnDWVdXXWrajaysHwFEIroPSyhkOGJgxkSn09uTCa5MZlcOuQ83H4PxS2lbG/aw66WEj4u/hppV1ckcGM8GVGpZJhDcfqSIxOIj4jpt4WpUiiJNUTTomzCqInos0xQDOIL+vAG/XgDPrwBL06/O3y8Qa0nQmNAr9IdsgjoVFqSIxOw+5y0uzqotTZ0tY5NRyQkgiAQaw6JUIfNiyDwo2kpHS2i18WM0Wm88MEWGpsDPDz9Lh77/hkeX/Isj0y/m+zojIE28aiRu+xOQiRJYntZKx8sLGFH+1Y0GXsQVH4mp0zi5rGX9PLwOhBNjha+r1jN8sq1tLjaUSlUFCUOYnTSMEYmD+mVEiHg6KDu3/eDoCDlxj+jigzt9/g9NDpaaXK20ORo3a9LKSRCTp+rx3mUgoLYrjGjBGMcica4rmR2ScRHxITHaERRpN7eFB4vKmuvpLKzNtzyMWmN5ERnkhOd3tVll0GU7siDiwZsrXjqSvDW7sFTvxdfY3l4Mq+g1qFJyESbmI02KQdtUjbqmJReIiWKIpWdtexsLmFH8x52tZTiDXhRCgryYrIoShxMUUIhOdEZPcaiXH43e9sqKW2roLyjmqrO2vAE5W4sOhMxhihi9FHhrtHwS2fGUW+lsLAQhUKxX86j0C0s0DvKdyAYwBXw4PK7cfndSJKESqEMhXHSGFEpD/2ZNCAGw7+1TqUlPiLmiD29JEkKt5RizTqiTIfewj+VkCSJ3bt3H5Muu6IbH4b0UVz/2DfMHp/Bz+YW0exo5fffP4Pd5+T/Jt3WI/L8qYgsSCcZu6va+c+XxeyoqcGQuwvR2EJOVCY/H3fNIc/QFkWRDfXb+HbvErY37UEQBIoSBjElfSxjU4f3m65BEoPU/Pd3VLdW4ppxOXWiJ5QEz95Ih9vao6xBrSfOEE1MRDSx+tDge6whmtiIKOIMMUTrLSh+MBDuCXiptYZyB1V21nZFXagNR1rQqrRkR6WTE51BbnQmudEZxEXEHNenZ0kM4m+tw9tYjrexDG9DOb6mCiR/qAtTUGtDApWShy6lAG1qASpjVI9zBIIB9rSVh7rqGoup6KhBQkKv0jEoLpfB8XkUxuaSFZXWqwL3+D3U2ZuotzXR7GylxdlOg72FVkcHNp8dr9jTRf/y1HMoTM5HZ4wAlCAqQFKAqETqWlcoQKlQoFKGch2pVUq0agUajQJf0Ivd58TldyMQSmBo0ZsPuRtOkiQcPhetrnYkJGIN0Uc8tiRJoTElu8tPcmwEEf2EVjpVkSSJtrY27Hb7EYdQ2l+Qhl71f0QWzeCp/6xn295W3vrd2aiUCtpdnfx5+T+osdbz09FXcmbO1GP8SU4ccpfdSUJLh5s3vtjJ8i21RKY2EzlyJwoFXDP8CmbnTjukGfWegJfF5Sv5qmQxzc42YgxRXD70QmZkTSTGENXnMaFupb3sailhR9laqlVWAsmRsOdrtCotaaYkihIGhWK4GeNJNMYSb4ztt9tIkiTsXgel7RXU25qoszdSa2ukztpAs7Mt/DSvV+nIsKQwM3syWVFp5ERnkBKZ2EvEDgdPwEurqz3sumzzOnD6XLj9HnxBH4GuaAUKQYFGoUan1mLURGDSGomKiyUuoyD01K9Q4m+r7xKoMrz1e7Gu/wrrmvkAqKOT0WUMQZ9VhD5zGCp9JEPi8xkSn8/VRRdj8zrY0bSbHU2h1tOmhh0AKBVKMswpZEalkRaZjE6KwufQ0dEGdS0KalsMNLSKuL2R+z6UIojBGMBkEdFFBNlQY0VHExatDoRQNIb9wwUJKFCgROgSK1Gkx8RmpSIU0VutEgjgoy5QiyRJ6FRaItSGQ/7+g6KI3eegNliNVqXBqIk4oqgP3XmgGmolok3aQ5r/dCqh0+mO2UOz2BVua8aYNFZsrWfT7mbGDUkk2mDh8Zm/5tnVr/Hqhv+xt62Sm0dfecRjfQOJLEgDTFCU+Hx5Ge98sxsJH7mTq6jzlzAoNpdfjLuBeGPsQc/hCXj5tnQp8/cswO51UBCbw7XD5zI2ZXif7sv19ibW125lU8N2SlrLCUoiKkFJitvDFH0cKYXnEqNORC+Y8AdCidzwA53Q5lBgUznwCy24JSsO0UqHt51mZyuNjmaaHK24usYtIDRelBQZT050BtOzJpBmTibDnEK8MfaIw9aEPPNqqbbWUd2VxrzB3kSnx9arbPeEUI1SjUro6iaUJHyiH08/KTLiDNGkmpPJtKSSM3g0uVMvJVkdgbepAk/1LjzVu3DsWol98wIQFOhSCzDkjyNi0ATU5nhMWiOT0scwKX0MAFWtLazcu4Pi5nIa2uqpaF2PpNwXf08SFSiJQJ8QSXKamRiDhURzDKmWGNJj4kg0R2PWmfr0nJMkCavHRrW1nsrOGva2VbGntYwOT6hFm25OYWzyCNK1hbQ0CWwva2VraT1ubxCzUcPUMbFIcWUsq1iJUqHkJ4PP4/yCWYfkpSeKIvP3LOD97e9i1pm4c/wNDE0oPLQfcT8a25zc8/clZCaZ+NMdU1DKmWr7RPSEBGlUQTymCA2LN9YwbkgiEJpu8cCUO/hg5+d8vOsbKjpquGfizSSbEgfS5MNG7rIbQOpbHPz93U3sqepg6BA1ttjVtHnauXzoBVxcePZBn1YDYpCFZcuZt+trrB4bwxMHc+ngcymMy+1VttnZxoqqdaysWk+NLTTr26KKI8KfjNgZxcVt3xMlOviz9SJcUtekUUFE0DlR6B0IekdoqXMi6FwIin0eXZIooAgY0EkmIlUW4gyxpEclUpiYzrD0NIx6bS97DhV/0E9FRw2lXS7p5e3VNDiaw/sj1HpSTUkkmRJINMYRHxFDjCGKKJ0Zky7yoAP43oAPm9dOm6uTVlcbjY5W6m2NVFnrqLc1dqWnCIlUYVxoEuzQ+ALiDFF46/fiKtuEq2QDvuZKAJRJ+XTEj2aXkEtpg4fyuk7abfsijsda9GQmR5IYr8Rg8aDUu/EKNto87bQ622lzd4ZdvPdHQMCsiyTGEEVcREzXmFwCqaYk0szJPdJjdHtRbm7Yyfq6LexuLQNgWEIB5+bNZFj8YLaWtLJwfTVrdzSgUCiYPiEKZ/RWtjbtIMOcwu3jrjvkQfLy9iqeW/NvGuzNnJ8/i6uGXXTY7v+LN1TzzLubue3iYVw49fRxYz5awl12lw8jf/oFxJx5IwCvfLyNb9dW8Z/HzukVRX5j/XZeXPsm/qCfa4fPPeQelpMBWZAGiO831vDPj7aiUiqYdZaapS1fYFDr+eWkWxgUl3fQ4zfV7+CtLR/SYG9mSHw+Vw67iILYnvGtAsEA6+q28vWepexpD03kU7qjcTfHI3YkIPn0REVqmWkqZ6p7EWszzqc5Lgar1ESrr5FWT3M4KKeAQIw+hhhdLFGaaIxKC1rJjDIQgd+lpdPuo7XTTVO7i5YOF/v1EoXzAOWkmMlJs5CfFoUlsm+Rsnkd7GktC71ayijrqA7Ph4rWW8iNziQ7Op1MSyoZltTjmszPF/RT2SWGe1rLKW4pDadUj4+IYXBsATHKNALWaNoqGzA0bGawVEKi0opHUlGsHERL8mQSM7PJSTWTlWTCaDh4Rd3t4h2KgmGlw22jw9NJu6uT1i4X7GZXG0Gx67cRBFJNSRTE5jA0Pp9hCYVE7jcxt9XZztLKNSwsW0Gbu4NUUxI/GXIeE9JG0drh4YNFJSxYV41Rr+asszSsbl+AzWvn0iHnc8mgsw8phYUn4OWdrR/z3d5lJEcmcMe46w9rfowkSfzu1dXsqe7glQfP7Pf/8WOju677z7XjyB45kbgL7gCgpLqDXz+3jDsvG8HZE3o/OLS7O3l53dtsadxFUcIgbh93Xb/d9icTsiCdYAJBkdc/28EXKysYnB3NsImdzC/9ioKYbH49+baw63N/tLk6+Pem91lft5WkyHiuH/ETRiUN7VEpO3xOPtq6kMUVy/BILkSvnmBLChGeLIamplGQEUVumgWLRWRP207WrnqXMp0KhxD6KxjUenKiM8iKSg+7JaeYEg7Zo8ofEGlqd4YiZzfZqWywUVFvpa7FEY4WHh9toCDDQkqKgMrUSXuwgZLWcursjUCoqy87Kp382GwKYrPJi84i2mA5/C/8GNE9AL+qZA+baoupdlbgVjchqAJIEqh8FuJUaQyKzWOiyUBCw3rce1ZDMEjE4ElETbkMTdyxmysSFIM0OVuptTZ0ddVVsqetHLffg4BAYVwOE9NGMyl9TDhqRFAMsrpmI5/s+oYaWwPZUencOPJyCuNyqGqw8cIHW9hT3cGZE5OQkrezqmYDhbE53D3x5l7emP2xrbGYl9a/Tburk3PyzuDKYReF4ucdAjVNdu786/ecNzGTn80tOuLv5nSiu65755ZppOcUkHDpvUDo/3j7U4uwROp48hdT+jxWkiQWlC3n7S3zUCqUXDnsIs7KmXrMc2QdS2RBOoG4PH6e+s8GNu1p5qKpWUgp21lYvoIpGeO4fey1B6zwJUliUflK3t4yj6AU5CdDzueC/Fk93HbbnFZeWfEpW9s3ICkCBK2xJElDmJ47krGDk8hIjKTJ0cKqmo2sq91CeUc1AKZAkKFJQxiWMZqCuBySIxOOSxO/3WFnZWkxW+tLqbJWYZWaQNU1lhJQEyHFk2XKZHT6IKYVDiFSN3CuwMGgSEW9jV0VbRRXtlNc2U5bV1BavVZJXloUBRkWTHEunKpGSjpKKWkrJygGEQSBTEsqeaYUUtrbiN6zmRi3B3PRGURNvxpV5PF5UhVFkbKOKjY37GBtzWZqbA2oFCompo3i/PxZ4fxKoiiyono97277jDZ3BzOzJ3Pt8EvQK/W8/XUx877fy5hBCUybAW9seQ+VQsWd429kVPLQQ7LD5Xfz7rbP+G7vMqL0Zm4Y+RMmpI46pJbsix9uYdH6Gl5/+CyiT1NX8MOhu6773+3nkBwfTdLVvwvve3/BHt75ZjevP3QW8dH95zBrtDfz6ob/saN5D2nmZG4aeTlDEwpOhPmHjSxIJwib08fv/rWa8jort186lGJxMatrNnLxoLO5aticA96sVo+Nl9a/w6b67QyNL+BnY68hwRi3b7/TyXPfz2OHbT2SIoDGmcaMtDOYM3YkcVF63H4PK6rW833FKva2VwKQF5PFmITBJH7/EZkJeSRd8dAx/bwev4fKzjoqOqop66iivL2aOltjeGwkOTKB/Jhskgyp4Iiivk6guKKd6iZ7qMWhFMhNtTAoK4aCjCgKM6KIMR/aRGBvwIfVa+/ysHPjFwOIktgVakcV8ijTGDBrI9GrQ2NMHXYPJVUd7KnuYE9VByXVHXh8oS6xuCg9gzKiGZwVzaCsGDKSTH0OvHsCXkpayylu2cvu1r3sbasMu7SrUZDg8REfEElPHUpG4WQSjPHERURj0kYel27Hqs5aFpWvZGnFGtwBDyMSB3PlsDlhYfIEvHy080u+2LMIi87EHeOupyhxEF+vquCf87YxqSiJ6y5O57k1r1PVWcvcwedw+ZALD9kTr6S1nNc2vktlZy1D4vO5YcRlZEYd+H6tb3Xw8ycXccWZBVxzzuE7SJxudNd1795zCQk6iZSbnwrva2xzcuufFnLTBUOYO6P3uPH+SJLEurot/GfzR7S42pmQNorrh19KbMSJDRJ7MGRBOgE43H4eemklNU127r9+NCs7v2Bt7WauHX4JFxXOPuCxxS2lPLvqdRw+J9cMv4Rz8s4It14CgSAvL/mWZY0LQO3B6EvnimEXMnv4EARBoN7exFcli1lWuRZPwEuaOZnpmROYlD6aWEM01nVf0LbgDZJv/DO6lPwj+mzdKcSrrfVUd9ZRZa2jqqOWRkdLWHzMOlMouGhUOnkxWeRGZ/Y7udfh8rGrop2d5aGWSWlNJ4FgyLEgxqwjJ8VCVrKJtEQjmkgXLtppcjVRb2+iydFKq7MtHJ3gUFBIKiSfnoBLj+iOQPBEkmRMYmhKJoMzYxmcFUNc1KFHxNifoBikztYYCqDaWUt1awU1bdVYhWCPcmqFiii9uWsirCW8HmOwEKMPOTFE6c1H5ZX4XdkyPt+9ALvPyRmZE7lm+MXhnFJl7VW8uPZN6myNXFR4FlcOm8MXyyt4ff5Orjt3EBefkcnrm97n+4pVDEso5J4JN2PSRR7kqiFEUWRh+Qre3z4fh8/FjKyJXD7sQqL1ln6Peexfq6lssPH6w7N/9B533XXd+/dfTYyvlbTbX+ix/5fPLEGpEPjbPdMP6Xy+gI/5exbwSfG3CMDFg87mooKzDtsJ5XghC9JxxusP8ugrqyip7uChm8ay1vYdSyvXcP2In3BBwawDHvtN6RLe2vwh8RGx/GrSrT2eLteVlvP8qv/g0zWh8Udz3fCfcPawkUDoyXjezq9ZW7sZpULJlPSxzM6dRk50RvhJXBKD1PzzTlSmGJKv/+MhfRarx0ZVZx011nqqrfXUWOupsTXgDezzIkuIiCXDkkpmVCqZllBA1Cj9kUdW8AeClNVZ2VJRw7b6Emqd1bgUrQgG2z5PP0lAGTCilUwYFJHoBCMaQY9S1CIGlXi94HIHsDq8uHxeUAYRlH4UGh9GcxCd0Y+kduAQO8NOHBqlumsMK4fC2BwK43L6nXt1OEiSRNv2JZQueYs2gviHTsYRHUeH20q7uzO83D8tO4REK9EYR4o5iQxzCjnRGeTFZB1WunmXz83Hxd/wZcki9CodN468jKkZ4xAEAV/Ax5tbPmJh2XIKYnP45cRbeH1eGau21fPMr6aTlWxmcflKXt/4HiZtJL+a9NPDclpw+Jx8vPNrvt67BJWg5IKCM7mo8Kw+x5eWba7l6Xc28uc7JjM05+DTHk5nuuu6Dx+5BXPrbjJ++e8e+z9cVMJ/vio+aLfdD2lxtvH21o9ZU7OJOEM014/8CeNSRgx4CCdZkI4jkiTxt/9uYunmWu6/bgxVwjrm7/6Oy4acz2VDL+j3OFEUeXPLh3xTuoRRycO4e/xN4WCcgaDI0199yib7YgQEzkg+k59NvQClUkmzo5X/bf+MVdUb0Kt1nJN7Bufmz+gzS6mrdCONH/yJ+Ln3Yhw0sdf+dlcne9srKWuvCkfj3n+eT6TWGHZ4SDMnk9710h3iAPbB6PTYwlG0dzWX0OhoAUJCkWVJJ06bhEGMQXKbcNu02BwBbE4fTo8ff0AkKEooBQGtRoleqyLSoCHarCPOoicpNoKUOCOp8UY0+0VFD4pBGuzNVHTUUNZeSWl7JeUd1aFxIQTSLSkMictjcHw+g+PzjkqgAvYOWj5/HnfFNoxFZxB77s9QdD2lSpKE2++h3d1Jq6udZmcbTY4W6u1N1FobaHK2AoSjio9MGsK4lJFkRaUdUoVSa2vglXXvsKetnAlpo/jZmGvCwraiaj2vbPgvepWWn4+6mb/9q5ysZBN//PlkACo6avj7yldpdbVz7fC5nJc/87AqsUZHC//b9ilrajZh0hq5dPB5nJUztcdYqMvj5+pHvubi6TnceMGQQz736Uh3XTfvj3cSUbaSrAff67G/odXJbX9eyK1zhnLRtMPPIruzuYQ3Nn1AtbWOYQkF3DjyctLMRxdI+WiQBek4Mn9ZGf/6bAfXnltIfHYbL61/m9k507hl9JX93sQBMciLa95gVc1Gzs+fxXXD54b77Fttdh747J/YNZWYxCQenv0zMmMS8AV8fFL8LfN3f4cgCJyfP4sLC888YIXZ+OGTeOtKSb/rFVAoQ0nomvaws7mE3a1ltLtDUbgVKIg3xJNmSiE7Oo3c2HQyo1IOmkL8cPEEvOxu2cu2xmK2Ne2m2hrKTWNQ6xkcl8eguDwGxeWSGZV21KkVDgdfwMfe9ip2tZSyq7mEPW3l+IN+BAQyLCkh2+LzKIjNOez05JIk0rHsAzpXfIg2JZ+EnzyAymg56HEuv5vy9iqKW/ayvWk3JW0ViJJIcmQCM7MnMTN78kHFct+k1vnEGqL5v8m3hSNGV3fW8fTKV2hzdTDGOIvvFwo8fddUCjND4w1On4t/rPsPG+q2MjZlOLePu+6wxXlvWyX/3fYJO5tLSDTGcXXRxYxPHRm+L+5/YTmSJPH03dMO67ynG9113cd/uQ/9jq/JeuA9BFVP56efP7mQpFgjv/vpoaWz/yFBMciCsuW8v+Nz3H4P5+RO57KhFxxW6/tYIQvScaKqwcYvn1nKqIJ4rro4gUcX/41Bcbn8dtqd/bpdBsQgz61+vc/xpd31tfx+0QsE1DbGRE3jvtlXoBAU7G4p46X1/6HB3syU9LFcO3zuQd2jg04r5c/fSvOIKawzWNjesgtHsCtWnV9LwBaF6LAgOixIrkiQ9tmrUgrERRlIiTOSlWyiID2KIdkxhzS/Zn+6UzVsayruSoFRTkAMoFaoKIjNoShxEEPjC8iOSj+qcELHGn/Qz972SnY2l7CzuYSStopwYr0EYxx50ZnkRGeQHZ1Ohjm1V5qJvnDuXkPzZ8+hjIwm6apHUEcd3ux6u9fButotLK1cw+7WMrRKDWflTuPiwtkHHespaS3nmVWvYfc5uGPc9eHoEg6vk2dW/4vtTXugOZvxMTP49dVjwsdJksSXJYv479ZPiNZbuGfiLYedl0eSJLY07uSdLR9TY2tgcFweN468nMyoVP716Xa+WVPFh386/7ASS55udNd1nz77CNpNn5Dxy3+jjOg5NeSf87by/YYa3v3jeaiUR36v2LwO3t8+n4VlKzBqI7h62BxmZE06ofefLEjHgaAocd/zy2jucPH0PZP406q/EpCC/GX2b3tMWNwfURJ5ce1brKhaxw0jfsL5+40vbaou4anl/0RC5OpBV3PxqAmIoshHu75k3q6viTVE87Mx11CUePCIwhUdNXy69B22OytxqBRIogLRGoPel0x6RBaZ0UnEWgyYIjTotSqUSgFRlPD4gjhcPtptHpraXdQ2O6husiOKEoIA+elRTByaxPRRqcRaelfCoiRSY63vipBdwq7mknCIoQxLKkUJhRQlDmJQbO5RD7D6gn46PTZsHjvugAdf0B/2stMo1ejVulBQUZ3pkOfI9EcgGKC8o5rdrXspaQ1Fk2h3d4b3xxqiSTMnkRSZQHJXPMD4iBhiDdE9uqk8dSU0vv8EglJD0rWPoYlJOSJ7Kjtq+WLPQpZXr0On0nLZkPM5N2/GAeeedHps/H3lq+xuLePyoRdy6eBzEQSBgBjkjU3vh9Ktdybw+vUPEKnv+duWtlXw3OrXaXV1cMXQC5kzaPZhO18ExSCLy1fx3o75OHxOzs6dTqx7BP/6eDevP3wW8VEn/kn9ZKG7rpv/0p9Rr/4vqT9/AU1Mzy61lVvrefI/6/nr3VMpyDh6r7mKjhre2PQ+u1vLyI5K56ZRl/eadH+8kAXpOPDtmipe/HALv75mNMWB71lSuZrHZ/76gD/qf7d+wme7v+PKYRcxd/C54e2b63bz5LJ/IPnV3D3uZ0wpLMDudfDs6tfZ3rSb6ZkTuHnUFQesWEVRZE3tJt7f8i0N7loUIuQ4g4i6s5hRMJrR+UmH7FK9P15/kJLqDraVtrJhdxN7azoRhFCsrQumZhAZ42ZPWzm7W/ZS3Lo3nKIiISKWoQmFDE0IheE50u6/7lTn3V5sddYGGh0t4WgKh4JBrSc+IoakyATSzElkWEK5mWIMUUc8wNvptlLeUUNVZ23Y8aPB3oRvvxTlAgIWvYlYQzRxhmhiI2KIFgWUaz4nNigw5KrH0cUceV9+ra2Bt7fMY3PDTrKj0rlrwk2kHCCumT/o55X1/2VZ1VpmZk/m1tFXoVQokSSJV5Z/xqL6b0mOSOH3Z97Va/K2y+fm1Q3/ZVXNRobGF3DnhBsP6EXXHw6vk/d2zGfB3uVEqI207cjnz9ddwuCsmMM+1+lCd133xevPolz6Gsk3PYUuuaeLd1O7i58+sYA7Li3i3ElHFlX8h0iSxMrqDbyz9WPa3Z1MyxjPNcMvIeogE/ePFlmQjjE+f5Db/ryQWIue6y6P44mlzzOncDbXDL+k32NWVK3n+TX/5sycqdw6+qpwRbintZzfLXyGgEfLXaN/zvRhuTQ5WvjT0hdpcbXz09FXMTN7Ur/nFSWRlVUbeHfb57S6WxE9BizOLO6xLSN+9AUknHX9MfnMoiTS5GhlQ9UeVpTupNJajajtRFDsS0ZXGJfLoK5YcHERR1bBePweilv3sqNpD8Ute6noqA7HmovURJBqTibJGNflJm3BpDViUOvRqjQoBAWiJOIP+nH5PTh8Tjrc1i6ngdau9A/7opGbtEZyozPJj82mMDaH3OjMo2q5iZJIh9tKk6OVJkcLLa5Q7LruNOWtro4eKeOVUmiuVmZMKHNuYWwOmZa0w+o+kSSJNbWbeG3je/gCPm4dczXTMscfsPz7O+bz8a5vGJcygnsm3oxaqcbl8XPNX99Al7edaIOJ30y7k1RzUq9jv69YzRub3kejVPOL8TcwKnnY4X9RdLW6Vr5Js7uZ4dGjuW/G9SeNW/KJpruu+/LtV1AseIHEqx7FkD28RxlJkrj6ka+ZPDyZOy8bcUyv7/F7+HT3t8zfvRCVQsllQy7g3PwZx20cV472fYxZtL6aNquHu68czr83vUSiMe6AHnV1tkZeWf8OhbE53DzqirAY1dub+OP3LxLwargo5WqmD8ul1tbA498/S0AM8ugZv6Qwrv8WV0lrOa9vei+Um8cVibptDDdMmcnEyHpaP/0ec+G4I/p8oiTS6GihoqOa8vZqyjuqqeioCXe/aZRq8tPSUXvzKNkN1uYIskfkcf3MoYed70aSJKo6a9ncsJMtjbvCkRBUChV5MZlcWHhWV96kzKNyLe/GE/BS3VnX5WUXyg67f+qInKgMBsXlMiguj4LY7MMa9FUIilDiPUMUg+N7xyoUJZFOt41mZys1NTso3TCf5mArOwJulletA0K5i0YkDmZS+mhGJA09aKUgCAIT00ZTEJvD86v/zYtr36TaWs/VRXP67FYTBIErh83BpI3kzc0f8pcVL3Hv5J9j0GlI0eZidKbQplvGw4ue5v4pt/f4HIIgMDN7EgWx2Ty7+nWeXP5PLsifxdXDLznsyisvJotfjb6Hez/8F1vZyMOLmrh/yu0n3STOE4lCE+oBEX+QCBNC331GkomapkPvGThUdGodVw6bwxlZk3hz0we8vXUeSyvX8NPRVx2w/jlSZEE6hkiSxBcrK8hNNdNEMQ32Zh6ceke/eUmCYpAX176JRqnml5N+Gr5xXT43Ty79J15fkBTHbK6ZNZJGRwuPf/8sAI/P/HWvJ9RuvAEf/9v2KV+Xfo+WCHxlRYyIH8Gvfj4Ks1FLy5ffoNAa0KYcPICrJEm0uToo6Yq0XdZeRWVHDe5AKISOWqEi3ZLCpPQx5EZnkB2VQZo5KTxe4Z4V4P0Fe/hkyV62lbXymxvGkptqOeA1nT4X25t2s6lhB1sadoZdzTMtqZyfP4uihEIKYnN6RLc+VuhUWvJjs3sMztu9DkraKkLdji17+aJkEZ/t/g4BgTRzMoWxORTE5pAXk0mCMe6IRVEhKIg2WIg2WCiMy2VyZBqNHz6JIX8sqgvuZ3drGVsbi9lUv50V1esx60zMzpnKufkzDurhFq238PAZ9/DGpveZv/s7Oj1Wbh97Xb/jSuflz0Sv0vHy+nd4cvk/eHDqL8hMMrOzIsBfrrifPy17kT8ufZ67J9zEhLRRPY5NMSXyxJn38/aWeXxRsojStgp+NfnWw+7CUylUBGoKuWTMeBY1f8ZvFjzJb6bdGY4y8WNDodEhApK370nfCdEGtpW2HLfrJxrjeGDqHayv28obmz/g0cV/ZWbWJK4efkk4VuKxQBakY0hZrZXqRjs/u3QQH+96jSHx+YxM6j/+15cliyhrr+KXE38avmElSeLlDe/Q5GzBWzqWO2+ZhNPv5ImlLxAUg/z+AGJUZ2vkbytfpdbWQLZ2BDtXxnLBxDx+evGw8Ix3d9UOdOlDeqXl7qbZ0cq2pt3saN7DnpYy2rrcv9UKFZmWVKZmjiM7KoPsqHRSzUkHfPrVa1XceMEQJgxN4qm3N/DAiyt44PoxjBu8bywjKAYpa69iW1MxWxt2UdJegSRJRKj1FCUOZmTSEIYnDj7ufdf9Eak1Mjp5GKO7up+8AR+lbRXsbt3L7pYyllWt5buyZUCoBZMTHcp4m2lJI8OSSkJE7BF5KRnyRhM963raF75JVFIuUybPZUrGOAJikK2Nu1hQtpwPd37JFyWL+Mng8zkv/8COCyqFkp+OvooovYUPdnyOKEncOf6Gfh0QZmRPQqVQ8eK6N3lq+T/JTTiLpZvdRKrM/GHmvTy14iWeWfUat465qleGUo1SzS2jr2RQXB4vrX+bB7/7M/dN+Tl5MYc+vuELhCYoF1gKmFl0P39a+iK/X/IMD0+/+7DOc7rQLUiiz9Pn/oRoA202D/6AiFp1fLziBEFgXOoIihIK+WjX13y5ZyEb67dz29hrGJsy/OAnOARkQTqGrNhah1IhEDTXYK2x8+uht/X7xNzpsfHRzq8YlTyMifs9Za6oWs+amk0omwYxKnUQOSlmnlj2PO2uDn4341f9itG2xmL+tvJV1EoVV+Vcz7/fbWbmmDRuu2RY2IaAvYNARyOm0WeHjwt1i9WxqmYD6+u2UmcLRduO0pkZFJ9HYWwOeTFZZJhTeniFHQ6FmdH8/ZfTePz1tfzpzTXceFkqisgOdjWXUNyyF3cgFKE6OyqdSwadw4jEIeTFZJ6UUYm1Kg1DEwrCwSmDYpBaW0NXvqYqytqr+LT4O8SusS21Uk1aV86iVFMSKaZEUk2JxB+CUJnHXYC3vpSOpe+iTx+ELm0QKoUyLJBVnbX8b9tnvL11HquqN3DPxJtJjIzv93yCIPCTIeehEATe2z4fo8bAzaOu6Lf81MxxSEi8uPZNnEYJyKShzUlWspmHp9/N31f9i1c3/A9f0M95+TN7HT8pfTSppkT+suIlHvv+GX458ZZDrrhc7tB4WoReTaoplj/MupfHvn+GPy19gT+eef8BHTRORwR1d5dd3y0ks1GLJIHT7T/uqTt0ah3XDr+EaRnj+Mfat3h6xctMyxzPzSOvOKRpDgdCFqRjyMbdzQzOjub7qoXkRmf2mSivm492fok/6OeGET8JC4bD6+TNLR+SpE+hvCqd83+axae7v2V70x5+Pva6fud5rKnZxHOrXyfFlMT9k2/n4Rc3kxIXwR0/Gd5DEL11ewDQpRbi8XtYUrmGRWUrqLLWoRAUDInP48zsKQxPGkxKZOIxCSNi8zrY21ZJSVs5pmFlaBPK+V95qLJJioxnSsbYUAUfX9CvS/yR4Al4aXa00ubuwOoJBVr1Bf1ISCgFZTjtdpTeTKwhihhD9BEN1CoVSjK6cjN1txR8QT811vpwmKUaaz3bGotZWrkmfJxaoSLVnESWJY3s6HRyozPJsKT2EGFBEIg77+d4G8po/vRZUm/9Owrdvu65DEsqv5n2C9bUbOKVDf/lwQVP9hrb6YtLBp2Dw+vki5JFpEQmcnZe/3HQpmWOx+V38+9N76POdFHXPI6sZDNalYb7Jv+MZ9e8zpubP0QhKDgn74xex6dbUvjTmQ/w5PJ/8reVr3Ln+BuZkjH2oN9rpyMUjsoUEeqajTFE8cgZ9/DbBU/y9IqXefKsB49ZVJBTAUGpAoUqnMb8h+g0of+NxxcATkwuqXRLCk+ceT/zdn3NJ8XfUNpawa8n30a65cimLIAsSMcMp9tPVaONM2caWGFv4s7xN/Zbtt3dyeLyVczInkzSfk+0H+36CofPSZ50Dk06D7GJAf628EsmpY1mRlbv8D4Amxt28Nzq18mJzuS30+5kU3E7Da1OfnvjOLTqnhWst6EMr1LJ5+17+Grtyzj9brKi0vjp6CuZkDb6qPuCXX43lR01lLWHInyXtVWGw9woBAUZ5hTOyJrImrVeJHs0j//yHMzGo795XD43e9sr9wt1VEuLs+2wzqFUKEmNTCSrSxwKYrNJMycfUUBTjVIdCib7g4yrTp+LOlsjtbZGam0NVHfWsb5uK4srVgGhMayihEGMSx3BmJQiDGo9Cq2B+Dn3UP/WQ7QtfJO4C37R63oT0kaRHZ3Bn5e9yBNLn+eh6XcxOL7/YLmCIHDt8LnU25t4c8uH5ERnkBuT2W/5c/LOoMXRyed8y5KaZUwZcRUAKqWKX078KX9f9S/+vel9jJqIPsXGpIvk0TPu4cnl/+SFtW+gU2kYc5CWUoc91DW1fwqK+IgYfjXpVh7//lne2foJPx1z1QHPcbqh0OqQ+umy02lDVbnbG+hz//FCpVRxxbALKUos5JlVr/HQwr9w+7jrmZQ++sjOd4zt+9FSXm9FksCqKkev0jEhdWS/ZReWLScoBrmo8KzwtnZ3Jwv2LuOMzIls+x6GZMfy9taP0Km0Pbzv9qfGWs/fV71GujmF306/E4Naz7LNdUSbdIwf0rNLQ5IkVjTt5LOMGOy7vmJMchEXDzr7sGfXd+P2e0KpJbrEp6Kjmgb7vtTisYZosqPTmZUzhfyYLLKjM9CpQuJzdpqVXz+3jH98tJXf3DD2sFpioiTSaG+mtCsh3Z7WMmqtDWF37SRjPHnRmczMmkRiZByxhmgsOhMRGgMapQYFAkFJxB3w4PA66fBYaXG202Bvoqqzlo3121lSsRoIjQkNTxzEqKRhDE8afNSCHaEx9HKakCSJFlc7e9sq2NFcwqb67ayr24JWqeHMnKnMKTwLS0o+lolz6Fz1Ccah09Bn9nanjo+I4fGZv+bRxX/jLyte5snZvyFxvxQlP0ShUHDnhBu579sneGHtGzw9+6EDulZfM+Iivtq4nW3ScopbxoSzGqsUSn458RaeWPoCL637D4nGuD7FTafW8eDUO/j9kmd5bvW/eeLM+w/4JN3a6UGjVvbyzBwSn885eWfwTekSZudOO6qn8VMNhcbQb5edXhOqyj3eYJ/7jzeD4vJ4avZv+fvKV3lu9es4fA5m5x5aBPL9kQXpGFHf4gQkKhyljE4p6vfmFkWRReUrGZE0uEeF8VXJ9wSkIBcVzOabD9eSPzjI6qbd3Djysj7Dv/gCPv6+8l/oVFoemHoHBnWo73ZXRRtjBiX0CLdi9zp4ad3bbBA6yFTo+M2sew74RPxDJEmi3t7E7pa97GkrZ29bZY/cRjGGKLKj0pmWMZ7s6HSyo9IPONk1O8XMdecW8sYXu1i5rZ4pw/uuVDwBL/W2RmqsDVRZQ7mV9ncx16t05MdmMTFtNPkxWeREZxySK7aK0FiQRWfqcz5Nk7OVPS1lbG/ezdaGXays3oBCUDA8cRBT0scxNqXomHUXCYJAfEQM8RExTEofgyiJlLZVsKBsOV+Xfs/i8pX8dPRVTJ5yGY7i1bR+/Sqpt/0doQ/PzUitkd9Mu5MHvn2CF9e8yeOzfn3AFp5RE8Ed467nD0ue49Pd33L50Av7LasQFOSI0ykLfs7za97g7+c8Gp6MrVGquXfybTz43Z95ZtW/ePqch8P/x/3RqXU8MOV27vvuTzy35t88ddZv+h2XbOl0EWfR9/mwctmQ81lSsZpPir/hnom39Gvz6YZCq0Psx8tO26PLbmCI0pt55Ix7+Pvq10Jz34J+Lig487DOIQvSMaLd6kaIsOLwOxl1AM+6nS0ldLit3DTy8vC2QDDA9+UrGZsyHK1kQhQlKoObiNKbe3kwdfPBzi+oszfy8PS7w7HrPL4AVoePlLh9T/K1tgaeWvZP2twdnN9i54LhM4k5BDFy+JxsadjJpoadbG/ajbXL/TpSE0FeTBaT0keHYrYdRHz6Y860HJZuruPV+ZuITvBi83fS7Gyl0dFKk6OZenszba6OcHm1QkW6eZ+LeW50JqmmpGMeZ0sQBBKNcSQa45ieNQFREilvr2Zd3RZWVK3nhbVvoFfrmJU9hXPyziD+CCf59odCUFDQ5Up+6eDzeHn927yw9g06hs/lzNm30Pj+E1jXf4Vlwpw+j4+PiOGGkZfxz3X/YVX1xoOO1wxLKGRS2mjm717A7NzpBwwQm5MUw65Nw2gvXMOHO77g+pE/Ce+L1Bq5Z+ItPLL4r7y3bT43j+7bWcKiN/OzMdfwlxUv8XXpEi4s7LvCau5wE99PHiqjNoIzsiayoGw5Lr+7T/E7HRE0+v5bSF1ddgMpSAAalYZ7J/+M59f8m/9smUdcRAzjD9Bb9ENkQTpGONx+dNGdABQl9J/pcn3dVtRKdQ938K1Nxdh9TmZmTcLtDSDoHDR4q7hq2Jw+5zDV25v4cs8iZmZN6hG/LhgMtVi63T6rO+v4/ZJnUSDw0PCr0H/4d7QHCEfjD/pZX7eNpZVr2Na4i6AkEqk1UpRQyJD4AgbH5ZIUmXBYXWxBMUirq51GRwtNjpZQpAJnK82OVjrSW/EEPTy29Ktw+UhNBInGOIbE5ZMUGR/ySjMnkWSMHxCvO4WgIDcmk9yYTK4cdhF7Wsv4bu8yvipZzNel33N+/ix+Mvjc4zLAnhQZz6Nn/JLn17zBO1s/Jn/mvZhzRtK5ch6Rw2ei1PcdOHVa5ng+2/0dX5YsOiQHgiuGXcTq2k18XfI9VxX1LXQAeekW/EvMTIgfzdd7l3Bu/oweUTfyY7M5K2cq35Ut47z8Gf16/I1JKaIoYRDzd3/HuXln9NlKam53kTOsb49SgPGpI/i69Ht2NpccM5fjkx2FSoMY8PW5T6ftmvs3QF12+6NSKLlz/I20Odt5cc2bpM5OOmSvSFmQjhG+gIjC2ElSZOIBIyzvbNrD4LjcHhM719duQa/WUZQwiKZ2D8qYhlCuo34cGebt/AqVQsWVP6g89FoVKqVAp91Lq6udPy59HrVCxWMzfkVkfSVNgMqS0Ot8br+Hb/cu5auSxXR6bMQYoji/YBbjU0eSE51xSAP7/qCfOlsj1dZ6am0N1NoaabA10ehsISjuu0nUSjXxhhjijbEUxOawu9TD3jIf910xjRHpGUftNno8UQiKrjQYeVwz/BI+2P4F83d/x9bGXTx6xj3H1EuwG6VCyR3jrmdXSylf7FnIXTOupe61e+lc/SkxM6/r184ZWRN5Z+sntLk6iDFEHfAaSZHxjEwaypLK1Vwx7MJ+f++C9FCkhDRGsVnaxNelS7h+xKU9ylw6+DwWl6/im71LuXHkZf1e89z8GTy1/J9sayruFWLI4w3ltjpQUNWc6EwEBKo6a380goRSjeTp28suPIY0wC2kbjRKNfdO+Tn/9/XveX3jezxyxj2H9CArC9IxQgBEnZWsqP6765w+F7W2xnCIfwiNWWxtKqYoYRAqpQqjXo0yqok4TUqfk0E73FZWVW/g7LwzenWvKBQC6QkmSmvbeWblF3gDPp44834SI+Ox2jcBoDLte6KVJIklFav537ZPsXrtDE8cxB3511OUMOiAXWGiKFJtraekrZy97ZWUt1dTZ2sIx5VTCgoSI+NJNiUwJqWIpK4o14nGOCx6U48Kz1rg5fanFvHxV82Mv7PgwF/ySUSsIZo7xoe8iZ5e8TJvb/2YO8YdXmxASZJweUKZbO0uH15/EFGUUCoUGHQqTBFaok1atCoNg+PyKO+oRpuQiXHIFGwbvsEy/qJeqQi6KYwNTTmo6Kg5qCABTE4bw6b67VR01PTyDuwmLkpPfLSB8kofI3OHsrJ6PdcNn9ujoonSmxmZNIQ1NZt6TGn4IcMSClEr1exo2tNLkFo6Q91S/XXZQWgMMFIbQbvbetDPdrogqNRI+wXo3Z/wGNJJ0ELqJkpv5qqiOby28T021m9nTErRQY+RBekYoVKLEHQfsGla0VGDhNTDoaDN3UGbq4M5XbmPRIUbhcGBWew7MOXyqrUEJZHZuX0nLivKi+WrvQtQtlfyq0k/DQ/aBx0dIChQGkIiZvM6+MfaN9ncsJOC2Bzun3r7AWfAt7ra2VS/nS2NxT1SR0RqjeREpTM6eRjplmQyzKkkRsYf8pwes1HLbZcU8bf/buSdr4u54fzBh3TcycKIpCHkx2ZT1lbZbxlJkmjpdFNWa6WiPhTNo67FQVO786BdLAqFQFJMBL6MWrRaJTVNduInX4pj5wqsG74ienrfrs/mrla6w+c8pM8xKD4kYHvbKvsVJIDhubGs2t7ALVOHsaF+Gw32JpJ/8J8fnjiY9XVbaXG2EW/sOwW5RqkmOTKBentTr32tXYIUd5C0EwpBgSiePBXw8UZQqqAfQdJ1tZBc3r73DxSzsqcwb9fXLChbLgvSiUTQecADUbr+n0ZrbQ0APVIEV3bUAJAdFYrRVdpeCYC7ve/B5bU1m8mJyiA5snfXG8CooWa+cZSRpstjYtq+uQBBtw2F3oigUNJgb+aPS5+n023l5lFXMDt3Wp/dNE6fi+VV61hetY7StgoA4iJimJA2isFxoSgOcRExRz2B9oxRqewoa+WjxaUkxhg4e0LmUZ3vRLKudgu7Wko5P29fpAKXx09JdQfFlR3sqWqntKYTmzPU9y8IkBgTSqE+LDeWWLMeS6QWU4QGrUaJQhAIBEVcngCdDi8tHS52teyhXNFER3ked6xcTFJMBLeaC2H911gmzUWh7j2XqzvVx6Hme4rRR6FSqGhxtR+w3Ij8OBasq0bpDbXMam2NvQSp+6GsydnaryBBKPWHJ+Dttb3NGhKkGPOBU6o4fC6Mx6Gb9GRFUKqRgn13ySkUAnqt6oTPQzoYSoWS6ZkT+Gz3dzh8zoPGXTyugvT555/z0ksv4ff7ufHGG7nmmmt67N+5cyePPvoofr+fpKQknn76aUymY5sa+0Sh1YWe1BTB/m+iFmcbaqWaKN2+bpb6rrk73Tdxd+rumkp6xaVy+Jzsba/i0iHn9XuNnfb1CAqRll0ZeM4JhCfMiR4nSl0EzY5WHvv+7+G4eH25f3e4rXy2+zsWla/EG/CSYU7hqmFzGJ864rCdGkRRxOl34faHEuV1p1hQCAq0Kg0GtZ4IjYGfXVJEa6ebFz/cSpvVwxVnFYTj752MBMUgnxR/y4c7vyA9Mo143whe/HALuyvbqW6yI0kh8UlLiGTc4ERy0yzkpJrJTDKFn2YPhZLWcr5ftpwUfSL/d8PNbC/tYPW2Bv5Xmc5dkcW8+/JbjDj/UoZk9/T229NaDoSiORwKgiCgVqh6pMDoi+F5cQgCVNSEhMTudfQqo1WGxkd9/TzNd+PyuYjpI4J3m7X3pNgf0uhoJiAGSD5AqKTTDUGpQgr0/50adCrcnpNLkACGxhfwafG3lLdXHzSJ6HETpKamJp555hk+/vhjNBoNV155JePHjyc3d184nSeeeIK7776b6dOn8+STT/L666/zq1/96niZdFyJMIY83Jy9788wnR4bFp2pR4Xe5upAr9KF58802luIVJto9gjsKm9jeP6+uUpl7VVISAzqJySRP+hncfkqBkcPYeN6Fe8t2MONFwwBQPJ58Ku1PLviJXxBP7+f8X+9JhUGxSCf71nIvJ1fERADTE4fy3n5M8g+QBcOhFKv11obqOysoc7WSIO9mRZnG+3uTmxeR3i+Un8IgoBFayI2M5r0SBUfbqtgZfk2fn7ONIZlnTwxyyRJorXTw5q9u5lf+SlWsRmhI4Xd6/PZLe4kQqeiICOayUXJFGREU5ARddgpN7oJBAPM37OAD3d8QWxEDL+ddidxERbS4iycNymL1s4RNPx7C+mdG3nwHwmMyIvjpguHkJ1iRpREFpavIMOccsDJsfvjC/hwBzwHfYI1G7XkplrYXtYEsfTp+ej0h1pnhgO0zgLBAA2OZob24ZHabvNg1KvRqPvv9t3RXAJwwjKZngyExpD6FxyDToXrJBSkzK6Holpbw8AJ0qpVq5gwYQIWiwWAs88+m2+++YY777wzXEYURZzOUB+32+3GbO49QGuz2bDZbD22NTY2Hi+zjxhTZOjmaW7t2y0TwOl3Y1T37Be3ee09vPI6PJ3EG6OxapQs31rXQ5CqO+sByLKk9Xn+7U27cficXDR+OtEdfj5ZspdxQxIZnBWDFPTzpT5IjbWe3067q5cYtbk6+Puqf1HaVsHYlOFcN3xuv267kiRR0VHD5oYd7GjeQ2lbRfhpWKVQkWCMJT4ilpzoDMw6E5HaCAxqPRqlGqVCiYBAUAriDfhw+d3YvA463Faana0EDE2o0620sIfH1yxBvzKGooQhXDJiKjlxJ25Wvs3po67ZQU2znaoGG5UNNsob2/FG70SZUAUBDVG2iQyLHUb+iGgKM6NIi4/sMSH5SAiIQVZVb+DDnV/S5GhhQtoobht9NUZtT6GItRhQTz2fiO/+zS9mRvGfNZ386pklXDAlm/i8Jmqs9fzyMCaNVnbWApDWT/De/RlVEM9H68rQxNKnw0StNdQ1nWTsv/VS0vWf6SunTofdS9QBWkcAK6s3kGSM77fr+lTkYHXdgbrsAAw6NU73yTWGBIQftp195HL6IcdNkJqbm4mL21eZxsfHs23bth5lHnzwQW666Sb+9Kc/odfr+eCDD3qd56233uLFF188XmYeMzTqrrk/jf03kfxBX695RW6/B4Nq383n8LqIMliYOCyJFVvq+OlFQ8Pdbk3OFiI0hl6VUzebG3aiVWkZllBA4RzYUdbGU/9Zz99/OZ0G0csKlZezc6czIqmn40CdrZE/LHkOl9/NLyfe0sMLcH9aXe0sKlvJ8qq1NDvbEBDItKQyK3sKeTFZZEWlkWiMO+r5Qjavg12NZXyzbRO720tY176UdYuXovHFkBcxgklpY8hIMBMXpcccoT0sEZAkCY8viM3pw+rw0m7z0NbppqXTTXOHm4Y2J42tThz73dhajZL4NAeqwRsJ4mBM/DhuGXspMcZj171s9dj4vmI135Yupc3dQYYlld9Ou5MRSUP6PcY4ZBptC99irK6SKb+5kv98XcyXW9ej8WygIKqgxxjiwVhXtxWFoGBw3MHzZA3Pj2Pezk5g39Pv/uxo3kN8REyvVOf7s6ZmE2qFiqKE3k/MnXYvUQeIWF3ZUUtxSynXFF1yTAIAnywcrK4TlKp+vewg1MVZ23zsk/QdLYIgdD2Eigcte9wEqa/M6Pv/eTweDw899BBvvfUWRUVFvPHGGzzwwAO8+uqrPY654YYbuOSSnum/Gxsbe41HDTTdn620phNJkvq8UURJ6uVOHRCDPSYGeoJetEoNsydksmRjLd9vquXciZkAdLptROv6v8n3tJaRH5OFWqlGrYSHbhrHfS8s4/evrSElyYUaemWvbXd18sclzxOURP4w694+xxxanG18sOMLVlStQ5QkihILuXTweYxKHnpEURoOhklrZELGcCZkDEcUJdaVVvJN8SpKA9vY6V/E9uJVBBbmEGxNQaFQYIrQEKFTo9cqUauUKBQCCkFAlCQCARFfIIjHF8TtCeBw+wkEe98YKqVAXJSBhGgDU0ekkBxnJDkuguRYAwtrv+Wr0qWkmBK5feztRxz/74cExSBbG4v5vmIVG+q2EpREhsTnc8voKxmVPPSg87+UhkgM2SNw7lpJ9MzrGDsBVgQ3I3oi2f59Gt+aqjin679zIDwBL9+Xr2RU8rBDmktVkB6FKqqFSCG2V3mX3822pt3Myprc7/Fuv4elVWsYnzqyT6eLdpuHQZn9Z4edt+sr9CodZ+ZMOaitpxKHVNf1Ua92E2fRs6Wkud/6Z6Bw+z1ISBgPMazXcSEhIYENGzaE3zc3NxMfv68JX1JSglarpago5Ap4xRVX8Nxzz/U6j8lkOiUcHZRdQmNzeahuspOR2NtmhSDg/0FlKAj0+JOJkoRSUDA4K5rcVDOfLNnL7PEZKBUCTr+r31htoihSa2vkvPwZ4W0ZSSYevH4cf3hrOc0pXiYGND0ChIqSyPNr/o3T7+Lxmb/uJUaiJPL57oV8sPMLBODs3OmcVzDrmIfLORAKhcCEgiwmFGQhSVezoW477237ghrtDmIKWhmingmeSJxuPx5fEH8gSFCUQuIvCBh0KixqLVqNEoNOTYROhdGgwRyhwWzUEmXSEmPWYzH2bml5Az7+uvJltjYWc07eGVw7fG6/2X8Ph0Z7M4srVrG0Yg0dHiuRWiPn5s1gZs5kUk0H7zLbn4jCCVjLNvLWqjf4qnY9OVEZ3DX2Nl7x7uEfH22lucPFdecOOmAF9Vnxd9h9Ti7umnpwMFrcLQjGTlT2Eb32rahahz/oZ2rmuH6P/3bvUtx+D+cXzOq1T5Ik2m2efrvsdrfsZW3tZi4bcv5hpZA/FTikuu4Av2OsRY/bG8Tp9mM0HPuMykdKd1ddhHoABWnSpEm88MILtLe3o9fr+e677/jDH/4Q3p+RkUFjYyPl5eVkZ2ezaNEihg3re+7NqYBW2dXFoAyyeU9zn4KkVmqwe3vOC1EqVAT2m0uhFBSIkoggCFw2K58/v7WepZtqmDkmHX8w0G/q7nZ3JwExQOIP+u1HFcZz4XkRfFMPGc0ebE5fOMfM9+Wr2NVSyu1jryMzque4lMfv4ZnVr7O5YQfjUkZw46jLiDX0/9R6IhAEgbGpRYxJGcbyqnW8vWUeqzwfcMOInzA7d9oxfSoUJZFnV7/Gtsbd/HzstczM7v+J/1DPt6l+B1+Xfs/2pt0IgsDIpKHcnDWR0UnDjij5oSRJlJgjeT0tmtba9ZyZM5UbR16GRqnm0ZvH89LH2/hwUSkA15/X9/yu/2fvvMOjKtM+fJ9pmZmUmfTeeyAJCaH3KoioKApi1/WzrH11i72srrruuq517b03RJDeWyChJJCekN57Mpk+5/tjkkBIIaEISO7r8pLMnHPmzWTmPO/7vM/z+xU1lvBTzhomB40Z9MpvRe56JEhpLPbAZhO7A7nNZmNl7kbCXO0WHn3RYdLzc846knxH9Nnv1NZhxmyx9VnybbFZeS/9K9zVriw8Rin/wkHE3oLfN56djcR1zfpzKiDVdlrQePRRUXk8Z3SF9MADD3DDDTdgNptZvHgxCQkJ3Hbbbdx7773Ex8fzj3/8g/vvvx9RFHF3d+f5558/U8M543QJPHp7yNlzuIbLp/WuhFPLVZSZK3s8ppQ5oLcc9ThRSOUYrfbCiPEjfQkP0PD56px+FbG7aO4UP+1L3cHkUIdSlOCr0/PXN7bx1B8m4KZ14LusVUS5h/WSKDJaTDy/9XXyGo7wh9FLmRN+em/2p4ogCEwNGcconzje2PMJ7+/7ipKWCv6QvPS0ia2uyd9CemUmtyQvOaVgZLPZ2FGaxg9Zv1LRVo27ypUlIxcyI3RityjuUBFFkey6fL49vJLDtXl4yBTcYVQzM2VZ9zFSqYQ/LrZL6ny7IR8vV3Wv9F2zoZWXd7yD1sGFm5OvZjBUt9ex5cguopwS2d8ho6HF0H0j3FmWRlV7LQ9OvK3fz8uP2atpN+lY0o+yeF2TfTbtoe2t0vBT9hpKWyp4ePId3VYmFxKiKA4Uj7rfs/pmPaF+/af2f2sqWu3Nz/7OJ66YPaN9SAsXLmThwp4fvHfffbf739OmTWPatKF7ZpyLOHcWGkSEqNm5tZ7mNmMvK2EXhSPtx62QnOTqHtUnKrmKDrM9QEkkAjcvGMFj/9vJ8q2FyCTSfvtEuoKaStZ7ZnmkuYwQuSMeylYamw089N+tLF6koaGjiZuTru5183h/31fk1Bdy/4Q/nLTR1m+Bi9KZv0y5ky8zlrM8Zy06Uwf3jL/5pJxfj8VsNfND1q/Ee0dz0Ul4unSRW1/Ie+lfUdJcTrDGn3vH38L4wOSTHp/VZmVPxQF+yd1AfsMRNEoXbk66mtHl5bSnrsBm1CNxOHojFwSBO69MpK5Jzzs/ZRIT4kaI71Gljr9v/i9txnaenvngoHX4PjnwPTKpnFlBM9nPYbtNhKsKi9XC15krCNb4MzZgVJ/n1uoaWJW3sdOmpO9WgupG+3fB261neqe4qZzvD69kYlDKhaNd1wfCQCsk7dEV0rlERVs1SplDn5Pl4zm92v0XMF2b+wF+CmwibD9Y0ecxeosB4zGKvS5KZ9pNHd1pO2cHxx7NholRnowb4cM36/OQIEdv7tsx0mqz7031dbOraa/HR+GCYOrgxbsmIJdL+WT7JhQSh146YhnV2Ww+sotFsfPO6WDUhUSQcG3iIq5LvIJdZem8uecTbIOo5hmI3PoiWoxtzIuccVIrQ6vNyucHf+SJDf+i3aTj3vG38OJFjzA5eMxJBaNmQys/ZP3K3b88zis736PV0MatyUt5Y8GzzI+agVPwSLBZMVbm9zpXKhF4cFkyjko5r39zAFEUqdU18OTGf1HVXsvDk+84YZ9ZF3srDpJWcZAr4+bj72pXYOhSoFhTsIUaXT3LEi/vtxjj84M/IgjCgIrilXX2z76fx9FKUrPVzOupH+Hk4MStyX3bWlwQdHVb94PWWYlUInRLL50rVLbW4O/sM6jv0nBAOk04KdTIJDJEuYFQPxc2ppX1OsZNpQWg4Rh5FjeVBhGRZoNdJFKrdKHJ0FMw8rbL47GJUFFlpK0fbbKuogrLcdpeFquFDrO+uwTX38nKv++bioO2BX2DC9+sy+9REfnd4ZV4qt1YPIAaxLnIpTFzWBp/KdtL9vB15opTulZFq733o799kIEwWky8sO1NluesZVbYJP497wkmB485KSv0goZi/rv7Q+5c8QhfZf6Mn4s3f558B69e/DQXRU7rNoF08LfblRsq8vq8jsbJgesvjiW3tInv9uzkb+teoFnfwqNT7z5ho2IXbcZ23k37gmCNP5dEz0bRqSBiNttoM7bz3eGVJHjH9rBVOZacugJ2laVzWczcAcVeS6vb8NAoUSuPFo98e3glpS0V3DnmujOiqH7+MHCDuVQi4K5VnZMrJD+XwfWLDWvZnSYkggR3lZb6jkZmjI7ngxWHKatpI9D7aNNrl3dMXUdjt/5X15ezXteEh9oNd5UrOlMHBouxO0/u7aZm6ZwovsjIQilvxSbaet3guo49Xhusq2FVpbKv4Cyt9Tj5u2GVtxOkDePLtblU1LZz39IkmoyN5NQXcm3CIuSnoZrst2ZR7DzqdI38mL2aUNdAxgcmn9R1zDb7ezbUijqrzcrLO/5HRk02t6dcy6yTKEsWRZF9VYf4KXsNufWFqGRK5oRPYV7EtF6acV1IlY7IXH0w1RT3e93JSd58lJ7Pt0dWE6Dx5aFJ/9fv9foa09t7P6PNpOORqXfbU8ed3ltSqcDXmSvQW4zceIxh3/Hnf37wR1yVmhMWIxSUNxPmr+3+uaixlJ9z1jE9dEKv1fwFh8iAKySwp+3qms6dgGQwG2joaBq0H9LwCuk04uXkTl17PdOTA5BIBDbsLe3xvHen0GRNe93Rcxztj3VVonQHLV1Dj3MXTY/ATaXFKlqpbm7ieJw7JV9ajcc1xnV+fgWVfWZpbqxCZ+rAJtqYkxzJjQvi2Hqggife2UV6eTbAoFR5z0UEQeDW5CVEuoXw1t5PqT3uPRwsLg72SUSXS+5g+T5rFQers7ht9LKTCkYZ1dn8dd0/eHHbmzR2NHFT0lW8fek/uCV5yQmDh8IrGFNtSZ/PHag6zF/WPY/FvRBbfRBPT3940MEIYFXeRvZWHOTahMu7qzHbOlN1HTSyrmgbcyOm9hANPpb9VYfJbShi8YgFAxYjtOpMlNe2Ex1sn6TZRBvvpn2Bi4NTL9+lC5OBq+wAPDSqcypl16XVOVhFjeGAdBrxcvSkWlePq4uSsXHebNhbhtlydD/DVaVBLpVT1XZsQHJHQKC63f6H69Ieqz4maAHIpBIun2APFB+vT+N4XDvTgY365h6PKzvL0Y0yGUhkmBoqutN6CqmcxTMjefi60eSWNPLN9n3IJLLzWo5FJpVx38Q/gAhv7fmkzwbtExHQebMubu69D9gf1W21/Ji9hinBY4fcsNmob+al7W/z9y3/RWfq4K6xN/Dqgme4OGrmoNW6Fe7+mJtrekjLlDSX84+tr/P81teRCAKXB1yL8UgcdY39y1sdz+HaPD49+ANj/BNZEHW0b6iqQQeIbK5Zg6NczdUjLun3Gj9mr8ZT7caMsIkDvlZGgf0zPzLcPinbXrKXwqYSrk+88oQaexcCNrMRQT5wObenq4qGFj0229A/92eCyjZ7+nvYMfYs4OvsSZuxHZ2pg4vGh7D7UDWph6u6S7YlggQfR48eKyS5VI6noxuVnaWRvp36cZWtNXBcpXdKWBif5kBqQQFZR1KICz3aoOogU6BxcKa2vb7HORKJBBcHJ5qN7cjd/TDXleEks6eiDJ3FFVOTAnBSKXh+wyFkZhk6gwWnkxQFPRfwcnTnusQreDf9C7YWpzItdPyQzg/U+KOQKjhQmUO4UywmsxWzxYbFasMmit0TValEQCaVIJdJ+LFgDQIC1yYsOuH1jyWt4iBvpH6M2WZhWcLlLIiaeVLpUpnWG2xWLG2N1EisfHd4FbtK01HLlVyfeCXzIqeRX9rKl2ynua235UNfVLXV8q8d7+Dr7MUfx93YY1O6oLwZtVcjeY0F3JK8pF85q6LGEnLrC7kp6aoTFnSkZ9fiqJITHeSK1Wblm0MrCHUNZFJw31JWFxqi2YjkBBMUd40Si1WkrcOExunsl8ZXtNYgCMKgRX6HA9JpxLdzZVHVVktSdDAeWhXrUkt79BD5OHt1B58u/F18Ke/cSHdUqNEqXbq9k47Fy9EduUSG3NXAG98d5NUHpyOTHl3k+rn4dG/IH4u3owfV7bU4eIegLz6Et1yNXCrvsZpKjvEipdyX9JoqXvx4L0/dNh6p9PxdQM8Kn8SmIzv5IvMnJgQmIyCjoUVPXZOe+hY9jS0GmtqMNLcZadHZHVvbOszo9Gb0BjOySBc2tu/j128HsYku2FAm7cHa7MUtT25F46TAzUWJl5saf08ngnxciArU4uvh2H1TF0WRn7LX8GXmcsJdg7l3wi3dk5GTQabxoFoh5ce0z9jTUIBCpuCy2LlcGjOne3Vh7Zw1D8bWo8XQyvNbX0cQBP4y5a7uPruusR/Ir0ERmoe7szezw6f0e50NRTtQSOVMD5nQ7zEAVquNPVnVjI7xQiqVkFq+n1pdAw+NWnxSBSG/R2wmA4Ji4IDUZdnR0GI4RwJSNd6OHoOeZA0HpNNIV6qrsq2GCPcQZo0J5Jv1edQ2deDV6X7p6+zNvqpDWG3WbhHSABcfMmtysNlsSCQSAjV+3YrJxyKVSAlw8cWmspGzqY2ftxZxxYyjDbhBGj+2FO/uVfQQqPFjb8VBFH7TaD+0FWtbI35OXpS39GzSjQvwZ1/DHg4UVfD5mpx+u/vPZdo6TBRX2tW5HZvjKbCt4Na3PqClxLeXDJiDQoqrswMaRwdcnZUEejvjpJSjUsootxk4oNvMLVeE4q52Qy6TIJVKkAgCgmCvwO3SyitoKeCXSgszw8fhEhpAc5uRhhYDJVVtpB6q7g4EWmcHkqI8mZYcQIF5D99nrWJy0BjuGHv9KUkSFTWW8m3RRtKD3HFoOsJlsXO5JGpWDxV5gJoGe4WmxwDW4GD33Xpuy2s06Zt5Yvr9vWa3RRUt1AsFKCQtXJOwpN+Vj81mY3f5flL8E1ErBn7N9NxaWnUmpo6yT962FKfiqtSQ4nd+7meeCUSzAcmJAlKnwkVjq4Ew/7PfHFvcXEaQZvAq/cMB6TTi7ejRYz9o9pggvl6Xx4a9ZVwzNxoAf2dvrDYrtbqG7hmxv4svFpuFWl09Ps5eBGn8WVe4tTtAHUuwawD7KjMZHTuZr9blMjMlsLsBN8w1iDUFW6hsq+mhiRbuFsLGIztpcbcHTENFLuFuwaSW7+/xGl2utaNHy/l+Yz4T4n2JDOy/RPdsI4oiFXXtZBTUc7iogdySJmoajzYZq1Uy5DGuSD3LWRI5HW83NZ5aFe5aJW4uPUuLj6e0WcOBNZtx8+tgaujAN8UjB1ORVkn4w+xpvTbtzRYb5bVt5JU2kVnQwN6sGraW7EYRdohIx3juGnsjMunJNcqWNJfzdeYK0iozUMuUzGrUsXDsUvwS+i7ZP1TUgJNKjo97//sx7UYdz219jfLWav48+Y4+5YR+3VWE3L+QEE0gY/1H9XutI81ltBnbSRlEddyvO4vROjswOtYbk9XMweosZoVOOm3KG78HbCZDn+7Ax3LsCuls027SUd1e10sJZiCGA9JpRCaV4a52pbpzH8fH3ZGECA82pZWxdE4UgiB0b+5VtFZ3B6SuTfTy1qrOgOSHyWqmRlffK40T5hrE5iO7uHJuAPtfq+PrdbncfoX9hhndefPIrSvsEZDivOyWArliB5FyJYaSw8THjWLjkZ3kNRzp9qSJ9ghHJVPi6tuES7Yv7y0/xAt/nHxOyQaJokhuSRNb9peTeri6u8TVzUVJbIgb8yaEEOanIdjXGTcXJSty5Xx28EcWTPdDOwRl8gAXXxykCo40lzGNgfegcuoKCHML7rOCTC6TEOqnIdRPw0XjQ8irO8KTm1YhN/iQsceXx0t38bcbx3brCw6GRn0zX2YsZ2txKiq5kiUjFzI3IIW61+/EsZ/NbKPZyu5D1YyJ8+43Zdesb+G5ra9T0VrNgxNv69P6oqnVwOaiVCQhepYkLBzws5FbXwhAnGfUgL9PWU0badk1XDM3GplUQnZdIWarmQSf3uZ9FzKiyYCgHvgz3GXb0dI+uH3CM0lho73qsy/Nwv4YDkinGW8njx6FBTNGB/Dq1wfIL2smKsi1R0DqKq8O6DRFK2upIsU/sds8r6S5vFdA6vrjdkjqmTM2iNW7S7hyZiQeWhW+zt5olC4crs3rUXbs5+yNl6M76VWHiQ+KRX8kg6RZ1yGXyNhRurc7IMmlciYGpbCtJJVFs8bw0U+FZBTUkxg5uA3JM4nRbGVdagkrthVRWa9DIZOQFO3FVbOiSIz0wNfdsc+bY1cpcnVb3ZACkkQiwcvRvVf5/fGYLCYKmkpYEDXzhNc0WUy8ufdjtEoXXrz0QfbENPHm9wd55M3tvHzf1BNam9tsNn7N38RXh1ZgtVlZGDOby2MvwknhiM2gow4QbX2rVKxPLUGnN3PRuL5vDmUtlbyw9Q1aje38ZcqdJPr0na79Yk0OeBfh6+hDcj9NsF2UtlTi7OB0Qs2+7zbmo5BLuXhiKAAlndWNg1WQuFCwmY1ITpD6lMukyKQCeuPZd449UJWFXCIbkqvv8Hr4NOPp6N7dUwR2gVSpRGBnhn2/pqtooaLtaPGBWq7CXe1KWWchQ6CLL4IgUNrSu+w4RBuATCIjr76Iq2dFYRNFlm+1z0QFQSDeO4aMmuwe8jmCIDAuIMn+eHAc5sZK5O0tjAtIYmtxKh2mo30LC6NnYbFZaVBmoHFS8Mv2otP7Bg0RURTZsLeU255bx/9+zMTZUcF9S5L49Ol5PHbLOOZPCMHPw6nfmbrebJ8pnswejVLmgMk6cIl0Vl0BVpuVEV4DrwLArjhQ2VbDnWOvx0XpzOyxQTx681hKqtv4bkNv2Z9jqW6v44mN/+LjA98R5xnBK/Of4LrEK7oLFrrK2/t6H3R6M1+tzyMu1I0RYb2tQ3aX7ePR9S9htll4auaD/Qaj/LIm1mWnIVG1c8WIi064cq7T1Z+wuqq4qpVN6WVcPDGkO/Vcq2tAIZXjOoD314WIaDIgnCBlB6BykJ31gGSz2dhTvp8RXlFDEsIdDkinGS9HD5oNrd0KCU5qBfERHuw+dLRIwc/Zu1elXaCLb3eRgUKmwM/Ju88+GLlUTphrEHn1RXi5qZmU4Me6PaUYTPYPYLLvCFqN7RQ0FPc4b2rIOKw2K+lK+02kIz+NS6Jno7cYWJW/8ejYXHy4KGIaG4q2kzhKQlp2DW0dg+9bOZ3o9Gae/SCV/3y1Hy83Nf+4axIv3zuV2WODBtz/6UIURTYf2YmTwpGgfpo2B6LVpDuhh0taxUEUUvkJ01LFTeWsyF3PjNCJPeR6Rsd4M26ED+v2lPbbM7W7bB9/WfM85a1V3DPuZv465Y94H3ejFztL+IU+7Ek+/OUwre1G/nDZyJ4mmWYD7+z9nH/vfJdAjR8vzPlbv+kVo9nKK1/uR+lfjouDM5P6cRU+lmZ964BBRRRF3v0pEyeVnKtnH33/Wg1taBycz6lU8bmAbRBFDXBuBKQD1VnUdTSesPfseIYD0mnGs9MzqKHjqJrCmDhvKup0VHdWOfk6e1PVWfjQRYDGj4q2GmydKZdgrT8lTb318ABiPMMpbCrFZDExf0IIOr2Z3Zn2gJfkOxKpRMru8v09zgnWBhDlHsa6in1IPAPR5ewmzC2Isf6jWJ6zjvpj9PWuib8UX2cvsmzrsMraST3Uu5T8TNNhMPPImzvYl1PLbZeP5KW7pzAy3GPQ59tEG58e+J4D1VlcGTd/yH5DTfoWatrr+nTQ7cJoMbGjLI0Uv4R+farAPlt8J+1znBRqrk+8otfzsSFuNLYaet1EbKKNrzNX8O+d7xLg4sPLFz3GlJCxfbsRG+yipBJlz4KF7QcrWLO7hEXTI3oUqByszuJPa/7OhqIdXBozl6dnPNhvak0URf73QwblTTXYnGqZHT55UO9n+wCGkgCb0svIKKjn+vmxOB/j39NhMaD+nZnvnSqiKCKajINaIUkkdrfks4VNtPH94ZW4qbSMGaDopS+GA9Jpxr3zS32sgOqozj2YjAJ7Ks/byYM2Y3sP5e4AF1/MVnN3ui/ENZC6jkba+xBTjfWMxGKzkN9YzIgwdzw0SrYfPJoSTPSOZVdpei/V60tj5lCjqycrOAxDWTaW1nq7JIso8vaez7qPV8qV/HnKnUgkoIpLY2t2zml6dwbPf77aT0l1K4/dMo5Lp4T3cnMdiIKGYh5f/09+ydvAvIjpzD/GRXewrM7fDMC4fqwUADYUbbc3QUcObFHxS94GChqLuSnp6j4bSHUGMxIBFPKj1XYWq4XXd3/E91mrmBE6kadn/mlAgzNLu30CJHXUdj9WVNHCq1/tJzrYlWvn2Vdlte31/GvHOzy35TVkgpSnZj7AdYmLBgwwK3ccYd2eUkaMbUciCMwZoO/oWI7VYzye+mY97/x0iNgQNy4aH9LjOZPFhIP03DGYOyewmADxhHtIYO83G0yv2Zlie8le8huLWRp/6ZDV7YcD0mmmS9G7SX9UBy3Q2xknlZzcEvtNw7PzxnLshrl/pxpuRZs9lRfcWdhQ2kfaLsYjHEEQOFybZ7f4HunL/rw6TGa7JNDUkHE06Js4VJPb47wU/wTCXINYYajEJED7oW14OXlwY9JiMmqy+ebQL93H+jl78/j0+5DJIUe+gu3FveWKzhQ5xY3syqxi2UUxpMQOTsbIYDawtTiVJzf+i0fWv0htRyN3j7uJm5OvHnJj5ZGmMn7JXc/EoJR+Nd9aDW18f3gVI7yiiPHobcbYhb00+2dS/BP7THOJokhadg2Rga7dTc4Gi5EXt7/J9tK9LI2/lDvGXHfCFYml2b7ilmk6pacadDz93m6c1Ar+duMYDNYOPjnwPff/+jT7qw5x9ciF/HPeY8R6Rg543V2Zlbz7UyYpI9ypFrMZGzBqQLXuY3+v/gKS1Sby7y/2YbXauP+apF6TDZPVhPwkHHR/z3SnZAchJWW1ikjOUrqzur2O9/d9RaRbCFNDxg35/OGAdJrp8kVqMR4NSIIgEBGgpaiiGQBPtX1juf6YtN6xKg9Ad6qopI+A5KhQE6YN4lCNfeWSFO2FyWztDnhj/BNxVjiytmBrj/MkgoQbkxbTYGhhY3AQbQc3Ioois8ImMzN0Ij9k/cqqvKP7SSGugdwQeRs2vSP/TX2ff25/u5fG3plgZ2YVMqmEhVMGttSu1TWwvnAbL2x7k1uX/5nXUz+iUd/CdYlX8OrFTzE1ZNyQ9yFKmyt4fuvrODs4cXPSVX0eYxNtvLX3UzosBm5JXtLva3SY9byy8z0cFWpuT1nW53F7s2s4UtnKnHH2HjCdqYPnNv+XjJoc7hhzPVfEzR/U72CqK0OQOyDTeFDb1MFjb+/EZLby15uS2Fy+ibtXPs7KvA1MDhrDqxc/zeIRF5+w0GNfTi0vfZpOVJArCWN16Mx6LjlGz24gjFYToij2qcX3+epsMgvruX1RAn4evZUwjFYTiuEVUg9spk7TTsWJU3YWq62HgstvRYdZz793vIMEgfsm/uGkFDYGNQ1ZtGgRy5Yt45JLLkGlOvGS8UJGLVchFSS0HmOyBxDk68zqXSXYbGJ3rv7YfSZnhSOOclV3U62rUoOTwpHS49QUuoj3iWFFzjo6zHriQu0rrqziBuIj7DIds8InszxnLdXtdT0qnWI9I5kTPoX1hdsI1zfhUXoYVfBI/pCyjHZzBx/t/5YmfQvXxF+GRCJh2ohI3vlmAuGjG8moPsD9q55iQmAyF0VMI7pzpXa6aWjR46FVonI4+vG02KyUt1SR33CE7PoCcuoKuve9PB3dmR02mXEBo4jxjDipL4JNtLH5yC4+3P8tarmSx6bf2z25OBZRFPki4yfSKzO5KemqfhWurTYr/9n5nr06bvr9fV6rpd3IW99nEOjtxMyUIJoNrTy35TXKW6t4YMIfhmSfYawqQOEVQlVDB4+/vZN2g4F5lwi8lP6SvTnVP5GlIxd2txSciLTsGp7/aA9B3s48eEM8j216jnjvmD4bZfui1WBXndc49FSL2La/gm835HPR+GBmjw3q81y92dA9QRvGTvcK6QRFDVabSHuHaUh9bacDg8XIP7a+QVlLJX+ecidejr2rOQfDoALS448/ztdff82rr77K3Llzueaaa4iMHHipf6EiCAKOCjXtx9iSA/h7OmEyW2lqM+DqrEFA6KElJwgC3k6e3cKrgiAQpPHrs/QbYJRPHD9lr+FQTS5jA0bh7+lIfunR682PnMHK3A38mLWaO8de3+Pc60ddSXZdPl/YRHz2LicxeCQyiZQHJvyBD/Z9zfKcteQ1HOGusdfj7eTJ6Bhfsg858O+HL2N1wSbWF25nR2kaHmo3UvwTSPCOIco9rJdUzckgiiJqZyuN1gp+ydlIZVsVR5rLKG2uwNxp365RuhDrEcGlMXMY4RVFQGeZ/Mlgs9k4UH2Ybw+tpLCphJFe0fxx3I19pqVsoo3PD/7Iitz1zI2YyvzIvvembDYbb+z5hAPVWfxfyrXdjcnHYjRb+cfHe2lpN/LITZNpMjTy9y3/pVHfzF8m38Uo38HLNtlMeoyVhYixc3jy9a1YXEpwHlHMryWtxHvHsDT+UiLdQwd9va37y3nly30E+7rw9G0T+Pzw1+jM+iFZQHTthR6775V9pJFXvtpHbIgbty/qX72h1diOywVtxNcbm9mIACcUV23vMGET+U117FqN7by8/W3yGoq4f8Kt/Zo0DoZBBaTk5GSSk5NpbW1lxYoV3HnnnXh5eXH99dczf/78k37x3ytquYoOc09Pki4tu7omPe4aFS5K517OsD5OnhQ2HfVQCtL6s/nIrj4N+aI8wlHLVaRXZjI2YBRh/lpyS44WUriqNMwJn8KvBZtZEDWzx8xYKXPgr1P+yCOrnuG/xhIeL8skJDAeqUTKH0ZfQ5R7GB/s+5oHf7VbIExMTmRXZhXZ+R1cn3QlV41YwO7y/aSW72dj0Y7uAgB3lSsBGh+8HT1xU2vRODjjqFCjkCq6Nzdtog2T1YzBYkRn6qDN1E6zvpUGfTP1ugZqdQ0YrSakUfDJwVQcFWpCtYFcFDmdMNcgItyC8XbyPKWVmdVmpaiplL0VB9lRspe6jkY81G7cNfYGpoaM63OF1W7S8WbqJ6RVZnBRxDRuTr66zzFYrBZe3/MxO0vTuCb+sj6tKDoMZp7/aA9ZRxp46NrRSBxbeXTDm1hsFp6Yfv+gVyHd1yvcj2iz8O/cZszhuYiKdnycw3gg4dZB9Ud1IXb2tH2w4jBxoe48dss4dpTvZHPxLq6Imz9gxeHxFDeXA/Z2BoDS6lae/WA3nloVj948Frms783uDpOeDrMej0HsU11IiF0B6QQrpC4vpC5NuzNNZWs1/+j077pv/K1MCBx9Stcb9M5ha2sry5cv57vvvsPZ2Zn58+ezfPlyNm3axEsvvXRKg/i9oZIpMZh7akl1SXo0ttofd1W60HycAZyXkwe7y/d3C68Ga/wxWIzU6hp6NRjKJFKSfEeQVpmBzWYj1M+FbQcq0OnNOHZaR1w54mK2lKTywb6veWLG/T1utF5OHjw26Q6e2fwqT+38H/dPtc/KBUFgWuh44r1j+CLjJ37OWYdcuglNrD+fbTMyIf4KlHIl00MnMD10AiarmYKGYgoaiylpLqeitZqCxhJ0x60Q+0NAwNnBETeVFm8nT+K9Y/B28mT99kZKim389dZZRAf3X102EFablWZDK3W6Bqrb6yhvraKosZSCxmIMFiMSQUKCdwzXJi5irP+oPgsHRFEkrTKD99K/pNXQxs1JVzMvcnqfwajdqONfO9/hcG0e1yYs4rLYub2OqarX8fxHeyitaeP+pcnIPWp4cuPHaByceXLG/T0knwaDzSayeevPbPJzo05diI+jF9cnXUeKX8KQgrbFauPtHzJYs7uESQl+PLAsmW2lO/lg/9ck+8UP6HfUF5k1uXg7eaJVaaiq1/H4/3Yhk0p4+v8mDDh7L2u1p6jPZ0+uM4HY2eB9oqKGslr7VkGA55lfYe4sTeN/aZ8jl8h4csYDQ55I9cWgAtKf/vQntm7dyvTp03nqqadISkoC4JprrmHixKE1Pl0IOMgUGI/r8O/qQm/pdNrUKF16OZL6OHlhE23UdzTi7eTZ7c5Z3FTWZ8f7uIAkdpSmcag2lxBf+4yyuKq1uxvf2cGJ6xOv4O29n7EqbyOXRM/ucX6IfxwPu8TyVvNhnt/6GvMiprM04VLUchVuai13j7+Jy+MuYmXuRrbYUml2LubWH/cwPjiBkV7RRLmH4u3kSZxXZK+0lMliotXYjs7cgdFiwipaERAQBAG5RI5S7oCTXI2jQt2ten4s430MPPzaNp58Zxd/vXEMo6K8MFnNtBrbaDPqaDO202psp91k/3ebUUebyf7/FmMbLYZWWoxtPZpNZRIZQRo/poWMJ9YzgnjvGJwHSA0VNpbwRcZPZNbkEKjx4y+T7+xXzqagoZhXdr5Lk6GVu8fd1KvCyK44Uca7yzORSgQeu3UMucZd/LxzHdHuYTw0+fY+95kGorShhhdWvUe9tgWlzYGbR13F3MjJfb6fA9HcZuTFT/dyqLCBxTMjWTw7lI8PfMX6ou0k+Y7ggQl/GJLIaauhjYyabOZHTKemsYNH396B2WLjH3dNGlDYFSC/4QgwLBt0PDazESknLmoor2lDIoCf55kzNDRYjHy8/zs2FG0nyj2M+ybc0u10faoMKiBFRkby6KOP4ubWc6Yqk8n48ssvT8tAfk8opAr0lp4rJKfOxr/2jq6A5NzL86gr6FS11eHt5Emgxg+pIKGoqbTPDe5k35Go5Sq2FO9mWexSAAormnvIw8wInci+ykN8fvBHQrSBjPSO7nGNiMlLuet/97IxOoY1BVvYUZbGZTFzmR02GbVCRYCLL7ePuZbrE6/g8S9/oqQjn93CfjYf2QXYV4MBLj74Onvj6eiOm0qLRumMk8IRtVyFUqawBx1BigDYELHZbJhtFpoMLVS116I3G+kw69GZOtCZO7oDTOD4ZnIranhu1wZk+y1YMff7njvKVTg5OOGicMRD7Uq4axBalQY3lRYvR3e8nDzwdvQ44c3aarOyv+oQq/I2cag2FyeFIzclXcXciGl99lRYrBZ+zF7N91m/4qbS8szMPxHhHtLjmNLqVv73YyYZBfWMCHPnhstD+CL7M/IaipgbPpUbkxYPyZTPYDbw3u7lbC3fikRmY3qTnmuveg6N++Bl/rvIKWnkhY/30tZh5v5rEhHcKnlwzdM061u5LGYuS+MvHXKAW12wGavNyki3JP725nb0Bgt/v2Miwb4nDrgHqrLwd/bpbp8Yxs7Rsu+Bi8ryy5sJ8HbuNyV6quyrPMT76V9S39HE5bEXcfXIhUPuNRqIQQWktLQ07rjjjh6PXX311XzzzTeEhw9eOO9CQS6V0WrsefNUyCTIpAI6vf1xrVJDi8E+g+9KrXQJqVa11TDKNw6FVE6Q1p/CxuI+X0chUzApKIXNxbu5cdRiXJ0dyC9r7nGMIAjcNfYGHtvwT17a/haPTbu3x9Ja7uqDe8Is5h3cyOxlf+GbI9v57OAPfHd4JRODUpgclEKMZyRqhYqnrrqSP726ldYKI/dcF4ZZ0ciRpjLKW6s4XJdHY2nzSVmG937/5LgonHBxcCI6wJO6OiuV1WaUUjVjo4MYGxWEm9oFJwdHnBWOOCkch3zTPBaL1UJOfSF7Kw6ysyydFkMrrioN1yYsYk7ElB7mdMeSVZvHe+lfUd5axeTgsdyavKSHMkFVvY6v1+eyKa0MlVLO7YviUfpW8sLulxEEgfsm3MKkoDGDHqfVZmVj4S4+2f8jRrEDrc6d2xtyCR17+ZCDkSiK/LytiI9+OYyrG1w818Z3le/QUNhEhFsIf5r4fyeVgqnTNbAiZz0JnvG8+lE+ZovI3++YSHiA9oTnthraOFyb22slPwzYTPaU3UB7SFabSG5xI1OSBr/XN1ia9S18uP9bdpWlE+Diy9MzHyTGs//+u5NlwIB07733cuTIEcrKyli4cGH34xaLZdinZADkEnl3RVgXgiCgcpDT0SkPo1W6YLFZaDfputNGWqULarmqh+trpFso20r29OmNBHBRxDTWFW5jbeE24kI9OVTY0CPIAagVKh6bdi9Pbvo3z25+lXvG38zYYxQIXKdcRXvmZjT7NvPYogcpbCxhdf5mdpSmsbFoB2q5ihFeUcR6RnDj1T6893UJr31SyAPXjObW0ZO6r2OxWe2pMkMb7SYdBosRg8WI2WrGeoxqhEwiRS6RI5fKUMocUMmVqOUqHDtTeH3J8GQdaeCjX7LYsLqR9B21zJ+gZs5YLRrl0NoQRFGkQd9EaXMlR5pKya0vJLu+EKPFiFwiI8lvJNNCxpPkO7LfmV9FazVfZi5nT/kBPNVu/GXKXYzu9Pyx2UQyC+tZueMIuw9VIZdKWDglnGnjtXyV/T2ZaTmM8IrirrE3DDrNIYoi+6oO8WH699R21GBr1zBGOYcl7auQanzRTl48pPegpd3Iy99sI7P+MK6jmtBJa1lTIjLCK4o/pFxDsu/IkyoasdisvJ76MaIIWTu8kYrwjz9OIthncKnIjUd2YhVtTAkeO+TX/r1zdA+p/3Lu0upWdAYLsSEnt+faFzbRxsaiHXx+8EdMVjNLRi7kspi5Q5biGiwDXvXPf/4zFRUVPP744zz++OPdj0ul0uGy7wGQS2WYrb3TS04qOR16e0A6KjHU3B2QBEEgwMW3RyovxjOctYVbKW4u6zOvHqT1J9kvnpV5G7gs4jZ2ZFRSWtPW6ybgptby7KyHeGnbW7y843/MjZjKsoTLUctVyJzd0Iy/jObt32JImU94YCx/HHcjt45eSkZ1NvuqDnG4Jpe9FQftFwsHqVXFy7t34ZflybioULyd3bpTdY4KFT5OnihlDsilcuQSGVKJtNdNThRFRFHEIlqx2uz/6c162oztWGwWLDZr9/+lzlZuXOJNXqmCnZkVfJ26nW/2bCfQx4m4EFfCAjQ4OUqx2uxVfCarCb3ZQIdZT5tRR5OhhcaOpu4qvi4CXHyZFjKOBO9YErxjUA6waVzeWsVPWWvYVroHhVTBkpELuSR6NgqpnMLyZnZkVLJlXzm1TXqc1XIWz4xkznh/tlVu5amta5FJZPxh9FJmh08ZdK9UVm0enx9cTn5jEaJBjUPDGO6ZMhG/fW9hMenxuuaxE5q2gb1p8XBtHpty9pNeeQjRUYfCEVxdfLk48GKmBI/F5xQs1G2ijXfTviC7Lh9KRuEsOPPsHyfiN8jNdYPFyMrcDcR7xwy6V+pCQrQYEWQKhAEyAXuz7CoviZGD13wciPKWKt5J+5yc+kJGeEVxW8qyM15sMmBACggIICAggDVr1gwr7w4BB6kCYx8ByVEtp11vvxm6q+xFCI36JkJcjy6xg7T+7CpN617ljPSy7/lk1OT0u9G7dOSl/GXt81RJ9yERHNmyr7xP+3Gt0oWnZj7IlxnLWZW3kdSy/SyKm8essMloJy6iLWMT9avfxf/WfyJIpChlDowNGNW9mmrWt1DUVEpxczllLVUcLi+j2lDMz7m5MIiPhyAICJ0Hiognn95Tg0PnVlgNUNMImxr7PrQr/adROuPt7EWCTxy+zl4EuPgSog04obW2TbRxqCaXVfmb2FeZiUIqZ0HkTCZ4T6G8ysTb3x1mf14tDS0GJAKMivLi+vmxjIv3YW/lPp7Z/iIN+iYmBaVww6jFuKoGZ6mQU1fIt4d+IbM2B8xKTOVxzAqfxHVTHWlb+R8sumZ8lj6Gg3dIn+dbbVbyG4rJqMkiozqHgsZibKIN0SpBYfHiougZzI0dc0pB6NjXejf9SzYd2Ym1MgJvInn2ngm4awa/el2evZYWYxtLRi488cEXIKLJeMKm2D1Z1UQGaof0vveFyWrmx6zV/JSzBpVMyV1jb2BayPjfJAYMGJCuueYavvzyS5KTk3sMputmuW/fvjM+wPMRpcyhV9k3gIujorvKritdU3ucAVyYaxDrC7dR3V6Hr7MXWpWGMNcg9lYc5PLYi/p8vRDXAOZGTGVtwVaiR8xlXWopS+dE9xDr7EIhlXNj0mImB4/hkwPf89H+b/nu8CqmBo8leeIlqFd/RMuelWjHX9rrXK1KQ7IqnuRjLKmPVLbw7vIMMksq0Whg/Cg34iI0CFILRqsJk9XUudKx2m+Ix/j2SAQBiSBBKkiRSqTIuv+TIZPIkEokR/8tSJFKJJ3H2/8vdJ5f26gnt7iZ3JImCsta0RsAmwS1QkmAj5ZAb2f8XZzwclbjrlWidXLAxVGBSt7/x7+ypY5NhbvZXpZKg6EBB4maUEkK1IXw6wEj3+p3A/ZVb2KkJymx3qTEeqNxUrC/6hBPbPqEkuZywlyDuG/CLYPKt4uiyKHaXH7I+tWuU2h1wFwRTbA8njuujMarYitNXy1H6qjB97qnUfr37DFqNrSyv/IQ+6oOkVmTQ4dZb3cpdvTHoTmK5kpn5o4cxa2LE05oBjhYWg1t/Hf3h2TUZGOpDCNUksJTd0/ood59IkqbK/gpZw2Tg8acltLh3yM2i2nAptj6Zj15pU0su+jUXHYP1+bxTtrnVLXVMjV4HDeMuvK0NLwPlgE/la+++ioAv/zyy0CHDXMcaoUaY+eN+Nh9CFdnB0qq7KXeWqULSplDt3ZdFzGd7orZdfndRQ7jA5P5IuMnatrrevngdHFtwuUcrM6iXraDZmMKq3Ye4fJp/d8Ew92CeXrmg+TUFbAqfxNrC7exymbBOcKHiMPLGaUWiA5IIFDjN2AFWKifhufumExGQT3fb8xnzfo6NmwyMDHBl9ljIkiI9EB6hnW1wt1gQuevarXaOFLVSkFZM0UVLZRUt7Izo6pPTyeJAA4KGXKZxC7ZL2vH4liNzaUSwcku62RtdcVal4C+0QejQkGQj5xJie5EBGiJDnYlyMcFqURAFEUya3L4JvUX8hqK8Hb04L4JtzAhcPQJ03M20UZaRQbLs9eQ31iM1KbCVBaDxhjBH6d4EWvJpv3nj2k26HAaORX3OTcjVbsgiiIVbdXsLT9IWsVB8juLX9xUWsYHJjPCM4acTIFfNlfg5uzAk9ckkRx96iuiLtIqMng37QtaDO2YikaS4JbEIzeNRekw+GBnMBt4ddf73dWMw/SNzWwYcIW0fm8pogjTk0+uoEFn6uDTgz+wsWgH3o4ePDbt3h6+Xb8VA35yDhw4MODJ/v7Dud6+cO508WwztvdI0XhoVTS1GjBbbMhlEgJcfHtJA/m7+OCq1HCgOouZYfaCgcnBY/gyczkbi3ZyTcJlfb6mUq7koUm38/iGl9EmHOCLDTImxvvh5Tawr0yMZwQxnhHoTB2kV2ayr3QfGeUH2J+9CrJXIREkeDt54OvkhZejBx6ObriptLiqNGgcnHF2sFe5JUZ6khjpSUlVK6t3F7MpvZyt+yvQOjkwMcGXiQl+jAhzP+Oij1KphIgALRHHVXW1d5iobdLT0KKnuc1IW4eZxo4WaozlNFjKaRDLMdIMgLPgRrBqIrGakQTEeuOhVeGpVaF1duhzH2xfZSbfZ/1KfsMR3FWu3DZ6GTPCJp6wHNZkNbO1eDcrctdT1VaLzOqEvDyMOL2SOYFmfExrsOyooFWQ4Bg9Fu3EK1H4hFLUVEpqwUb2lB+gslMdPtwtmCUjFzLaL55gbQCHihp446sDVNTpmDM2iFsuHYmTauiuuX1R3VbLpwd/YG/FQZwl7nRkjmNSVAwPLhuNXDb4v6/NZuP11I8pb6vm0an3/KYz8fMN0WxEoup7r9BmE1mXWsKoSM8T9nn1xd6Kg7yX9iUtxjYujZnLVSMWDOjvdSYZMCB9+umn/T4nCAJz5/buRB8GtCp7QUFzZ/lwF77ujthEuzVAoLczoa6BbC/d20MaSBAEkv3i2VG6F5PVjEIqt2vG+SWwrnAbi2Iv6nfjPUjrz8OT7+CFrW9CxE6e/ULOv25f0Gfq7ngcFWqmhoxjasg4Wg9sIHfN27SkzKbBy4/y1iqq22rJrivo1V/VxbGVcioXJfGzHejogMZmMxsqD7K2RIJCqiDQU0u4rzuR/h64OTmilDngIHOwV9vJlJ0/K06pjPt4DBYjjaZ66sRaysUqSiwVFOlKutOlCqmcWM8IRvnMZrRf/KD2VSw2K7vL0lmes46S5nI81W7cNnoZ00PH91hRilYzNkMHVoMOm7EDm0FHS3s9G6sPsbG5kDbRjIdBYF6DhfEdJSiFIpCBUKdAHhSLJnku6piJHDE1s6ZsH6lp71PX0YhEkDDCK5L5kTMY45/YLdjb0m7kv18fYP3eUrzd1Dx7+wRGRZ2eVVFNex3Ls9ey6chOZFI5kbLxZOxyYe7YUO5anDgkDx5RFHl/31fsqTjATUlXnZXZ+PmEaDYh0Wj7fO5gfh21TXpuWjBiSNdsMbTywb5v2FWWTrA2gL9M6b/x+7fipAPSMP3joT7qdxTaqbYAdFe+lVS3EujtTLRHOOsKt1HaXNGtygAwITCZDUXbSavIYGKQXRtqUew89la8yC95G1g8YkG/rz3SO5onZtzH85vfpFq2loe/aOTFa65FqRj87Ng5cSaBBem479nIlJuexyHevtEsiiJ6s4FGfTPNhhZajG20GtppM+nQmTroMNt1yPRmA+0mHXoM2JwMODrYy79FREqB0lrYVDvwGORSOapjgpRS3hmspAoUUjlyqRyZRNodyLuq9UxWMwazAZ25g1ZjO82G1l4yRt6OHoS6BjE3YirRHuGEuQYNujG1w6xnY9FOVuVtpL6jEV+llpu9kkgRVYhZ+6nfswmrrgWrvg2bob27XBegXi5lm1ZFurMKi0QgWmdkTKMZb7MaR3dfPEZOROkZiIN3CHKvIIpaKllXmsauLS/T0NGETCIjwTuGxSMWMMY/sYfZn9VqY01qCZ+uykZvtHDljAiWzo0+5b0iURTJqS/g1/zNpJbvRypImR0+BUNZKKu3VXPJpFBuuzx+SAaKNpuN9/d9xbrCbVwWM5eLo2ae0hgvBESzsV/ZoLWpJTir5YyP79u7qy8OVB3mjdSP0Zn1LI2/lEtj5p7WBteTZcBP63PPPcejjz7aqym2i7fffvuMDOp856jiQs+7brCvMzKphNySJiYn+nerJhyozuoRkEZ6ReOhdmNd4dbugBThHsL4gGR+yl7DlOCx/e4lAUR5hPHvBY/x9Jq3qTKmctu3+dw+4SomhSYOqlJGEAQ8F9xJ+XsPUfPjvwm45SUkSkcEQUCtUNkVHDRD01wTRRGzzUKHyUBeeR0ZRdVkldRRXNOIFQsSmRUvdzleHg64aWU4O0uwCRYMZgMGixGj1UiHqYNGSxMmqxmzzYLFZum2ahYEAZkgRSGVo5QrcZSr8HfxYaRXNO5qVzwd3fBx8sLf2XvA0u7+qGyqYNXB5WytzcIgWgk1WLm4sY2Yjlok5NEmkSJzckXq5IrMxR2FTyhSlTMSpSNFNgPLGwrIMdYgiAKyBk/UjaH4xMQTOyesh7V4aXMF20v3smPfR9TpGpBKpIzyieOa+MsY7RffpyX4gbxa3lt+iJLqNhIiPLh9UTxBg+z96Y9mQyvbS/awqWgnZa1VOMpVLIyezcVRM1m1uYqftuWxcEoYt102tJ4lo8XEG6kfs7t8H5fFzGVZwuWnNM4LBZvF2GdTbEu7kd2Hqrh4Uuig1BksVgufZ/zEyrwNBGr8eHz6fedUmf2AAWnChAkAXHRR39Vdw/SNo0KNm0pLyXH7Q3KZlJgQVzLy7dL8biqtvYKu/ECPCjqJRML8yBl8evB78uqLuiuPbkxazMGaLF5P/ZgnZzww4IzGTaXlP5f9hXe3rGFd6Rr+u/d/fJnpxYLYaUwMSkF7At00qcoZ78sfoPLTx6ld8Treix9GOAmfoS4EQUAhlaNQyRkb6czYSPvvZDBZyC1uIrOwnszCeg7tbsZitTfR+rq7ER3iSlyQK5GhWkL9NINKP54urGYje/b9zNriXWSJHQhAQruJGTJXIr2jUYwMRu4RgNzVB6mTtkePSE2jjuUZqWwv20q7pAbRIkesi2CkJplZk6MYE+fdvXpp0rewvWQvW4p3U9pSgUSQEO8dw1WdK6G+ghBAYXkzn6zKZl9uLV5uav56wxgmJpy8FYfO1GFXQC9NI7MmB5toI9IthDvGXMfEoBSUMgdWbi/i6/V5zB0XPORgVK9r5F873qGoqZQbRl05rMgwBPor+96UXobFKjJ37IlTbS2GVv69812y6wqYFzGd6xIXoThLe0X9MWBAmjnTvpRetGgRTU1NHDhwAJlMRmJiIi4upzYD+70T7hZMQadQ5LGMjvHm45VZ1DZ14OWq7q6gq2qr7a6qA5gTPpmfc9by2cEfeHrmnxAEAXe1K/+XsoxXd33AR/u+4dbRSwe8IQiCwP9Nn8eM8jH8Y/mP1Kjy+Wj/t3y8/zsi3EOI944hxiOCcLegPkVGlYExuM++kYZ1H9K8/Xtcp5z+KiilQkZilCeJUfYVn8lsJb+smZziRnJKGjmYV8fmdLuVgVQiEOjtTJi/hlA/DSG+zgT5uODaR7HBqdDUVMnqnZ+xpbmQRpkEJ6vIfLUfc6Jm4BuRguS43iWj2UpReSv5pU1klzaQUXuIDk02EnUbWFWEMIGLR0xlbIw/aqU9NWi1WUmrOMj6oh3srzqEKIpEuoVwS/ISJgQmDyi0WlzVyldrc9mRUYmTSs6tl47g4omhJxWsW43tpFVkkFq+n4yabKw2K56O7lwaM4epweN6rIT35dTyzk+ZjBvhw11XDk1NfF/lId7Y8zEWm4WHJ99Oin/ikMd6IWMv++5Z1CCKImtTS4gOdj2hTmBpcwX/2PYGbcZ27h1/C5ODBy9Z9VsyqATz5s2b+ctf/kJkZCQ2m43S0lJeeeUVxow5N3+pc4FYzwj2VhykoaOph9nbpAQ/Pl6ZxZZ95Vw1K4ppIeP5OvNn1hRs6VH2qpQrWRJ/Ke+kfc7mI7uYEWZXVZ8UNIYjTWX8nLMOR4WapfGXnvDGEBngzv9uv4UfNxfw3c50TE6VVNhaKGhYg8ivwFEvIx8nL7ydPPBQ26vptHHjcajMp2nrVyg8A3GMGX8G3q2jKORSRoS5dwvEiqJIfbOBgvIm8suaKaxoYX9uLRvTyrrPcVTJCfB0wtfTET93R7zd1Xi6qvHQqHDTKHEYxI3aJtrIrDzM6vTvONBRg1UQCJepuDpsElOSLkMQZDS2Gsgu76C6oY6qeh3lte2UVLdSWa/DZrMhdavGIbAQ0bcdrdSNeeFXc1nCFOTHyKy0GtpYX7SdtQVbadQ3o1W6cFnMXKaHjMfPZeA9gLzSJr7dkMfuQ9WoHGQsmRPF5dMihlw9V9tez96Kg+ypOEhOfQGiKOKpdmN+5AwmBCYT4RbS6zNV16Tn5c/TCPJx4U/Xjh50Kb/RYuKLjJ/4NX8TQRp/Hpx027C1xMlg7r1CyiluoqymnXuuHjXgqXn1Rfxj6+s4yBx4ZuZDhLn17dR7LjCogPTqq6/y2WefdcsFHT58mMcff5wffvjhjA7ufKZLYSGzJofpoRO6H/f1cGREmDtrU0u4YkYkrioNE4NS2FC0gyvi5vdwypwZNpFtJXv4aP+3xHhGdK+gliVcjs6k58fs1bSZdNySvOSEG5JymYSrZ0dx0fhglm8tZNXOYjqMejz8jASF2VA6d9BibCCvIRV9H0296nBvHPd+gPuRDbhpvNEqXdCqNLgqNbirtbir3XBXu6IYgmr1YBAEAU9XFZ6uKibEH7ULb24zUlLdSml1G2W1bVTWtXO4qIEt+8o5XgBCrZShcXTASS3HUSVH5SBDqZCikEsxCW3UkE+N5RAdgh6V1Ua8XgXWcbSYAviuxMT7KzbQ1mHqcV2JAN7ujgR6OxEea6TIlkq9sRZ/F18Wj1jK+ICkHtqDNe11rMhZz6biXZitZhK8Y7kleQnJfvED/u0sVhu7D1Xx89YisosbcVTJWTInisumhg+6+VQURY40lZFWeZC95Qe7U8mBLr5cETufsQGjCNEG9DuxEUWR177Zj9li4283julhLT8QOXUFvLX3U6raapkXOZ3rEq847Z+PCwex1wpp3Z4SVA5Spozqfw8op66Q57a+hlbpwuPT7ztpa/HfikF9sgRB6KFdN2LEiNOi6vx7Jkjrj5tKy96Kgz0CEsDCyWG88MledmZUMmWUP4vi5rG9dC8/Zq3mxqSjQpkSQcI9427iL2uf5+Xtb/PMrIdwVKiRCBJuS7kGZwdHfspeQ1lLJfeMuwkvpxNrWGmcHLjh4jiunhXFjoxKNqaVcWBbPTbRGXdNMImRHkSFOOHpBTKlkRZjG036Fhrb6qjJ201rXSmF5g5azHbx1ONxU2nxcfLEz8WHABcfgjR+BGkDTrsltdbZAa2zvffpWMwWK3XNemobO6hvNtDUZqCpzUhLu5H2DjM6vZm61jY6HEoxOZVic6wHEcL1JuJaRIraJlAiC0XpIMNJJSHAy5mRYQ5onR1w1yjxdFXj7abGy1VNaWsZnxz4jj11Bfg4eXJv0i1MDBzdIxDV6Rr49vBKthTvRipImRoyjkuiZp2wKKSmsYN1e0pYl1pKY6sBbzc1t102ktljg7rTfgNhsVrIqssnrTKje6UuCALR7mHcMOpKUvwT+/TY6ovdh6rYn1fH/10ePyhtug6zni8yfmJdwTY8HN14fPp9xHufmoLAMD3N+QxGC9sPVjA50b/fCUJ5SxUvbn8TN5WGp2Y8OGjZqrPJgAGpubkZgJEjR/L++++zdOlSJBIJP/zwA+PHn9nUzfmORJAwPiCJtYXbaDO299ijGR/vS6C3E5+vzmFCvC8BLr7MCJ3I6vxNzAid0KPqxcPRjQcm/oHntr7Oi9ve5JGpd6OUK5EIEpYlXE6Qxp9307/gT2v+zlUjLubiyJmDUuJVOsiYNSaIWWOCaG4zsjermvScWtKyatmUZt+zcVTJCffXEOoXRKjvCKaOGoNi3UtI2xrxu/7vWNRONOlbaOhooqGjibqORmra66hqq2VXWXqPcmtPtRsR7qFEuYcS7RFOiGvgGSkzlcuk+Hk44efR88ZpsBg5UHWYHaWZ7KvMxGyz4OPkSYrBkxEF2QREjMNz2V1IHAZuJAb75vB7+z5n05GdaByc+cPoa5gZNqnH76M3G/gxezW/5G5AAOZHzuDSmDkD+vy0683szKhkc3o5mYX1CAIkR3vxx8WJjI71PmGfT4dZz8HqLPZWZLCvMpMOsx65VE6idyxLRi4k2XfkkJtPRVHkq3V5+Hs6cfHEkBMeu7t8Hx/t+5ZmQyvzIqdzTfylJ1XVOExvjlX63plZhd5oZdaYvtNv7UYd/9j6OnKJjEen3nNeBCMAQRxgqRMTE4MgCH2uhgRBIDs7+4wOrj/Ky8uZNWsWGzZsICDg9Ht/nC5Kmyt4aM3fuS7xCi6NmdPjuT2Hq3n2g1RuWTiCRdMjaDW288CvT+OhduW5WX/uFVR2laXz6q4PCHcN4i9T/9hjxVGna+D9fV+zrzITT0d3roybz5TgsUMyfevCZhMpq20jp7iJgvJmCsqbKa1uw2S2AhAkredul3XopM6kBt6Am5cXvu5qvN3sezddBQaiKNJiaKW0pZLi5nIKGospaCimvsOuguogcyDWI5wRXtGM8Ioi1DXwtDbDgn2/5kB1FnsrDrK/6hAmqxmN0oUJgclMDkjGecsP6PP24Dp1CdrJV51wL04URTYf2cUnB77DYDGyIHoWV8TN7+WXtK/yEO+mfUGDvompweNYmnBpd2/a8TS3GdmTVc2uzCoO5NVisYr4eTgyMyWQGSmBeLkOHCCb9C2kV2ayt+IgmTU5WGwWnBWOjPZLYExAIgnesafUdV9S1crdL2/ijkXxLJjcv85cTXsd76d/xYHqLEK1gdyWsqyXUeEwQ6frXvfRFVHEX/dnnEZMAeCxt3dQ09jBO3+b3ad6yD+3v83+6sM8O/Oh8+rvMOBUOicn55QuvmLFCt566y3MZjM33XQT1157bY/ni4qKePLJJ2lpacHT05N///vfaDTnRyQfDEFaf2I8wllbsIUFUTN73HDHxHkzJs6bz1bnMHaED/6eTtyeci0v7/gfnx38gZuSr+5xrQmBo5EKUl7d/QGPrHuBhybd3t275Onozl+n3MWBqiy+ylzO23s/48uM5UwPncC0kPFD6hmSSASCfVwI9nHhIuylpFabSE2DjtKaNipq29lX6sGYis9JLv6EVw/MRicenQErZBI8XdX4ejji46bG290RH/eRJISOw2e0mg5bOzl1hWTV5XG4No/PM34E7M6zMZ4RxHpGEOkeSphrEKohzqxbDW0UNJaQU19AZnUORU2liIi4KjVMD53A+IBk4jwjEQSoXf4qurw9uM+9Bc2Y/huNu2jsaOatvZ9ysDqLGI9w/m/MtQS49HxfzVYznxz4njUFWwhw8eXvEx/uJRZqtdrIL29mf04t6Tm15JU1IYrg5aZm4ZRwJif6ERmoHTA41ukaSC0/QGr5fvLqixAR8XJ0Z17ENMYEJBLlHnbagvuhIruaxZi4vgsuLFYLv+Rt4NvDK5EKEm5KuoqLIqad9snFMCDI7HtITW0GMgrqWTonus/PyeYju0irzODGUYvPq2AEg9xDMplMbNmyBZ1OB4DVaqW0tJQHHnig33Nqamp45ZVX+OGHH1AoFCxdupRx48YREWFXwRRFkTvvvJNHH32UqVOn8vLLL/POO+/w8MMPn4Zf69xhYcwc/rn9bbaV7OmxlyQIAn9cnMjd/9zEy5+l8eLdUxgbMIr5kTNYlb+JQI0fs8In97jW2IBRPDXjAf614x0eXf8S1yRcxsVRM7vVCkb5xpHoE0tmTQ6r8zezInc9y3PWEujiy2j/BEb5xBHlHjZkcy2pRMDP0+mY/YNIOo6EIP/mBV6K3AVzH6LWIKemQUd1Ywc1jR3UNHSQfaQBnaGnUaGrswO+Ho74ekQzwWM02gjokFVTZSglr6GQ/VWH7O8PAl5OHvi7+OCldkercsFJoUYukSMCJqsJnamDJn0LNbp6yluqaNDbBVGlgoQItxCuGrmAJN+RhLoG9hA4bdz0GbrD23Gbce2ggtH+qkO8vvsjTFYztyQvYW7E1F6CqW3Gdv65/W1y6gtZEDWLZQmXIZfKMVusFFa0kFXUSGZhPVlHGugwWBAEiAzUsuyiGMbG+RDq5zJgEDJYjOwqTWdL8W6y6vIBCNL4c9XISxjrn0igxu+M2AM0tRoQBPB07W1pUNRYwlt7PqWkpYKxAaO4JWlJt4TRMKcfobOoIS2rBlGECfG9J5rtJh2fZfxItEf4eamAMag70wMPPEBZWRl1dXXExcVx8OBBxo4d2NVx586djB8/Hq1WC9iba1evXs3dd98N2Cv11Go1U6dOBeCOO+6gtbW113VaW1t7PV5dXd3ruHOV0X7xhLsG882hX5gYlNKjyshdo+L+pUn8/cM9vPV9BvcuGcX1o66ksq2Gd9O/xMnBkXEBST2uF+keyotz/8b/9n7OJwe+Z0dpGrcmL+2eCQmCQIJPLAk+sTTrW9hVto89FQf4OWcdP2WvQS6VE+kWQpRHGGGuQYS5BuHp6D7km5k6NBGfq/9G9bcvIFv9IqOWPYEstndKp73DRFWDjur6DqobdVTV66is17E/t5YNe8uOOdIZd814wrxkOLrrENStGKVNVLXUkV2b36+GnqNCjbejB7GeEYS4BhDhFkK4W0i/aar2nF007/wR56Q5aCYsGvB3FEWRH7NX81XmzwRr/Hmgn5LldpOOZza/SkVrNctiluFiDubDFTnklTZRWN7S3ejr7+nIlFH+jIryJCHCExfHE6fSDGYDq/I3sTJ3A20mHb5OXiyNv5SJgaNPi5fRiVArZYiifX+rq6rParPyU/Yavj28Eo2DMw9PvoMxw31Fp8yJ7nVdVXaph6vxdFUR0kfv0c8562g36rh12pLz0sNuUAEpOzubtWvX8tRTT3HzzTcjiiJPP/30gOfU1tbi6Xm0isfLy4uMjIzun0tLS/Hw8OAvf/kLWVlZREVF9XCl7eLjjz/m9ddfH+zvc84hESRcm3g5z2x+lRU567hyxMU9nh830pelc6L5al0uvh6OXD07ij9NvI2/b3mN/+x8j/sm3Mr4wOQe52iULjw8+Q52lO7l4wPf88j6F5kYOJrFIxf0SCNpVRrmR81gftQMdKYOsuryOVybR05dASty1nXbijvKVQRpA+wVcRp/grR+BGsDUMoGdiJVhSbge83jVH/9PBUfP4rvNY+h8Oy5yeqkVhCpVvSQx+lCb7RQWddOZb2Oyrp2Kjr/O3xAis7gDDgDQUgkAl5uDnh6yvB0dcDLVY2PqzMB7q74urvgrJYP6stnbq6l7pc3cfCLxGPurQOeY7PZeCf9CzYW7WBy0BjuGHMdCpkCURRp6zBT0xlcy2vb2Nj4Ha1CNZb8ZN7f3Qg04qCQEhGg5ZLJocSEuBEX4oary9BSkEWNJbyy631q2utI8h3J5bFzifGI+E1vNNHB9r2vtOwaZowOpNXQxqu73yezJpeJQSn8YfRSnBRDV5gepjcnutcJcgVGs5X9eXXMGRvU63OgM3WwpmAL4wOTe0iRnU8MKiB5eXkhk8kICQkhLy+P+fPno9frBzynv0KILiwWC3v27OGzzz4jPj6e//znP7zwwgu88MILPc658cYbWbSo50y2urq6137UucxI7xjGBybzQ9avTAhM7tUAec3caKobdXz6azZOajkXTwzlkal3849tb/DKzve4Kekq5kfN6HGOIAhMDh5Lsl88P+esZWXuRnaV7SPFP4EFUTOJ9Yzs8X47KtSM8U/snsmarGZKmysobi6jqKmM0uYKthandq9EBAT8XLwJdw0mwj2EGI9wgrT+vVJVysBYfK9/luovn6Xyk8fwXvxnVMEjB/W+qBxkhAdoCT/OKkIURVp1JirrdFTUtVPVoOsOXEVHWtAZepoaKuRS3DVK3DVKXJ2VaJwUuDg64NLZd6RWyVHJJKi2/BuJzYZl8v9R0WhAEIzYbCI2m4jZasNktmIwWekwmFlV9iMFusOEy1KwFCfybEYaDS0G6ps70But3a8t8ypFHlKJn2ECo5JGE+LrQpi/Bn8v5yGpXx9PZVsNT2/+D2q5iqdmPECcV9SJTzoDxIS4EeDlxLcb8ggPk/LPHW/RaGjhjjHXM7OzWXuY08OJ7nWC3IG80iZMZmufvlbbS/aiNxu47LgCqvOJQQUktVrNihUriImJ4ZtvviEsLKy7JLw/vL29SUtL6/65trYWL6+jb6KnpyfBwcHEx9vdRy+55BLuvffeXtdxcXH5XcgU3ZJ0NZnV2byR+jHPzHqox6avRCJw35IkOvQW3vo+A0EQmD8hhMem3curu97nw/3fUNZSyc3JV/eqnFPLVSyNv4yLo2axKm8Dawu2sbfiYHcp+ZTgMWj7KPlUSOVEuIf02PQURZH6jkZKmsspairjSFMpGTXZbC1JBcDZwYkE7xhG+9ldY7uqyxy8Q/C7+R9Uf/UcVV88i+fFt+OcePL5a0EQ0Dg5oHFyIDa0Z3WaKIq0683UNHRQ29RBXbOe+mY9DS0GGlsNFJY309Ju7LV3Nc0hmysc8/msfRJ7384c8PVlgbnIfY9gLo8kt9aLGucGXJ0dCPByYlSUJ16uqs6qQiUv7H0BP+dInpxx/WlduXxy4HukgpRnZv6p2134bCCVCNx66Uie+WwDj6xdidJBytMzHjzvNsvPB050r5PIHcgpse+TRgf3zjhsK9lDoMbvrFtInAqDCkhPPPEE33zzDQ8//DDfffcd1113HQ8++OCA50ycOJHXXnuNxsZGVCoVa9eu5dlnn+1+PikpicbGRnJycoiJiWHjxo2MGDE0P4/zCa1Kw20py/jPrvf55tAvvYz2ZFIJf70xhec/2sub3x1Eb7BwxYwIHpp0O18d+pmfstdQ1FTK/RNu7XPvwMXBiaXxl7Eodj47S9NYX7iNTw9+z2cZP5DgHcP4gGTGBozqU7OuC0EQ8HR0x9PRvVtrrCtIZdXmk1mTw8HqLHaUpiGXyhnjl8CMsInEe8cg13jhd+Pz1P7wMnW/vIGptgS3WTf0EBw9HQiCgLNagbNaQUSgtt/jLFYbbR0mOgwWdLWVSFd8hclzJLNTljDDBlZRBFG0q4RLJUilAgq5lIK2LL4rPMKUwInccukS1Mr+04E5dYU0G1q5Ofnq0xqMrDYrB6oOc3HkjLMajLqICXfCNeEgHWYr872Hy7nPFoLMgZziRnw9HNE49Uyn17TXkddQdN6rpw8qIIWEhPDnP/+Z1tZW/vOf/wzqwt7e3jzwwAPccMMNmM1mFi9eTEJCArfddhv33nsv8fHxvPHGGzz22GPo9Xp8fHx46aWXTuV3OeeZGJRCRnU2P2avJtI9lBT/hB7Py2VSHrlpLK98uY8PfzlMc7uRmxbEsSzhciLdQ3kz9WMeXvs8N41azMywSX3eBB1kCmaETWRG2EQqWqvZWpzKztI0/pf2Oe+mf8kIryjG+CcyNmDUgE2aXXQFqWmh7kwLHY9NtJHfcITtJXvZUZrGzrJ0/Jy9uThqJtNDJ+Cz9DEa1n1Ey55fMNYcwevyB5E5nfh1TjcyqQRXZyVaJ5Hq9V9jkEqIuOoeZC79q1k0G1p57dcVRLqHcuf4ZSds3O1yaw1zPb3aYHqzAZto66GBeDb5eP93mIR2ws3z+PqXCnydvJiZcu7qof1eEeQK8suaeqmTAByszgLoVQR1vjGogFRUVMQ999xDa2sr3333HTfddBOvv/464eHhA563cOFCFi5c2OOxd999t/vfiYmJfPfddycx7POXW5KXUNxczmu7P+S5OX/u1csil0n407WjcXFU8OPmAuqb9dy/NIkx/on8c95jvJH6Mf9L+5w9FQe4bfQyPBz7brgEux36NQmXsTT+Uo40lbG7fB+p5fv5YN/XfLjvGyLdQxkXkMSEoOR+GzePRyJIiPYIJ9ojnBtGXUlq+X5W5m7kvfQv+f7wKi6Lncvs2Tfg4BtO/a//o+L9h/C6/P5B7yudbnTZO9EXHcB97i0DBiOAbzJXYLAYuWvsDYNSkZB27qdZbdYTHDk0lHIlAgI688D7tL8F5S1VbCnezWUxc1kcdzHPvr+b/3y1H4lEwvTkc7cp/feIzizQ2GrsNvo8lqza/G7ZrvOZQUn2/v3vf+eRRx7B3d0db29vrrvuOp544okzPbbfJQqZgocm345CpuCFrW/QYuhd6i6VCNy+KJ6bFsSx7UAFj729k5Z2Ix5quy7YLclLyKrN50+rn2VN/hZsndVy/SEIAmFuQSxLuJxXL36af89/gqtGXoLJauLTg9/zxxWP8dTGf7OxaEef+nT9IZfKmRw8lufn/IUnpt+Pn4s3H+3/lvtWPkmqowzvG55DolBR9dlTNG7+AtFqOfFFTyNWfRsNa9/HwTccl9HzBjy2Sd/CpiM7mRU2Cf8TqG53Edwp8ZTXh83IqSCTSHFVaajTNZz44DPM9tI9SAQJC6Nn4yCX8tgt4xgZ5sErX6SzM6PybA/vwkGmoKre3gfal55gdl1BZ9P3+VfqfSyDCkjNzc1MmjSp++drr72W9vb2Mzao3zseajf+MvlOmg2tvLjtLYwWU69jBEHgypmR/PWGMRSWN/Pwf7dRUdeORJAwL3I6/5r3OBHuIby/7yue3vQKla2D780KcPFl8YiLeemiR/nvgme4auQlNBtaeXvvZ9z+81/5YN/X1LbXD/p6giAw0juaJ2c8wBPT78NNreWdtM/5276PKbnoGtTx02ne8T0VH/0NU23poK97qjSs+whrRxseC+464V7WluLdWEUbl0TNGvT1g7UBeDt6sK5w22kXG/ZUu3XLLJ1NipsrCHDx7dbAUypkPH7rOCKDXPnnZ+lkHTn7QfNCQJApqOwMSP6ePcvsWwytNBlaCD+Pixm6GLQFqNFo7I6+dXV12GwDz8qHGZgI9xDuHX8LhY0lvLrr/X7TPpMS/XjurknoDGYe/u9Wso/Yb1JeTh48Nu1e7hxzPaUtlTy89nlW5Kw/4WrpeHycPFk84mJemf8kz8x8iNF+Cawr3Ma9q57kjdSPhxSYwF7i/vdZD/PnyXcgl8r5795PeVnRTNmsqzC11lP+wcM0bf8O0Woe0nWHii5nN+2Zm9FOvAIH75ATHp9emUm4a/CQmk0FQeDSmLnkNxwhtXz/yQ+2D1yUzrQZdaf1mieDVJBgsfVc2aocZDxx63i8XFU8/9EeGlrOfmrx944gtSuhAPi49wxIpS32lWqgxq/XeecbgwpIy5Yt49Zbb6WhoYF//etfLFmyhGuuueZMj+13z9iAUdycfDVplRl8uO+bfmfZMcFuvHzvVJzUCh57ewd7s+yrIUEQmBE2kX/Pe4JEnzg+Pfg9z25+lSZ9y5DHIggCMZ7h3Dv+Zt5Y8HfmR85gZ1k69//6NJ8d/AFDHx5JA10rxT+Rl+Y+wr3jb8FsM/NGyRbeiA4mKyKW+i1fUv7un9AXD1x+fbKYm2uoW/UWCp/wQbnc2mw2ihpLiPWMGPJrzQybSLA2gA/3fUO76fQFEJlE1isQnA1iPSOobKshr76ox+Mujgoeu2UcBpOV1789OGxHc4YRZDKa242olbJezsAVndmRoWhWnqsMKiAtXryY++67j4ULF2KxWHj22WdZtmzZmR7bBcG8yOlcGjOXtYVb+TV/U7/H+Xo48s97phDk48xzH+5hx8Gj+XtXlYaHJ93OHWOuo6ChmL+u/QcFDcUnPSY3tZYbkxbz2sXPMDloDD/nrOOBX58hrSLjxCcfg0QiYXLwGF6Z9yT3jLsZBAmfWGt4OTaE9Qoz+V8+Tc33/8TcePr2ImxGPTXfvgiiiPeiBxAGodvXZGjBbLPgexJOplKJlDvHXE+rsY330r86mSH3icFiOCWV7tPFrLDJuKm0/HPH/yhqLOnxXKC3M9fPjyUtu4a07JqzNMILA0EipbXdhMaxt3pKi6ENAQGtw/nfrzmogNTe3s6+fft4+OGHue6669i8eTMdHR0nPnGYQbEs4TLG+CfyyYHvOVST2+9xGicHnrtzElFBrrz0WRq7D1V1PycIAjPDJvHc7D8jl8p4evN/yKw5NbV2N7WWu8bdwLOzHkKtUPHS9rd4ffdHQ14JSCQSpoSM5Z/zHuOvU+4iwC2QXx3hH2FevNuSy/pPHqZm5dtYWupOabw2k57qr5/DVFeG1+UPIHcb3IyxyyHXUdFbQHQwhLkFsXjEAnaWprGzNP2krnE89bpG3FVnv+xbJVfy+PT7kAlSHln/Eu+lf9kjjbtgUihebmp+2FxwFkf5+0eQymnRGXFx6j1JaTG04uzg2MMY8nxlUL/B3/72t25lBhcXuypxX7pzw5wcEkHC3eNuwsfJk9d2f0ibsf+CEbVSzlO3jSciQMNLn6Z17yl1EaT15++zHsbL0Z2Xtr9NcVP5KY8v2iOcF+f8jSvjLmZ76V4eXv1cd9/DUJAIEpL94nl8+v38e94TXBQ5nSKtCx/6uvBI837e+Pphdv/4AvqK/CFf29xUTeXHj2Eoz8Xr8vtRhw++H6Pri2w5hfLty2MvItw1mA/3fX3KqTuDxUhFWw1B2nNjT8DfxYd/znuU2WGT2VC4nXtWPcFL294irSIDQRCZPSaIQ4UNtLQPvkJzmKEhSGW09LdCMrahcRia8eK5yqD6kIqLi3nttdcAcHZ25pFHHuHSSy89owO70FDJldw/4Vb+tu4FPj3wA3eNu6HfY9VKOU/cOp6HX9vG8x/t4ZUHpuGhPTq716o0PDrtHv627gX+s+s9Xrro0R4q4yeDTCpjSfxCUvwTeD31I57b8hpzwqdwXeIVQ/YtAnu++6bkq7k2cRHplZlsK9xOak0OO0wluGz+JyNtDozxTyRx5FwcPQP7LWe16lpoSVtFy+6fEWQKfJY8MqRgBODcKQ460ERgIAxGC01tRub4XcLbh9/glQ3fEOcwGZ3BjN5gwWCyYrJYsVhtiCIIgr1xV6mQ4aSSo3FywNNVRaC3M/6eTuTUFWATbcR4RJ7UeM4ETgpH/pByDVfEzWdNwRY2HtlJWmUGrkoNI7SjQC5wpLKFUVFnXoH8QkSQymltMRLZhzpJq6ENjfL8T9fBIAOSxWKhvb0dJyd7/btOpxvexDwDhLgGsiB6Fity1nNJ9KweVubHo3Fy4PFbxvGnV7fw8ufpPHfnpB5inm4qLXeOuYHnt77GuoKtLIgefDnzQIS7BfPinL/x1aEVrMzdwP6qw9w6eimj/eJP6npyqZzxgcmMD0ymw6xnb/FeduVuJq29ip116cg3pBFphpFKDxJcg/Fy8gRRxKprxlh9BEN5DtisOMZOwH32zchchi6146RwxEHmQO1xfT8ms5XGVoNdJ6/FQEPrUc28xlYDTa0GGluN6I1Hiw/kYX5kWNPZc8AZiahA7SBDqZAil0uRSSVIBLCJdmkjg9FCu96M2XK0MlIuk6CNzUWqkqM0eSJ2yhudK7iptVyTcBlXjbyE/VWH2FC4nR1VW1Emwk9FzQQGXH3OKEz8rpBIadWZ+rQsaTG2ndf6dccyqIB0+eWXc9VVVzFv3jwEQWDdunVcccUVZ3psFySXx1zEmvwtrMrbyB1jrx/w2EBvZ25flMB/vtrPyu1FXDq1p3LGKN84oj3CWV+4/bQFJLA3994w6krGBYzinb2f8+K2N0nxS+CGUVeekkePWq5iWuRUpkVOxWQxcfBIKnsLdpLRWk6W2MA3jQ14VFuI7DARZRKJVnvjOm4hzgkzUHgMXjXAbLHS2GqkscVAY5s92ChsTuzOK6A4bVd3EGrr6N0fppBLcXNxwM1FSaifhtExSrTO9p+1zg60ieG8mfEGd93qybyoqYOyRe8wWKht6qC0uo2csjo2dqzDUu/Jw//dSYivC/MmhDBrTCBKxdCMFc8kMom0Wz1+Y0YOr2/+kVxpJvetOszS+Et7GEcOc+oYkWOxir007MBe1KC9kFJ2t99+OxEREezatQuZTMZDDz3EtGnTzvTYLkicHBwZF5hEavl+bktZdkIr6JkpgWw7UMFnq7OZPMoft+M8dyYEJvPR/m9p6Gg67TPXaI9wXpz7CCvzNvJ91ioe+PVpZodPYVHcvEHp5A2EQqZgTOQUxkROQRRFqtpq2F91mIyqLPbVF7DLakIqdBBFLaNqD5EkFQnWBmA0Walt6qC2SU9dpxp4V4Bp6lzZtHX07oFSREmRKVtQd5jwdlMTG+qGm4sSdxcl7hqV3d5Cq8JRKRswyIiiF1/nu5LTkM984cTfEUEQcFTJCVVpCPXTYHQpREw388ilV1Fb5sDaPaW8/UMGX63N5erZUcybEIJcdm7d6EtLrVjLRvD8spv5NvtHPjnwPVl1Bdw/4dZTThUPY0cn2gOR5riiBpPFhN5i6G5cPt8Z9JRrypQppKSkdKfqmpubu91ghzm9JHjHsrU4laq22hP2FgiCwP8tiuePL23k89U53HP1qB7PdzXLVbfXnZFUikwq47LYuUwLGce3h1eyvnAbG4t2MCtsMpfGzBlQa2+wCIKAn4sPfi4+LIiehdlqZl9ZLjuLD5LdkEt23XK+zFwOZgcsTZ5Ym7yxtbqBKEUiEXB1dsBdo8TXw5G4MHfcXZS4uijtAafTQ+nDzDqKm8t45cZTm2gJgkCoWxDlLUMvZbfarKzI3UC4WzDJAdEIgQLzJ4ZyuKiBz1fn8M5PmazccYT/WxTfpx/O2aCtw8Sa3SWMifUm1MOHhyffwaq8jXx84Dv+s+t9Hpr0f8MrpdNAh80eiFyOK2po7dz3vKCKGj7++GP+9a9/YTbbZ5Zdee3s7OwzOrgLlS6BxLqOhkE1u/l5ODF/YigrtxexaHo4AV5HP5xyiX2Gaj7DOnJd9hqXxszhh6zVrCvcytrCrUwMHM2C6FmnJGsiiiJV9ToO5NdxuKiB3JImaho7ACdgNBqtiMa3BZtzDS1e5Vi8ylFIFIz0imNKSAqj/UagHKDwQhRFKttqTnlV14WLgxMFJ6Fvt61kDzXtdVw/6fYeq7ARYe48d+dE0rJreHf5IZ58ZxeTE/34w2UjcdecXKn66UAURV7/9gB6o4Vr58UA9oDclR7++MB3bCrayazwyWdtjL8X2q327/HxK6QWY5v98QupqOHTTz/lyy+//F37FZ1LdKU5hhJErp4VxbrUEr5ck8vD16d0P95uss+gnBTq0zvIfvB28uTOsddz1YgFrMzbyMaiHWwv3Uu0RzjzI2cwNmDUoJS0bTaRnJJGdmRUsudwNdUN9r43NxclsSFuXDwxhHB/LSF+Lj3y6marmUO1ueytyGBP+X72VR9ALpWT6BNHil88CT6xPZTNbaKNX/M2UdJczi3JS07Le2Cz2U6Yaj0ek9XMN4d+Idw1uNvV91gEQWBMnA+jojz5YVMBX6/PIz2nhmvmxnDJ5LDfPI1ns4m8+1MmOzOquPmSOEL9eppAXhw1kx2laazIXT8ckE4DXQFJ69RzYtUlzqy5kFJ2np6ew8HoN6Sj03ZgKOXUWmcHFk4J47uN+SyeFdl9gyjvlBX5rWXpPRzduDFpMVeNXMCmop2szt/Mf3a9h6tKw9zwqcwOn9znrK6uSc/a1BI2ppVS26RHLpOQGOnJ5dMiSIryxNfDccA9HLlUTpLvSJJ8R/KH5KVk1xeQWr6fvRUHSas4CICrUoOPsycyiZTKtloaOppI9IllVtikfq87FOo7GnFT9nbpHYhf8zZR39HInWMHdp6Vy6QsmRPN1KQA3vkpkw9WHGb1rmJuWBDHhJG+SE7BNn2wtLQb+e/XB9iTVc3l08JZNL235JIgCEwOHsNH+7+lSd+Cax+uxcMMHp3FfqvWOh+3QjJ0rpAupJTdpEmT+OKLL5g1axYODkdno8N7SGeGms5O+MF6FHVxxfQIft1ZzAc/H+aZ2ycgCAL5DUfwdvTAycHxxBc4A6jlKhZEz2J+5Az2Vx9mdf4mvj60gu+zfmVSUAoXR80k1DWQrCMN/LSlkNRDVYhAYqQn186LZfxIH9TKk9sYl0gkjPCKYoRXFDcnXU1ZSyWHanM50lRGna4Bg8VEtHsYKQkJTAoac1rKq202G0eayxgfkDzoc5r1LfyQ9SvJfvHEe8cM6hxfD0eeuHUc6Tm1fLDiEC98vJcwPw1XzIhgUqIfMunpXzGZLTbW7y3ls1+z6TBYuH1RPAsmhfb7vjl19nfpLQZcGQ5Ip0KbRYqTSo5c1nPl3dy9QrqAUnbvvPMOJpOJZ555pvux4T2kM8eR5jIcpAq8HQc2lDseJ7WCay6K5t2fDrEjo5KJCb7k1BUw2i/hxCefYSQSCaP94hntF09FazW/5m9iS3EqW4p342DyorU4ALXJj0XTI5g/MRRvt9ObYhQEgSCt/4C9XaeDgsZidKYORngNvqn184yfMNnM3Dhq8ZBeSxAEUmK9SYr2Yuv+cr5el8vLn6fz/s+HmDUmiKlJ/oT4upxyoG1o0bMpvZxVO49Q16QnNsSNP16V2KdR3LEUNBYjk8iGPLEapjdtZila594l3436ZhzlqnNC9/B0MKiAlJExNFHNYU6N7LoCIt1DT0qbasHEUDallfHW9xk4uXXQZtIx0jv6DIzy5PFz9ibeYTqZZe6UmrMQfEtxiNqHt3MlIbFuuGt7f/HOF7aX7kUmkZHkOziH3Oy6fLYU7+by2IvwPckeLqlEYMboQKYlBZCWU8PqXcX8sLmA7zbl4uZrxC/QjMrZhEJpw1Elx1Xlgr+zDzGeEb1eUxRFmtuMFFa0kFPSyMG8OnJKmgAYGe7OXVcmMjrG64RBrrGjmS3Fuxnrnzhc+n0aaDcLuDr3TuE36Jtx+x01Ig8qIJlMJrZs2YJOZ9foslqtlJaW8sADD5zRwV2ItBhaKWkuZ2n8yUkzSaUSHlw2mgf/s4U3Vm8EF86ZgGSziew+VMXX6/MoqmjBx13NXbMWMSXZj7TK/SzPWcebez7h20O/cFnsXGaETkR+Ht3MOkx6thTvZlzAKBwHUURisVl5L/0rPNRuXBE3/5RfXyIRSIn1QuHaiCoqgwNVh9GLJgpFEJukiBY5AiDITSCxq0MorC64mMNQd4TS0SalvlmP3mjX9JMIEB6g5bp5MUwZ5d+nU2lftJt0vLT9LWyiyJKT/BwP05N2o0BoHyukpo7m01Ydei4wqID0wAMPUFZWRl1dHXFxcRw8eJCxY8ee6bFdkHQpdCd4x570NQK9nXlw2Whe3rEXpdUZpXB29o+6sFhtbDtQwXcb8ymtbsPXw5H7lyYxPTkAaedex+TgsUwKGsP+qsP8kPUr76V/xY9Za1gUN4+ZoRORDcJG4myzKn8TerOBhdGzB3X8L7nrKWup5M+T70ApO7VVoc1mY3vpXn7MWk1FWzVOCkemho4lyXcE4a4htLcKFFe1UVbbZm8cbq+lyVaBzqGMeuUBcDiIRhPAqPAERniOIMxfS7i/Zsj7d7n1hbye+jENHU08NOn2k171DdOTNpPQZ8quQd90xtPQvyWD+pZnZ2ezdu1annrqKW6++WZEUeTpp58+02O7IDlYnY2zwpEw16BTus6EeF+0eSaaa5346xvb+fP1KT36k34LWnUm1qWW8MuOI9Q36wnyceZP145myij/Hrp7XQiCQLLfSJJ8R5BZk8O3h37hvfQv+TlnLVePXMjkoDHnrMR+Y0czP+esZYx/4qB0xarbavn28ErGBowipY8y76GQXZfP++lfU9pSQbA2gHvG3cz4wKQeq0t3Rwj27buwoKa9jg1FO9hYtIMDxpWUNe9kkiYFpS6JcEXwCd9zq83Kodpcfs3fzL7KTDwd3Xl8+r3Eep474rDnOwYLvVRYOkx6mg2tv3kF7ZlkUAHJy8sLmUxGSEgIeXl5zJ8/H71+2Lb4dCOKIpk1OYzwjj7lG68oinRYW5kUM5K0jXru+/cWls6J4rKp4b0cJ08nVpvIocJ6NuwtZcfBSkwWG/HhHtx1ZQKjY7wHVZYsCAIJPrHEe8dwoPowX2X8zOupH/FzzjqWJVxGku/Ic0pwVBRF3k3/Aqto44ZRV57weJto439pnyOTSE+p98lgMfLZwR9YW7AVD7Ub90+4lfGByUNWRvB28mRZwuVcNWIBeysOsqU4lZW5G/g5Zx1quYpwtyACXPzwdHTDUa5GEAT0ZgMN+mbKWirIqS9EbzbgrHBkyciFXBw186QU4IcZGHdNz/e0vNXuh/Z7sC7vYlABSa1Ws2LFCmJiYvjmm28ICwvr9kca5vRR015Ho76ZkV6nvucjImIVbQR4arjxoRm89X0Gn6zKZuWOI1wyOYzZY4L6TAGcDGaLlcNFDaQermZXZhUNLQbUShmzxgRx8aRQQnxPriRVEASSfEeS6BPH7rJ9fJn5My9se5MRXlFcn3jFOaNwvKZgC+mVmdwwajHeg5itri/czuHaPG5Pufak8//lrVX8a8c7VLbWsCBqFkvjLz3lSiu5VM7EoBQmBqXQbtRxoDqLrLp8ihpL2HhkJ0ZLT78jmUSGn7M3k4LGkOgTS7LvyPNqz+984/iAVNopTxX4O7Au72JQAemJJ57g22+/5eGHH+b777/n+uuvHy5oOAPkdcrNxHiEn+DIEyMRJDg7OFHf0Yi7RsVjt4wjo6COr9fl8fHKLD79NZuECA+SoryIC3MjzE8zqJWTKIrUNesprmqloKyZ7OJGsosbMZqsKGQSkqK9uHVhAGNH+uBwmlZiEkHCxKAUxvqPYn3Rdr49vJK/rnuByUFjWJpwGV6OQ7ecOF1k1uTw8f5vSfYdycVRM054fE17HZ8d/IEE71hmnmQj7v6qQ7yy8z0cpAoen34vIwfZuzQUnBwcmRw8hsnBYwD7311n7qDDbABRxEGmwFnhdM6mUH+PHC8TVdJcjoPMAc+z+Pk/3QwYkK6/vmfX+A033IAoikRHR/Prr79yzTXXnPEBXkiUNJcjl8jwd/E5LdeL84xkX2UmJosJhUxBQoQnCRGelFS3sjm9nNTDVXz4y2HAXlHl4arGQ6NE4+SAykGGRBCwiSJGk5W2DhNNbQZqm/QYTfYqLEGAYB8X5owJIinGi4RwD5QOZ674QCaVMS9yOlNDxrE8ey0r8zawu3w/8yKmsShuHs4Og6sCO13k1BXw0va38XP25t7xt5wwVWaz2Xg99WMEQeCOsdedVNpxU9FO3k77jBBNAH+Zchduau1Jjn5oCIKAk8Kxu9l1mN8e9+P2kHLqC4lyD/ldidcOePe47rrrAFi3bh3t7e1ceeWVSKVSli9fjovL76Mz+FyiRlePl6PHkHXQ+mN+5HRSy/fzZebP3DDqyu4bYLCPCzcuiOPGBXE0tOjJK22iqKKVqnodDa16ymvbMZos2GwigkRAqZDipFIQ4OVMcrQ3fp6OBPu4EOrnctIqCqeCWq7imoTLmBsxla8PrWBl/kY2FO1gYcxsLo6aiVp+5gVH95Qf4LXdH+Km1vLY9PtQK078mt9lrSK3vpB7xt18Us2iq/M388G+r0n0ieVPE/9vQMHYYX5fKOVCj8leh0lPaXMFi0dcfBZHdfoZMCBddNFFALz//vt89dVX3cvz6dOns2TJ6RGiHOYobcZ2tKrTF+jjvKK4KGIaK/M20GHWc/2oK3rNcN01KibEq5gQf/5tjLqrXblr7A0sjJ7NV5k/882hX1iVt4kFUTOZFzl9UL1AQ8VsNfP1oRX8nLOOCLcQ/jzlTrSDkG3JqM7m+8OrmBoyjikhQ2+ZWFewjQ/2fU2KfyIPTLh1eK/mAkPr2PNWndtQiIhIjGdvHcHzmUHlV5qamjAajahU9lmgTqejpaXljA7sQsRitQxqpj0Ubk6+GkeFmh+yfmVXWTpTg8cxyncEYa5BaFUufS73bTYbbaZ2WgxtNBlaaNK30GxopVnfQrOxjTZjO+0mHQazEaPVhMVmVyWXSqQopArUciUuDk64KrV4OLrh6+RFgMYXfxefM9K1H6jx4+HJd1DYWMK3h1fy9aEV/JSzlpmhE5kbMfW0pEBFUWR/1WE+OfAdlW01zA6fwk2jFqMYRCFBra6BV3e9j7+LD38YPfQ0987SdN5L/5Jkv3genPCH86Ina5jTi1bd829+sCoLuURGpHvoWRrRmWFQn+xLLrmEq6++mjlz5iCKIqtXr+bqq68+02O74FDIFBjMxhMfOAQkgoSl8ZcyITCZ5Tnr2FK8m7WFWwF7lZSzgyMOUvtN1Wy1oLcY0JsNiIi9rqWSKdEonXFxcEar1KByVuIgVXTbSVhFGyaLCZ1ZT6uxjbKWKpr0Ld3XkgoSgjT+RHmEEecVyQivaFxO475PuFswf51yF8VNZfySu4G1hVv5NX8T0R7hTAkeQ4p/4pCr2vRmA6nl+/k1fxNHmsrwdfLikan3MMo3blDn60wdvLD1DWyijYdPogE2qzaP11M/ItojbDgYXcBonY7+3UVRJK0yg3jvmFNuqD7XGNSn+7777mPEiBHs3r0bgL/+9a/DFuZnAHeVK5m1OWfk2sHaAO4dfzMmi4nCphJKmiuo72ik3ajDaDUB9rJfpcwBJ4UaFwdnNEpntEoXXJUatEqXk9qzMFnN1LTXUdZSRXFzGYWNxWwu3s2agi0ICIS7BZPin8BY/1GDMiMcDCGugdw9/iauG3UFm4/sYmtxKu+lf8V76V8RqPEjxiOcUNcg/Jy9cVNpUMtVSCVSzDYLrYY2anX1lLZUkl2Xz+HafCw2C37O3twx5jqmBo8bdFAwWky8tP0tqtpreXTq3UNWLShvreKf29/G28mDP0+5c1CrsWF+n2jVRzMLZS2V1OoauDz2orM4ojPDoKdbs2fPZvbswUmiDHNyhLgGsrUklYaOpjNiNw72VVisZ+Rv1kWvkMoJ1PgRqPFjYtBowK7hVtRYQkZNNvsqD/FV5s98lfkzgS6+TA4ey5SQsadFIVqrdOHy2Iu4LGYuFa3VpFVmcKgml+2le1lXuO2E5we4+DIvYhpjA5KI9ggbUlWcwWLkn9vfIqeukHsn3Dzk0uxWYzsvbn0TmUTG36bePVzddoHjfExA2tvp63UuqPifbobX/+cQo3zi+ARILd/PxVEzz/ZwzhgyiZQojzCiPMJYPGIBjfpm9pQfYEdpGl9mLuerzJ9J8IlldvhkRvslDMphdiAEQSBA40uAxpfLYy/CJtqo1zVS1V5Lk74FvdmAVbQil8hxdnDCQ+1KgIvvSe/ntRnbeXHbW+Q3HuGusTcwKWjMkM43W828vP1tGvXNPDnjgbPaZzXMuYGz6uitOq0yg0i3kN+l6eFwQDqHCND4Eu4azJqCLcyLmH7BNB26qbTMi5zOvMjp1LTXsaV4N5uKdvGvHe/gqtIwJ3wKs8OnDKqabTBIBAleTh54OQ3Nb2owlDZX8M/OYPLAhD8wPnDwRn1g3x/4X9rn5NQXcv+EW4nyCDvtYxzm/EOttqdrG/XNFDaWnLQbwLnOhXHHO4+4LHYuVW21g0op/R7xdvLk6pELeeOSv/PnyXcSpPHnm0O/cNeKR3kj9WOKm8rO9hD7RBRF1hdu55H1L2KwmnhyxgNDDkYA3x1eydbiVK4eeQkTg1LOwEiHOR9xUtlTdukVmQCMOUVB3nOV4RXSOca4gCTivWP47OAPxHlF/q6EE4eCRCIhxT+BFP8EKttq+DVvE5uLd7OleDcjvKKYHzmDFL+Ec2IVWdlWwwfpX5NRk028dwz3jLsJ7UmkU9YX2mWRpodM4Mq431fD4zCnhrPKvkJKqzyIt6MHAS6/H/26Yzn73+ZheiAIAn8ceyMquZJ/bH2DWl3D2R7SWcfP2ZtbRy/l7YXPc13iFdS01/Pyjv9xz6on+Cl7DS2G1rMyrmZ9Cx/u+4Y/rX6W/IYj3Jq8lEen3XNSwWh32T7eTf+CJN8R/N+Ya88pNfNhzj5OjgoMZgOZNbmk+Cf+bj8fwyukcxA3tZa/Tb2bZza9wuMb/smfJ99J+DmibH02cVSouTRmDguiZrK34iBrCrbwRcZPfH1oBSl+CUwLGcconxFnvFfnSFMZawq2sK04FatoY0boRJbELzzpPa69FQd5ddf7RLqF8sDE2065iGOY3x9KBzkZNTlYbBZG+8Wf7eGcMYYD0jlKqGsgT8/8Ey9se5PHNvyTJSMXcknUrOHGSOyKEOMDkxkfmEx5axUbC3ewtSSV1PL9OMpVjPZLYLR/PPFeMTg5nHq5tCiKlLZUkF6Zya7SdEpaKpBL5UwLGc+lMXPwOQVX1J2laby2+0PCXIN4ZOrdv7tGx2FOD4JExr7KdFRy5e9OLuhYhu9u5zBBWn9enPs33kn7gi8yfmLTkZ0sjlvAxKDRp02A9XwnwMWXG5IWsyxxERnVWewsS2df5SG2lqQiIBCk8SPCPZRQ1wACXHzxcvJAq9T0uwoxWIzUdzRS3VZHaUsFRY2l5NYX0mJsAyDKPYxbkpcwOXjMKfcG/Zq3iY/2f0uMZzh/mXLXbyIKO8x5iiBhf9VhEr3jftcr6OGAdI7j7ODEnyb9H/sqD/H5wR94LfVDvsj8iRmhE5gQOJoAF98zmk82Wc006ptp7GimxdhKi6ENnakDvcWAyWrGZrMBdmsIlUyJs4MjWqULHmo3/Jy9T8sKZTDIJFKS/eJJ9ovHarNS2FhCRk0OOXUF7C5LZ0PR9h7HO8pVKOVKZBIZomjDbLPQYdJ3q1Z04ePkSYJPLCO8oknyHXFaej8sNisf7/+WNQVbSPFP5P7xtwyrMAwzIBW6WpoMLST7jTzbQzmjnNGAtGLFCt566y3MZjM33XQT1157bZ/Hbd68mWeeeYaNGzeeyeGc1yT7jWSUbxz7KjNZU7CF7w//yneHV+Hp6M4Irygi3UIJ1vrj6+yFk8JxUEHKJtpoN+po1LfQqG+ivqOROl0jdboG6nQN1OoaulcGxyOXyFDIFN3irGarGaPF1EsDT6t0IcwtmGj3MEZ4RRHuFnzGV3fSYxpvwZ5ya9A3UdlaQ62uniZ9C21GHQaLEYvNgiAIyCUy1HIVLkpn3FRafJw8T6k5tj/qOxp5ddcH5NYXckn0bK5LWHROVAoOc26TVV8IwCjfEWd5JGeWMxaQampqeOWVV/jhhx9QKBQsXbqUcePGERHRM/9ZX1/Piy++eKaG8btCIkhI8U8kxT+RJn0LeysOcKA6m/SKDDYf2dV9nIPMAa3SBSe5GgeZAqlEioCAxWbBZDWjNxtoN+loM+mwibYeryGVSPFQu+Hl6MZo/wQ81G54qF1xU2nRKl1wcXDCSeHY516WTbShM3XQpG+hVtdAVVstJS3lFDaUsK/S3j+hkitJ8h3JuIBRJPmO/E32TARB6Pw9Tl2O6GQRRZFtJXv4cN/XWEUb90+4dbjPaJhBk99QTJDG/7Q1h5+rnLGAtHPnTsaPH49WqwXs3kqrV6/m7rvv7nHcY489xt13382//vWvPq/T2tpKa2vPst7q6uozMubzCVeVhrkR05gbMQ1RFKnV1VPWUkl1ez0NHU00G1rQmTowHbNykUmkOCrUeKjd7AKqSic0Di64qjS4qexWEVoHl5OesXfZpjs7OBGk9e/xXKuxnazaPPZXHSa9MoOdpWk4yBwY5z+K6aHjifOK+l05Xx5LdVstH+7/hv1Vh4lyD+Pu8Tfh4+R5toc1zDnGQPe6Iy1lzAmbcTaG9ZtyxgJSbW0tnp5Hv3ReXl5kZGT0OOaTTz4hLi6OxMT+u44//vhjXn/99TM1zN8FgiDg7eSJ9zl8k3NxcOqujLPZbGTV5bO9dC+7ytLZWpKKl6M7s8ImMyN0wkn18ZyLtBnb+TF7DavzNyOTSLkp6aoLShJqmKEx0L3OaDER4/H7ra7r4owFJFHs7adz7L5GXl4ea9eu5aOPPhpwxXPjjTeyaNGiHo9VV1f3ux81zLmPRCJhpHc0I72juSXpavZUHGB94Xa+zFzON4dWMDYgibkRU4nzjDwvGwBbDK2sytvE6vzNGCxGpoWM55qEy36XYpjDnD5OdK+L8Qg/G8P6TTljAcnb25u0tLTun2tra/HyOtqvsXr1aurq6rjyyisxm83U1taybNkyvvjiix7XcXFxwcXl9503vZBRyBRMDh7L5OCxVLZWs65wO5uLd7GrLB0/Z29mhk1iasi4cz53LooihY0lrCvcxvaSPVhsVsYFJrE47uJe6cthhumLge51KrnqjFnSnEucsYA0ceJEXnvtNRobG1GpVKxdu5Znn322+/l7772Xe++9F4Dy8nJuuOGGXsFomAsLPxcfbkxazDXxl7KrbB/ri7bz2cEf+DLjJ0b5jmBK8FiS/eLPqebRmvY6dpXtY1vJHspaKnGQOTA9dAILombidxqs04cZBuzfjfMxWzBUzugK6YEHHuCGG27AbDazePFiEhISuO2227j33nuJj//9yl8Mc2ooZAqmhY5nWuh4ylur2HxkN9tKUkmvzEQhlZPoE8dovwRG+cYN2ZL8VDFZTOQ2FJFZk8O+ykOUtlQA9obZ20YvY1JQymkvFR9mGP9TUAM5nxDEvjZ7znHKy8uZNWsWGzZsICAg4GwPZ5jfAJvNRk59ATvL0kmryKBR3wyAv4sPsR4RRLiHEuYaRICLz2mTVzJbzVS21VDSXEFRUymFDcUUNpVisVmQChKiPcJJ8U9kbMCoYRO9Yc4IXfe6J95/jmsnLz7bwznjDCs1DHNeIJFIiPOKIs4riluTl1LaUsHB6mwO1+ayqyyd9Z1KDFJBgreTJz5Onnio3dCqNLg4OOKoUOMgdUAhlSMRBETo0ZelM3XQamynydBCQ0cjte0N1HY0dBfnyKVywrSBXBw1g7hOC3iVXHkW35FhLiR+60zA2WI4IA1z3iEIAsHa/2/vTmOjKhs2jl8DbdlKKYUuPKC8AbENYYuC7CU8ULpRdkNZLAqCgCyWSNjEQMSASFKIJGyi4UNRSkGwBAHZZGmDghrW+kLCIn0ZSi1SaEs7Mz3vBx4m1hateRznHvr/JU2Yc8+cueZOOdec6cw9rdQ6uJWGRMWowqqQ/X6+rv76s278+n/Ku29X/oMC/e8vV/WgvLjG+61jq6Mm9RurWYOmatvsf9Sn9Ut6pkkLPdvk0QoYrB8Ib2lav3a8Q5NCgs+rY6ujfwVF6F9BEer9bOUxp8up++XFKnaUqMxZLofLoYr/nPX41amrgLoBauBfT438G6phQIOn9sO58G215SMDFBKean51/dS0QZNa8x8aT6fashI8TwcBwHC14S3fEoUEADAEhQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMIJHCykrK0sJCQmKiYlRenp6lfGDBw9q6NChGjJkiKZPn6579+55Mg4AwGAeK6Tbt28rLS1NW7du1e7du7VtEpZtFQAADjxJREFU2zZduXLFPf7gwQMtWbJEGzdu1JdffqnIyEh99NFHnooDADCcn6d2nJ2drR49eig4OFiSFBsbq3379mnGjBmSJIfDoSVLlig8PFySFBkZqaysrCr7KSoqUlFRUaVtdrvdU7EBwCs41nmwkPLz8xUaGuq+HBYWprNnz7ovN23aVAMHDpQkPXz4UBs3btQrr7xSZT9btmzR2rVrPRUTAIzAsc6DhWRZVpVtNputyrb79+9r+vTpioqK0vDhw6uMT5gwocp2u92ucePG/X1hAcDLONZ5sJDCw8N1+vRp9+X8/HyFhYVVuk5+fr4mTZqkHj16aOHChdXuJygoSEFBQZ6KCQBG4FjnwTc19OrVSzk5OSosLFRpaakOHDig6Oho97jL5dLUqVMVHx+vRYsWVXv2BACoPTx6hpSamqqUlBQ5HA6NGjVKnTp10uTJkzVr1izZ7XZdvHhRLpdL+/fvlyR16NBB77//vqciAQAM5rFCkqSkpCQlJSVV2rZp0yZJUseOHZWbm+vJuwcA+BBWagAAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGMGjhZSVlaWEhATFxMQoPT29yvilS5c0cuRIxcbGatGiRXI6nZ6MAwAwmMcK6fbt20pLS9PWrVu1e/dubdu2TVeuXKl0nblz52rx4sXav3+/LMtSRkaGp+IAAAznsULKzs5Wjx49FBwcrIYNGyo2Nlb79u1zj+fl5enhw4fq0qWLJGnEiBGVxh8rKirSzZs3K/3Y7XZPxQYAr+BYJ/l5asf5+fkKDQ11Xw4LC9PZs2efOB4aGqrbt29X2c+WLVu0du1aT8UEACNwrPNgIVmWVWWbzWar8fhjEyZM0PDhwytts9vtGjdu3N+QEgDMwLHOg4UUHh6u06dPuy/n5+crLCys0nhBQYH78p07dyqNPxYUFKSgoCBPxQQAI3Cs8+DfkHr16qWcnBwVFhaqtLRUBw4cUHR0tHu8ZcuWqlevns6cOSNJ2rVrV6VxAEDt4rFCCg8PV2pqqlJSUjRs2DANHjxYnTp10uTJk3Xu3DlJ0qpVq7R8+XLFx8ertLRUKSkpnooDADCcx16yk6SkpCQlJSVV2rZp0yb3v6OiopSZmenJCAAAH8FKDQAAI1BIAAAjUEgAACN49G9InuJyuSSp1n2KGcDTIyIiQn5+PnkI9hifnI07d+5IUq36wBiAp8uhQ4fUqlUrb8cwis2qbskEwz18+FDnz59XaGio6tat+7fu+/Eno9PT0xUREfG37tvTyO4dZPcOX84u1ewMyel0ym6315qzKZ98hPXr11fXrl09eh8RERE+++yF7N5Bdu/w5ex/xs/P76l9bNXhTQ0AACNQSAAAI1BIAAAjUEi/ExQUpBkzZvjkqrtk9w6ye4cvZ0f1fPJddgCApw9nSAAAI1BIAAAjUEj/cebMGY0cOVJDhw7VhAkTlJeXJ0kqKirSlClTFB8fr3HjxrlXiTBNVlaWEhISFBMTo/T0dG/H+VNr165VYmKiEhMTtXLlSklSdna2kpKSNGjQIKWlpXk54Z/74IMPNH/+fEnSpUuXNHLkSMXGxmrRokVyOp1eTle9w4cPa8SIEYqLi9OyZcsk+c6879692/0788EHH0jynXlHDVmwLMuy+vfvb126dMmyLMvavn27NXXqVMuyLGvp0qXWhg0bLMuyrC+++MKaPXu2tyI+kd1ut/r372/dvXvXKi4utpKSkqzLly97O9YTnTx50ho9erRVVlZmlZeXWykpKVZWVpbVr18/68aNG5bD4bAmTpxoHT161NtRnyg7O9vq3r27NW/ePMuyLCsxMdH64YcfLMuyrAULFljp6eleTFe9GzduWH369LFu3bpllZeXW2PGjLGOHj3qE/NeUlJidevWzfrll18sh8NhjRo1yjp58qRPzDtqjjMkSeXl5Zo9e7aioqIkSZGRkbp165Yk6ejRo+4vGRw8eLCOHTsmh8PhtazVyc7OVo8ePRQcHKyGDRsqNjZW+/bt83asJwoNDdX8+fMVEBAgf39/tW3bVteuXVPr1q31zDPPyM/PT0lJScY+hl9//VVpaWmaOnWqJCkvL08PHz5Uly5dJEkjRowwMvvXX3+thIQERUREyN/fX2lpaWrQoIFPzLvL5VJFRYVKS0vldDrldDrl5+fnE/OOmqOQJAUEBGjo0KGSpIqKCq1du1YDBw6UJOXn5ys0NFTSo2U8AgMDVVhY6LWs1fltRkkKCwvT7du3vZjoj7Vr1859ELl27Zr27t0rm83mM4/h3XffVWpqqvvtxr+f/9DQUCOzX79+XS6XS5MmTdKQIUO0detWn/ndCQwM1OzZsxUfH6/o6Gi1bNlS/v7+PjHvqLlaV0hfffWVoqOjK/28+uqrkh6dKb399ttyOp164403nriPOnXMmjarmnfu22w2LyT5ay5fvqyJEydq3rx5evbZZ6uMm/gYtm/frhYtWqhnz57ubb4y/y6XSzk5Ofrwww+VkZGhc+fO6ebNm1WuZ2L23Nxc7dixQ0eOHNGJEydUp04dnTx5ssr1TMyOmvPJxVX/G/Hx8YqPj6+yvbi4WNOmTVNwcLDWrVsnf39/SY+eMRYUFCgiIkJOp1MPHjxQcHDwP5z6j4WHh+v06dPuy/n5+QoLC/Nioj935swZzZo1SwsXLlRiYqK+/fZbFRQUuMdNfQx79+7VnTt3NHToUN27d08lJSWy2WyVst+5c8fI7M2bN1fPnj0VEhIiSRowYID27dtXacV8U+f9xIkT6tmzp5o1aybp0ctzmzdv9ol5R82Z9VTfi+bOnavWrVtrzZo1CggIcG/v16+fdu3aJenRwahr167usjJFr169lJOTo8LCQpWWlurAgQOKjo72dqwnunXrlt58802tWrVKiYmJkqTOnTvr6tWr7peV9uzZY+Rj+PTTT7Vnzx7t3r1bs2bN0r///W8tX75c9erV05kzZyRJu3btMjJ7//79deLECRUVFcnlcun48eOKi4vziXmPiopSdna2SkpKZFmWDh8+rJdeeskn5h01V+vOkKpz8eJFHTp0SM8995yGDRsm6dGZ0aZNmzR79mzNnz9fiYmJaty4sVatWuXdsNUIDw9XamqqUlJS5HA4NGrUKHXq1MnbsZ5o8+bNKisr04oVK9zbkpOTtWLFCs2cOVNlZWXq16+f4uLivJjyr1m1apXeeecdFRcXq3379kpJSfF2pCo6d+6s119/XWPHjpXD4VDv3r01ZswYtWnTxvh579Onjy5evKgRI0bI399fHTt21JQpUxQTE2P8vKPmWDoIAGAEXrIDABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAp5g/vz52rx581+6zaFDh9yraB89elRr1qzxRDTgqcTnkIC/0YABAzRgwABJ0rlz53Tv3j0vJwJ8B4UEn3Pq1CmtXLlS4eHh+vnnn1W/fn2tWLFCYWFhWrp0qXJzc2Wz2dS3b1/NmTNHfn5+at++vSZMmKBTp06ppKREc+bM0aBBg7Rz507t379fGzZskKQqlx/LzMzUtm3b5HA4dO/ePU2ePFljx47Vzp07lZmZqdLSUgUGBmr48OHav3+/pk+frs8//1wul0uNGzfW2bNnFRcXp9GjR0uS1q1bp7t372rhwoX/+PwBpqKQ4JMuXryoBQsWqGvXrvrss880d+5ctWvXTsHBwcrKypLD4dC0adP0ySefaMqUKXK5XGrSpIl27typ3NxcjR8/Xl27dq3RfRUXF2v79u3auHGjmjZtqh9//FGvvfaaxo4dK0m6cuWKDh8+rMDAQO3cuVPSo1URkpOTdffuXaWmpurgwYNav369Ro8erYqKCm3fvl0ff/yxx+YH8EX8DQk+KSoqyl0oI0eO1KVLl7Rnzx6NHz9eNptNAQEBSk5O1rFjx9y3GT9+vPu2zz//vL777rsa3VejRo20fv16ffPNN1q9erXWr1+vkpIS93hkZKQCAwP/cB/9+/dXQUGBcnNzdfz4cbVq1Upt2rT5qw8beKpRSPBJv12hWnr0FRC/XwWroqKi0lda//Y2FRUVqlu3rmw2W6XbVffli3a7XcOGDVNeXp5efPFFvfXWW5XGGzZsWKO8ycnJyszM1I4dO5ScnPyntwFqGwoJPik3N1e5ubmSpG3btumFF15QfHy80tPTZVmWysvLlZGRoV69erlv83jV9gsXLujq1avq1q2bQkJCdPnyZZWVlcnpdOrIkSNV7uv8+fMKCQnR9OnT1bdvX/d1XC7XH2asW7dupUJ8+eWXdfDgQV24cEExMTH/7RQATx3+hgSf1Lx5c61evVp5eXkKCQnRypUr1ahRIy1btkxJSUlyOBzq27ev+2vGJen7779XRkaGKioqlJaWpiZNmqh3797q1q2b4uPjFRoaqu7du+unn36qdF+9e/dWZmam4uLi1KBBA3Xq1EkhISG6fv36H2bs2bOnZs6cKX9/fy1evFjNmjVThw4d1LZtW+O+wgQwAat9w+ecOnVK7733nvbs2VPj20RGRionJ8f95XTeUFhYqFGjRik9PV0tWrTwWg7AVLxkB/wDMjIylJCQoJSUFMoIeALOkAAARuAMCQBgBAoJAGAECgkAYAQKCQBgBAoJAGAECgkAYIT/B5VRJ992K6JTAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:09:34+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/2-K-Means/notebook.ipynb b/translations/sw/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..4ca300902 --- /dev/null +++ b/translations/sw/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,229 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:46+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Tutazingatia tu aina 3. Labda tunaweza kupata makundi 3 yaliyoundwa!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/sw/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..24a35f0e0 --- /dev/null +++ b/translations/sw/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,642 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:28:43+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Chunguza K-Means clustering kwa kutumia R na kanuni za data safi.\n", + "\n", + "### [**Jaribio la kabla ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "Katika somo hili, utajifunza jinsi ya kuunda makundi kwa kutumia kifurushi cha Tidymodels na vifurushi vingine katika mfumo wa R (tutaviita marafiki 🧑‍🤝‍🧑), pamoja na seti ya data ya muziki wa Nigeria uliyoingiza awali. Tutashughulikia misingi ya K-Means kwa Clustering. Kumbuka kwamba, kama ulivyojifunza katika somo la awali, kuna njia nyingi za kufanya kazi na makundi, na mbinu unayotumia inategemea data yako. Tutajaribu K-Means kwa kuwa ni mbinu ya kawaida zaidi ya clustering. Twende kazi!\n", + "\n", + "Maneno utakayojifunza:\n", + "\n", + "- Alama ya Silhouette\n", + "\n", + "- Njia ya Elbow\n", + "\n", + "- Inertia\n", + "\n", + "- Variance\n", + "\n", + "### **Utangulizi**\n", + "\n", + "[K-Means Clustering](https://wikipedia.org/wiki/K-means_clustering) ni mbinu inayotokana na uwanja wa usindikaji wa ishara. Inatumika kugawanya na kupanga vikundi vya data katika `k clusters` kulingana na kufanana kwa sifa zao.\n", + "\n", + "Makundi yanaweza kuonyeshwa kama [Voronoi diagrams](https://wikipedia.org/wiki/Voronoi_diagram), ambayo yanajumuisha nukta (au 'mbegu') na eneo lake linalohusiana.\n", + "\n", + "

\n", + " \n", + "

Infographic na Jen Looper
\n", + "\n", + "\n", + "Hatua za K-Means clustering ni kama ifuatavyo:\n", + "\n", + "1. Mwanasayansi wa data huanza kwa kutaja idadi ya makundi yanayotakiwa kuundwa.\n", + "\n", + "2. Kisha, algoriti huchagua kwa nasibu K uchunguzi kutoka seti ya data ili kutumika kama vituo vya awali vya makundi (yaani, centroids).\n", + "\n", + "3. Kisha, kila uchunguzi uliobaki unagawiwa kwa centroid yake ya karibu zaidi.\n", + "\n", + "4. Kisha, wastani mpya wa kila kundi unahesabiwa na centroid inahamishwa hadi wastani huo.\n", + "\n", + "5. Sasa kwamba vituo vimehesabiwa upya, kila uchunguzi unakaguliwa tena ili kuona kama unaweza kuwa karibu na kundi tofauti. Vitu vyote vinagawiwa tena kwa kutumia wastani wa makundi uliosasishwa. Hatua za kugawa makundi na kusasisha centroid hurudiwa mara kwa mara hadi mgawanyo wa makundi usibadilike tena (yaani, wakati mchakato unafikia muafaka). Kwa kawaida, algoriti hukoma wakati kila mzunguko mpya husababisha harakati ndogo za centroids na makundi yanakuwa thabiti.\n", + "\n", + "
\n", + "\n", + "> Kumbuka kwamba kutokana na nasibu ya uchunguzi wa awali wa k uliotumika kama centroids za kuanzia, tunaweza kupata matokeo tofauti kidogo kila tunapotekeleza utaratibu. Kwa sababu hii, algoriti nyingi hutumia *mianzo ya nasibu* kadhaa na kuchagua mzunguko wenye WCSS ya chini zaidi. Kwa hivyo, inashauriwa sana kila mara kuendesha K-Means na thamani kadhaa za *nstart* ili kuepuka *muafaka usiofaa wa ndani.*\n", + "\n", + "
\n", + "\n", + "Uhuishaji huu mfupi ukitumia [mchoro](https://github.com/allisonhorst/stats-illustrations) wa Allison Horst unaelezea mchakato wa clustering:\n", + "\n", + "

\n", + " \n", + "

Mchoro na @allison_horst
\n", + "\n", + "\n", + "\n", + "Swali la msingi linalojitokeza katika clustering ni hili: unajuaje ni makundi mangapi ya kugawanya data yako? Changamoto moja ya kutumia K-Means ni kwamba utahitaji kuanzisha `k`, yaani idadi ya `centroids`. Kwa bahati nzuri, `njia ya elbow` husaidia kukadiria thamani nzuri ya kuanzia kwa `k`. Utajaribu muda si mrefu.\n", + "\n", + "### \n", + "\n", + "**Mahitaji ya awali**\n", + "\n", + "Tutaanza moja kwa moja kutoka pale tulipoishia katika [somo la awali](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), ambapo tulichambua seti ya data, tukafanya visualizations nyingi na kuchuja seti ya data kwa uchunguzi wa kuvutia. Hakikisha umeangalia!\n", + "\n", + "Tutahitaji vifurushi kadhaa ili kukamilisha moduli hii. Unaweza kuviweka kwa: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Vinginevyo, script hapa chini hukagua kama una vifurushi vinavyohitajika kukamilisha moduli hii na kuvifunga kwako endapo vingine vinakosekana.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Tuanzie kazi mara moja!\n", + "\n", + "## 1. Mdundo na data: Punguza hadi aina 3 maarufu zaidi za muziki\n", + "\n", + "Hii ni muhtasari wa kile tulichofanya katika somo lililopita. Hebu tukate na kuchambua data!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 Hilo lilifanikiwa vizuri!\n", + "\n", + "## 2. Uchunguzi zaidi wa data.\n", + "\n", + "Je, data hii ni safi kiasi gani? Hebu tuangalie data zisizo za kawaida kwa kutumia grafu za sanduku (box plots). Tutazingatia safu za nambari zenye data chache zisizo za kawaida (ingawa unaweza kusafisha data zisizo za kawaida). Grafu za sanduku zinaweza kuonyesha wigo wa data na zitasaidia kuchagua ni safu zipi za kutumia. Kumbuka, grafu za sanduku hazionyeshi tofauti (variance), kipengele muhimu cha data nzuri inayoweza kugawanyika katika makundi. Tafadhali angalia [mjadala huu](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) kwa maelezo zaidi.\n", + "\n", + "[Grafu za sanduku](https://en.wikipedia.org/wiki/Box_plot) hutumika kuonyesha kwa picha usambazaji wa data ya `nambari`, kwa hivyo hebu tuanze kwa *kuchagua* safu zote za nambari pamoja na aina maarufu za muziki.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Tazama jinsi msaidizi wa kuchagua `where` unavyorahisisha hili 💁? Chunguza kazi nyingine kama hizi [hapa](https://tidyselect.r-lib.org/).\n", + "\n", + "Kwa kuwa tutakuwa tunatengeneza boxplot kwa kila kipengele cha nambari na tunataka kuepuka kutumia loops, hebu tuweke data yetu katika muundo *mrefu zaidi* ambao utatuwezesha kutumia `facets` - chati ndogo ambazo kila moja inaonyesha sehemu moja ya data.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Sasa ni muda wa `ggplots` zaidi! Kwa hivyo tutatumia `geom` gani?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Rahisi-gg!\n", + "\n", + "Sasa tunaweza kuona kuwa data hii ina kelele kidogo: kwa kuchunguza kila safu kama boxplot, unaweza kuona data zilizotengwa (outliers). Unaweza kupitia seti ya data na kuondoa data hizi zilizotengwa, lakini kufanya hivyo kutafanya data kuwa ndogo sana.\n", + "\n", + "Kwa sasa, hebu tuchague safu ambazo tutatumia kwa zoezi letu la kugawanya makundi. Hebu tuchague safu za nambari zenye viwango vinavyofanana. Tunaweza kubadilisha `artist_top_genre` kuwa nambari lakini kwa sasa tutaiacha.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Kuhesabu k-means clustering katika R\n", + "\n", + "Tunaweza kuhesabu k-means katika R kwa kutumia kazi ya ndani `kmeans`, angalia `help(\"kmeans()\")`. Kazi ya `kmeans()` inakubali fremu ya data yenye safu zote za nambari kama hoja yake kuu.\n", + "\n", + "Hatua ya kwanza wakati wa kutumia k-means clustering ni kubainisha idadi ya makundi (k) ambayo yatatengenezwa katika suluhisho la mwisho. Tunajua kuna aina 3 za muziki ambazo tumechambua kutoka kwenye seti ya data, kwa hivyo hebu tujaribu 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Kitu cha kmeans kina taarifa kadhaa ambazo zimeelezewa vizuri katika `help(\"kmeans()\")`. Kwa sasa, hebu tuzingatie chache. Tunaona kwamba data imegawanywa katika makundi 3 yenye ukubwa wa 65, 110, 111. Matokeo pia yanajumuisha vituo vya makundi (wastani) kwa makundi 3 katika vigezo 5.\n", + "\n", + "Vector ya clustering ni mgawanyo wa kundi kwa kila uchunguzi. Hebu tutumie kazi ya `augment` kuongeza mgawanyo wa kundi kwenye seti ya data ya awali.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Perfect, tumegawanya seti yetu ya data katika vikundi 3. Sasa, je, makundi yetu ni mazuri kiasi gani 🤷? Hebu tuangalie `Silhouette score`\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[Uchambuzi wa Silhouette](https://en.wikipedia.org/wiki/Silhouette_(clustering)) unaweza kutumika kuchunguza umbali wa kutenganisha kati ya makundi yaliyopatikana. Alama hii inatofautiana kutoka -1 hadi 1, na ikiwa alama iko karibu na 1, kundi ni lenye msongamano na limetenganishwa vizuri na makundi mengine. Thamani karibu na 0 inaonyesha makundi yanayofungamana na sampuli zikiwa karibu sana na mpaka wa maamuzi wa makundi jirani. [chanzo](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Njia ya wastani ya silhouette inahesabu wastani wa silhouette wa uchunguzi kwa thamani tofauti za *k*. Alama ya wastani ya silhouette ya juu inaonyesha upangaji mzuri wa makundi.\n", + "\n", + "`silhouette` ni kazi katika kifurushi cha cluster inayotumika kuhesabu upana wa wastani wa silhouette.\n", + "\n", + "> Silhouette inaweza kuhesabiwa kwa kutumia kipimo chochote cha [umbali](https://en.wikipedia.org/wiki/Distance \"Distance\"), kama vile [umbali wa Euclidean](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") au [umbali wa Manhattan](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") ambao tulijadili katika [somo lililopita](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Alama yetu ni **.549**, kwa hivyo iko katikati. Hii inaonyesha kuwa data yetu haifai sana kwa aina hii ya ugawanyaji. Hebu tuone kama tunaweza kuthibitisha hisia hii kwa njia ya kuona. [Kifurushi cha factoextra](https://rpkgs.datanovia.com/factoextra/index.html) kinatoa kazi (`fviz_cluster()`) za kuonyesha ugawanyaji.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Kuwepo kwa mwingiliano katika makundi kunaonyesha kuwa data yetu haifai sana kwa aina hii ya ugawaji, lakini tuendelee.\n", + "\n", + "## 4. Kuamua idadi bora ya makundi\n", + "\n", + "Swali la msingi ambalo mara nyingi hujitokeza katika ugawaji wa K-Means ni hili - bila lebo za darasa zinazojulikana, unajuaje ni makundi mangapi ya kugawa data yako?\n", + "\n", + "Njia moja ya kujaribu kujua ni kutumia sampuli ya data `kuunda mfululizo wa mifano ya ugawaji` na idadi inayoongezeka ya makundi (kwa mfano kutoka 1-10), na kutathmini vipimo vya ugawaji kama vile **Silhouette score.**\n", + "\n", + "Hebu tuamue idadi bora ya makundi kwa kuhesabu algoriti ya ugawaji kwa thamani tofauti za *k* na kutathmini **Within Cluster Sum of Squares** (WCSS). Jumla ya WCSS inapima ukaribu wa ugawaji, na tunataka iwe ndogo iwezekanavyo, ambapo thamani za chini zinaonyesha kuwa vidokezo vya data viko karibu zaidi.\n", + "\n", + "Hebu tuchunguze athari za chaguo tofauti za `k`, kutoka 1 hadi 10, kwenye ugawaji huu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Sasa kwa kuwa tuna jumla ya ndani ya mraba wa makundi (tot.withinss) kwa kila algoriti ya kugawanya na kituo *k*, tunatumia [mbinu ya kiwiko](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) kutafuta idadi bora ya makundi. Mbinu hii inahusisha kuchora WCSS kama kazi ya idadi ya makundi, na kuchagua [kiwiko cha mchepuko](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") kama idadi ya makundi ya kutumia.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Grafu inaonyesha kupungua kubwa kwa WCSS (kwa hivyo *ukamilifu zaidi*) kadri idadi ya makundi inavyoongezeka kutoka moja hadi mawili, na kupungua zaidi kunakoonekana kutoka makundi mawili hadi matatu. Baada ya hapo, kupungua hakutambuliki sana, na kusababisha `kiwiko` 💪 kwenye grafu karibu na makundi matatu. Hii ni ishara nzuri kwamba kuna makundi mawili hadi matatu ya alama za data ambazo zimetenganishwa vyema.\n", + "\n", + "Sasa tunaweza kuendelea na kutoa modeli ya makundi ambapo `k = 3`:\n", + "\n", + "> `pull()`: hutumika kutoa safu moja\n", + ">\n", + "> `pluck()`: hutumika kuorodhesha miundo ya data kama vile orodha\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Hebu tuendelee na tuone makundi tuliyopata. Unapenda kuwa na mwingiliano kwa kutumia `plotly`?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Labda tungetarajia kwamba kila kundi (linalowakilishwa na rangi tofauti) lingekuwa na aina tofauti za muziki (zinazowakilishwa na maumbo tofauti).\n", + "\n", + "Hebu tuangalie usahihi wa mfano.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Usahihi wa modeli hii si mbaya, lakini si mzuri sana. Inawezekana kuwa data haifai vizuri kwa K-Means Clustering. Data hii haina uwiano mzuri, haina uhusiano wa kutosha, na kuna tofauti kubwa sana kati ya thamani za safu ili kuunda makundi vizuri. Kwa kweli, makundi yanayoundwa huenda yameathiriwa sana au kupotoshwa na zile kategoria tatu za muziki tulizotaja hapo juu.\n", + "\n", + "Hata hivyo, hilo lilikuwa somo la kujifunza!\n", + "\n", + "Katika nyaraka za Scikit-learn, unaweza kuona kuwa modeli kama hii, yenye makundi yasiyo na mipaka dhahiri, ina tatizo la 'tofauti':\n", + "\n", + "

\n", + " \n", + "

Picha kutoka Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **Tofauti**\n", + "\n", + "Tofauti inafafanuliwa kama \"wastani wa tofauti za mraba kutoka kwa wastani\" [chanzo](https://www.mathsisfun.com/data/standard-deviation.html). Katika muktadha wa tatizo hili la clustering, inahusu data ambapo namba za seti yetu ya data zina mwelekeo wa kutofautiana sana kutoka kwa wastani.\n", + "\n", + "✅ Huu ni wakati mzuri wa kufikiria njia zote unazoweza kutumia kurekebisha tatizo hili. Je, urekebishe data kidogo zaidi? Utumie safu tofauti? Utumie algorithimu tofauti? Kidokezo: Jaribu [kupanua data yako](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) ili kuifanya iwe ya kawaida na ujaribu safu nyingine.\n", + "\n", + "> Jaribu '[kikokotoo cha tofauti](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' ili kuelewa dhana hii zaidi.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Changamoto**\n", + "\n", + "Tumia muda na daftari hili, ukibadilisha vigezo. Je, unaweza kuboresha usahihi wa modeli kwa kusafisha data zaidi (kwa mfano, kuondoa data zisizo za kawaida)? Unaweza kutumia uzito ili kutoa uzito zaidi kwa sampuli fulani za data. Je, ni nini kingine unaweza kufanya ili kuunda makundi bora?\n", + "\n", + "Kidokezo: Jaribu kupanua data yako. Kuna msimbo uliotolewa maoni katika daftari unaoongeza upanuzi wa kawaida ili kufanya safu za data zifanane zaidi kwa karibu katika suala la masafa. Utagundua kuwa ingawa alama ya silhouette inashuka, 'kink' katika grafu ya kiwiko inakuwa laini. Hii ni kwa sababu kuacha data bila kupanuliwa kunaruhusu data yenye tofauti ndogo kuwa na uzito zaidi. Soma zaidi kuhusu tatizo hili [hapa](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Maswali ya baada ya somo**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Mapitio na Kujisomea**\n", + "\n", + "- Angalia Simulator ya K-Means [kama hii](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Unaweza kutumia zana hii kuona alama za data za sampuli na kubaini centroids zake. Unaweza kuhariri nasibu ya data, idadi ya makundi na idadi ya centroids. Je, hii inakusaidia kupata wazo la jinsi data inaweza kugawanywa?\n", + "\n", + "- Pia, angalia [karatasi hii kuhusu K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) kutoka Stanford.\n", + "\n", + "Unataka kujaribu ujuzi wako mpya wa clustering kwenye seti za data zinazofaa kwa K-Means clustering? Tafadhali angalia:\n", + "\n", + "- [Fanya mafunzo na tathmini modeli za clustering](https://rpubs.com/eR_ic/clustering) ukitumia Tidymodels na marafiki\n", + "\n", + "- [Uchambuzi wa K-Means Cluster](https://uc-r.github.io/kmeans_clustering), Mwongozo wa Programu ya R ya UC Business Analytics\n", + "\n", + "- [K-Means clustering kwa kanuni za data iliyopangwa](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Kazi**\n", + "\n", + "[Jaribu mbinu tofauti za clustering](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## ASANTE KWA:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) kwa kuunda toleo la awali la Python la moduli hii ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) kwa kuunda michoro ya kushangaza inayofanya R kuwa ya kuvutia na ya kupendeza zaidi. Pata michoro zaidi kwenye [galeria yake](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Jifunze kwa furaha,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Balozi wa Wanafunzi wa Microsoft Learn wa Dhahabu.\n", + "\n", + "

\n", + " \n", + "

Sanaa na @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/sw/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..c50c550d4 --- /dev/null +++ b/translations/sw/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,544 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:21:24+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Tutazingatia tu aina 3. Labda tunaweza kupata makundi 3 yaliyoundwa!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Je, data hii ni safi kiasi gani? Angalia data isiyo ya kawaida kwa kutumia boxplots. Tutazingatia safu zenye data isiyo ya kawaida kidogo (ingawa unaweza kuondoa data isiyo ya kawaida). Boxplots zinaweza kuonyesha wigo wa data na zitasaidia kuchagua safu za kutumia. Kumbuka, Boxplots haziwezi kuonyesha tofauti (variance), kipengele muhimu cha data nzuri inayoweza kugawanyika katika makundi (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Nambari hizo hazimaanishi mengi kwetu, kwa hivyo wacha tupate 'alama ya silhouette' ili kuona usahihi. Alama yetu iko katikati.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Tumia mfano huo kuamua, kwa kutumia Mbinu ya Elbow, idadi bora ya vikundi vya kujenga\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAFNCAYAAACAH1JNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5hdZX33//d3coIQgSQEUkJMQE4iVMABgyIp4SyHhD3wCLWK/VEpLSii9Kn2oG3110ptK1JbWypWeMoD8kNIEJCDnEU5hIMEpEIKBMIpkUA4J4R8f3+sNWYnmWRmktmz9t7zfl3XvvZa91p7z3c5l5kP973WfUdmIkmSpNbRUXUBkiRJ6h8DnCRJUosxwEmSJLUYA5wkSVKLMcBJkiS1GAOcJElSizHASdI6RMSnIuKndfsZETtWWZMkgQFOkoiIJyPizYh4re717arrkqR1McBJUuHozBxT9zq96oIkaV0McJLUPx+NiMcj4tcR8Y2I6ACIiI6I+IuIWBARiyLiwojYojx2QUR8odyeVA7FnlbuvycilnR/jyT1hf9gSFL/HAt0AnsDM4H/p2z/VPk6ENgBGAN0D8PeCvxOuT0deBw4oG7/9sxc2diyJbUTA5wkFWZHxMt1r0+v47yzM3NJZj4FnAOcWLZ/HPinzHw8M18DvgScEBHDKQLc/mUv2wHA3wMfLj83vTwuSX1mgJOkwqzM3LLu9R/rOO/puu0FwLbl9rblfv2x4cA2mfk/wOvAnsBHgKuAZyNiFwxwkjaAAU6S+mdy3fa7gWfL7WeBKWscWwG8UO7fChwHjMzMZ8r9k4CxwAONLFhS+zHASVL//ElEjI2IycAZwA/K9ouBMyNi+4gYA/wt8IPMXFEevxU4Hbit3L+l3P9pZr4zaNVLagvDqy5AkprEjyKiPkjdAMzp4bw5wL3AFsD3gfPL9u9RDKPeBmwCXAd8pu5ztwLvYlWA+ykwum5fkvosMrPqGiRJktQPDqFKkiS1GAOcJElSizHASZIktRgDnCRJUosxwEmSJLWYITeNyFZbbZVTp06tugxJkqRe3Xvvvb/OzAlrtg+5ADd16lTmzp1bdRmSJEm9iogFPbU7hCpJktRiDHCSJEktxgAnSZLUYgxwkiRJLcYAJ0mS1GIMcJIkSS3GACdJktRihtw8cA21ciUsWgTLlsGoUbD11tBhRpYkSQPLdDFQVq6EefNg2jSYOrV4nzevaJckSRpABriBsmgRzJwJC8oJkxcsKPYXLaq2LkmS1HYMcANl2bJV4a3bggVFuyRJ0gAywA2UUaNgypTV26ZMKdolSZIGkAFuoGy9NcyZsyrETZlS7G+9dbV1SZKktuNTqAOlowP22APuvBMefxzeeqvY9ylUSZI0wEwXA6mjAyZOhGuugUMPhSVLqq5IkiS1IQNcI9Rq8M47cOWVVVciSZLakAGuEfbaq5gL7vLLq65EkiS1IQNcI0QUvXA33ACvvFJ1NZIkqc0Y4BqlqwuWL4err666EkmS1GYMcI0ybVrxQIPDqJIkaYAZ4BqlowOOPbZ4IvWNN6quRpIktREDXCN1dRXh7frrq65EkiS1EQNcIx1wAIwbBz/8YdWVSJKkNmKAa6QRI2DmTPjRj4oHGiRJkgaAAa7RajVYuhRuuqnqSiRJUpswwDXawQfDu97l06iSJGnAGOAabZNN4MgjYfbsYnktSZKkjWSAGwy1GixeDD/9adWVSJKkNmCAGwxHHFH0xDmMKkmSBoABbjCMGQOHHVYEuJUrq65GkiS1OAPcYOnqgoULYe7cqiuRJEktzgA3WI46CoYPd1JfSZK00Qxwg2XsWDjooGIYNbPqaiRJUgszwA2mWg3mz4eHHqq6EkmS1MIMcINp5kyIcBhVkiRtFAPcYNpmG/jIR5xORJIkbRQD3GCr1WDePHjssaorkSRJLcoAN9hqteLdXjhJkrSBGhrgIuLMiHg4Ih6KiIsjYpOI2D4i7oqI+RHxg4gYWZ47qtyfXx6fWvc9XyrbfxURh9W1H162zY+ILzbyWgbM5Mmwzz7eBydJkjZYwwJcREwCPgt0ZubuwDDgBOBs4JuZuSPwEnBy+ZGTgZfK9m+W5xERu5Wfex9wOPCvETEsIoYB/wIcAewGnFie2/y6uuCee+Cpp6quRJIktaBGD6EOBzaNiOHAaOA5YAZwWXn8AmBWuT2z3Kc8flBERNl+SWYuy8wngPnAvuVrfmY+npnLgUvKc5vfsccW77NnV1uHJElqSQ0LcJn5DPAPwFMUwW0pcC/wcmauKE9bCEwqtycBT5efXVGeP76+fY3PrKu9+e28M+y+u8OokiRpgzRyCHUsRY/Y9sC2wGYUQ6CDLiJOiYi5ETF38eLFVZSwtq4uuP12eOGFqiuRJEktppFDqAcDT2Tm4sx8G7gc+DCwZTmkCrAd8Ey5/QwwGaA8vgXwYn37Gp9ZV/taMvO8zOzMzM4JEyYMxLVtvFqtWFJrzpyqK5EkSS2mkQHuKWBaRIwu72U7CPglcDNwXHnOSUB3grmy3Kc8flNmZtl+QvmU6vbATsDdwD3ATuVTrSMpHnS4soHXM7D22AN23NHpRCRJUr818h64uygeRrgPmFf+rPOAPwU+HxHzKe5xO7/8yPnA+LL988AXy+95GLiUIvxdC5yWme+U98mdDlwHPAJcWp7bGiKKXrgbb4SXXqq6GkmS1EKi6OQaOjo7O3Pu3LlVl1G4+2744AfhwgvhE5+ouhpJktRkIuLezOxcs92VGKrU2QnbbecwqiRJ6hcDXJU6Ooo54a69Fl57repqJElSizDAVa2rC956qwhxkiRJfWCAq9r++8OECU7qK0mS+swAV7Vhw2DWLLjqqqInTpIkqRcGuGZQqxX3wP3kJ1VXIkmSWoABrhnMmAFbbOHTqJIkqU8McM1g5Eg4+uhiWa0VK6quRpIkNTkDXLOo1WDJErj11qorkSRJTc4A1ywOOwxGj3YYVZIk9coA1yxGj4YjjoArroCVK6uuRpIkNTEDXDPp6oLnnoM776y6EkmS1MQMcM3kyCOLBxqc1FeSJK2HAa6ZbL45HHJIcR9cZtXVSJKkJmWAaza1Gjz5JDzwQNWVSJKkJmWAazbHHFMsr+UwqiRJWgcDXLPZaiuYPt3pRCRJ0joZ4JpRrQaPPFK8JEmS1mCAa0bHHlu82wsnSZJ6YIBrRttuC/vt531wkiSpRwa4ZtXVBfffD088UXUlkiSpyRjgmlX3MOoVV1RbhyRJajoGuGa1ww6w554Oo0qSpLUY4JpZVxf87GfF+qiSJEklA1wzq9WKd4dRJUlSHQNcM9ttN9h1V6cTkSRJqzHANbtaDW65BV58sepKJElSkzDANbuuLnjnHbjyyqorkSRJTcIA1+z22gumTHEYVZIk/YYBrtlFFMOo118Pr7xSdTWSJKkJGOBaQVcXLF8O11xTdSWSJKkJGOBawX77wcSJTuorSZIAA1xr6Ogolta65hp4882qq5EkSRUzwLWKWg3eeAOuu67qSiRJUsUMcK1i+nQYN86nUSVJkgGuZYwYAcccU8wHt3x51dVIkqQKGeBaSa0GS5fCzTdXXYkkSaqQAa6VHHIIjBnjMKokSUOcAa6VbLIJHHkkzJ5dLK8lSZKGJANcq+nqgkWL4I47qq5EkiRVxADXao44ouiJc1JfSZKGLANcqxkzBg47rLgPLrPqaiRJUgUMcK2oVoOFC+Gee6quRJIkVcAA14qOPhqGD/dpVEmShigDXCsaOxZmzCjug3MYVZKkIccA16pqNZg/Hx56qOpKJEnSIDPAtapZsyDCYVRJkoaghga4iNgyIi6LiP+OiEciYr+IGBcRN0TEY+X72PLciIhzI2J+RDwYEXvXfc9J5fmPRcRJde0fiIh55WfOjYho5PU0lW22gf33dzoRSZKGoEb3wH0LuDYzdwXeDzwCfBG4MTN3Am4s9wGOAHYqX6cA3wGIiHHAV4APAvsCX+kOfeU5n6773OENvp7m0tUF8+bBY49VXYkkSRpEDQtwEbEFcABwPkBmLs/Ml4GZwAXlaRcAs8rtmcCFWbgT2DIifgs4DLghM5dk5kvADcDh5bHNM/POzEzgwrrvGhqOPbZ4dxhVkqQhpZE9cNsDi4H/jIj7I+K7EbEZsE1mPlee8zywTbk9CXi67vMLy7b1tS/soX3oePe7YZ99DHCSJA0xjQxww4G9ge9k5l7A66waLgWg7Dlr+DwYEXFKRMyNiLmLFy9u9I8bXLUa3H03PP107+dKkqS20MgAtxBYmJl3lfuXUQS6F8rhT8r3ReXxZ4DJdZ/frmxbX/t2PbSvJTPPy8zOzOycMGHCRl1U06nVivcrrqi2DkmSNGgaFuAy83ng6YjYpWw6CPglcCXQ/STpScCccvtK4JPl06jTgKXlUOt1wKERMbZ8eOFQ4Lry2CsRMa18+vSTdd81dOy8M+y+u8OokiQNIcMb/P2fAS6KiJHA48DvU4TGSyPiZGAB8L/Kc68BPgrMB94ozyUzl0TEV4HuhT//JjOXlNt/DHwf2BT4cfkaemo1+NrXYNEi2HrrqquRJEkNFjnElmLq7OzMuXPnVl3GwHrwQXj/++G88+DTn666GkmSNEAi4t7M7Fyz3ZUY2sEee8B73uOkvpIkDREGuHYQUUzqe+ON8PLLVVcjSZIazADXLmo1WLECrrqq6kokSVKDGeDaxT77wKRJDqNKkjQEGODaRUdH0Qt37bXw+utVVyNJkhrIANdOajV46y348dCcTUWSpKHCANdOPvIRmDDBSX0lSWpzBrh2MmwYzJxZPMiwbFnV1UiSpAYxwLWbri549VX4yU+qrkSSJDWIAa7dzJgBm2/uMKokSW3MANduRo6Eo4+GOXOKeeEkSVLbMcC1o64uePFFuO22qiuRJEkNYIBrR4cdBqNHO6mvJEltygDXjkaPhiOOgCuugJUrq65GkiQNMANcu6rV4Lnn4M47q65EkiQNMANcuzrqqOKBBp9GlSSp7Rjg2tXmm8PBBxcBLrPqaiRJ0gAywLWzWg2eeAIeeKDqSiRJ0gAywLWzmTOho8NhVEmS2sx6A1xE7BMRE+v2PxkRcyLi3IgY1/jytFG22gqmT3c6EUmS2kxvPXD/DiwHiIgDgK8DFwJLgfMaW5oGRFcXPPJI8ZIkSW2htwA3LDOXlNsfA87LzB9m5l8COza2NA2IWbOKd4dRJUlqG70GuIgYXm4fBNxUd2x4D+er2UyaBPvtZ4CTJKmN9BbgLgZujYg5wJvA7QARsSPFMKpaQa0G990HTz5ZdSWSJGkArDfAZeb/C3wB+D6wf+ZvJhTrAD7T2NI0YGq14t1eOEmS2kJvT6GOBu7NzCsy8/WI2CUizgR2z8z7BqdEbbQddoA99zTASZLUJnobQr0WmAq/GTb9ObADcFpE/F1jS9OAqtXgZz8r1keVJEktrbcANzYzHyu3TwIuzszPAEcARzW0Mg2srq5iSa3Zs6uuRJIkbaTeAlz9IpozgBsAMnM5sLJRRakB3vte2GUXJ/WVJKkN9BbgHoyIfyjve9sRuB4gIrZseGUaWBFFL9wtt8CLL1ZdjSRJ2gi9BbhPA7+muA/u0Mx8o2zfDfiHBtalRqjV4J134Ec/qroSSZK0EXoLcGOAH2XmGZn5i7r2pRQPOKiV7L03TJniMKokSS2utwD3z8D4HtrHAd8a+HLUUBFFL9z118Orr1ZdjSRJ2kC9BbgdM/O2NRsz83bgtxtTkhqqVoPly+Hqq6uuRJIkbaDeAty71nNsxEAWokHyoQ/BxIlO6itJUgvrLcDNj4iPrtkYEUcAjzemJDVURwfMmgXXXANvvll1NZIkaQMM7+X454CrI+J/AfeWbZ3AfjiRb+vq6oJ/+7fiXriZM6uuRpIk9VNvPXBHAr8H3AFMKV+3Ar+dmY82uDY1yvTpMHasw6iSJLWo3nrgtgPOAd4LPEgR5BYBo4G3GluaGmbECDjmGJgzp3igYeTIqiuSJEn9sN4euMw8KzM/BGwDfAlYAvw+8FBE/HIQ6lOjdHXByy8XKzNIkqSW0tsQardNgc2BLcrXs8BdjSpKg+CQQ2DMGCf1lSSpBa03wEXEeRFxB/ADigcXfgYcn5mdmfn7g1GgGmSTTeDII2H27GJ5LUmS1DJ664F7NzAKeB54BlgIvNzoojRIajVYtAjuuKPqSiRJUj/0dg/c4cA+rFq4/gvAPRFxfUT8daOLU4N99KMwapRPo0qS1GJ6vQcuCw8B1wA/pngS9T3AGQ2uTY02ZgwcdlgR4DKrrkaSJPVRb/fAfTYiLomIpyjmfzsK+G+gRrGgvVpdrQZPPw1z51ZdiSRJ6qPe5oGbCvx/wJmZ+Vzjy9GgO/poGD686IXbZ5+qq5EkSX3Q2z1wn8/MH25MeIuIYRFxf0RcVe5vHxF3RcT8iPhBRIws20eV+/PL41PrvuNLZfuvIuKwuvbDy7b5EfHFDa1xSBs3Dg48sJhOxGFUSZJaQl/ngdsYZwCP1O2fDXwzM3cEXgJOLttPBl4q279ZnkdE7AacALwPOBz41zIUDgP+BTgC2A04sTxX/dXVBY89Bg8/XHUlkiSpDxoa4CJiO4r1VL9b7gcwA7isPOUCYFa5PbPcpzx+UHn+TOCSzFyWmU8A84F9y9f8zHw8M5cDl5Tnqr9mzoQIJ/WVJKlFNLoH7hzgfwMry/3xwMuZuaLcXwhMKrcnAU8DlMeXluf/pn2Nz6yrXf01cSLsv7/TiUiS1CIaFuAi4ihgUWbe26if0Y9aTomIuRExd/HixVWX05xqNXjwQZg/v+pKJElSLxrZA/dh4JiIeJJieHMG8C1gy4jofvp1O4oVHijfJwOUx7cAXqxvX+Mz62pfS2aeVy7/1TlhwoSNv7J2dOyxxbu9cJIkNb2GBbjM/FJmbpeZUykeQrgpMz8O3AwcV552EjCn3L6y3Kc8flNmZtl+QvmU6vbATsDdwD3ATuVTrSPLn3Flo66n7U2ZAp2dBjhJklrAYDyFuqY/BT4fEfMp7nE7v2w/Hxhftn8e+CJAZj4MXAr8ErgWOC0z3ynvkzsduI7iKddLy3O1oWo1uOsuWLiw6kokSdJ6RA6xub86OztzrqsO9OzRR2GXXeDcc+Ezn6m6GkmShryIuDczO9dsr6IHTs1q553hfe9zOhFJkpqcAU6r6+qC22+HRYuqrkSSJK2DAU6rq9Vg5Uq40udBJElqVgY4re63fxt22MFhVEmSmpgBTquLKIZRb7wRXn656mokSVIPDHBaW60Gb78NV11VdSWSJKkHBjitbd99YdIkJ/WVJKlJGeC0to6OYmmta6+F11+vuhpJkrQGA5x61tUFb75ZhDhJktRUDHDq2f77w1ZbOYwqSVITMsCpZ8OHw8yZxYMMy5ZVXY0kSapjgNO6dXXBK68UU4pIkqSmYYDTus2YAZtv7qS+kiQ1GQOc1m3UKDj6aJgzB1asqLoaSZJUMsBp/Wo1ePFFuO22qiuRJEklA5zW7/DDYdNNfRpVkqQmYoDT+o0eDUccAVdcAStXVl2NJEnCAKe+qNXg2WfhrruqrkSSJGGAU18cdRSMGOEwqiRJTcIAp95tsQUcfHAxnUhm1dVIkjTkGeDUN11d8MQT8ItfVF2JJElDngFOfXPMMdDR4aS+kiQ1AQOc+mbCBJg+3fvgJElqAsOrLkAt5NRTi4cZ5s+HMWNg662LXjlJkjSo/Ourvlm5EnbaCc48s3ifNg3mzXNuOEmSKmCAU98sWgTHHgsLFhT7CxbAzJlFuyRJGlQGOPXNsmWrwlu3BQuKdkmSNKgMcOqbUaNgypTV26ZMcV44SZIqYIBT32y9NcyZsyrETZkC3/senHyyT6ZKkjTIDHDqm44O2GMPuPNOePLJ4n3XXeHVV4tJfs86C95+u+oqJUkaEgxw6ruODpg4seh9mzgRtt0Wbr8dTjsN/vEfYcaMYtF7SZLUUAY4bZxRo+Db34aLLoL77oO99oKbb666KkmS2poBTgPjd38X7rkHxo0rFr7/u79zjjhJkhrEAKeBs9tucPfdcPzx8Gd/VswT99JLVVclSVLbMcBpYL3rXXDxxfDP/wzXXQcf+EAxtCpJkgaMAU4DLwJOPx1uuw1WrIAPfQjOO8854yRJGiAGODXOtGlF79v06fCHfwif+hS88UbVVUmS1PIMcGqsrbaCa66Br3wF/s//KULdo49WXZUkSS3NAKfGGzYM/uqv4Mc/LuaJ6+yEH/6w6qokSWpZBjgNnsMOK4ZUd9sNjjsOPv95V2+QJGkDGOA0uN797uLhhtNPh29+Ew48EJ55puqqJElqKQY4Db6RI4tpRi6+GB54APbeG266qeqqJElqGQY4VeeEE4rVG8aPh0MOgb/9W1dvkCSpDwxwqtZ731us3vCxj8Gf/zkccwwsWVJ1VZIkNTUDnKo3ZgxcdBF8+9tw/fXF6g1z51ZdlSRJTcsAp+YQAaedBrffDu+8Ax/+MPz7v7t6gyRJPTDAqbl88INw//0wYwaceiqcdBK8/nrVVUmS1FQaFuAiYnJE3BwRv4yIhyPijLJ9XETcEBGPle9jy/aIiHMjYn5EPBgRe9d910nl+Y9FxEl17R+IiHnlZ86NiGjU9WgQjR8PV18Nf/3X8F//Vaze8KtfVV2VJElNo5E9cCuAL2TmbsA04LSI2A34InBjZu4E3FjuAxwB7FS+TgG+A0XgA74CfBDYF/hKd+grz/l03ecOb+D1aDB1dMCXvwzXXgvPPQf77AOXXVZ1VZIkNYWGBbjMfC4z7yu3XwUeASYBM4ELytMuAGaV2zOBC7NwJ7BlRPwWcBhwQ2YuycyXgBuAw8tjm2fmnZmZwIV136V2ceihxZDq+94Hxx8PZ57p6g2SpCFvUO6Bi4ipwF7AXcA2mflceeh5YJtyexLwdN3HFpZt62tf2EN7Tz//lIiYGxFzFy9evFHXogpMngy33gqf/Syccw78zu/AwoW9fkySpHbV8AAXEWOAHwKfy8xX6o+VPWcNf8wwM8/LzM7M7JwwYUKjf5waYeRI+Na34JJL4MEHi9UbfvKTqquSJKkSDQ1wETGCIrxdlJmXl80vlMOflO+LyvZngMl1H9+ubFtf+3Y9tKudfexjxeoNEyYUw6tf+5qrN0iShpxGPoUawPnAI5n5T3WHrgS6nyQ9CZhT1/7J8mnUacDScqj1OuDQiBhbPrxwKHBdeeyViJhW/qxP1n2X2tmuu8Jdd8GJJ8Jf/iUcfbSrN0iShpRG9sB9GPgEMCMiHihfHwW+DhwSEY8BB5f7ANcAjwPzgf8A/hggM5cAXwXuKV9/U7ZRnvPd8jP/A/y4gdejZjJmTDHFyL/+azGUuvfert4gSRoyIofYTPednZ051z/07eWee+C44+D554uHHE49tVjZQZKkFhcR92Zm55rtrsSg1rfPPnDffXDQQfDHfwyf+ISrN0iS2poBTu1h/Hi46ir46lfh//7fYkkuV2+QJLUpA5zaR0cH/MVfwPXXwwsvQGcnXHpp1VVJkjTgDHBqPwcfXKzesMcexbQjZ5wBy5dXXZUkSQPGAKf2tN12cMstRXg791xXb5AktRUDnNrXyJHFU6mXXgrz5sFee8ENN1RdlSRJG80Ap/Z3/PHFHHHbbAOHHVY86ODqDZKkFmaA09Cwyy7F6g0f/zh8+ctw5JHw4otVVyVJ0gYxwGno2GwzuPBC+M534KabitUb7r676qokSeo3A5yGlohipYY77ii299+/WI5riK1IIklqbQY4DU2dncXqDYccAqedBr/3e/Daa1VXJUlSnxjgNHSNGwc/+hF87WtwySXF6g2PP16sqbpgQfHuww6SpCZkgNPQ1tEBf/7nxeoNkyfDk0/CtGkwdWrxPm+eIU6S1HSGV12A1BQOOgh22gkOOKDofYPifeZMuPHGItANG1ZpiZIkdTPASd0yV4W3bgsWFCs4vP/9sO++sN9+xWvaNNhqq2rqlCQNeQY4qduoUTBlyuohbsoUGD8ePvUp+PnP4eyz4Z13imM77bQq0O23H+y+u710kqRBETnEpk/o7OzMuXPnVl2GmtHKlcU9bzNnFiFuyhSYMwf22KO4Vw7g9deLVR1+/vNVr8WLi2ObbWYvnSRpQEXEvZnZuVa7AU6qs3IlLFoEy5YVPXJbb70qvPUkE554YvVA94tf2EsnSRoQBriSAU4NZy+dJGmArCvAeQ+cNNA22wymTy9e0HMvnffSSZI2gj1wUhXqe+nuvLN4X7SoOGYvnSSpZA+c1Ew2ppdu2rSil264//eVpKHKHjipWdlLJ0lDnj1wUqtpRC9df5+ylSQ1JXvgpFbWn166D30Inn56/fPcSZKaitOIlAxwamvrm5fu8svhzDPXXmnijjtg0qTqapYkrZNDqNJQEAE77FC8Pv7xoq27l27s2J7Xep0/v1jrdfvtV7122GHV9pQpMHLk4F+LJGmdDHBSu+u+l+7553te63XMGDjuOHj8cbj/fpg9G95+e9U5EUUP3ZrBrvu17bYOwUrSIDPASUPF1lsX97z1dA/cv/3bqvPeeQeefbYYin3iiSLYdW//5CfFsfpbL0aOhKlT1w523a9x44oQKEkaMN4DJw0lA/EU6rJlRQCsD3b1ryVLVj9/8817DnY77FAEv9GjB+zyJKndeA+cpCKsTZy4cd8xahTsvHPx6snSpT0Hu1/9Cq67Dt58c/Xzt9lm3fffTZ7shMWS1AP/ZZQ0sLbYAvbcs3itKRNeeGH1YNfdk/fzn8Oll66a1w6KNWEnT+453G2/fRH+6odnnedO0hBhgJM0eCKKHsCJE4u56db09tuwcOHa99498QRcfXUR/uptuumq++8OPBD23x9OOGHVPX6zZxf3+A0bNiiXJ0mDxQAnqXmMGLGqd23GjLWPv/EGPPlkz/ff7bzzqvAGxfusWXDOOXDqqcXDFP15bb65vXeSmpYBTlLrGD0adtuteK1pwYKe57mbOhWOOaZ4uGLJEnjqqWJy4yVL4LXX1v2zOjpgyy37H/zGjvW+PUkN578yktrDqFE9z3M3cSKcd17Pn1m+HF56aVW4W9/r17+GRx8ttl9+ef21bL75hgW/TTbp/Tq9z08SBlFgrMMAAApnSURBVDhJ7WJd89xtvfW6PzNyZPEgxDbb9O9nvfNOEeLWF/jqg+HTT6/arn9IY02jR68e6NYMeTvvXDzUcfzxq67x8suLkDpsWBHoNtmkeG/VufcMqFKfGOAktYeOjuKBhTvvbPwf/2HDYPz44tUfmfDqq33r8VuyZFWP34svFr2Fl1++KrxB8V6rwTe/WbzX6w5z9a+e2vpzvD/fsSHDyCtXwrx5PU82bYiTVuNEvpLU7DKL+fOeew523HHt4w89BLfdBm+9tfZr2bKe23s7vrGGDet/APyDP4ATT1x7GHz2bLjnnqLHdOTI4ju6t9fcX9exESOaIwTaw6h+ciJfSWpVEcXw6mab9Xyf3/jx8Ed/NHA/L7Po8duYANiX40uXrn78U5/q+UGUpUvhlFM2/rpGjOh/8Ovvues7tuWWxZPUtdqqHsYrrijmNxw2rOi17A6arToEDu0fUpvk+gxwktQqNuQ+vw0RUfxhGjVqYL+3N88/33NA3WGHYn7AZcuKYNn9qt9f37H+nLtsWdHbuXTp+j+3bNnqawL3xeWXw5lnrj4EfuyxPQ+BjxixKtDVb6/53te2RhzrqW2zzYr7P+tD6uzZxe+wo6M4b/jwIrC2YkhtomF+h1AlqZU0yX/9N0QT/XHskxUr+hcSp07teQqcBx8slpl7++3iO99+e/XtNd/7e6y38wcyB6wZUqH4PfYUUrsDXXfv48ZsD8R39GV7zz3hqKPWvr4779z4ZQrXwSFUSWoHA7GebbMazAdRBkL3H/fRo/t2/rp6GCdMgLPOakyNfbFy5cAFxV126XkYfMcd4e//vngKe8WK4jVQ28uXb9hn69/76pZber6+Zcs2+tfQXwY4SVLzaOeAOlhD4P3V0TFwQ+brC6l/8icb//2NkFmE2L4Ev2HDer6+wb7dAAOcJEmDo9V6GDdEs4bU9Ykogln3XIrrs3Jl01yfAU6SpMHSzj2M0P4htYmur+UDXEQcDnwLGAZ8NzO/XnFJkiQNXUMhpDbB9bV0JI6IYcC/AEcAuwEnRkQPj/hIkiS1j5YOcMC+wPzMfDwzlwOXADMrrkmSJKmhWj3ATQKerttfWLZJkiS1rVYPcH0SEadExNyImLt48eKqy5EkSdoorR7gngEm1+1vV7atJjPPy8zOzOycMGHCoBUnSZLUCK0e4O4BdoqI7SNiJHACcGXFNUmSJDVUS08jkpkrIuJ04DqKaUS+l5kPV1yWJElSQ7V0gAPIzGuAa6quQ5IkabBEZlZdw6CKiMXAgl5P1PpsBfy66iK0Ufwdtj5/h63N31/rG6zf4ZTMXOsG/iEX4LTxImJuZnZWXYc2nL/D1ufvsLX5+2t9Vf8OW/0hBkmSpCHHACdJktRiDHDaEOdVXYA2mr/D1ufvsLX5+2t9lf4OvQdOkiSpxdgDJ0mS1GIMcOqziJgcETdHxC8j4uGIOKPqmtR/ETEsIu6PiKuqrkX9FxFbRsRlEfHfEfFIROxXdU3qn4g4s/w39KGIuDgiNqm6Jq1fRHwvIhZFxEN1beMi4oaIeKx8HzuYNRng1B8rgC9k5m7ANOC0iNit4prUf2cAj1RdhDbYt4BrM3NX4P34u2wpETEJ+CzQmZm7U6widEK1VakPvg8cvkbbF4EbM3Mn4MZyf9AY4NRnmflcZt5Xbr9K8YdjUrVVqT8iYjvgSOC7Vdei/ouILYADgPMBMnN5Zr5cbVXaAMOBTSNiODAaeLbietSLzLwNWLJG80zggnL7AmDWYNZkgNMGiYipwF7AXdVWon46B/jfwMqqC9EG2R5YDPxnOQz+3YjYrOqi1HeZ+QzwD8BTwHPA0sy8vtqqtIG2ycznyu3ngW0G84cb4NRvETEG+CHwucx8pep61DcRcRSwKDPvrboWbbDhwN7AdzJzL+B1BnnYRhunvE9qJkUY3xbYLCJ+r9qqtLGymNJjUKf1MMCpXyJiBEV4uygzL6+6HvXLh4FjIuJJ4BJgRkT8V7UlqZ8WAgszs7vn+zKKQKfWcTDwRGYuzsy3gcuBD1VckzbMCxHxWwDl+6LB/OEGOPVZRATFvTePZOY/VV2P+iczv5SZ22XmVIqbpm/KTP/Lv4Vk5vPA0xGxS9l0EPDLCktS/z0FTIuI0eW/qQfhgyit6krgpHL7JGDOYP5wA5z648PAJyh6bh4oXx+tuihpiPkMcFFEPAjsCfxtxfWoH8re08uA+4B5FH+HXZWhyUXExcDPgV0iYmFEnAx8HTgkIh6j6Fn9+qDW5EoMkiRJrcUeOEmSpBZjgJMkSWoxBjhJkqQWY4CTJElqMQY4SZKkFmOAk9Q0IiIj4h/r9s+KiL8aoO/+fkQcNxDf1cvPOT4iHomImxtZV0RMjYjf7X+FktqBAU5SM1kG1CJiq6oLqVcuOt5XJwOfzswDG1VPaSrQrwDXz+uQ1MQMcJKayQqKSU3PXPPAmj1VEfFa+f47EXFrRMyJiMcj4usR8fGIuDsi5kXEe+q+5uCImBsRj5ZrwxIRwyLiGxFxT0Q8GBF/WPe9t0fElfSw2kFEnFh+/0MRcXbZ9mVgf+D8iPhGD5/50/Izv4iItSb9jIgnu8NrRHRGxC3l9vS6ybPvj4h3UUwa+pGy7cy+XkdEbBYRV5c1PBQRH+vLL0ZSc/G/xiQ1m38BHoyIv+/HZ94PvBdYAjwOfDcz942IMyhWLvhced5UYF/gPcDNEbEj8ElgaWbuExGjgDsi4vry/L2B3TPzifofFhHbAmcDHwBeAq6PiFmZ+TcRMQM4KzPnrvGZIygWMf9gZr4REeP6cX1nAadl5h0RMQZ4i2IR+7MyszuIntKX64iILuDZzDyy/NwW/ahDUpOwB05SU8nMV4ALgc/242P3ZOZzmbkM+B+gO7jMowht3S7NzJWZ+RhF0NsVOBT4ZEQ8ANwFjAd2Ks+/e83wVtoHuKVckHwFcBFwQC81Hgz8Z2a+UV7nkn5c3x3AP0XEZ4Ety5+5pr5exzyK5X/OjoiPZObSftQhqUkY4CQ1o3Mo7iXbrK5tBeW/WRHRAYysO7asbntl3f5KVh9pWHPtwAQC+Exm7lm+ts/M7gD4+kZdRf/95hqBTX5TZObXgT8ANqXoWdu1h8/26Toy81GKHrl5wNfKYV9JLcYAJ6nplL1Tl1KEuG5PUgxZAhwDjNiArz4+IjrK++J2AH4FXAf8UUSMAIiInSNis/V9CXA3MD0itoqIYcCJwK29fOYG4PcjYnT5c3oaQn2SVdfY1d0YEe/JzHmZeTZwD0XP4avAu+o+26frKId/38jM/wK+QRHmJLUY74GT1Kz+ETi9bv8/gDkR8QvgWjasd+wpivC1OXBqZr4VEd+lGGa9LyICWAzMWt+XZOZzEfFF4GaKnq+rM3NOL5+5NiL2BOZGxHLgGuDP1jjtrykegPgqcEtd++ci4kCKHsWHgR+X2++U/3t8H/hWH69jD+AbEbESeBv4o/XVLak5ReaaIwqSJElqZg6hSpIktRgDnCRJUosxwEmSJLUYA5wkSVKLMcBJkiS1GAOcJElSizHASZIktRgDnCRJUov5/wEEXUm8vjXJ1AAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Usahihi wa mfano huu si mbaya, lakini si mzuri. Inawezekana kuwa data haiendani vizuri na K-Means Clustering. Unaweza kujaribu mbinu tofauti.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/sw/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..3c5dc9d72 --- /dev/null +++ b/translations/sw/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,341 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:38+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Tutazingatia tu aina 3. Labda tunaweza kupata makundi 3 yaliyoundwa!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/sw/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..2514041e3 --- /dev/null +++ b/translations/sw/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:16+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/sw/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sw/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/sw/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..543ae01a3 --- /dev/null +++ b/translations/sw/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:54+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/sw/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..e17c2e6de --- /dev/null +++ b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:37+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..5146db551 --- /dev/null +++ b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:22:57+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..737f1c0e4 --- /dev/null +++ b/translations/sw/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:17+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati asilia katika lugha yake ya awali inapaswa kuchukuliwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/sw/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..51b16c9e0 --- /dev/null +++ b/translations/sw/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli na Rob J. Hyndman, \"Utabiri wa nishati wa kiuhalisia: Mashindano ya Utabiri wa Nishati ya Kimataifa 2014 na zaidi\", Jarida la Kimataifa la Utabiri, vol.32, no.3, uk. 896-913, Julai-Septemba, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pakia data kutoka csv kwenye dataframe ya Pandas\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Panga data zote za mzigo zinazopatikana (Januari 2012 hadi Desemba 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:37+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/sw/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..edeac1f30 --- /dev/null +++ b/translations/sw/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Kuandaa Data\n", + "\n", + "Katika daftari hili, tunaonyesha jinsi ya:\n", + "\n", + "kuandaa data ya mfululizo wa muda kwa ajili ya moduli hii \n", + "kuonyesha data kwa njia ya picha \n", + "Data katika mfano huu imetolewa kutoka kwenye mashindano ya utabiri ya GEFCom2014. Inajumuisha miaka 3 ya mzigo wa umeme wa kila saa na thamani za joto kati ya mwaka 2012 na 2014.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli na Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, uk. 896-913, Julai-Septemba, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutokuelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:32+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/sw/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..4eb9ea259 --- /dev/null +++ b/translations/sw/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1131 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Utabiri wa mfululizo wa muda kwa kutumia ARIMA\n", + "\n", + "Katika daftari hili, tunaonyesha jinsi ya:\n", + "- kuandaa data ya mfululizo wa muda kwa ajili ya kufundisha mfano wa utabiri wa mfululizo wa muda wa ARIMA\n", + "- kutekeleza mfano rahisi wa ARIMA ili kutabiri hatua za HORIZON zijazo (muda *t+1* hadi *t+HORIZON*) katika mfululizo wa muda\n", + "- kutathmini mfano\n", + "\n", + "Data katika mfano huu imetolewa kutoka mashindano ya utabiri ya GEFCom2014. \n", + "\n", + "Inajumuisha miaka 3 ya mzigo wa umeme wa kila saa na thamani za joto kati ya mwaka 2012 na 2014. Kazi ni kutabiri thamani za baadaye za mzigo wa umeme. Katika mfano huu, tunaonyesha jinsi ya kutabiri hatua moja ya muda mbele, kwa kutumia data ya kihistoria ya mzigo pekee.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli na Rob J. Hyndman, \"Utabiri wa nishati wa uwezekano: Mashindano ya Utabiri wa Nishati ya Kimataifa 2014 na zaidi\", Jarida la Kimataifa la Utabiri, vol.32, no.3, uk. 896-913, Julai-Septemba, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Sakinisha Vitegemezi \n", + "Anza kwa kusakinisha baadhi ya vitegemezi vinavyohitajika. Maktaba hizi pamoja na matoleo yao yanajulikana kufanya kazi kwa suluhisho: \n", + "\n", + "* `statsmodels == 0.12.2` \n", + "* `matplotlib == 3.4.2` \n", + "* `scikit-learn == 0.24.2` \n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Panga data zote za mzigo zinazopatikana (Januari 2012 hadi Desemba 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Unda seti za data za mafunzo na majaribio\n", + "\n", + "Kabla ya kuanza kufundisha mfano wako, ni muhimu kugawanya data yako katika seti mbili: moja ya mafunzo na nyingine ya majaribio. Hii inahakikisha kuwa unaweza kupima utendaji wa mfano wako kwa data ambayo haujafunzwa nayo.\n", + "\n", + "### Kwa nini tunahitaji kugawanya data?\n", + "\n", + "Wakati wa kujenga mifano ya kujifunza kwa mashine, tunataka kuhakikisha kuwa mfano haujajifunza tu data ya mafunzo kwa kukariri, bali pia unaweza kufanya utabiri sahihi kwa data mpya. Kugawanya data katika seti za mafunzo na majaribio husaidia kupima uwezo huu wa jumla.\n", + "\n", + "[!NOTE] Ni mazoea mazuri kutumia takriban 70-80% ya data yako kwa mafunzo na 20-30% kwa majaribio.\n", + "\n", + "### Jinsi ya kugawanya data\n", + "\n", + "Unaweza kutumia zana au maktaba kama @@INLINE_CODE_1@@ kugawanya data yako kwa urahisi. Hapa kuna mfano wa jinsi ya kufanya hivyo:\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Kugawanya data katika seti za mafunzo na majaribio\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "```\n", + "\n", + "Katika mfano huu:\n", + "\n", + "- @@INLINE_CODE_2@@ ni data ya vipengele (features).\n", + "- @@INLINE_CODE_3@@ ni lebo (labels) zinazohusiana na data hiyo.\n", + "- @@INLINE_CODE_4@@ inawakilisha asilimia ya data inayotengwa kwa majaribio.\n", + "- @@INLINE_CODE_5@@ inahakikisha mgawanyo wa data ni wa nasibu lakini unaweza kurudiwa.\n", + "\n", + "[!TIP] Hakikisha data yako imechanganywa vizuri kabla ya kugawanya, hasa ikiwa data yako imepangwa kwa utaratibu fulani.\n", + "\n", + "### Kumbuka\n", + "\n", + "- Usitumie data ya majaribio wakati wa kufundisha mfano wako.\n", + "- Data ya majaribio inapaswa kubaki \"safi\" hadi wakati wa kutathmini utendaji wa mfano.\n", + "- Ikiwa una seti kubwa ya data, unaweza pia kuunda seti ya tatu inayoitwa \"data ya uthibitisho\" kwa ajili ya kurekebisha vigezo vya mfano.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Data asilia dhidi ya data iliyopimwa:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hebu pia tupime data ya majaribio\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Unda data ya majaribio kwa kila hatua ya HORIZON.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Linganisha utabiri na mzigo halisi\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hesabu **kosa la wastani wa asilimia ya thamani kamili (MAPE)** kwa utabiri wote\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Panga utabiri dhidi ya halisi kwa wiki ya kwanza ya seti ya majaribio\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:58:46+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/sw/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..07e9f27b8 --- /dev/null +++ b/translations/sw/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,61 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:36+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Utabiri wa mfululizo wa muda kwa kutumia ARIMA\n", + "\n", + "Katika daftari hili, tunadhihirisha jinsi ya:\n", + "- kuandaa data ya mfululizo wa muda kwa ajili ya kufundisha mfano wa utabiri wa ARIMA\n", + "- kutekeleza mfano rahisi wa ARIMA ili kutabiri hatua za HORIZON zijazo (wakati *t+1* hadi *t+HORIZON*) katika mfululizo wa muda\n", + "- kutathmini mfano\n", + "\n", + "Data katika mfano huu imetolewa kutoka mashindano ya utabiri ya GEFCom2014. \n", + "\n", + "Inajumuisha miaka 3 ya data ya saa kwa saa ya mzigo wa umeme na thamani za joto kati ya mwaka 2012 na 2014. Kazi ni kutabiri thamani za baadaye za mzigo wa umeme. Katika mfano huu, tunaonyesha jinsi ya kutabiri hatua moja mbele, kwa kutumia data ya kihistoria ya mzigo pekee.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli na Rob J. Hyndman, \"Utabiri wa nishati wa kiuhalisia: Mashindano ya Utabiri wa Nishati ya Kimataifa 2014 na zaidi\", Jarida la Kimataifa la Utabiri, vol.32, no.3, uk. 896-913, Julai-Septemba, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuchukuliwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/sw/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..e85955e39 --- /dev/null +++ b/translations/sw/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1017 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Katika daftari hili, tunaonyesha jinsi ya:\n", + "\n", + "- kuandaa data ya mfululizo wa muda wa 2D kwa ajili ya kufundisha mfano wa SVM regressor \n", + "- kutekeleza SVR kwa kutumia kernel ya RBF \n", + "- kutathmini mfano kwa kutumia michoro na MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kuingiza moduli\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Pakia data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sasa, unahitaji kuandaa data kwa mafunzo kwa kufanya uchujaji na kupima data yako.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pima data kuwa katika kiwango (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Kwa SVR yetu, tunabadilisha data ya ingizo kuwa katika umbo la `[batch, timesteps]`. Kwa hivyo, tunabadilisha umbo la `train_data` na `test_data` zilizopo ili kuwe na kipimo kipya kinachorejelea timesteps. Kwa mfano wetu, tunachukua `timesteps = 5`. Kwa hivyo, viingizo kwa mfano ni data za timesteps 4 za kwanza, na matokeo yatakuwa data ya timestep ya 5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Fanya utabiri wa modeli\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:04:27+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sw/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/sw/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..5612d7071 --- /dev/null +++ b/translations/sw/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,693 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Katika daftari hili, tunaonyesha jinsi ya:\n", + "\n", + "- kuandaa data ya mfululizo wa muda wa 2D kwa ajili ya kufundisha mfano wa SVM regressor \n", + "- kutekeleza SVR kwa kutumia kernel ya RBF \n", + "- kutathmini mfano kwa kutumia michoro na MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kuingiza moduli\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Pakia data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sasa, unahitaji kuandaa data kwa mafunzo kwa kufanya uchujaji na kupima data yako.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pima data kuwa katika kiwango (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Kwa SVR yetu, tunabadilisha data ya ingizo kuwa katika umbo la `[batch, timesteps]`. Kwa hivyo, tunabadilisha umbo la `train_data` na `test_data` zilizopo ili kuwe na kipimo kipya kinachorejelea timesteps. Kwa mfano wetu, tunachukua `timesteps = 5`. Kwa hivyo, viingizo kwa mfano ni data ya timesteps 4 za kwanza, na matokeo yatakuwa data ya timestep ya 5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Fanya utabiri wa mfano\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:06:58+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/sw/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/sw/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..417cf3e17 --- /dev/null +++ b/translations/sw/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:05:14+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter na Mbwa Mwitu: Utangulizi wa Kujifunza kwa Kuimarisha\n", + "\n", + "Katika mafunzo haya, tutajifunza jinsi ya kutumia kujifunza kwa kuimarisha kutatua tatizo la kutafuta njia. Mazingira haya yamechochewa na hadithi ya muziki [Peter na Mbwa Mwitu](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) iliyoandikwa na mtunzi wa Kirusi [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Ni hadithi kuhusu mvumbuzi kijana Peter, ambaye kwa ujasiri anatoka nyumbani kwake kwenda kwenye uwazi wa msitu kumfuatilia mbwa mwitu. Tutafundisha algoriti za kujifunza kwa mashine ambazo zitamsaidia Peter kuchunguza eneo linalomzunguka na kujenga ramani bora ya urambazaji.\n", + "\n", + "Kwanza, hebu tuagize maktaba kadhaa muhimu:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Muhtasari wa Kujifunza kwa Kuimarisha\n", + "\n", + "**Kujifunza kwa Kuimarisha** (RL) ni mbinu ya kujifunza inayotuwezesha kujifunza tabia bora ya **wakala** katika **mazingira** fulani kwa kufanya majaribio mengi. Wakala katika mazingira haya anapaswa kuwa na **lengo**, linalofafanuliwa na **kazi ya zawadi**.\n", + "\n", + "## Mazingira\n", + "\n", + "Kwa urahisi, hebu tuchukulie ulimwengu wa Peter kuwa ubao wa mraba wa ukubwa `width` x `height`. Kila seli katika ubao huu inaweza kuwa:\n", + "* **ardhi**, ambapo Peter na viumbe wengine wanaweza kutembea\n", + "* **maji**, ambapo ni wazi huwezi kutembea\n", + "* **mti** au **nyasi** - mahali ambapo unaweza kupumzika\n", + "* **tufaha**, ambayo inawakilisha kitu ambacho Peter angefurahia kukipata ili kujilisha\n", + "* **mbwa mwitu**, ambaye ni hatari na anapaswa kuepukwa\n", + "\n", + "Ili kufanya kazi na mazingira haya, tutafafanua darasa linaloitwa `Board`. Ili kuepuka kujaa sana katika daftari hili, tumepitisha msimbo wote wa kufanya kazi na ubao kwenye moduli tofauti inayoitwa `rlboard`, ambayo sasa tutaiingiza. Unaweza kuangalia ndani ya moduli hii ili kupata maelezo zaidi kuhusu mambo ya ndani ya utekelezaji.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Hebu sasa tuunde ubao wa nasibu na tuone jinsi unavyoonekana:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Hatua na Sera\n", + "\n", + "Katika mfano wetu, lengo la Peter litakuwa kupata tofaa, huku akiepuka mbwa mwitu na vizuizi vingine. Fafanua hatua hizo kama kamusi, na uzihusishe na jozi za mabadiliko ya kuratibu yanayolingana.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Mkakati wa wakala wetu (Peter) unafafanuliwa na kile kinachoitwa **sera**. Hebu tuchukulie sera rahisi zaidi inayoitwa **kutembea kwa bahati nasibu**.\n", + "\n", + "## Kutembea kwa bahati nasibu\n", + "\n", + "Hebu kwanza tutatue tatizo letu kwa kutekeleza mkakati wa kutembea kwa bahati nasibu.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Kazi ya Tuzo\n", + "\n", + "Ili kufanya sera yetu iwe na akili zaidi, tunahitaji kuelewa ni hatua zipi ni \"bora\" kuliko nyingine.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Jenga Q-Table, au safu yenye vipimo vingi. Kwa kuwa ubao wetu una vipimo vya `width` x `height`, tunaweza kuwakilisha Q-Table kwa safu ya numpy yenye umbo la `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Pitisha Jedwali la Q kwa kazi ya `plot` ili kuonyesha jedwali kwenye ubao:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kiini cha Q-Learning: Mlinganyo wa Bellman na Algorithimu ya Kujifunza\n", + "\n", + "Andika pseudo-code kwa algorithimu yetu ya kujifunza:\n", + "\n", + "* Anzisha Q-Table Q na namba sawa kwa hali zote na vitendo vyote\n", + "* Weka kiwango cha kujifunza $\\alpha\\leftarrow 1$\n", + "* Rudia simulizi mara nyingi\n", + " 1. Anza katika nafasi ya bahati nasibu\n", + " 1. Rudia\n", + " 1. Chagua kitendo $a$ katika hali $s$\n", + " 2. Tekeleza kitendo kwa kuhamia hali mpya $s'$\n", + " 3. Ikiwa tunakutana na hali ya mwisho wa mchezo, au jumla ya zawadi ni ndogo sana - toka kwenye simulizi \n", + " 4. Hesabu zawadi $r$ katika hali mpya\n", + " 5. Sasisha Q-Function kulingana na mlinganyo wa Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Sasisha jumla ya zawadi na punguza $\\alpha$.\n", + "\n", + "## Kutumia vs. Kuchunguza\n", + "\n", + "Njia bora ni kusawazisha kati ya kuchunguza na kutumia. Tunapojifunza zaidi kuhusu mazingira yetu, tutakuwa na uwezekano mkubwa wa kufuata njia bora, hata hivyo, kuchagua njia ambayo haijachunguzwa mara moja moja.\n", + "\n", + "## Utekelezaji wa Python\n", + "\n", + "Sasa tuko tayari kutekeleza algorithimu ya kujifunza. Kabla ya hapo, tunahitaji pia kazi fulani ambayo itabadilisha namba za bahati nasibu katika Q-Table kuwa vector ya uwezekano kwa vitendo vinavyolingana:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Tunaongeza kiasi kidogo cha `eps` kwenye vector ya awali ili kuepuka kugawanya kwa 0 katika hali ya mwanzo, ambapo vipengele vyote vya vector vinafanana.\n", + "\n", + "Algoriti halisi ya kujifunza tutakayoendesha kwa majaribio 5000, pia inaitwa **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Baada ya kutekeleza algoriti hii, Jedwali la Q linapaswa kusasishwa na maadili yanayofafanua mvuto wa vitendo tofauti katika kila hatua. Onyesha jedwali hapa:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAFpCAYAAAC8p8I3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdd3xUxd7H8c9sS7KbShJCL1IEBAURFEUBxXoVVFBARS7YQETgiooFsaGAXgUsIGIDHzuCXiuKICDKpYoiHUJJAmkk2Wzfc+b5YzcRrgSQZHM2ZN6+8kr27ObMlyX8nMyZmSOklCiKoig1h8noAIqiKMrfowq3oihKDaMKt6IoSg2jCreiKEoNowq3oihKDaMKt6IoSg0TscIthLhCCLFVCLFDCDE+Uu0oiqLUNiIS87iFEGZgG3ApsB9YDQySUv5R5Y0piqLUMpHqcXcFdkgpd0kp/cAHQN8ItaUoilKrRKpwNwT2HfZ4f/iYoiiKUkmWCJ1XHOXYEWMyQog7gTsBrFZr5zPPPDNCUSrP5/PhdDpJS0szOkqFioqKsFqtOBwOo6NUKCcnh7p162I2m42OUqG9e/fSpEkTo2NUKBgMkpeXR/369Y2OUqHS0lKCwSDJyclGR6lQXl4eiYmJxMTEGB2lQps2bcLj8RytloKUsso/gG7At4c9fgh4qKLX161bV0az7du3y9mzZxsd45gWLFggV65caXSMY3rqqadkYWGh0TEqpOu6vOeee4yOcUwFBQVy0qRJRsc4phUrVsiFCxcaHeOYZs2aJbdv3250jGMK18Wj1sxIDZWsBloJIZoLIWzAQODzCLWlKIpSq0SkcEspg8A9wLfAZuAjKeWmSLSlKMqpZf369Xi9XqNjRLVIjXEjpfwK+CpS51cU5dSSlZXFq6++SkxMDPPnz6dVq1YMGTLE6FhRSa2cVBTFcFJKcnJy2LZtG8OHD6d79+589NFHZdfIlP+hCreiKIYLBAJMnjyZF198kX/961+cccYZDBgwgLfeesvoaFEpYkMliqIoJ8pmszF+/Hhuu+02OnTowMcff8z333/PV1+p0dajqXE97l27dvHRRx8ZHUNRlCrWtGlT+vfvj9lsZsqUKdx7771GR4paNarH3adPH2JjYzn33HPp0KEDS5YsiepFMYqinLj09HTuuOMO9u3bx6JFi+jVq5fRkaJWjelx79q1i9jYWGbNmkW/fv24/PLL+f33342OpShKFWvcuDHffPMN/fv3NzpKldu0aRO6rlf6PDWmcK9du5auXbtSUlLCW2+9Rb169Vi+fLm66qwop6D4+HjOPPNMVq1aZXSUKrNkyRLee+89NE2r9LlqzFDJDTfcQIcOHThw4AD169dn3Lhx5OTkIMTRl/IrilJzORwOLrjgAn744QfOPfdco+NU2s8//8zPP//MuHHjsFqtlT5fjelxQ+j/WFdffTUul4sDBw4wfPhwioqKjI6lKEoExMTEIKXE7/cbHeWkSSnZsmUL8+bNY/To0aSkpFTJeWtU4U5LS6Nnz55MmDCBjIwM3n77bSZMmMCOHTuMjqYoShW75JJLcDqd/PLLL0ZHOWmrVq1i2rRpvPLKK1W6c2eNKtxlyoZHkpOTGT16NPPmzSMzM9PYUIqiKIdZsmQJ33//PS+88EKVD+nWyMJ9uJYtWzJ06FAee+wx3G53xNurigsLkSSlrJKr1pFUEzLquq4ufEeB4cOH8/rrr1fLv+2qIqVkw4YN/PDDD4wcORK73V7lbdT4wg3QrFkzZs2axa233kpOTk7E2snKyqJfv35s3ryZgoKCiLVTGevXr+eOO+5gy5YtUfnDHgwGee+993jhhRfYsmVLVBZwp9PJxIkT+fLLL9m1a5fRcaJWdnZ2xMefmzZtihCC3bt3R7SdqrRlyxamT5/OhAkTqmxM+3+dEoUbwG6389JLL/HSSy+xffv2iLTx+eef89JLLzFmzBhmz54dkTYq6/PPP2f06NEMGDCAdevWGR3nL9xuN4WFhTRt2pSLL744Krfv3LFjB927d2fDhg1cd911RseJOkVFRcyZM4eXXnqJ2bNn88UXX0S0vbfffptbbrklom1UlWXLljFnzhzeeOMNbDZbxNqpMdMBT0T9+vUZOnQob7zxBmPHjiUjI6NKzz9ixAjGjBmDpml88sknbNmy5aTPFRcXx8yZM6t87Ou+++5j1KhR6LrOpEmTqFu37kmfq127djz44INVmA4SExPp3r0706ZNQ9M07rzzzkrdyuy6667j2muvrcKE0KlTJ95//322bdtGVlZWpbcWHT9+PG3btq2idMY7cOAA8+bN44033uC///0vzz33HFdffbXRsQy3fPlyVq5cyWOPPYbJFNk+8SlVuAFatWrF2LFjGTFiBPPmzavSK7kej4dRo0bx9ddfk5mZyV133XXS5/J6vXTu3LlKxlHbt29Pv379gND42vjx45kyZQqXX345nTt3Punz/v7773Tq1KnS+QAGDx5cnq9x48Y88MADbNy4kQcffJDY2NiTPu/HH3/ME088USUZp02bBoSGc3r06EGbNm1Yt24djz76aKXO+8QTT7BpU9XcR+STTz6p8LnS0lIuuuiiiI/Ne71ecnNzueGGG1iwYAFms5mXX36Ze+65JyLtCSGYPn06DzzwAFOnTo1IG5UhpWTz5s0sWLCAxx9/nMTExIi3ecoVboCMjAzmzZvHPffcw2OPPUbz5s2r5LzPPvss7du35+OPP+aOO+6gVatWJ30uKSVr1qypklwQGiIBGDlyJD169GDXrl20a9euUhlbtmxJ3759qySfEIJJkyZRVFTE1KlTSU1NJTk5mVatWlWqcD/00EOMHz++yjJ+8skn/PLLLyxfvpxNmzbRuXPnSr2HAHPnzq2SfMAx1y04HI4q/ZmqyJYtW7j//vuZM2cO27ZtY+bMmSxZsiRi7QkhqFevHgcPHoxYG5WxYcMGXnrpJebMmRPxnnaZU7JwQ+iH+LHHHuOdd95h8ODBtGjRotLnfPLJJ3nppZdYunRppYc4hBARWfU5a9Ys5s+fz48//ljpc0UiY0pKCqNHjyY/P59JkyZV+nxVmbGsp9q9e3ecTie33HILjRs3rvR5q+sfc6R+pv5Xeno6HTt2ZPbs2QQCAW6++eaIt1unTh2aNWvGhg0b6NixY0Tb+jt++OEHli5dyqxZs6rt7xlO4cIN0Lx5cwYPHsxzzz3HlClTSEpKqvQ5R40aVQXJIsfhcHDrrbcaHeOYasJ475VXXml0hKiVnp7OpEmT2LhxIy1atKjS4ciKpKWlcdppp7FmzRrOOussw7e6kFKyevVqVq5cydixYyN6IfJoTplZJRVp0aIFU6dOZciQIVE7hU9RaqIzzzyzWop2mQEDBrBmzZqoWCm9detW5syZw3333RexKX/HcsoXbgjNZHjzzTd5+umnIzZVUFGUyIqNjSUYDBq+CO7nn39mxowZvPbaa8TFxRmSoVYUbgiNkY0cOZJ58+axd+9eo+MoinISBg4cyNy5c6t94dbzzz+PpmksWbKEJUuW8Nxzzxk6XFNrCjeEZknccccdjB8/PipXFSqKcmy9e/fmm2++qbbtCL7++mvOPfdcTjvtNLp168a3337LiBEjqnWI6GhqVeGG0N013njjDYYOHcr+/fuNjqMoyt/UsmXLahnn9vl8ZGZmMnbsWFq2bEkwGERKWa2zRypifAIDxMXF8cILL7BixQqjoyiK8je9/fbb3H777RFvx+l0sm/fPjp06MDXX3/Nl19+icPhiIp9U2pl4QZo2LAhAwcONDqGoih/k81mY9CgQbz33nsRbSctLY0zzjiDf/7zn9xwww0MGzaMoqKiqJhHfkrP41YU5dRjsVjo2LEjn332GTfddFNE2+rbty8XXHAB9913Hy+//LIhU/+ORhVuRVFqnKSkJKxWK3l5eaSnp0esnfj4eOLj48v3iDF64U+ZWjtUoihKzXXGGWeQkJBQbbc1q67tBE6U6nErilIjDRgwwPBpeUapVOEWQmQCTkADglLKc4QQdYAPgWZAJnCjlPJQ5WIqiqIcqVmzZkZHMExVDJX0klJ2lFKeE348HlgspWwFLA4/VhRFUapIJMa4+wLvhL9+B6ja25MoiqLUcpUt3BJYJIRYK4S4M3wsQ0qZAxD+fPL3zlIURVH+orIXJy+QUmYLIeoC3wkhTvgmjOFCfyeEptxE8659+/fvp6ioKKoz5ufno+t6VGd0uVzs3r2b/Px8o6NUyO/3R/V7WFJSgsvliuqMBw4ciPp/L0VFRezbt6/a9jw5GcfaSKtShVtKmR3+nCuEWAB0BQ4KIepLKXOEEPWB3Aq+dzYwGyA1NVUuXbq0MlEiqqioiP379xPNGXfu3Indbo/qPcfz8/NZuXIlMTExRkepUGlpaVT/PXu9Xn7O+5nPln5mdJQK2XPsXOK5pNp38Ps7srKyWLt2bVTs7V2RY75/UsqT+gAcQMJhX68ErgCeA8aHj48Hph7vXHXr1pXRbPv27XL27NlGxzimBQsWyJUrVxod45ieeuopWVhYaHSMCum6Lu+55x6jYxxTQUGB7DypsySK/6u3op5cuHCh0W/VMc2aNUtu377d6BjHFK6LR62ZlRnjzgBWCCF+Bf4LfCml/AaYDFwqhNgOXBp+rPyPyy67LKp7JIqiRK+THiqRUu4CzjrK8QLgksqEqg3y8vKMjqAoSg2llrwriqLUMKpwK4qi1DCqcCuKotQwqnAriqLUMKpwK4qi1DC1snB7vV42btzIU089hdvtVtPyjqK0tJShQ4caHUNRaoTdu3fz8MMPV1t7tbJw9+jRg+eff5527drRvHlz9u7da3SkqKNpmnpfFOUE+f1+Dhw4UG3t1brCvXjxYi677DImT55MQkICL774Ip9++mlU71mgnNreeust9fNXw1X331+tK9wZGRlkZWXx8MMPU79+fbKysmjcuLHRsZRaaOHChVxzzTWYTCb69OlTfl9DRTmeWnfrsiZNmpCZmUkwGOSXX37hlVdeYfny5VF1Pznl1Fe2C+FNN91Er169cDgcbN++Hb/fj81mMzqeEuVqVY87OzubyZMnM3v2bObOnYumaWRmZqoet1LtMjMzKSoqwm63M3HiRLp3747b7Y7q3eqU6FFretzFxcW8+uqr9OvXj5YtWwIwfPhwg1MptVVsbCw//fQTzzzzDNu2bePuu++ma9eutGvXzuhoSg1QKwq3pmkMHz6cJ554gtatWxsdR6nlPB4PDz/8MJMnhzbOfPjhh3nggQdo0qSJwcmUmuKUL9x5eXk8+OCDTJs2jYyMDKPjKLVcdnY2Y8aM4e2338ZutwPw7rvvRvXNJaKBz+cjPz+fhg0bGh3lqKr7GtkpPcadk5PDq6++yr/+9S9VtBUgNG1r/vz51d5uSUkJs2fP5pVXXuHFF18sL9qAKtonYM+ePdW6wOXvUtMBq4jX62Xy5Mn06dOH9u3bGx1HiQIfffQRQ4YM4cCBAwwePLjablEmpWTChAnExcUxZMiQqO01KjXHKTlU4vF4GDx4MNOmTaNRo0ZGx6mRhBDExMTg8/mqrUdYWloa0Z7LqlWr6NmzJ9deey1+v5+tW7dy4YUXYjabI9amz+fj7rvvZty4cbRt2zZi7ZzKpJR4vV7i4uKMjlIhk8mE2WwmEAhgtVoj3t4pV7hzcnKYOnUq06ZNi+qeTceOHfn111/p1KmT0VGOKiEhgfvuu49nnnmGJ554olraHDFiBFlZWRE7/86dO1myZAlLlixh6tSpPPfcc2RlZUXsomBhYSHTp09n5MiRtGnTJiJtnOp27dpFZmYmw4cP57XXXiMnJ4f69esbHesvWrVqRbdu3fi///s//vnPf0a8vVOqcOfn5zN79mxuvfXWqO9pz5gxgz59+rBkyRKjoxyVEAIhRLWO3c2bNy+i53/ooYfIz8/nqaeeYuTIkVx55ZURK9ozZsygqKiInj17cvbZZ0ekjdrg9ddfZ//+/ZSUlPDiiy/Sp08fbr/9dqNj/UXZxcnq+vdyyoxxa5rGuHHj6N+/f9T2YhVjDR06lH79+nH//fdz5513csUVV1R5G1JKJk2aRIMGDbjqqqvo1atXlbdRW/z3v//Fbrczbdo0GjRowMyZM9m8eTP79u0zOprhToked1FREXfffTcvvviimj2iVKh169blv9ImJSVV+fn9fj9vvPEGbdq04dprr43o2Hlt0LFjRxYuXMjy5ctZtGgRH374Ia1ataJBgwZGRzNcjS/cWVlZzJo1i8cff1wVbeW4hBARKdqLFi0iMzOTmJgY+vXrV+Xnr41sNhvt27fn/fff5+DBgyxbtoy77rpL/Q+RGj5U4nQ6eemll7juuuvUikjFMB9//DEbN24kMTGRYcOGGR3nlHLTTTfxwQcfkJSUxIcffsjFF19sdKSoUGN73MFgkNtvv51nnnmGFi1aGB1HqaUWL17Mvn37uPvuu49YVKNUHSEEAwcONDpGVKmRhTs3N5dHH32U6dOnU69ePaPjKLVYz5496dmzp/r1XalWNa5wHzhwgNmzZ3PPPfeooq0YThVsxQg1rnBbrVb69OnDmWeeaXSUU9qoUaP47bffyMvLY+vWrbzxxhvEx8cbHUtRoo6u6/zzn/9ky5YtACxZsoS3334bkylylxBrXOFOTU0lNTXV6BinNJfLxdq1axk1ahSrVq1i27ZtFBYWqsKtKEfhdDrZsWMHN954I0II5s+fj9PpjMjspTI1rnArkffyyy8zbtw4mjdvTiAQ4JZbbuHJJ59kzpw5RkdTlKjz5JNPMmnSJPx+P0IIzj77bJ588kn+/e9/R6xNVbiVv3jwwQdp27Yt999/P82bN2fAgAGsWbPG6FiKEpWmTp1K8+bNmT59OlJKxowZw+7duyPa5nEHYYQQbwohcoUQvx92rI4Q4jshxPbw55TwcSGEmCGE2CGE2CiEUJs01FBvvvkmAD/++CNz5sxRwySKUgGTycS8efPIzMxk7969zJs3L6Lj23BiPe63gZeBuYcdGw8sllJOFkKMDz9+ELgSaBX+OBeYGf6s1DDdunWjS5cuBINBYmNjjY6j1DDVfUcYIwkh6NGjB926dQNCKz4j7bj/W5BSLgMK/+dwX+Cd8NfvANcednyuDPkFSBZCRN8ejMoJsVgsqmgrJ6W67wgTDWw2W7UUbTj5Je8ZUsocgPDnuuHjDYHDt+7aHz6mKIqiVJGqHog52u9HR/1frxDiTiHEGiHEGo/HU8UxottDDz3ERRddxNq1a+nUqRMrVqwwOpKiKDXIyRbug2VDIOHPueHj+4HGh72uEZB9tBNIKWdLKc+RUp4Tzbckqmq5ubk4nU7mzZvH2WefzciRI9mxYwfBYNDoaIqi1BAnOx3wc2AIMDn8+bPDjt8jhPiA0EXJ4rIhlWPRNI2FCxeeZJTIy8/PZ+fOnVWS8ZdffiElJYVNmzZhs9lIS0tj9uzZ2Gy2Sm1S9Pvvv7Nnzx4OHjxY6YyRcuDAAb755puovndgSUlJVP8sut1uHDkOTlt4mtFRKpSQmcDvrt+jepx7165dWCwWfv/99+O/2CCaplX43HELtxDifaAnkCaE2A9MJFSwPxJC3AbsBW4Iv/wr4CpgB+AGhp5IQL9fMGJE9O6lbbfrDBlir5L9vvv27Vv+ddm9HCdMmFDp8+7Zs4dZs5IoKore97FlyxiuvTYdh8NhdJQKWSyWqN7XvbS0lC4xXZicMdnoKBXacmgLTpMzqt9Hu93OM3WewZ3hNjpKhfzCX+Fzxy3cUspBFTx1yVFeK4GRJ5ys/PtMHDjQ7e9+W7VJStpB/foF5dN9otHBgwcpKsqI6vexUaPFdO7cGZvNhtPpJKVOMgcPZZPgSKIkkMuiQ3PZ5d6EKWAhRsQjdDM5zmzOS7mCy5oPxO/20Si9CSUlJTgcDg4dOoTdbicQCKBpGg6HAyklcXFx5Uv0y5Yelz32+XwkJSXh8/mQUhIbG4vJZCq/v+Z7771XJX/Puq6zaNEi9uzZw4ABA3A6nbzwwgs888wzlfqNo7CwkNWrV0f1z6Ku6+Tn50d1xo0bN1LQoYDilsVGR6lQvKnitRM1+kYKSs0jpU5BIJstrl9YVfgVM7Lv5MPsKXx28DU0P7S2nYtdT6e01EdD2+mcl3oFiTEp3L/kZiavu5/Nub/h1/0EAgFMJhO6rgOhRRCapqFpGj6fDyEEmqYhhCh/bdljv9+Ppmnouh6xawtut5vPPvuMXr160a9fPywWC02aNGHp0qURae/vWrZsWVQPZSjHppa8K9VKIlmfu4rp6yeR4cigSVJTioMBft39B5nZ+2jXsjHWgI1tu3aQ37qI5kltEewnRiYSJxJ5f8ObnF6nA5e3vIZYWxxCCMxmM7qulxeiQCCA1WpF0zQsFguaphETE4MQAovFUl6spZQEAoGIzL2Nj4/nuuuu47bbbmPv3r08/PDD+P1+xo4dW+Vt/R0//fQT//nPf0hISOCrr77isssuU3eVqYFUj1upViZh5py0i6kf6MymrYVs3JTH+o05lGTbiHHXw7XPTtY2P5vW57Fq/Xo27VrNsnVL8biCrNz5M7nOAmatfIVCXz5OpxMI/Wru8XiwWCyYTAK7PQ6v14PVasXn8xEbG4vL5SrvbTscjvIiHsm71nTt2pXJkyeTlpbGoEGDmDhxYsTaOhFSStauXUtCQgL33nsvGRkZrFy5UvW8ayBVuJVqpes6DpOdGdfMoHlqcxbv+I5PNsznxx0/sjFrI1+tXkrdpMbcdulweqXdQANPV9xuF96SAvILD7E9ZyfBgJm+M69Bs4V6zjabjZSUFHxeD+u/fpJ5Dzfhs8kdWPvFYyQmJuJ0OklNTUXXdeLi4sjPz0fTNPx+P4WF/7souOokJydzwQUXkJyczLnnnmv4fVG3bNlSfpu1Sy+9lGHDhuH3+1m3bp2huZS/TxVupVqZTCZiYmLwlnp4rd8srmrzDyxmM6eln8Z5Lc/jzGbt2ZO3h01Zv1PgLCSnIAdHQVNcW5PokNgWT3E+6F60YsHtM25HCIHX66WwsADnwU3s3LSCQyVeGrbvQ3KDjjhLSoiPjycvLw8hBC6Xi7S0NCwWCxaLheTkZKPfkmrTtm1bGjduzCOPPEJWVhaPP/44NpuNzp07Gx1N+ZvUGLdSraSU+P1+UlJSCAQCzOz3Ko/GTeDTtZ9SVFqEw+zALuLwCT+5BVsoPlRMgjWRvt36UuosJY46FOTlYkrJxn8wgKYFsVqtLFkwjdzMnziUs49OF4/lwj5jCQZDz3k8HlJSUtA0DbvdTnFxMWazGSklpaWlEd3wPtr07t2bhIQEPvjgA77//nveffddoyMpJ0H1uJVqZzKZMJlMSClJiavDk5c/yYBzBlEacLErbze/Z/3B6t2r2XtoH6c1akGTBk3YlbMLp9dJgkila5OLyF/jI6btAd5aOIeA38vqpfPx+iz0vetNulx6Z/n5y6b5ld0bsuxxmdq0ix1Au3btGDp0KIsXL6Zhw4acddZZRkdSToLqcSvVzmQyUVpaisPhwOVykRiTyOR/PMOTV07kuleu51DJIXbs20XdhDQKSwuItybgdXshIMnLKyDe6uDSzn3Yv38by+UCfhnxFima5Ipet9C0bTesVitut5uYmJjyi5OlpaXYbDb8fj92u718OqDVajX67TBE06ZNjY6gVIIq3Eq1klLi8/lITU2lsLCQ5ORkXC4XNqsNf6mfL0Z+QWZhJv9Z+x9cXhemoAmHzU5JUQlIgcftJcZsY0DvAZxz1jks27iI11c+Ro9/DOCs865G0zRKS0upU6cOJSUlJCUlUVRURFpaGk6nk7i4OAoKCrDb7UgpcblcUb3CL1JiY2M555xzWLlyJeeff77RcZS/SRVupVoJIYiJiaGwsJC4uDiKi4uxWq0Eg0Hi4+ORUtKybktGXToKKSU2i5kDK77nwH8/xR4TS2qvK0nudgnWmBgOHTpE4EAQT5Hggt79sNlsSClJTk4mPzOT1W+8TOH+vaS0aEvnIXeQXDe9fLxb1/XyWSa1kd1up3v37vzwww+qcNdAqnAr1aqsx52UlERxcTGJiYm43W4sFkv5XGz8Xkw+L1seG4X0e2l03c2c89Cz6MKE1Wxi9+wpFPy6lqCmsyO/iJi8XHy/r2bNT8vI3biOgKbRdsAwOl0/EL/Pi+b18f6dgyktKaXPY0+Q2LwFGY2bYDKZcLlcxMTEGP22KMrfogq3Uu3MZjOBQKB8FWPZhUSz2YzmLCZ79nO49u6g7b+exJqQSKDoEN5d20GAT0LD62+h6a0jCbqcNPxxMeds20zBT8toduHFdLjpdoJBP65Dh/A7i9Ek6Ej6PPI4QU1n+f/NZeOKFdw1521OO7tz+UXL2igjIwNN08jLyyM9Pd3oOMrfoAq3Uq2EEEfsI1K2daWUEoJB9sx8Fu1gNqfdPBx/3gGCeQcQSMomfwgJ/r278UqJDiSe3pbkjp3R/EE8RQWU7NmJJiWaBE1KdCnRdNClJKhLzr66DwFd5//G/YuBz06l1bm195aonTp1Yv78+fzxxx/06NHD6DjK36AKt1KtpJQEg0FSUlKOuDhpsVjYt+BdPDs20/yW4RDwInQQIvxxxDlCBRwkmtuFX8pQsQ4XaE2X6JLy4h3UJJrUCYZf0/6iXvi8fmaNuIuxH35M27PPNujdUJSTowq3Uq1MJhOxsbHk5OSQmppKfn4+DocDn9tF4fefc/rNI9HcxUgTIASmcA/dFK7cUspQ71wSquBlRVqX6LokKHU0XaJpEAwX7oCuE5QQ1HU0XaDpOm3Pv4Dc/fvx5OdH/M+s9gJRqpoq3Eq1Kutxx8XFEQgEyi8MFqz4HpsjHm9+FmaTwGQOrQ0TZjAfVrh1GepVS12ApqNLHSlB6uGetl5WoCUBPTQ8EtQlQUmogOuhYZRAUCe1UVNeHX0vr2/6AxHBse5oXuTTtWtX1qxZQ7du3SJ6h/Lvv/+eHTt20LBhQ6655pqItVNbqMIdhSZOnMiECRNCMyxOQWWFrOyzlBLnupXYm7VE87gQJoE0mULrek0CYRKYw5Vb6hIhJVIHqcnwtD7Cn0PFW9NDRQuL1LoAACAASURBVPrPwq0T0P8s3AEt1Atv0KoFW1avMuptiAp9+vShV69e3HnnnREt3LGxsSQkJLBr1y5uvvnmI54bOnQovXv3jljbp6JTszLUUF9++SXjxo1j2rRpdOnShWuuuYYnn3zS6FhVqmz/bKfTid1ux+VyYbfbMZtNSM2P5nFhMgl0kwlpIlTAzaHiDYS73ICuo5cVbglBLVSUg1qoxx0M97gDuiQQ1AhKiV+XBDRBQNPCRZzyGzEokdW9e3e6d++Ox+OhT58+Rzz32muvMW7cuCOOzZ8/n7S0tPLHJpOJhISEaslaE6jCHWF5eXn89ttvJ/TaVatW0bt3b2w2Gx9++CFz5szh4MGDp9TKPl3X8fl8JCcn43a7SUxMxO/34/f5kQUHiQnvYyLMApNJIMwCYTIR6n5LgoCm66HirMlwgQ59HZDh3rQWKtj+YKg4l5QUY7Y78GtlxTv8fHgRTm3XqVMn1q5dS8+ePSPeVlxcHM2bNz/i2LPPPsuzzz57xLGBAweSf9j1hzp16jBixIgjXtOkSRNatmx5Qu1mZmaSkJBAamrqSSaPLqpwR1hBQcEJ367qjz/+wOVysWzZMm6//Xbsdjt5eXmnVOE2mUzYbDYKCgpIT0/n0KFDJCQkEJuYRM6P32AzmSA5GcLFG1NoSknQ70PExKFTNvwBPpcTd34efk3HF9Tx6xKfpuMLSjSTBUtaBgEExdn7sddriF/XCWjg0zSCOuTlHMDv9Rr9lhju+eefp3Pnzqxfv96Q9o92DeDDDz884nFBQQHTp08/4ljZZmWHGzZsGI0aNSp/7PP5mDJlCrquI4QgKSmJMWPGVGF6Y6jCHWFt2rQ54eGOjz76iIkTJ/LCCy8waNAgzjzzTNq3bx/hhNVL13X8fj/p6aHl58nJyfj9fupffyt5Py2maOtvaA2b4Eiri24S6CZBUEBw306sjVsgAc/BbAIlxXh9PrylpXiDGn5N4glKfEENr6bjR6Dv24sfM3GNm1Cck4NwOAho4NV0igsL2bXpDzpefS1E8cVDJSQ1NfUv/452797N9u3bjzj2yCOPkJOTU/44GAxSUFDAJ598QklJCYMHD2b06NHVkjmSVOGOItdeey2XXnopd999N5988gnx8RXf5bkm03W9/D6RZdusxjRogm6xEXC5Yfd20DRs8fEEpIYZ8JcUIzb+NzRXW9MIaDp+Tcev/Tk8EpR6eO42BDQNb1EhvqBOQX4+noCGH0Fi42YcOnSI3KwDeP1Brh4xIqpnfVSXtLQ0CgoKatRQQvPmzf8y7HLeeeeVL+oCuOqqq9i5cyf9+/fn1VdfZdasWTzwwAMnPMQSrVThjiI2mw2bzcb7779vdJSIEUJgs9lwOp3ExMTg8XjKi7gWE4dfl8iAhrmkmKAWQMveF54OKBCAhixfZOPXdYKawK8fPnatl495B8MzTIJaAE2DQFDDU1pKYc5BdAkIE3HxDqPfEsOZTCZmzpzJ2LFjmTt3rtFxKiUxMfGIx4sWLeK8885j3rx5FBYWMmLECDZv3sxrr71mUMKqoW6koFSrsjvgJCcn4/F4SEhIQNd1LBYLzW6+HV94nNpVWIi71IlP0/FqOh5Nx63peIM6nmDosV8DX7jXfUTPW9dDKyZ1WT67JBiefVJSeCh0R3iTiS79rkfE1s7dAWuLmJgY7r77bhYsWMCqVasYP3680ZGqhOpxK9WqbFvX/Px84uPjKSoqwmazEQgEaHDBpazXQZc6ugygO90Q1EPXJ0WojyGlHl6EA8HwYht/+GKlXy+bLSLxa6HnA2UFXEpEbCxejy/0Gi1Ix549aXLaaQa/I0ok2Ww2Ro4cycGDB4mLi/tLj7ymUj1upVpJKQkEAqSlpeF2u0lKSiq/E43T5Sahy0WhXnZQo9RZijsQ6mG7A3r4axnqcQd1PEENT3hGiTeo4Qtq+DQNf1Di1zT8mn7YXG4dV6kbv89PQno6lw+/C3NsXETv8l6TxMTEkJiYeMQUvFNJRkbGKVO0QRVupZqVLcBxu91YrVa8Xm/5LoFxCQm0vuk2vEEZLtAa3vBsEW9QwxvUDivaoSEUb1CWD6/4NIkvPFzi1wR+HfyaPGK+d0BKMlq1oqTwEN2u6VNrb6Twvxo3bkzXrl1ZsGCB0VGUE6AKt1LtpJTl27qWLYCRUmKxWEhpeTqNLusTLtThXnUwNLb95/i2xBMIPe8Lv84XnmUSCBfv0HCJFiriusSvh1ZXtruoJ5qwcH6//lgsllp7z0mlZlOFW6lWZUXbbrcTCASIi4srv4mCx+PB5IgntX1H/JhCvW4tNDTiDmq4y4t4MHSxsvxxqDfu1UJzuH26xBsMLbbx6xq+cG9bFyZSGjbE6Syhw0UXoWkaLpfL6LckathsNoLBIMFg0OgoynGowq1Uq7JtXXNzc3E4HBQUFJTfESc5OZm4uDhaD7iVjPMuDA2N+DXcAQ13UA99BHTcfokvKPEGZXi4JNQL9wbBo0l8wdCUQG94+CSgaUiLlfYXX8rqxUt5dsFCYmJjsVqtNWrecqQNGDCAdevWsW3bNqOjKMehCrdSrcouTsbHx+Pz+XA4HOULcrxeL36/H5MQtO3TH80ai0cLj20HNDyBP3vX7sPHvLU/i7gvPGxy+DTBICYan9mJAIIL+/dDs/7ZsywtLTX6LYkaaiFSzXHcwi2EeFMIkSuE+P2wY48LIbKEEBvCH1cd9txDQogdQoitQojLIxVcqbnMZjOapmG1WgkEAuWrJy0WS/k9IJtcfDn2NmfgDUrcQVne4y6/MBk+Xjb+7QuExrt95Rct/xz3rtuyNfaUOmRu+oMOvXrhiI8v3+fiVN0692SV/V2omz9EtxPpcb8NXHGU4y9KKTuGP74CEEK0AwYCZ4S/51UhRO29G6vyF2X3nPT7/Ufce1JKecSmQbqu84+nnseUknpYwS4bMpG4whclvYE/i7lHA0+4aHs1Dd1iJbFRUyzxCRQXFnL96Hs5vWtXzGZzeQ51cfJIM2bM4K677jpi2bgSfY5buKWUy4ATnezaF/hASumTUu4GdgBdK5FPOcX871CJ3W5H13VMJhMej4dAIACELpQ1aNmKga++SUKTZngCevgjNETiK5vfXT7GrZfPRPEFQ2Pgfinw+gOUFB6iU+9L6T10KLFxcTidTjRNUxcnjyI2NhaPx2N0DOU4KjPGfY8QYmN4KCUlfKwhsO+w1+wPH/sLIcSdQog1Qog1gYD6QaktylZOFhUVERsbS0lJCRDaxc3hcBATE4OUEq/Xi9PppGXX87j6yWfpdP2N+KQon2XiN1tofmHP8imC3qBGbFpd4us1wKtpoeXwvgA2u53rRo3i0mHDEELg9XpJTk7GbDZjsVjU5vxH8eijj/5lu1QlupzsAN9M4ClCt2x9Cvg3MIwjb8Zd5qiDZVLK2cBsgISEDOnznWQSpcax2WzUrVsXs9lMenp6+UWxsmJhsViw2+3lxzpfegXtu3XnmvtD+0xICcIksCcnU3rYykeLLQaEOGKPbVtsLHWbNEEPTzmMi4tDCFG+8EZdkPur6667zugIynGcVOGWUh4s+1oI8TrwRfjhfqDxYS9tBGSfdDrllHT4WPbRenbm/7lxr8lkwpqSQnxKyl9em5JR74TaLDtjWXvVWbDVhT6lqp3U70NCiPqHPbwOKJtx8jkwUAgRI4RoDrQC/lu5iIpSs914443ld7NXlKogjtcbEEK8D/QE0oCDwMTw446EhkEygbuklDnh1z9CaNgkCIyRUn59vBBJSXVk69b/Otk/Q8RZrS7OOCOfpk2bGh2lQgcOHODXX2Pwev/aK40WKSnb6NateVTP5Pjtt9/o0KGD0TEqFAgEyMzMpFWrVkZHqVBhYSF+v5969U7styEjZGZm8kf6HwQcAaOjVGjbC9soLiw+6q+Gxy3c1SEhoa70+7caHaNCiYmZNGjwE1u23Gx0lAo1bfoNr76aTufOnY2OUqFp06YxdOhQkpKSjI5SoUceeYRJkyYZHaNCRUVFzJ07l3vvvdfoKBVas2YNBQUFXH559C7jmDdvHhdddFFUd8ZOP/10cnNzj1q4o2T1gcDvj96eYiBQgKbFRHVGTYvD4XCQcpRx4GhhtVpJSkqK2oxle6ZEaz4IZbRarVGd0W6343a7ozpjTEwM8fHxUZ3xWNdh1JwfRVGUGkYVbkVRlBpGFW5FUaKalJLdu3cbHSOqRMkYt6Ioyl+tXr2aX3/9tXy2z4UXXsjpp59udCzDqR63oihR69NPP2XXrl089thjrFq1iuXLlxsdKSqowq0oSlRasWIFDoeD8ePHM3ToUCZPnsxvv/2mhk1QhVtRlCh1wQUX4HK5ePvtt9m8eTP//ve/6dChA82aNTM62l+43W62bq2+tShqjFtRlKgkhKBXr14sXboUp9OJEIKOHTtG3cZgc+fOZc+ePQSDQWw2G8OHD4/4LfFUj1tRKqEm3C2mJmSsyGWXXcbTTz9N/fr1mTRpEuecc47RkY4gpWTOnDmcd9553HHHHSxdupS8vLyIt6sKt6KcBCkl2dnZfPjhh8yaNYusrKyoK47BYJD9+/fz6KOPsnz5cnJzc42OdMqZPn069957L02bNmXKlCm8+eabjB8/Hl3XI9quKtyKcpK6dOlCVlYWVquVTp064ff7jY50hOzsbLp160b37t1ZuHAhV1111fG/SflbxowZw4wZM/jss8/YtGkTt956K5MnT474jShU4VaUkzBr1iwef/xxLr74Ylq3bs20adOYMWOG0bHKSSmZPXs2s2fPxuVyMWbMGAYNGsT8+fONjnbKGTVqFEVFRbjdbq699loyMjIi3qYq3LXEU089Rc+ePfH5fDXmRrCBQIDvvvuO9957j0AgEFVDEb1792bRokU0atSIzz//nOnTp9O7d2+jYx3h4osv5scff+Scc87h3nvvZcWKFVG9e2RNdcMNNzB48GDatWvH6NGjq2XjKlW4a4nx48fz2Wef0aNHD5544gl+/fVXgsGg0bEqJKWkZcuWfPfdd2RlZdGgQYOoGopo2bIlubm5TJw4EQCfz8fTTz/N/v37DU4WIoSgbdu2LFq0iIULF6LrOpmZmcyaNYuioiKj4ymVpAp3LVG2peovv/xC9+7dee2113j11Vf55ptvjI52VF9++SW33nord911Fw6HgyFDhvDpp58aHaucEIIff/yR22+/nauvvpoNGzYwbdo0XnvtNXbs2GF0PADq16/PunXrqF+/Pq+88grr16/niiuuYObMmZSWlhodT6kEVbhrocsuu4xXXnmFRo0a8ccffzBo0CB27txpdKwjxMfH43Q6sdlspKSk4HQ6mTp1qtGx/uLss8+mZ8+eADRu3JjBgwczc+ZM8vPzjQ12mAEDBtC4cehWsD179uTCCy/koYceiqqhJ+XvUQtwaikhBNdffz0ul4vrr7+ecePGsWPHDq655hruu+8+EhIS/nLT3urUo0cP7rzzTnRdp27duvznP//hnXfeoWPHjkyYMIHevXuTmJgYdYsxWrduzfjx4xk4cCDNmjXj5ZdfJjY21uhYR+jevTupqal06tSJ2267jREjRmCxqFJQGdX9c6h63LWcw+GgWbNmfPzxx6xfv55zzz2X/v37s3DhQtatW2dYLiEEW7dupX///rRt25asrCx69+7N+vXr2bJlCzfffDOLFy9m165dhmWsSHp6Ot999x3Dhw9n6tSpFBYWGh3pL9q0acP69euJj49n3rx5UX29oyao7t9eVOFWgFChFEJw9dVX8/3337N//34+//xzJk6caNgwihCCiy66iH79+pXnE0LwyCOP8Mknn/DTTz8xd+5cHn/8cbxeryEZKyKE4JxzzuHCCy9kxowZUZlPCMHQoUMRQvDKK68YHalCQghuuOEGFixYYHSUCqWlpdGkSRPWr19fLe2pwq0c1ejRo7n33nu58MILmTx5MjfddFP50uloGBuNjY1l4sSJ3HrrrXTv3p3+/fvzzDPPRE2+Mr169eKaa65h5MiRUZetzJAhQ2jSpAlTpkyJyoxCiPLfAqNVWeHesGFDtbSnCrdSoTp16tC7d2+mTZvG448/TqdOnejUqRNLliwhKyvL6HgAnHbaaVxyySW89957nH766XTq1ImlS5eSnZ1tdLRyZ599Nvfffz+dOnVi9uzZUTWtEUKFsU+fPjRv3pxOnTqxYsWKqCveypFU4VaOy+Fw0Lp1azZs2MCGDRv47rvvePrpp3nvvffYu3fvEa9dvHgxgUCgWvMJIUhMTKRfv35s2LCBb7/9lqeeeuqo+YwghKBNmzZs2LABi8XC3LlzI76Xxd9lNpu58cYb2bBhA19++SXffvut0ZGUY1CFW/nbnn32WZ588kmKi4t59dVXGTduHJs2bWLEiBGsW7eOsWPH8s477xiWb/LkyTzxxBMUFxdX+/9Ejue2227D4XDw/PPPGx2lQk8//TR//PGHWh4fxVThVk5Keno6I0aMYMSIEVx//fX07dsXs9nMsGHDuPrqq1myZImhS+vr1q3LiBEjaNGihWEZKjJgwABOP/10unTpwnfffRd1wxIWi4Xhw4eza9cuunTpQk5OjtGRlP+hCrdSKU2bNuWMM87g/PPPp06dOlxzzTV07dqVzp0788UXXxgdLyqZTCb69OnDypUr+f7771m2bFnUFW+73c59993HypUrGTt2LNu3bzc6knIYVbiVSktKSuKmm25izZo1fPjhh7z88susXr2avn37Gh0tagkhsFqtTJkyhVWrVhkd56hMJhNWq5XXX3+dH3/80eg4ymHUcimlSnTs2JFhw4bx7LPPctFFF3HllVcaHanGeOCBB4yOcEwJCQncfvvtRsdQDqMKt1Il6tWrR//+/enZsyd16tSJ+EbyihJtYmNjq23puyrcSpVKS0szOoKiGGLQoEHV1tZxu0VCiMZCiCVCiM1CiE1CiNHh43WEEN8JIbaHP6eEjwshxAwhxA4hxEYhxNmR/kMoiqIYrWwbgepwIr/PBoH7pJRtgfOAkUKIdsB4YLGUshWwOPwY4EqgVfjjTmBmladWFEWpxY5buKWUOVLKdeGvncBmoCHQFyhbZfEOcG34677AXBnyC5AshKhf5ckVRak1bDYb9erVMzpG1PhbV5CEEM2ATsAqIENKmQOh4g7UDb+sIbDvsG/bHz72v+e6UwixRgixJhDw/P3kiqLUGs2aNWPKlClGx4gaJ1y4hRDxwHxgjJSy5FgvPcqxv6wukFLOllKeI6U8x2qNO9EYiqIotd4JFW4hhJVQ0f4/KWXZjf8Olg2BhD/nho/vBxof9u2NgOjZqk1RFKWGO5FZJQJ4A9gspXzhsKc+B4aEvx4CfHbY8VvDs0vOA4rLhlQURVGUyjuRedwXAIOB34QQZbuEPwxMBj4SQtwG7AVuCD/3FXAVsANwA0OrNLGiKEotd9zCLaVcwdHHrQEuOcrrJTDy70eJrk12ji76M0bbZkVHE+0Zoz0fqIxVpSZkPBoRDcGTklJkx463GB2jQmazn6SkUmy2OkZHqVAwWEJysgW73W50lArl5uaSmppq6N3jj2f//mwslgZGxzgGjYApG2tdq9FBKqS7deKD8SQmJhodpUKFhYXEx8djs9mMjlKhd999l0OHDh210xwVhTshIUOWlh40OkaFkpJ28NxzS7jjjjuMjlKhhQsXkpGRwbnnnovP58Nqtf55lxWTzgHfHg4FDyJ1iQUbIPAE3NjNibRIPAOhm7HZrGiahhCCYDCIEAKTyUQwGMRms5V/Ljt/MBjEbDYf8dqy1WPBYBCrNVRcylaTPf3004wcOZKUlBSD3qVjk1Jy44338sknLxkdpUIxMYW0f+wy1j681ugoFar3Uz1m5c+K6t0hX3vtNS655BJatmxpdJQKZWRkcPDgwaMWbrVXySlG0zQKCgqITbDx30NfUDe2KUGTl52lv5Lj34PTW4rTW0yDuBZ4/B7qWhuxPXYzuwt2cM+5j+D3BRBCUFpaihCCmJgYSktLSUtLo7S0lDp16lBcXEydOnUoKSnB4XBQVFSE1WrFZrNhs9mwWCyUlpZGbYFWlKqWm5vLkiVLGDBgQLW0pwr3KWZH0a/MP/QiolhwwLcHq4wlGJQ4SCEtpiHJpFDkduHRA9SJaQS6la93fkqcJYGnfniAge1vo4G9MQkJCUgpCQaDpKam4nK5iImJIT8/n/j4eEpKSoiLi8Pn85GcnIyUEk3TcLvdQGilW0FBAcnJyVgs6sdMObUVFhby7bffqsKtnJx0e1M+WLyeOrF1ODP9TE6r24Zd2Zm8s+J9WrZOIt0Rz/aNOZgbBrmg3UWYg7HEWZIpdOYTY0/gzf/O5B9tr+WMlLOwWKxYrVby8vKoW7cuLpeLOqmpFBYUkJSURHFxMQ6Hg5KSEqzW0GsdDgcmkwmXy0VKSora3lWpFaprc6ky6l9VNdF1nU8//fT4L6ykOOzM/seb6Jrgyz++ZvoPL7Poj0VkpNTDn5eM70A9WtU9j9iihmhFOitW/Mzu/E2s3bmeLdk7WL13HfM3zMer+7BYLAghiI+Px+/343PmsO2Xuexe+wF7f/8PVmtoTDw2Nra8V+33+/F6vZhMJrxeb5VdtV+4cKGh97BUlGOp7muFqnBXE13XeeaZZyLejslkonWdljx68SOYLIKdBTs55DlEfKwDt9+NO+Cicd3GtE3rSKKnJc0S2+HcJhF+HTM+9uZm8+1vi5n0xdNA6AdS13WQGll/fMvSD8aw9qtHWbvo3wj5559N1/XyH16TyYSUskp+mFevXs3gwYPZt28fw4YNM/Tu8Yrx5syZQ3a2WoitCnc1io2Nxev1RrQNq9VKwB+gW6NuzL9pPmnxqZjMZoq8xVhtFnyanz/2byLPmcfWvVtYvuZnmtrb0ydjML8u3kqXNo2xO818/PXHBIIBAJwlReTuWc2yL1+iyB1Dl/5vcOmw/yOghWaV+P3+8hksZRcpdV0nLi6uUr9C6rrOzp07adWqFTfffDODBg1izZo1BAKBKnmvlJojMzOTNm3aoGkad9xxBzfccMPxv+kUpgp3NTGbzcyaNYvRo0dHtJ3i4mLq1q2LkIK29drx070rSHYkk+M8wIGSg2QX57Dv0H5+3vYzy7csJy0lHU1qHMzNp8/ZA3BsbkVSjIW6SXHs3LcNKSU/LnieOZOHEZvciktueYX2Xa8miAO73Y7X66VOnTrY7fby2ShFRUXYbDby8/MrNbxRWFjIsmXLGDp0KP/4xz/o0qULTZo0Yfny5VX4jinRTkrJhg0bGDJkCD169GDu3LnY7XZ27txpdDTDqMJdTcrmN0d6LKzsYqEQAq/XS4a9Hm8OepO7e9yNXw+QWZDJlpwt+HU/rRq2Jq1OGrlFuRwqLSQrLxu3101CYTPiEgVPfDaGT/8zh22bN5Jcrx3X3Dad9l2vwuv1Yrfb8fv9WK1W3G43fr8fgLi4OOx2O5qmkZCQUKmLk2lpaVx88cXcddddXHrppUycOJHdu3dz8cUXV9XbpdQQv/32G2eccQbPP/882dnZtGzZkh07dhgdyzBqVokBpJQRuwrt9XqJi4sjEAiUL8I5Pb01rXuNpWvDLhx0HeSZT54hKz+bXQd3Uic2FRs2CvLz8bkDeEs9jLh2BKPOv4di+37efnEKKbka9z31OinpjXG73cTFxeH1eomJiSlflFM2zl1WwMsKekxMTKX+PBdeeCEJCQm8++67/Pzzz3z++edV8TYpNYgQggEDBtCrVy+EEGzatAmTycSECROMjmYYVbirUZs2bejYsSMfffRRxOZ76rqOxWLB7/cfcZFQSuh2Wjdi42K5ot0VWG1WSp2l2MyCrF3bSE9KxSfBXiedWFssKckplJQcYmvzDfQa9g+ateqIEAJN0zCZTJTm5xGwmAloOqkNGmIymcqLN1D+2sr+hpGRkcHll1/Oeeedx6ZNm3jrrbd47rnnquKtUmqQFi1a8Msvv/D4449z991306pVK6MjGUoV7mpkNpvLi2qkxMbGls+r9vl85e0CxMTE4Pf7SYhNIH/NSmIDHpy5B0nI3kNJ0SGSO3QiseN5lGbuYLfHw74Dufy2/CfOO7s7gay9ZG/fQmxcHCXxKexZvpi9v/9KfHp97Ke1Jj41jYZnnEFGq9PLl8EnJSVV2TzupKQkGjZsiMViYe/evTRp0qRKzqvUDGazmR9++IFu3brRuXNno+MYThXuU4zL5SI1NZXS0lJiY2PRdR2fz4cQAo/HQ6zHye7/m4UjJRV/nJ2k9Hoknt8DKQQC8OzfgywuJEYP4ti9jfN9buTiL8jOykSYLBwK+Imr25DWl1xBi0suR2o6W39axoHff2Xv+rU4PV6ufXgCKWlpFBcXk5qaWmXFu2nTpjRu3JiffvpJFW6lVlOFu5pdf/31TJw4kZ49e9K4cePjf8PflJiYGNqrJDYWt9uNyWTCarUipcRhNbNh1B0kndaKlIsuw2S2gNTwZ+0NbdwrJWazhaSWbdClxNG4BS2vH4im6fjcJVji4tGkTiAQxFNciC5B0yWN2p9FfSkpLijg8+kv8Mbdd3HP2++SnJxc5TsB3nDDDUyYMIEePXrQoEE07+KnKJGjZpVUs7S0NEpKSsqHMapaSUkJaWlpSCmx2+2hed2BAN5DBay6/VrsDRpS/8p+6M5i9OJCpLMY4S1FeErB60K6StAK8wgW5qG7nASLC9CchxB+P/6iQgKHDhF0lhB0uQi6XQTcLvylTnyloeGZvmPuo/RADi//81b27dxZ5asd09PTSU5OZuvWrTV2L2VFqSzV4z7FxMbG4nK5EEIQCASQUmI2m8n5z0fUadyCBpf3IZCfgzk8PdEkwnfJEAIhJbqUIAUCCbqOlKBJSVAHTdfRpUSXhB9LNF0SkBJN6gR1ga5Lzh94E9+99SablvxA89NPr/I/4+TJk+nYsSPr51Q7CwAAIABJREFU16+v8nMr0amoqIj9+/fTq1cvo6NEBdXjPsXY7XaKiooA8Hg8oVkePg/ObRtJbtOeYP4B8LrB60b4XJh8bsx+N2afG5Pfg/C5ET43eFxIrxvpdSHdbqTHheZxE3S7CLpcBFxOAq5S/O5Sgi4X/lIXfpcTn9uJCWjW4SxWffYZxXl5xr4hyikhNzeXbdu2cf755xsdJSqowm2AadOmMWLEiD9vdFCFiouLycjIQEpJfHw8FouFnKXfgs+PrgXQPC6kJ1SYhdeNyevC7HNj8bkweV0I3/+zd+ZxTlbX/3/f7DOZZFb2fVMRFEUQgUIFVERxq7VudbdqrbYI6qB+RbG1isoiboigIra/Qq1KXepSRS2KqIAiiGwCOjIDwyyZ7HmW+/sjyeOMggyQITNw369XXkme58l9TrZPTs4995yUWMeiyEgEMxzBjIYxokHMSFK8tegPYRItHCIeCREPB0mEQ8RCYaKhOtr27EmwuppQTU3GnyPAwoULufjii5tkbIWiuaNCJVmguLiYnTt3NsnY+fn5bN++HZ/PRzgcxm63k+t2EnTZMRMxTB2kzQY2kDYBNoHNbkMIkCYIU4IpkabENAxMKyRiYpigG8nQSMKU6IZEN000EzTTREvdT5ipsImuQRPFodu3b4/X62XTpk306NGjSc6hUDRXlHAfZESjUXw+H4C1ajEWi2HGY0nP2QZ2mx3TBqZdYNpsmDaBDYEpU4JtmhimxDSkJdq6KZMCbSRv60ZSsBOGmRJriWaAZsqUiJsYTVgMKi8vj/PPP5/nn3+eu+66q8nOo1A0R1SoJEvMmDGDCRMmZHxcu91OJBKxutdIKXHYnQQ3rCVevRMjHEaPhNDT8epICC0cIWFliYTQIxGMSAgtEkILh9DCye1aKEQilAyJJMIh4qEQ279eQ7S2hlgoSCwUJBoKJsMlwVCTV/Hr378/UkpWrFjRpOdRZBfTNLnnnnuYOHFitk3ZLekSFgcq00l53Fmif//+3H333RkfN503nS5oZZom7pJW4HRRt/ZLRI9eSLcbabMh7QIpJIlwEOHOBacTQ9fREjrxWITar9eQ0HViuiRuSmK6QcwwiRvg69UXw+XCmZtLLBxBFwLNkMSNZMhk27dbCVRWIpqwo3tRUREFBQVs3ryZfv36Nevu8Ycama7Fs2bNGnr37p3RMTNJz549GTx4MH//+98PyNyLEu6DjHRZ12AwiNfrRdd1OPp4igePZPt/XsCIhino2gMjNxfDJrALibH9e4TDDS4XiWCA+M4dJIxkHDtumOiGJKFLNMNA1yWaYfL9qk+J6+AoaUNc08GbBy4PCSmo3VnN1g0bOPHK31HUrl2TPt9x48Zx0kknMWrUKAoKCpr0XIrGc6jl2Nvtdqs2/YFAhUqyhNvtZuzYsSxatCij4+bm5hIIBKyyrrqebHYQjSfQTUk8Eia4fRuxUB11326mbss3hGtqCX3/LXWbNxLekRTttOesGZJEatJRNyW6KTFkesLSILDtewIVFVSsX09NeTk7tm6h/JtNmCZ0P+pocvLyMvr8dsW4ceOYOnVqk59HoWguKOHOEi6XiwEDBvDpp59mdNxEIoHX60VKicvlwm63J7vRdOiAbneS0E2iwSDhQC3hqkrCVZVEIlGiuklMN4kZBlHdIG6YxI3kpGMilTGimaBJMzkxmV6EIyUGyUnMeCxGNBjGFAJ3np9YPN4kKY8/5vTTT+eVV15p8vMoskMikcDlcmXbjGaFEu6DkPTf1Pp/V7v/9npsJW2JGAaRSIxwIEBUM4hqJlHNJKKbRDSDiG4S1SVxHeK6SVw3SeikskaS2SKaKTH0H7zwhGFiIgjXhYlGo+i6Sb/TT2X4xRcdsOfcq1evQ7qw/sHM6aefzltvvZVtM5oVSrgPMlwuF9FoFCEEuq5jmmayLrewYStohW4kJxkjoUhStA2TqG4Q1c2U1y2JGaZ1SXrdqeuUBx43zWTKn0wKumGCDhgkQyhHDB2OHRu5npyMVQb8OYQQPPjgg0yaNKnJz6U48BiGoSaef4QS7izSp08fnE5nRmtuxGIx/H4/kKxb4nA4knnZhkHXS39P3BDEdJNoLJEU7pRgRzUjGSrRDWJa6tqQxI2kkCf0lHjrJrouSZikFtyk4uCaTiwWw+5xY3M7OfWaa6mrq8t4kand0bp1awYMGMAbb7xxQM6nUGQTJdxZpKSkBLvdzvbt2zM2ps/ns1ZlhkIh4vE4drsdp9NJt0FDMXPziKXCIsl4tiSa8q6juklEk8lwiSGJ6kkxjxsmMTMp4gkD4iYkTIO4kVx0o5smCUMiHU4Gn3cBNTur6NK3L61atcLhODCJS7m5uXTp0oUNGzYkM2kUioOYPQq3EKKTEGKxEGKtEGKNEOJPqe13CyG+F0J8nrqcVu8xtwkhNgoh1gkhRjflE1A0JBQKkZ+fj5QSj8eD0+nEMAxM0ySiaYx8+BkrHztiJGPbUc0kkopzR1OTkz944CYxzSChGz+ESgyThJ5e3m4QN0E3TI4Y8guWL17MDU/OxuVyEQqFDsjkZJpf/epXfPXVV3z11VcH7JyKpuXrr7/msMMOU6GSH9EYj1sHJkgpewMnAH8QQhyZ2jddSnlM6vI6QGrfBUAf4FTgcSGEetV3w8iRI1myZAmRSCQj47lcLmKxmNXzMR3jFkLgcrlwt25D26EjU4Kc8rJ1g6iuW5kl6fBITP9h0U3ykgqbGKaVMhg3JJpp4PbnE40lGHTaabTt0gXDMHA6nU3WFHlXCCE4++yzeeGFFw5YiEbRtMybN4/f/va35OTkZNuUZsUehVtKWS6lXJG6HQTWAh1+5iFnAf+QUsallJuBjcDxmTD2YGTIkCF89tlnxGKxjIzn8XgIBoMIIUgkEpimaS0OyM3NxVFQRPvjhxDXZSqrJOlZR3WZvE5lmUR1k7hhpMSa1OUHsY6bkriRrF1iCgd9Rp5ENJFgyJln4/P7MQwDr9d7QIUbYPTo0bz55psqXKI4qNmrGLcQoitwLLAstekGIcQqIcTTQojC1LYOwHf1HlbGzwu9AtiyZUtGxqmrq6NVq1aYppkUaocDTdPQNI2amhq8ubn0ueAyOo44haiZ9LDDmkE4YRBJpQdGUqGScErAY5pBTNeJawbx9MSlnvS8DbuTw3/xS6p3VtH/pJPp0LcvtbW1OJ1Odu7cmRXP96233mL0aBWhOxgoKSlR3vYuaLRwCyHygH8B46SUdcATQA/gGKAcSC9d25WL9ZP1r0KIa4QQnwkhPtO06F4bfrDw+eefU1ZWxkknncTcuXP3Oz7r9/uprq7GZrMRiUTQNA2n04nT6aSgoIBIJILd6aTzyaehO3OsvO2okZyUjBip+7r8IePEShOURNMxblOCx0PrHj2RDjuRugAdjjgCf34+BQUFaJpGUVFRVmKTubm5nHzyyQf8vIrMM2HCBAYOHJhtM5odjRJuIYSTpGj/TUr5IoCUcruU0pBSmsBT/BAOKQPqd8HtCGz78ZhSytlSygFSygFO56H7i/rqq68yevRovF4vy5cv55133tmv8SKRCH6/35qcdDgc6LqOrutW53cpJcefcx7DbypNetn6DxcrJdAwf5icNGRKuA3iejLerUuBy18ILjflm7fw27vu5oSxpxONRgmHwzgcDoLB4AGdnEzjdDq54447Dvh5FYoDRWOySgQwF1grpZxWb3v96kHnAKtTt/8NXCCEcAshugG9gE8yZ/LBw/Lly0kkEkyaNIkOHTpw7733snXrVtauXbvPY+bk5FBXV4eU0qpVYrPZsNlseL1eYrEYUkrq6ur45ZXXcsr/3Y1udya96VQ+d1Q3SQh7KiUwleNtmCSkjZhuENclcQSRaIyKLd9yyV2T6TVoULISoduNx+NB1/WsxLgVikOBxiTZDgUuAb4UQnye2nY7cKEQ4hiSYZAtwLUAUso1QoiFwFckM1L+IKVUU/y7oH///vznP/9h7ty5vP3228yYMYMuXbpwxBFH7POYdrsdh8OBw+Gwlrynb9ff53A4cLndDL74cnoedwJvP/EodTuT/SElMPiii/nf355HSjBNiSMnl05HHcXapUsxJUgERe3acvHtt1PUqRMOp9MaN31Oh8OhhFuhaAL2KNxSyiXsOm79+s885l7g3v2w65BACMGQIUN46623mDlzJuFwmFGjRu2X2NlsNkpKSna7Pz8/HwCv1wskVxy2bt2aPsOH/+TYU664ep/tcDqd+/xYhaIlYZom06dP5+OPPwagurqam266qUnLPah63Flm5MiRjBgxgiVLljBs2LBsm6NQKPYSKSWzZ89mzJgxCCGYPXs248aNa9JzNhPhlrjd1dk2Yre4XHXEYjGqq5vOxj59+uzX+JFIhFAo1KQ27i+aplFbW9vMi+wbzfqz6HbXYtfsuKvd2TZlt7hCLiKRSLP+LMZiMerq6jJi4xVXXMGLL77I+++/jxCCq6++ml/96lc888wz+zXuz31PRHP4EhUVFcmbb74522bslnA4TGVlJV27ds22KbulvLwct9tNUVFRtk3ZLevWraN79+7NOozyxRdf0K9fv2ybsVs0TWPJkm+oqTk826bsFo+nmmOPjdOuibsf7Q+bN2+mdevWVsiwOfLQQw9RXV2967iplDLrl9atW8vmzIYNG+Ts2bOzbcbP8tJLL8mPPvoo22b8LH/+859ldXV1ts3YLaZpyhtuuCFj43399dfyzTfflFJKuXPnTjl//vz9HrOqqkoed9y9EmSzvbRtu0S+/PLL+/1c0zzyyCMyFotJKaX8xz/+ISsqKvZ7zFmzZskNGzbs9zhNSUoXd6mZqjqgQtEE1NXVcd9999G7d2+uv/56AGpra3n55ZezbFnL4oknnqCwsJDy8nImT57MgAED+NOf/tQsa9G88sorB6wTUzOJcSsUBxc+n4/zzz+fqVOnsnz5ct577z2WLVvGddddl23TWhRXX301Y8aMwe12s2LFCtauXcvtt99+QBp07C2VlZUHLP21+T17heIgQAhBly5diMVilJWV8dprrzFixIgDVp/8YMHpdDJmzBj+9a9/8fXXX1NQUEDr1q0P+fUB6lOkUDQRRx55JLNmzWLDhg1Mnz7dyqFX7B0TJkxg/fr13HvvvcyaNSvb5jQLlMetUCgULQwl3AqFQtHCaFHCrWnafie1KxQKRUunxQj39OnTOeussxBCcMopp/DRRx9l2ySFQqHICi1icjIcDrNx40buuece2rVrRyAQYPPmzQwaNEg1EVUoFIccLcLjXr58OZ07d6ZLly7cd999tGnThs8++4xgMJht0xRNSDQa5cMPP8y2GQpFs6NFCPfw4cPZuHEjN998MxdddBEXXnghJ554IgUFBdk2rUl46KGHDvlmt48//ji33XYbH3zwATfccAPffvtttk1SKJoNLUK4ASZNmsQNN9zAo48+ysqVK3nttdcoLy/PtlkZxTAMpkyZwuGHH94sV4YdKHRd5/XXX+e8887jd7/7HcFgkPLy8mZeVVChOHC0GHXo1KkTAwYM4LnnnuOYY47h8ccfZ9y4cWzatCnbpmWMl156idzcXE4//fRDWriff/55zj33XLp3786dd97JH/7wB2bMmIGmadk2TaFoFrQodRBCNGiNNWfOHObMmcPy5cuzbNn+EwwGWb9+Pf369TukRRvg8ssvZ+HChSxcuJBRo0YxaNAgOnXqdMi/LgpFmhb9TfD5fNx444289NJLrFu3Ltvm7DNSSkpLSxkxYgTDd9FC7FDk9ttvp7CwkA8//JCFCxcyYMAAbr/99mybpVA0C1pEOuDP0b59e26++Wauv/56pk+fTps2bbJt0l4RjUa58cYbufXWWznssMOybc4+YZomNTU1QPJHaOzYsUSjUSZNmsS55567T2MOGzaMIUOGcPrpp1NcXIyUkvbt2zNp0iTuuOMO3O7m2wFGoWhqWrxwAxQUFDB//nyuvvpqJkyYQN++fbNtUqOZOnUqv/71r+nVq1e2TdkrAoEAS5cuBZJtoNLFf4QQvPvuu+Tk5Oz3Oex2O8XFxda4Q4cOpaamhlmzZtG3b19Gjhx5yFeJUxyaHBTCDckv+YMPPshjjz1GIpGgf//+2TZpj2zevBld1+nVq1ezF6Da2loefPBB677NZsPj8QDg8Xj4z3/+0+TPQQjBGWecwUsvvcTSpUvZunUrV155ZZOeU6Fojhw0wg1QUlLCNddcw5QpU2jVqhUdO3ZstoIYDoe55557mDRpEt26dcu2ORbp1kgAK1euJN0LND8/n5tuusk6Li8vj+OOOy4rNp5zzjmcccYZzJ8/n2effZbLLrus2b7PKoVx/0l/Jpvreww/vM8HysaDSrgB2rVrx3333ccll1zCjBkz6NixY7ZN+gnbt2/n97//PfPnz2/SZqVSSsrKyujUqdNujwmFQlRUVFj33377bWbPng1A//79+fe//w0kP5B5eXlNZuve4nA4uOSSS3jsscd4+eWXOfPMM5tl+YN7772XW265Bb/fn21TWixSSq644greeeedbJuyWzZs2MDSpUt5/PHHD8j5DjrhBsjJyeH5559n4sSJXHbZZRx77LHZNsli/fr1zJ07lyeeeKJJRXv58uWsXr2aTz75hEGDBjFkyBB69uwJwOrVq1mxYgUAO3bsYNWqVdbjRowYwcqVK5vMrkzicDj405/+xJQpU5g3bx69evVi2LBh2TarAfF4HLfb3ay9xZZANBrNyLxJU2GaJoZh4HQ6D8j5DkrhhmTctbS0lNmzZ+N0OpvNhOV7771H3759mzz75Z///CcAkydP5uabb2bu3LlW1kq7du0sL7xfv35WOKSlcuutt/Lcc8/x4YcfYhgGJ554YrZNUiialINWuCEpUNdffz2lpaX89a9/pW3btlmzRUrJ2rVrWb9+Pf/3f//XpOdasmSJleN+2mmnUVZWxrZt2ygtLaVr164UFxe3uLTJn0MIwWWXXUYgEODee++lpKSEPn36KC9XcdDSohfgNIZWrVrx1FNPcdNNN7F+/fqs2fHll1/ywAMPMGXKlCYvjjV06FCCwSALFizgtdde46KLLuKJJ55gzJgxHHnkkQeVaNcnPz+f+++/n2nTprF06VK2bNmSbZMUiibhoBduSKYKzpo1i+eee+6ALo8Ph8O89NJLvPfeeyxYsICnnnrqgEygCSEYNmwY69ev5/HHHyeRSHD00UcfEh6ozWZj9uzZLFq0iGeeeSarP9YKRVNxUIdK6pOfn8/111/Po48+Sn5+vjVR11T85S9/YfXq1QwdOpRbbrmF559//oBNXACcfvrpnHbaabz//vuHXMzX4XAwZcoUNm3axMyZM5k0aZK1kEehOBjYo8cthPAIIT4RQnwhhFgjhJic2t5NCLFMCLFBCLFACOFKbXen7m9M7e/atE+h8bRv357bbruNO+64o0EKXKYJh8OsWrWKSZMm0apVK4LBIKFQ6IDn9AohDjnRrk+PHj2YPHnyQVu3XXHo0phQSRwYKaXsBxwDnCqEOAGYAkyXUvYCaoCrUsdfBdRIKXsC01PHNRt8Ph9/+9vfuPPOOxukwWWSN954g+HDh1NUVMSmTZt49913WbhwoSpLmgUKCgqaZX63QrE/7FG4ZZJQ6q4zdZHASOCF1PZ5wNmp22el7pPaP0o0s+Cqw+Hg3nvv5eWXX+bzzz/P+Pjnnnsub775JjNmzODss8/m8ssv58orr8TlcmX8XAqF4tCjUTFuIYQdWA70BB4DNgG1Usp0f60yoEPqdgfgOwAppS6ECADFwM4M2r3ftG7dmmuuucaq751pHnjgAb799ltmzpzJww8/TJ8+fZrkPAqF4tCjUaolpTSAY4QQBcBLQO9dHZa63pV3/ZPgrhDiGuAaSIYvskFT5nX37t2bI444ghNPPFGVIFUoFBllr9IBpZS1wHvACUCBECIt/B2BbanbZUAngNT+fKB6F2PNllIOkFIOaM5LWfcHIYQSbYVCkXEak1XSKuVpI4TIAU4C1gKLgV+nDrsMWJS6/e/UfVL735WqRJpCoVBkjMaEStoB81JxbhuwUEr5qhDiK+AfQoi/ACuBuanj5wLzhRAbSXraFzSB3QqFQnHIskfhllKuAn5SXk9K+Q1w/C62x4DzMmKdQqFQKH7CIbHkXaFQKJqK5557jieffJIvvviCe++9l6qqqiY/pxJuhUKh2EeklMyZM4cePXpQXFzMe++9R2VlZZOfVwm3QqFo9rRu3TrbJuyShx9+mD/+8Y+MHj2avn378vTTTzNx4kRM02zS8zaLIlOmafLhhx9m24zdUlFRQXl5ebO2ccuWLdTU1DT5B2Z/qK6u5tNPP23Szj/7SyQSyej7HAgE2LBhQ8ZWzYZCITyeatq2bb6fxcLCdWzZEszo6zhp0qSMjldeXs6qVavYvn37fo0zcOBAJk2axB133MG5555LaWkpF198MUuXLt1vG3/uu9wshFtKeUDiQvtKIBAgGo02axvD4TDPPGMjGGy+NnbunGDQoBpisVi2TdktNTU6l1ySydfwJF59FSAzYzocEdqd+ik5t76YkfGaAtdmP+Hwb5r19yUWi/F/tf9HzJGBz+Kf4OrQ1RACLoCP+Tgjb3dcxne7r1kIt91u58wzz9zvcaSUvPzyy7hcLkaPHs2XX37J0qVLue6667DZ9j0qtHHjRgzDyIiNTYVpmuzY0YaKisHZNmW3FBev4pRTTqGwsDDbpuwSKSXz57/N5s3N9312u6vxt32IzWduzrYpu6Xth23ps7PPfn9fTNPk8ccfZ+jQoRx11FG8+eabJBIJzj777P2uLV9eXs624dsI9Azs1zhNSZ599825D6oY99atW3n//fet5rjdu3enqqqKjz/+ONumKRSKvWTp0qXU1tbSvXt3hg4dyqBBg3j//ffZunVrtk3LOgeVcHft2pW+ffsyfvx4ysvLufvuu4nFYgwZMiTbpikUir1k6NChhMNh7r77brZt28b48ePp27cvXbt2zbZpWeegEm6AESNGMGbMGOx2OwMHDuT888/PtkkKhWIfueCCCxg4cCB2u50xY8YwYsSIbJvULDjohLtHjx5ceOGFFBYWcsEFF3D00Udn2ySFQrGP9OvXjwsuuIDCwkIuvPBCevTokW2TmgUHnXArFArFwY4SboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWHHC++uorPvjgg2yboVC0WJRw7ydSSkzT5IEHHmDLli2N7vmYftzuLuvXr+fEE09scHnuueea+Nk0PTU1NcycOZM333yTESNGNOs2ZgpFc6VZtC7LNBUVFbRr1+6AnGvTpk0MHz6cadOm8cc//pFwOMw777wDwDfffIOu67t8XCwW4/LLL0dKucv9vXr14tVks0KLTDWczSYFBQX85S9/oaKigtdff53169fj8XiybZaFYRjZNkGxC9q1a0dFRQVt27bNtinNgoNSuEtLS3n44Yf3q89kY5BSsnjxYiZPnkznzp0ZO3YsDz30EPPmzQNgyZIlJBKJXT42JyeH5cuX73fvvJaGEILvvvuOGTNmsG3bNh544AHsdnu2zbIIh7NtgeLH2Gw2ZsyYQWlpqfXdOtQ5KIX7QGKaJg6HA9M0LW8t7WXPnDmTnJycbJrX7Kirq2PJkiWMHTuWt956i9mzZ5Obm5tts4DkD/FvfvPHbJuhUOwRJdz7gRCCUaNGMWzYMB5++GHeeOMNOnbsyFVXXZVt05otubm5FBUVsXXrVhYvXtyswiQKRUtBCfd+0qNHD8rKynjwwQeZMWMGXbp0ybZJzRqHw8FFF12ElLLJQ1kKxcGKEu79RAiB3W5n4sSJ2TalxSCEOORi+wpFJtmjyyOE8AghPhFCfCGEWCOEmJza/qwQYrMQ4vPU5ZjUdiGEmCmE2CiEWCWE6N/UT0KhUCgOJRrjcceBkVLKkBDCCSwRQvwnte8WKeULPzp+DNArdRkEPJG6VigUCkUG2KPHLZOEUnedqcuuk4+TnAU8l3rcx0CBEOLAJFUrFArFIUCjZoeEEHYhxOfADuBtKeWy1K57U+GQ6UIId2pbB+C7eg8vS21TKBQKRQZolHBLKQ0p5TFAR+B4IURf4DbgCGAgUASUpg7f1azTTzx0IcQ1QojPhBCfRaPRfTJeoVAoDkX2Kh9LSlkLvAecKqUsT4VD4sAzwPGpw8qATvUe1hHYtouxZkspB0gpB6hFKgqFQtF4GpNV0koIUZC6nQOcBHydjluLZF7X2cDq1EP+DVyayi45AQhIKcubxHqFQqE4BGlMVkk7YJ4Qwk5S6BdKKV8VQrwrhGhFMjTyOXBd6vjXgdOAjUAEuCLzZisUCsWhyx6FW0q5Cjh2F9tH7uZ4Cfxh/01TKBQKxa5Qa44VCoWihaGEW6FQKFoYSrgVCoWihaGEW6FQKFoYSrgVCkWzx+fzMWiQKnmUplmUddV1nSeffDJj40UiEV588UUKCgoyMl4gEKCsrCyjNmaab775hs6dcykpWZVtU3aL37+F+fPn43a793xwltD1avr2bb7vs90eI39zPn2f7JttU3ZLbnkuS2NLqaioyOi4drs9Y9/B1atX0yPQg0T+rlsLNge+1b/d7b5mIdx2u51Ro0ZlbLxMjgVQVlaGzWbL+LiZxOFwcMIJRRx11FEZGU/XdRyO5MfDMIyM9IWcO3cLf/7zMDTNt99jNRUnn7yCl15qvu9zXV0d//rXDq4YtevlERKJxERKiUhVn5CpihM2Ybe2NSWrVq2itraW4cOHZ2S8+p+/TH0WA4EAU4+fSseOHfd7rKZisG3wbvc1C+EWQtCzZ89sm/GzbNiwoVnbuHr1atq0aZMRG7du3crtt9/OnDlzCIdjCDOLAAAgAElEQVTD/PWvf+XSSy9l4MCB+zWu1+slGOxKPF643zY2DRKbzZXR97m8vJy8vDx8vsz8WFVXV+P1eunWrRtVVVXJjTkadeFa8vML+GLHYj6MvEowVoOpC7y2IsLxMJF4mKu6T8bjzKFdXkcKvcUEAgGcTiehUIiSkhJ27tyJ3+8nEolQUlJCOBzGbrejaZolmOFw2NqXn59PZWUlJSUlAFZHo+3bt2O32zPyOm7bto3S0lJmzpxJXV0dCxYsYMCAAZxyyin71YwjPz+fjh070qlTJ0KhEDk5OYTDYZxOJw6Hg2g0is/ns/bF43GEEDidTiKRCH6/n2AwSE5ODpqm4Xa7SS5hAZfLRSgUIi8vj3A4TG5uLrquY5ombrebYDCIz+cjEong8XgwTdNylDwej/W8fq5DVLMQbkXzYvHixfzyl7/k008/5f333+ekk07i1Vdf3W/hPhR5/PHHGTlyJCNGjMjouFE9xJfR9wjpAcrq1lAVq8BT7UOYDlrbutEh5yi+2vkpDruPvr5jsOXZ+aJ6Ka9uXMDoLucxqstY2ng6IKXE4/EQj8ctEUmLk2malhilRSR9rBCCSCSCy+Wyrl0uV0afI8Cnn37K0UcfTXl5OVOmTOGyyy7jrbfe4uSTT85IF6VQKER+fj6hUIjCwkJ0XUfTNIqKiqipqaGwsNASYSkl8XickpISampqKCoqIhKJkJubSzQaRQiBaZrWmFVVVeTn5xMIBHA4HNhsNqqrqykoKKCqqgq/309dXR1CCNxuN9FoFLfb3ajnpYRb8RMuv/xyDj/8cNq1a0dlZSWzZ89m7dq12TZLUQ+bsDHzk8fQjDgd/R3pXtgdt93Ls+/Ox+9zcViXdlRtDVMVX0O/vrUUuVqjGSbtcnqwpmIV6A5audsw+rAzASzRSd+22WyYponNZkPX9QbnTreeS4u5zWZrslZ0Z511Fr/85S95++232bBhA//73/94/fXXM9avNCcnh1AohMPhoK6uDrvdjs1mIxAIcOONNzJgwACuvfZaIpGI9Zxra2vxeDzU1dXhcDiIxWJWWNFms1k/bvn5+SQSCbxeL6ZpMm/ePN555x2efPJJ8vPz0TTN2ielbLRogxJuxW5YvHgxH3zwAR9//DHjx48nPz8/2yYp6uG25/KXgY9z9oKz2OEy2OioJlfkUiS6kBtzE9mSx87vo3xdsQN37pd4qoqoKdqJ11GEw+YiUBcjlkhwQsfhOKQTr9dLOBxGCJH86++UJGJhnA47CA+mlNjtduLxOF6vF13XcTqdhMNhfD5fk/YQXbhwIV9//TWPPPII06ZNo127zPVlCYfDFBYWUldXR15eHoZhoGkafr+f119/nUWLFmEYBpdeeikFBQXE43H8fr/lcYdCIVwuF7FYDMDyuAsKCqitrSU/P5/vv/+ed955h9LSUuLxOM888wy1tbX4/X5CoWSPmrTY5+TkKI9bse+0b9+e1q1b4/f76dy5c7bNUfyIWCxG91ZdWfibhVz4z/NZvmU5Tt1BsasImQAzYXLfhffz8ZdL6ezvzJtr3qRDp0K2fFuJ25dHeWUVsYTOfW//lbvGTiYcDuP3+4nH4zhljOfvPA5Tj4GQ/OqWleQUtMU0TQoKCgiHwzgcDgKBALm5udTU1JCbm0tubm6TPNc2bdoQCATwer0Z/yw6nU50Xcdut2MYRnJSt14z62g0SmlpKXfeeSdvvfUWxx57rBWP1nUdm82GlNL615EOe0gpcblcrFq1ilNPPZVAIAAkkwjsdrsVVnI6ncAP/3KUx61QHMTk5uZSWVlJB297nvjVLG5ceCM7anbQs7gXdmnHTBj888MFeO1eorEILoeT7Z84OKLLALbt2ERd8Q5KtE78vzcXcErXUzlt0GlUVlbiccHyNx8mENJo3XkAvY45CeHMJR6PY7fbqa6utiYni4qKqKyspLi4uEk97qbE4XCgaRo2mw1N06zn8fTTT1teNEAikeCiiy7ikksu4ZxzzqFr165MmTIFKSWGYVgC7HQ6+d3vfsf27dv5+9//zj/+8Q9LtCGZFTN79mx+97vfYZomDofDmkfYm2wZJdwKRQskEomQl5cHwADPAP7fJX/nrKfO5usd6/A5fOSIHOIiTmV8JxWV5VTvrOb0gWMpcbXHxM7ReQN464v/UOR24LY5CQaDBHZs5JV/z2DH1s9o3aE/w34zlYLWXbEJgd1uxzRNiouLLY+7qqoKn8/X5B53UxKNRikqKqKurg6/34+u6yQSCf7+97+TSDTM8d62bRtTpkzhtddew+v18tlnn2EYRoNjbDYbr732GlJKVq5c+ZPzSSmZPXs2F1xwAQUFBYRCIYQQeDweEomE5fHviUN25WQ8Hmfnzp3ZNkOh2CfS3pmUEpuw0bOoF+9c9w492x5GXayOdRXr+WzrClZ9twpfnp+BfQYS1aJ8u30rwmGj7vsEJ/YYQ16ugzufv4HN2zby7cbVfP3lcoadeRvn3jCf4rbdEST/xqcFJZ0WKITA4XBgmiZ2u/0n3mJL8cDTPzxut5vq6moikQgAmqZZx0ybNq3BGo7Vq1ezbNmyn4g2JGPcK1asaCDabdq0Yd68edZ9h8NBq1at0DSN/Px8vF4vkPwXpUIlP8Prr7/O1q1b+e677zjyyCM5++yzLe9FoWgJ2Gw2YrEYIuUNa5pG2/y2vHHtq7z25Wu8+uXrLF3zERVV24kkwlSZduL2BGbCBB3WrvuKUwaOZnjJr2k9WHDjtAs5vNLOMQNGcdhxY8jNy7dEOp31IIQgkUjgdDoxDAOXy2VNUv5YcNJ//5s76TTAuro6ioqKLI87HfqApIi/9NJLFBYW7lKs98SoUaMa/BDous7OnTspKCggEAhYHrdKB9wDkydPZty4cQwfPpzLL7+cX/ziF0q4FS2KWCxmhSai0Sher5fa2lp8Ph8je47i3IG/5o0Vb1ARrCARS+Dz5BGNRIlHEyAF+gidzm06MfL4kRQVFuGvKOK7j77g5F/9gZLW7amqqsLr9aJpGg6HwxLpdH6yx+OhtrbWWrjj8/maJI+7qUmnAzqdyXBReoKwvkDn5OSwrw3Nr7zySh544AHeeusta5vdbsfv9zdIB4Tkwh3lce+GqVOnUlpaSq9evXj11VdZsGABEyZM4IUXXmgxf+8UitzcXOrq6oDkFz69Gi8dsw2Hw4w+djSB2lpyXS6itVV8O+9RYhvX4mnXgSNu+jMJpxM7sLOinIqV23B7W9Opc0/qqqsp9PlIaBobX3mR5f+cj3B6OOLM39DjxJEUFhdjGAYlJSWEQiGKi4utPOaWRjweJy8vj0gkQk5OjrWK0ePxWMckEgncbreVebI3nHXWWQANJjqllITDYbxer7Xd5XI18Mr3RMt8tfeDcePGMXjwYDRN4/XXX+fKK6/k0UcfzbZZCsVeEQ6HrdV80WiUvLw8K284fb195TJE2Wa2vLYQZ46XoydPB5sTYbdh7Kxg7Z0TMYQNM2Zirv2S1kf3Z8sLz/LdB4uJBOvI69SNw8++kDPumYqpa3z17ts8f8WFuPILGfnH8eS1bU+XXr0IBALk5OS0yH+t9eP3UkorxPPyyy/Ttm1bgsEgW7duZcWKFT9ZiNQYNm7cyHHHHcfGjRut851zzjnWnED91MO9cRwPOeH+3//+x8iRI5FS8tRTT9G7d28KCwuVt61oUbjd7gYx7kQigcfjQdM0PB4POz94k61T76TTBVfT59a/IgSE160l/TGXQtD3zmlIAbGKcgo/XkIikcAubAy44VZwOIlHIySiESJVOzClpMtxA+l83PEEqqv516Q78HfqzGUPzSDH72+xHrfT6SQej2Oz2ayl/EKIBh7yI488wiOPPLJP40+YMIFt27YxdepUIDk3MW7cONxuN6Zp4nK5rB+LvXkNW+arvY8sXbqUjz76iNLSUmw2G5s3b+aYY47JtlkKxV6TzuaAHyrmpReSVL73Bt89eT89LroWb9cexL/fgkBgQyIEqVqBguimrzGlxARaHz8YE0hEIkQrKzBN0KXElBIDMEyJYYIpk+f95SWX8v78+cy5/vdc+/QzDSbzWhLpglrplYzpycmpU6fuc1z7x6RFG5Lv25133smdd95pTYqml8knEolGZ5YcEsItpWT9+vXMnz+fBx54wPpLp0Rb0VJJZ3UIIayVfJFIBFG1ne0vP0/nsy/GXVSCGajChg0hUisCAQGYSDCTtzEliUgIQ0p0MynSppSYMnlbT1+bEgMTzQCXO4dfXPRbFj08nUevvIKb//7/svuC7CPp5esej4eamhqklDz22GM89NBDDUIjhYWF2O32BmmRNTU1uxwzPz8fp9OJaZpWDZj0sVJK5syZg91u56677rIyVQzD2Kt0wEMij/uTTz5h2rRpPPbYYy0yDqdQ/Jh0TDtdeS4QCFCQn0/Flyvxl7TFW1CMGaqFWAQRD2GLR7DHw9jikeQlFkbEwhANQywE0TBmJIyMhDAiIfRICD0cJBEOoYWCJEJBEuEg8WDyOhaqw9Q1Tr7qamrKygju2JHtl2SfCAaDFBQUkEgk8Pl8PPnkk9xzzz0NFt8ceeSRrFixgrKyMjZt2sSOHTv47LPPdlkts3fv3rz77ruUlZXx5ZdfUlZWxieffEK/fv2sYwzD4PHHH+eBBx5g27ZthMNhIOn9NzaN8qAX7nfffZe3336b6dOnqzi24qAhXZDI7XZjGEYyrS1QS+37b2DL8aAFayAWQUYjEEsKtS0ewREPY49HELEIxCPWMUYkjIxGMKNhzGgEMxJBj0TQIyG0SJhE+jocJhEOkQiHiIdDaLEETm8e7/2jZXrcOTk5RCIRHA4H27dvZ9KkSQ329+nTh1mzZlFUVGTFwuvq6mjVqhVTp06lV69e1rFut5ubb76ZXr16EY/H8fl8aJpGmzZtmDt3Lscff3yDsadOnUo4HLY6Qql0QJJ/ST7//HPef/99xo0b1yKW40oprZzONJMnT2bx4sUAXH311Vx22WXZME3RzEiHRiD5hU8kErhtgtg3X1E8aixmNIxhs2G3iaR7ZgO7zY7NBqYEYUowJdKUSNNEGhLTBMM0k/FtU6KZEk2aaEYyhKKbZnKbKdGN1G0Jbbt2QctQPPhAo2kaubm5xGIxrrvuOiu7JE15eTm33norhmFwxBFH8Oijj+LxeIhEIhx77LGccsopbNiwAYBTTjmFESNGkEgkrB+Eu+++m5UrV2KaJlu3bm1wbiEEf/jDH3jxxRdxuVx7lWp40Ar32rVreeSRR5g1a9YBWRhQUVFBYWFho/spmqbJhg0bGnxIli1bxsMPP9zguMmTJ1NaWgrQYieAFJmnfvqaldJmE0jTwIxF0G1gs9kxbQJpE2ATSLuAtDCZIM2ko2AayWvdBN0w0SVouokuk3HthGEmhdww0U2ThCnQDIlmmmiGSSwcyvbLsc+kGxg4HA7mzp3L+++/z0UXXWTtr66u5uOPP6ZHjx7cf//92O12IpEIbrebeDzeIBPE5/PRqlUrK8vH6/UyadIkxowZw4oVK35y7pkzZ3LhhRc2aGDRWA5K4f7ggw9YtGgRc+bMyVjB9d0RCAR44YUX2LRpE+3bt6dLly6cccYZPznulVdeYUe9OKBhGCxbtqzBMQMGDNjlG5wNEokEn3/+Of3798+2KYpdkEgkLIfEMAw8Hg+xQC1GOEJs+zZy/PkYNjs2u0DYQNgFCBsmNkwkupQYZlKQdSPtVUt0aZIwQEt71EZyMjIajRLXNHDnkDBlSrhBMw3ikQhNucBdSsnixYsz1sPyx2OnwxN2u50PPvjgJ8f07t2bBQsWkJeXh8Ph4O2332bHjh0UFBTQr18/LrvsMnRdZ9CgQSxbtowtW7aQk5PD2WefjcfjYdGiRYwdO5Yvvviiwbiffvop5513nuW87U2ZgINOuD/44AM++ugj7rrrriYXbUj213vmmWd4+umnWbFiBVOmTMFut/PPf/6zwXHHHntsg76DbrebOXPmNNu4ezQa5bXXXuOdd97JtiktmnPOOYf58+dzwgknkJOTk7FxPR4PO3bsQAiB1+tN9kH05WFKqPt6DfZeRyByPGCzpTztVCaJpiPcHgxpJoVX1wlv+45YOEzMMEkYkrguiZsGcR2cxW3A5ycWiRJPJBC6QSJ1nGZKErrB1tWr6Tnw+D0bvY9IKZk1a9Yuq+1lgnSnn1AoxKxZszjzzDNZt24d69ats84/depUHnzwQYQQVFVVMX78eIYMGcILL7zAOeecY5Vnvfbaa3nhhReYNm0akKxLcueddzYQ5Q4dOjBq1Cief/55SktLyc3NbXRVwDQHjXBLKVm7di2LFi1i0qRJ+P3+Jj+naZpMnDiRv/3tb8ybN48XX3yRjRs3smzZMiZOnNjg2M6dO2f0i6toGfTv359bbrnFintminSz3vRiEZ/PRzAU5MjSe1kzeRzGl2FKDu+LdLswbAJDgIhHMGtrsLdpj6kbBDeuwdAlsXicuKYRN0ziOkR1g7huEjNMtIptaNiR3nzs+QXISAzd7kAzIGGYbPxyFTZXLkf+YljGntuBJN3Y1+Px4PF4+OSTTygpKeG3v/2tdczXX3/NunXr+OCDDzj//PO56qqrKCoqstL9DMOwmicYhkFeXh5nnHEGTz/9NNOnT2fLli0N5q4KCgqYPn06N954I926dbO6Dh2SC3C++OILHn74YebOnXtAPG1Ixhnvu+8+rr/+eubMmcOIESO47bbbDpi3rzi0MQzD+pwlvUY7wleIppvYwmGqv/qc/J5HYDN07KaB0OJold9DeVkyV9sEzTRJmEkPOqEnvWiDVO62hEQ8QUwziAWCxL/7jphhojvdeNu2Z9uWrQSDEboefxh9myCMcSBIN/aNx+MUFRVRWFjId999RywWs/puQtIx3Lx5M/fffz9r1qzh3//+N8888wxSSnJycqz0wb59+3LzzTczceJEFixY8JPwh81mIxqNUl5eTu/eva1FPk6nk1gs1ug5skYLtxDCDnwGfC+lHCuE6Ab8AygCVgCXSCkTQgg38BxwHFAFnC+l3NLY8+wLixcv5t133+XJJ588oIIphKC4uJijjjqKp556ilgsxkUXXaREW9HkpJdqp8U7XV41BJgeD4l4DDSdcG0NhOsQoSA2m8CGQCIxpIkpk8Ktm6Ri1j/ErvV0/NtMxsNNU2LI5OpJQ9MI1dQSi0Sxuz1I2XLqb/+YvLw8qxt7bW0tLpeLTZs2MWTIEEaPHk1dXZ01gTlr1iyklLzyyisMHjyY0tJSq9u91+tFSsmECROYP39+A9G+4YYbLI88XRxs48aNtG/fHr/fj2EYe/2PbG887j8Ba4F0DGIKMF1K+Q8hxCzgKuCJ1HWNlLKnEOKC1HHn78V5Go2UkmXLlvHhhx8yfvz4rJSVLCkp4f7772fVqlV0795dLfBRHBDi8bhVwS4SiZCbm5sss9r7KAp/cQrb33wZEx1ZVYVDmNh0E2ETiJRwm7KeEEuZjG0bsoGA6/UmL3WZnLA0pETXJPGaAKYEu8fDGbfeYtVIaWmkQ06JRIL8/HyklAwbNoyRI0cSi8Wspeg2m41evXoxfvx4AGbMmMFNN91kpRMmEglrleS0adMs0b7rrrv4/e9/j8fjsVa5ejweYrGYVdURsLrFN1bDGuUaCiE6AqcDc1L3BTASeCF1yDzg7NTts1L3Se0fJZro53j9+vU8++yzjB8/nsLCwqY4RaM5+uijlWgrDhher5dQKNSglnR+fj5xYcffpSe6CXHNJBqJEo0miBgmUd0koievo7pJTE+KdVSTyYlJ0ySRSv/TpCRuSnRDoktBIuVxa6aJzZuXDCW4ctB0ncEnj24R6yR2RW5uboPXMB3yqKurIycnh7q6Oqu7fe/eva3H6bpu9ZKMxWI4nc4GTYDT9OrVi8LCQpxOJzabDb/fTzQaJT8/36rHnfa098bxbKzHPQO4FUinRRQDtVLK9GL+MqBD6nYH4DsAKaUuhAikjs9on7ClS5cyf/58nnjiiRb7N02h2FcikYiVpZS+HQgE8Pl82Lr2wtaqPbGKMjSZwI7AbiNVGTDpq0nZ0OtOL66xskUMA81IinfCTOdzS3QDYjW1mAKOHjUCT1ExlZWVFBQUNMiaaimk67yk86jTYU6Hw2E1AZZSYrfbG0weCiGsvOt0DZP6lzTpbvDpbZqmWXne6RBXOo7+48V3P8cePW4hxFhgh5Ryef3NuzhUNmJf/XGvEUJ8JoT4bG+rcC1evJjFixdb6TkKxaFGOu4ajUatCa/03/ouQ0/E06EzUcMklsoOSXrYJjFdJ6brRHWDqG78sN8S6dREpSGT+dxpMU/leWtmMoRS0rUb36xew9jrb8Dv97fI7jfwQypgWpzr53SnKzBKKbHZbHTr1q1BY4T//ve/AFaIJB3/rqqqApIty/r27WvtS2ed2Gw2DMNo8DjIfB73UOBMIcRpgIdkjHsGUCCEcKS87o7AttTxZUAnoEwI4QDygeofDyqlnA3MBmjTpk2jLV6zZg3//e9/ufnmm60mmwrFoUb6i5/+8qczINKCM+CWe3jlt2cQjYawC5GcmJRYZV1NwExXAUSi68lMkqQ4m+gGJMykmGummco+SQq42+endc/DadWzJ0Xt2lntvloi6SbBfr+fQCCAy+XC6XRanYSqq6vx+XxEIhEKCgoYNmwYixYtIhwOc8MNN9CpUydL2AHKysqsSoDHHXcc7dq1s+qkp2vK1NTUWJ3l063LEolEZtMBpZS3AbcBCCFOBG6WUl4shPgn8GuSmSWXAYtSD/l36v7S1P53ZQY7h/bu3Zu7775bLf9WtBiaonGuYRjWFz39lz4SieByuYhGoxR070Fu527sWPM5NmHDbpV0NZHYkCLlAaYmJ5P1tpMrJ5P1SITlaWumScxIhkwSpoHPX4DN5aJbv374CgqsmtIt0etOVweMxWIUFBRgmiaGYVBUVGS1ZYtGo/h8PqSUVn0YgMrKSiorK3c7dvpfUCAQwG63Y7PZqKmpwev1Ul1dbcXQ02GXdLPgxrA/eWulwHghxEaSMey5qe1zgeLU9vHAxN08fp+w2WxKtBUtiqbwRr1eL8FgkFAohMPhsPKRI5EIxcXFRCIRxjz2DHHNJK4bRDUjFR6RyeuESVRLhk/i6TCKIYkaENMFMd0kYZjEjeR2zTBJ6AaFHTrTa+gwPLleTrngAoLBICUlJS12ctLn81FTU4PL5aKmpsbKq043QN65cyd2u526ujoikQgDBw6kU6dOexy3bdu2jBgxwvpBcLvd2Gw2qx9oSUmJlcmSjhzszWu4V8ItpXxPSjk2dfsbKeXxUsqeUsrzpJTx1PZY6n7P1P5v9uYcCoViz0SjUXJzc8nJybGK8KdXAAYCATweD9Lhot8lVyeF2kgKd0T7IbadzC4xkvFvQ9YT8eSy9rhuErfi3RJ/2w50H3A827Zs4aQrriAQDJGTk0NtbW2DVl8tiUgkYnVc9/v9VkpjQUGBFR4xDAOv14vH42Ho0KHMmzePgoKC3Y7pcrmYM2cOJ554Im63m2AwiKZpSCmtbJWamppk3n0oZP1Y7M1rqFaKKBQtELfbjaZpVpZCNBq1VvDl5eUlGwMUFlEyeDi2Vu2I6pKIbhIxkimBP6QFyh9uGyYxzUh62XoyRTBuGCRMicufT+uevajasZ1IMET3Y47B5/MRj8fxer0t9l+wx+MhHA7jcDgIh8NWOmD6RzAYDGK324nFYlZPyt69e7Ny5UqeffZZ/H4/Pp8Pv9+P3+9n+vTprFu3jsGDB+Pz+ax2ZA6Hw6orky5RoOs6ubm5DepxN5aDZsm7QnEoUX8pdjojon7tjPSkZbfjBzPg0qt5d/qDaJGw9XiZWogjZXKS0iAd7072mtStBTgmnqIS8tq0IxKN4nZ7mPL2W5YN9SdFWyL124ulqd+erP6+dPlcm81G69atGTNmDN9++y26rlsrIwFrviFdX9s0TSt7pP57BMn5ifpZJ41FCbdC0QJJNwhOi0G6WbDNZkPTNOva5XIx7KrrMKTk1b9MRjYQqGSGiSFJ5nSnl7XLH+py61JgMySBmhq6tmvH1Q8+iC1VCS8ej1s5yUKIFtnpvb7oplc3QtITT5fLhYbecHpf/YUz9VP6NE3D6XRamSKaplmPTSQS1r70e1b/h6KxqFCJQtECSedsx2Ixq7h/elteXp61BD0YDGKz2Tj+okv59UMz6XjswGQ8O3XpMOB4PG3aEjPM1EXSa/iJxE2SS+BNiEWi9D/5JK647z5yU81CTNMkLy+PeDxOXl5ei8woASxhTS+GSYtnfdFNL1VPe+DpSn7psEo6N1sIYSVPpJs5m6aJw+Gw9judTnRdb7Av/YO3N/9aWt5PpELRQohGo1RWVhKLxSgrK0PTNEpKSjI2flFREZD8C5+Tk4MQwtpWWFiIEIL27dtb+0deejnDzjsfo54HaHc6MU0D0/jBE3e4XGj1muUCuDweXB6P5R36/X6ryFpLzeGG5A+g2+1u8BrCD+GS9L76pLux72pfmp+LW+9LTPvHKOFWKJqI//3vf0yYMIEdO3YwYcIEiouL+dvf/pax8etXoUwLyJ6u7Y2sp+PZzeK23Y3bUkkvYkrfrr/9x9sas+9AoUIlCkUTEIlEeOedd3j66afp27cvTz31FH369GHJkiXZNk1xECCaYlXX3lJYWCgvueSSbJuxW+LxuLWKqrkSCARwOBwZKwNgGAZbtmyhR48eGRkPkm3etm8vQcrmm4FQUPA9Xbp02POBe8AwDLZu3Ur37t3ZtGkTXbt2pa6uDtM09+tzZBgGVVVVtG7der9tbCrC4TCGYWS0C9X69es57LDDMjZeVVUVeXl5jV6pmA3mz59PTU3NLt36ZiHcQohKIEyGKwhmkBKUbfuCsm3fULbtGwebbV2klK12taNZCDeAEOIzKeWAbNuxK2bvclkAAAUsSURBVJRt+4aybd9Qtu0bh5JtKsatUCgULQwl3AqFQtHCaE7CPTvbBvwMyrZ9Q9m2byjb9o1DxrZmE+NWKBQKReNoTh63QqFQKBpB1oVbCHGqEGKdEGKjECKjTRf20Z4tQogvhRCfCyE+S20rEkK8LYTYkLo+IC3lhRBPCyF2CCFW19u2S1tEkpmp13GVEKJ/luy7Wwjxfer1+zzV8i6977aUfeuEEKOb0K5OQojFQoi1Qog1Qog/pbZn/bX7Gduy/rqlzuURQnwihPgiZd/k1PZuQohlqddugRDCldruTt3fmNrfNQu2PSuE2FzvtTsmtT0b3wm7EGKlEOLV1P2med1+3J34QF4AO7AJ6A64gC+AI7Ns0xag5EfbHgAmpm5PBKYcIFuGA/2B1XuyBTgN+A/JZs0nAMuyZN/dJNvb/fjYI1Pvrxvolnrf7U1kVzugf+q2D1ifOn/WX7ufsS3rr1vqfALIS912AstSr8lC4ILU9v/f3tmEalGFcfz3LLQiI1FCxLsoQ1AIsUgJFBGTyAqvwl0IQS4EwWohLQQR3Lms3IiC9qF9KWmhuDK6iisx/Eiv+NGFgsSLd6XpxtL+Lc4z3uH1fV8VmznzwvOD4T1zZmD+/N85z8x5zsyc7cBaL78PbPfySmBvBm1fAgNt9s/RJj4CvgUO+XolvuW+454HDCvNpvM3af7K/sya2tEP7PLyLmB5HQeVdIz7J1rupKUf2K3EcdJkzlMz6OtEP7BH0m1JvwPDpP+/Cl0jkk55+SZwAZhGA7zroq0TtfnmmiTplq+O80XAYmCf17d6V3i6D3jdrJqPeHTR1ola24SZ9QFvAzt93ajIt9yBexrwZ2n9Ct1P4joQcNjMTprZGq+bImkEUsMDcr5v3ElLk7z80Lumn5fSSln0eRf0ZdLdWaO8a9EGDfHNu/tngFHgJ9Jd/nVJd9pouKfPt98gzUFbizZJhXeb3btPzax4j71u77YA64HiU4uTqci33IG73RUm92Mu8yW9AiwFPjCzhZn1PCxN8XIb8CIwBxgBPvb62vWZ2QRgP7BO0l/ddm1TV7e2xvgm6a6kOUAf6e5+VhcNtepr1WZmLwEbgJnAXGASaSLzWrWZ2TvAqKST5eoux38sbbkD9xWgPGVyH3A1kxYAJF3131HgR9KJe63oYvnvaD6FHbU0wktJ17xx/QvsYKxbX6s+MxtHCozfSPrBqxvhXTttTfGtjKTrwFFSfniimRWfgS5ruKfPtz/Lw6fP/g9tb3r6SUoTln9BHu/mA8vM7A9Syncx6Q68Et9yB+5fgBk+8jqelKQ/mEuMmT1tZs8UZeANYMg1rfLdVgEH8iiELloOAu/5SPprwI0iLVAnLTnEFST/Cn0rfTT9BWAGcKIiDQZ8BlyQ9ElpU3bvOmlrgm+u4zkzm+jlp4AlpDz8EWDAd2v1rvB0ABiUj7jVpO1i6WJspBxy2bta/ldJGyT1SXqeFMcGJb1LVb5VPcr6oIU08nuZlEfbmFnLdNII/q/A+UIPKff0M/Cb/06qSc93pG7zP6Qr9OpOWkhdr63u4zng1Uz6vvLjn/WTc2pp/42u7xKwtEJdC0jdzrPAGV/eaoJ3XbRl982PNRs47TqGgE2ltnGCNDj6PfCE1z/p68O+fXoGbYPu3RDwNWNPntTeJvy4ixh7qqQS3+LNySAIgh4jd6okCIIgeEQicAdBEPQYEbiDIAh6jAjcQRAEPUYE7iAIgh4jAncQBEGPEYE7CIKgx4jAHQRB0GP8B3ecKdiRIWmrAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kukagua Sera\n", + "\n", + "Kwa kuwa Q-Table inaorodhesha \"mvuto\" wa kila kitendo katika kila hali, ni rahisi kuitumia kufafanua urambazaji bora katika ulimwengu wetu. Katika hali rahisi zaidi, tunaweza tu kuchagua kitendo kinacholingana na thamani ya juu zaidi ya Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Ikiwa unajaribu msimbo hapo juu mara kadhaa, unaweza kugundua kwamba wakati mwingine unakwama tu, na unahitaji kubonyeza kitufe cha STOP kwenye daftari ili kuukatiza. \n", + "\n", + "> **Kazi ya 1:** Badilisha kazi ya `walk` ili kuweka kikomo cha urefu wa njia kwa idadi fulani ya hatua (sema, 100), na angalia msimbo hapo juu ukirudisha thamani hii mara kwa mara.\n", + "\n", + "> **Kazi ya 2:** Badilisha kazi ya `walk` ili isirudi kwenye maeneo ambayo tayari imekuwa hapo awali. Hii itazuia `walk` kuzunguka, hata hivyo, wakala bado anaweza kujikuta \"amekwama\" mahali ambapo hawezi kutoroka.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Zoezi\n", + "## Ulimwengu halisi wa Peter na Mbwa Mwitu\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/sw/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..954db2d76 --- /dev/null +++ b/translations/sw/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,425 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:14:53+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter na Mbwa Mwitu: Mazingira Halisi\n", + "\n", + "Katika hali yetu, Peter aliweza kuzunguka karibu bila kuchoka au kuhisi njaa. Katika dunia halisi zaidi, anapaswa kukaa chini na kupumzika mara kwa mara, na pia kujilisha. Hebu tufanye dunia yetu iwe halisi zaidi kwa kutekeleza sheria zifuatazo:\n", + "\n", + "1. Kwa kusafiri kutoka sehemu moja hadi nyingine, Peter hupoteza **nguvu** na kupata **uchovu**.\n", + "2. Peter anaweza kupata nguvu zaidi kwa kula matufaha.\n", + "3. Peter anaweza kuondoa uchovu kwa kupumzika chini ya mti au kwenye nyasi (yaani, kutembea hadi eneo lenye mti au nyasi - uwanja wa kijani).\n", + "4. Peter anahitaji kumtafuta na kumuua mbwa mwitu.\n", + "5. Ili kumuua mbwa mwitu, Peter anahitaji kuwa na viwango fulani vya nguvu na uchovu, vinginevyo atashindwa katika mapambano.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Kufafanua hali\n", + "\n", + "Katika sheria zetu mpya za mchezo, tunahitaji kufuatilia nishati na uchovu katika kila hali ya ubao. Kwa hivyo tutaunda kitu `state` ambacho kitabeba taarifa zote zinazohitajika kuhusu hali ya sasa ya tatizo, ikijumuisha hali ya ubao, viwango vya sasa vya nishati na uchovu, na kama tunaweza kumshinda mbwa mwitu tukiwa katika hali ya mwisho:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Kazi ya Malipo\n", + "\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Algorithimu ya Q-Learning\n", + "\n", + "Algorithimu halisi ya kujifunza haibadiliki sana, tunatumia tu `state` badala ya nafasi ya bodi pekee.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Matokeo\n", + "\n", + "Tuweke kuona kama tulifanikiwa kumfundisha Peter kupambana na mbwa mwitu!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Sasa tunaona visa vichache sana vya kuzama, lakini Peter bado hawezi kila wakati kumuua mbwa mwitu. Jaribu kujaribu na uone kama unaweza kuboresha matokeo haya kwa kucheza na hyperparameters.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/sw/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..6e06f6f5e --- /dev/null +++ b/translations/sw/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:11:00+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter na Mbwa Mwitu: Utangulizi wa Kujifunza kwa Kuimarisha\n", + "\n", + "Katika mafunzo haya, tutajifunza jinsi ya kutumia kujifunza kwa kuimarisha kutatua tatizo la kutafuta njia. Mazingira haya yamechochewa na hadithi ya muziki [Peter na Mbwa Mwitu](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) iliyoandikwa na mtunzi wa Kirusi [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Ni hadithi kuhusu mvumbuzi kijana Peter, ambaye kwa ujasiri anatoka nyumbani kwake kwenda kwenye uwazi wa msitu kumfuatilia mbwa mwitu. Tutafundisha algoriti za kujifunza kwa mashine ambazo zitamsaidia Peter kuchunguza eneo linalomzunguka na kujenga ramani bora ya urambazaji.\n", + "\n", + "Kwanza, hebu tuagize maktaba kadhaa muhimu:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Muhtasari wa Kujifunza kwa Kuimarisha\n", + "\n", + "**Kujifunza kwa Kuimarisha** (RL) ni mbinu ya kujifunza inayotuwezesha kujifunza tabia bora ya **wakala** katika **mazingira** fulani kwa kufanya majaribio mengi. Wakala katika mazingira haya anapaswa kuwa na **lengo**, linalofafanuliwa na **kazi ya zawadi**.\n", + "\n", + "## Mazingira\n", + "\n", + "Kwa urahisi, hebu tuchukulie ulimwengu wa Peter kuwa ubao wa mraba wa ukubwa `width` x `height`. Kila seli katika ubao huu inaweza kuwa:\n", + "* **ardhi**, ambapo Peter na viumbe wengine wanaweza kutembea\n", + "* **maji**, ambapo ni wazi huwezi kutembea\n", + "* **mti** au **nyasi** - mahali ambapo unaweza kupumzika kidogo\n", + "* **tufaha**, ambayo inawakilisha kitu ambacho Peter angefurahia kukipata ili kujilisha\n", + "* **mbwa mwitu**, ambaye ni hatari na anapaswa kuepukwa\n", + "\n", + "Ili kufanya kazi na mazingira haya, tutafafanua darasa linaloitwa `Board`. Ili kuepuka kujaa sana katika daftari hili, tumetenganisha msimbo wote wa kufanya kazi na ubao katika moduli tofauti inayoitwa `rlboard`, ambayo sasa tutaiingiza. Unaweza kuangalia ndani ya moduli hii ili kupata maelezo zaidi kuhusu mambo ya ndani ya utekelezaji.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Hebu sasa tuunde ubao wa nasibu na tuone jinsi unavyoonekana:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Hatua na Sera\n", + "\n", + "Katika mfano wetu, lengo la Peter litakuwa kupata tofaa, huku akiepuka mbwa mwitu na vikwazo vingine. Ili kufanya hivyo, anaweza kimsingi kutembea huku na huku hadi apate tofaa. Kwa hivyo, katika nafasi yoyote anaweza kuchagua mojawapo ya hatua zifuatazo: juu, chini, kushoto, na kulia. Tutafafanua hatua hizo kama kamusi, na kuzihusisha na jozi za mabadiliko ya kuratibu yanayolingana. Kwa mfano, kusonga kulia (`R`) kungefanana na jozi `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Mkakati wa wakala wetu (Peter) unafafanuliwa na kile kinachoitwa **sera**. Hebu tuzingatie sera rahisi zaidi inayoitwa **kutembea bila mpangilio**.\n", + "\n", + "## Kutembea bila mpangilio\n", + "\n", + "Hebu kwanza tutatue tatizo letu kwa kutekeleza mkakati wa kutembea bila mpangilio.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Wacha tufanye jaribio la matembezi ya bahati nasibu mara kadhaa na tuone wastani wa idadi ya hatua zilizochukuliwa:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Kazi ya Zawadi\n", + "\n", + "Ili kufanya sera yetu iwe na akili zaidi, tunahitaji kuelewa ni hatua zipi ni \"bora\" kuliko nyingine.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Kujifunza kwa Q\n", + "\n", + "Jenga Jedwali la Q, au safu yenye vipimo vingi. Kwa kuwa ubao wetu una vipimo `width` x `height`, tunaweza kuwakilisha Jedwali la Q kwa safu ya numpy yenye umbo `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Pitisha Jedwali-Q kwenye kazi ya mchoro ili kuonyesha jedwali kwenye ubao:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kiini cha Q-Learning: Mlinganyo wa Bellman na Algorithimu ya Kujifunza\n", + "\n", + "Andika pseudo-code kwa algorithimu yetu ya kujifunza:\n", + "\n", + "* Anzisha Q-Table Q na namba sawa kwa hali zote na vitendo vyote\n", + "* Weka kiwango cha kujifunza $\\alpha\\leftarrow 1$\n", + "* Rudia simulizi mara nyingi\n", + " 1. Anza katika nafasi ya bahati nasibu\n", + " 1. Rudia\n", + " 1. Chagua kitendo $a$ katika hali $s$\n", + " 2. Tekeleza kitendo kwa kuhamia hali mpya $s'$\n", + " 3. Ikiwa tunakutana na hali ya mwisho wa mchezo, au jumla ya zawadi ni ndogo sana - toka kwenye simulizi \n", + " 4. Hesabu zawadi $r$ katika hali mpya\n", + " 5. Sasisha Q-Function kulingana na mlinganyo wa Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Sasisha jumla ya zawadi na punguza $\\alpha$.\n", + "\n", + "## Kutumia vs. Kuchunguza\n", + "\n", + "Njia bora ni kusawazisha kati ya kuchunguza na kutumia. Tunapojifunza zaidi kuhusu mazingira yetu, tutakuwa na uwezekano mkubwa wa kufuata njia bora, lakini kuchagua njia ambayo haijachunguzwa mara moja kwa wakati.\n", + "\n", + "## Utekelezaji wa Python\n", + "\n", + "Sasa tuko tayari kutekeleza algorithimu ya kujifunza. Kabla ya hilo, tunahitaji pia kazi fulani ambayo itabadilisha namba za bahati nasibu katika Q-Table kuwa vector ya uwezekano kwa vitendo vinavyolingana:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Tunaongeza kiasi kidogo cha `eps` kwenye vector ya asili ili kuepuka kugawanya kwa 0 katika hali ya awali, ambapo vipengele vyote vya vector ni sawa.\n", + "\n", + "Algoriti halisi ya kujifunza tutakayoendesha kwa majaribio 5000, pia inaitwa **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Baada ya kutekeleza algoriti hii, Jedwali la Q linapaswa kusasishwa na maadili yanayofafanua mvuto wa vitendo tofauti katika kila hatua. Onyesha jedwali hapa:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kukagua Sera\n", + "\n", + "Kwa kuwa Q-Table inaorodhesha \"mvuto\" wa kila kitendo katika kila hali, ni rahisi kuitumia kufafanua urambazaji bora katika ulimwengu wetu. Katika hali rahisi zaidi, tunaweza tu kuchagua kitendo kinacholingana na thamani ya juu zaidi katika Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Ikiwa unajaribu msimbo hapo juu mara kadhaa, unaweza kugundua kwamba wakati mwingine unakwama tu, na unahitaji kubonyeza kitufe cha STOP kwenye daftari ili kuusimamisha. \n", + "\n", + "> **Kazi ya 1:** Badilisha kazi ya `walk` ili kuweka kikomo cha urefu wa njia kwa idadi fulani ya hatua (sema, 100), na angalia msimbo hapo juu ukirudisha thamani hii mara kwa mara.\n", + "\n", + "> **Kazi ya 2:** Badilisha kazi ya `walk` ili isirudi kwenye maeneo ambayo tayari imekuwa hapo awali. Hii itazuia `walk` kurudia, hata hivyo, wakala bado anaweza kujikuta \"amekwama\" katika eneo ambalo hawezi kutoroka.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Kile tunachokiona hapa ni kwamba mwanzoni urefu wa njia ya wastani uliongezeka. Hii huenda inatokana na ukweli kwamba tunapokuwa hatujui chochote kuhusu mazingira - tuna uwezekano mkubwa wa kujikuta katika hali mbaya, maji au mbwa mwitu. Tunapojifunza zaidi na kuanza kutumia maarifa haya, tunaweza kuchunguza mazingira kwa muda mrefu zaidi, lakini bado hatujui vizuri mahali ambapo tufaha zipo.\n", + "\n", + "Mara tu tunapojifunza vya kutosha, inakuwa rahisi kwa wakala kufanikisha lengo, na urefu wa njia huanza kupungua. Hata hivyo, bado tunakuwa wazi kwa uchunguzi, kwa hivyo mara nyingi tunatoka nje ya njia bora, na kuchunguza chaguo mpya, jambo ambalo hufanya njia kuwa ndefu zaidi kuliko inavyopaswa.\n", + "\n", + "Kile tunachokiona pia kwenye grafu hii, ni kwamba wakati fulani urefu uliongezeka ghafla. Hii inaonyesha asili ya mchakato wa nasibu, na kwamba tunaweza wakati fulani \"kuharibu\" viwango vya Q-Table, kwa kuandika upya thamani mpya. Hili linapaswa kupunguzwa kwa kupunguza kiwango cha kujifunza (yaani, kuelekea mwisho wa mafunzo tunarekebisha thamani za Q-Table kwa thamani ndogo).\n", + "\n", + "Kwa ujumla, ni muhimu kukumbuka kwamba mafanikio na ubora wa mchakato wa kujifunza unategemea sana vigezo, kama vile kiwango cha kujifunza, kupungua kwa kiwango cha kujifunza, na kipengele cha punguzo. Hivi mara nyingi huitwa **vigezo vya juu**, ili kuvitofautisha na **vigezo** ambavyo tunaboresha wakati wa mafunzo (mfano, viwango vya Q-Table). Mchakato wa kutafuta thamani bora za vigezo vya juu huitwa **ubunifu wa vigezo vya juu**, na unastahili mada tofauti kabisa.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Zoezi\n", + "#### Ulimwengu Halisi Zaidi wa Peter na Mbwa Mwitu\n", + "\n", + "Katika hali yetu, Peter aliweza kusafiri karibu bila kuchoka au kuhisi njaa. Katika ulimwengu halisi zaidi, anapaswa kukaa chini na kupumzika mara kwa mara, na pia kujilisha. Hebu tufanye ulimwengu wetu uwe halisi zaidi kwa kutekeleza sheria zifuatazo:\n", + "\n", + "1. Kwa kusafiri kutoka sehemu moja hadi nyingine, Peter hupoteza **nguvu** na kupata **uchovu**.\n", + "2. Peter anaweza kupata nguvu zaidi kwa kula matufaha.\n", + "3. Peter anaweza kuondoa uchovu kwa kupumzika chini ya mti au kwenye nyasi (yaani, kutembea hadi eneo la ubao lenye mti au nyasi - uwanja wa kijani).\n", + "4. Peter anahitaji kumtafuta na kumuua mbwa mwitu.\n", + "5. Ili kumuua mbwa mwitu, Peter anahitaji kuwa na viwango fulani vya nguvu na uchovu, vinginevyo atashindwa katika vita.\n", + "\n", + "Badilisha kazi ya malipo hapo juu kulingana na sheria za mchezo, endesha algoriti ya kujifunza kwa kuimarisha ili kujifunza mkakati bora wa kushinda mchezo, na linganisha matokeo ya kutembea bila mpangilio na algoriti yako kwa kuzingatia idadi ya michezo iliyoshinda na kupotezwa.\n", + "\n", + "> **Note**: Unaweza kuhitaji kurekebisha hyperparameters ili ifanye kazi, hasa idadi ya epochs. Kwa sababu mafanikio ya mchezo (kupigana na mbwa mwitu) ni tukio nadra, unaweza kutarajia muda mrefu zaidi wa mafunzo.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/8-Reinforcement/2-Gym/notebook.ipynb b/translations/sw/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..bb990d2fc --- /dev/null +++ b/translations/sw/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,392 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:17:39+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Kuteleza kwa CartPole\n", + "\n", + "> **Tatizo**: Ikiwa Peter anataka kutoroka kutoka kwa mbwa mwitu, anahitaji kuwa na uwezo wa kusonga haraka kuliko yeye. Tutaona jinsi Peter anaweza kujifunza kuteleza, hasa, kudumisha usawa, kwa kutumia Q-Learning.\n", + "\n", + "Kwanza, wacha tusakinishe gym na kuingiza maktaba zinazohitajika:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Unda mazingira ya cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Ili kuona jinsi mazingira yanavyofanya kazi, wacha tuendeshe simulizi fupi kwa hatua 100.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Wakati wa uigaji, tunahitaji kupata uchunguzi ili kuamua jinsi ya kutenda. Kwa kweli, kazi ya `step` inaturudishia uchunguzi wa sasa, kazi ya zawadi, na bendera ya `done` inayonyesha ikiwa ina maana kuendelea na uigaji au la:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Tunaweza kupata thamani ya chini na ya juu ya nambari hizo:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Hebu pia tuchunguze mbinu nyingine ya kugawanya kwa kutumia mabano:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Hebu sasa tuendeshe uigaji mfupi na tuangalie zile thamani za mazingira zisizoendelea.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Kutoka kwa grafu hii, haiwezekani kusema chochote, kwa sababu kutokana na asili ya mchakato wa mafunzo wa stochastic urefu wa vipindi vya mafunzo hutofautiana sana. Ili kufanya grafu hii iwe na maana zaidi, tunaweza kuhesabu **wastani wa kuendelea** juu ya mfululizo wa majaribio, tuseme 100. Hii inaweza kufanywa kwa urahisi kwa kutumia `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Kubadilisha Vigezo na Kuona Matokeo kwa Vitendo\n", + "\n", + "Sasa itakuwa ya kuvutia kuona jinsi modeli iliyofunzwa inavyofanya kazi. Hebu tuendeshe simulizi, na tutafuata mkakati sawa wa kuchagua hatua kama wakati wa mafunzo: kuchagua kwa kuzingatia usambazaji wa uwezekano katika Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Kuhifadhi matokeo kwenye GIF inayotembea\n", + "\n", + "Ikiwa unataka kuwavutia marafiki zako, unaweza kutaka kuwatumia picha ya GIF inayotembea ya fimbo ya kusawazisha. Ili kufanya hivyo, tunaweza kutumia `env.render` kuzalisha fremu ya picha, kisha kuhifadhi hizo kwenye GIF inayotembea kwa kutumia maktaba ya PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya kutafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutokuelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/sw/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..a3a62fc7c --- /dev/null +++ b/translations/sw/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,524 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:20:31+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "sw" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Kuteleza kwa CartPole\n", + "\n", + "> **Tatizo**: Ikiwa Peter anataka kutoroka kutoka kwa mbwa mwitu, anahitaji kuwa na uwezo wa kusonga haraka kuliko yeye. Tutaona jinsi Peter anaweza kujifunza kuteleza, hasa, kudumisha usawa, kwa kutumia Q-Learning.\n", + "\n", + "Kwanza, wacha tusakinishe gym na kuingiza maktaba zinazohitajika:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Unda mazingira ya cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Ili kuona jinsi mazingira yanavyofanya kazi, hebu tuendeshe simulizi fupi kwa hatua 100.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Wakati wa uigaji, tunahitaji kupata uchunguzi ili kuamua jinsi ya kutenda. Kwa kweli, kazi ya `step` inaturudishia uchunguzi wa sasa, kazi ya zawadi, na bendera ya `done` inayonyesha ikiwa ina maana kuendelea na uigaji au la:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Tunaweza kupata thamani ya chini na ya juu ya nambari hizo:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Hebu pia tuchunguze mbinu nyingine ya kugawanya kwa kutumia mabano:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Sasa tuendeshe uigaji mfupi na tuangalie zile thamani za mazingira zisizoendelea.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Kutoka kwa grafu hii, haiwezekani kusema chochote, kwa sababu kutokana na asili ya mchakato wa mafunzo wa stochastic urefu wa vipindi vya mafunzo hutofautiana sana. Ili kufanya grafu hii iwe na maana zaidi, tunaweza kuhesabu **wastani unaoendelea** juu ya mfululizo wa majaribio, tuseme 100. Hii inaweza kufanywa kwa urahisi kwa kutumia `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Kubadilisha Hyperparameta na Kuona Matokeo kwa Vitendo\n", + "\n", + "Sasa itakuwa ya kuvutia kuona jinsi modeli iliyofunzwa inavyofanya kazi. Hebu tuendeshe simulizi, na tutafuata mkakati sawa wa kuchagua hatua kama wakati wa mafunzo: kuchagua kulingana na usambazaji wa uwezekano katika Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Kuhifadhi matokeo kwenye GIF ya michoro\n", + "\n", + "Ikiwa unataka kuwavutia marafiki zako, unaweza kutaka kuwatumia picha ya michoro ya fimbo ya kusawazisha. Ili kufanya hivyo, tunaweza kutumia `env.render` kuzalisha fremu ya picha, kisha kuhifadhi hizo kwenye GIF ya michoro kwa kutumia maktaba ya PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kuhakikisha usahihi, tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuchukuliwa kama chanzo cha mamlaka. Kwa taarifa muhimu, tafsiri ya kitaalamu ya binadamu inapendekezwa. Hatutawajibika kwa kutoelewana au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/sw/PyTorch_Fundamentals.ipynb b/translations/sw/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..12aec6bb6 --- /dev/null +++ b/translations/sw/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2828 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:11+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "sw" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Kanusho**: \nHati hii imetafsiriwa kwa kutumia huduma ya tafsiri ya AI [Co-op Translator](https://github.com/Azure/co-op-translator). Ingawa tunajitahidi kwa usahihi, tafadhali fahamu kuwa tafsiri za kiotomatiki zinaweza kuwa na makosa au kutokuwa sahihi. Hati ya asili katika lugha yake ya awali inapaswa kuzingatiwa kama chanzo cha mamlaka. Kwa taarifa muhimu, inashauriwa kutumia huduma ya tafsiri ya kitaalamu ya binadamu. Hatutawajibika kwa maelewano mabaya au tafsiri zisizo sahihi zinazotokana na matumizi ya tafsiri hii.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/2-Regression/1-Tools/notebook.ipynb b/translations/th/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/th/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/th/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..9cf70b748 --- /dev/null +++ b/translations/th/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,448 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:44:22+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## บทนำสู่การวิเคราะห์การถดถอย - บทเรียนที่ 1\n", + "\n", + "#### ทำความเข้าใจในมุมมอง\n", + "\n", + "✅ มีวิธีการวิเคราะห์การถดถอยหลายประเภท และการเลือกใช้วิธีใดขึ้นอยู่กับคำตอบที่คุณต้องการ หากคุณต้องการทำนายความสูงที่เป็นไปได้ของบุคคลในช่วงอายุหนึ่ง คุณจะใช้ `linear regression` เพราะคุณกำลังมองหาค่า **ตัวเลข** หากคุณสนใจที่จะค้นหาว่าประเภทของอาหารควรถือว่าเป็นมังสวิรัติหรือไม่ คุณกำลังมองหาการ **จัดหมวดหมู่** ดังนั้นคุณจะใช้ `logistic regression` คุณจะได้เรียนรู้เพิ่มเติมเกี่ยวกับ logistic regression ในภายหลัง ลองคิดถึงคำถามบางข้อที่คุณสามารถถามจากข้อมูล และวิธีการใดที่เหมาะสมที่สุด\n", + "\n", + "ในส่วนนี้ คุณจะได้ทำงานกับ [ชุดข้อมูลขนาดเล็กเกี่ยวกับโรคเบาหวาน](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html) ลองจินตนาการว่าคุณต้องการทดสอบการรักษาสำหรับผู้ป่วยโรคเบาหวาน โมเดล Machine Learning อาจช่วยคุณระบุว่าผู้ป่วยคนใดจะตอบสนองต่อการรักษาได้ดีกว่า โดยอิงจากการผสมผสานของตัวแปรต่าง ๆ แม้แต่โมเดลการถดถอยที่พื้นฐานที่สุด เมื่อถูกนำเสนอในรูปแบบภาพ อาจแสดงข้อมูลเกี่ยวกับตัวแปรที่ช่วยให้คุณจัดการทดลองทางคลินิกในเชิงทฤษฎีได้อย่างมีประสิทธิภาพ\n", + "\n", + "เมื่อกล่าวเช่นนั้น มาเริ่มต้นงานนี้กันเลย!\n", + "\n", + "

\n", + " \n", + "

ผลงานศิลปะโดย @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. การเตรียมเครื่องมือของเรา\n", + "\n", + "สำหรับงานนี้ เราจะต้องใช้แพ็กเกจดังต่อไปนี้:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจใน R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้การวิเคราะห์ข้อมูลรวดเร็วขึ้น ง่ายขึ้น และสนุกมากขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบการทำงานที่เป็น [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างโมเดลและการเรียนรู้ของเครื่อง\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้ด้วยคำสั่ง:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "สคริปต์ด้านล่างจะตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และจะติดตั้งให้ในกรณีที่บางแพ็กเกจขาดหายไป\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้ มาโหลดแพ็กเกจที่ยอดเยี่ยมเหล่านี้และทำให้พร้อมใช้งานในเซสชัน R ปัจจุบันของเรา (นี่เป็นเพียงตัวอย่าง `pacman::p_load()` ได้ทำสิ่งนี้ให้คุณแล้ว)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. ชุดข้อมูลโรคเบาหวาน\n", + "\n", + "ในแบบฝึกหัดนี้ เราจะนำทักษะการวิเคราะห์การถดถอยมาใช้โดยการทำนายบนชุดข้อมูลโรคเบาหวาน ชุดข้อมูล [โรคเบาหวาน](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) ประกอบด้วย `442 ตัวอย่าง` ของข้อมูลเกี่ยวกับโรคเบาหวาน โดยมีตัวแปรคุณลักษณะเชิงพยากรณ์ 10 ตัว ได้แก่ `อายุ`, `เพศ`, `ดัชนีมวลกาย`, `ความดันโลหิตเฉลี่ย`, และ `การวัดระดับเซรั่มในเลือดหกตัว` รวมถึงตัวแปรผลลัพธ์ `y`: การวัดเชิงปริมาณของการพัฒนาของโรคในหนึ่งปีหลังจากจุดเริ่มต้น\n", + "\n", + "|จำนวนการสังเกตการณ์|442|\n", + "|----------------------|:---|\n", + "|จำนวนตัวแปรพยากรณ์|10 คอลัมน์แรกเป็นตัวแปรเชิงพยากรณ์เชิงตัวเลข|\n", + "|ผลลัพธ์/เป้าหมาย|คอลัมน์ที่ 11 เป็นการวัดเชิงปริมาณของการพัฒนาของโรคในหนึ่งปีหลังจากจุดเริ่มต้น|\n", + "|ข้อมูลตัวแปรพยากรณ์|- อายุเป็นปี\n", + "||- เพศ\n", + "||- bmi ดัชนีมวลกาย\n", + "||- bp ความดันโลหิตเฉลี่ย\n", + "||- s1 tc, คอเลสเตอรอลรวมในเซรั่ม\n", + "||- s2 ldl, ไลโปโปรตีนความหนาแน่นต่ำ\n", + "||- s3 hdl, ไลโปโปรตีนความหนาแน่นสูง\n", + "||- s4 tch, คอเลสเตอรอลรวม / HDL\n", + "||- s5 ltg, อาจเป็นค่าลอการิทึมของระดับไตรกลีเซอไรด์ในเซรั่ม\n", + "||- s6 glu, ระดับน้ำตาลในเลือด|\n", + "\n", + "> 🎓 จำไว้ว่า นี่คือการเรียนรู้แบบมีผู้สอน และเราต้องมีเป้าหมายที่ชื่อว่า 'y'\n", + "\n", + "ก่อนที่คุณจะสามารถจัดการข้อมูลด้วย R ได้ คุณจำเป็นต้องนำเข้าข้อมูลเข้าสู่หน่วยความจำของ R หรือสร้างการเชื่อมต่อกับข้อมูลที่ R สามารถใช้เพื่อเข้าถึงข้อมูลจากระยะไกล\n", + "\n", + "> แพ็กเกจ [readr](https://readr.tidyverse.org/) ซึ่งเป็นส่วนหนึ่งของ Tidyverse ให้วิธีการที่รวดเร็วและเป็นมิตรในการอ่านข้อมูลแบบสี่เหลี่ยมเข้าสู่ R\n", + "\n", + "ตอนนี้ เรามาโหลดชุดข้อมูลโรคเบาหวานจาก URL แหล่งข้อมูลนี้: \n", + "\n", + "นอกจากนี้ เราจะทำการตรวจสอบความสมเหตุสมผลของข้อมูลโดยใช้ `glimpse()` และแสดง 5 แถวแรกโดยใช้ `slice()`\n", + "\n", + "ก่อนที่จะไปต่อ เรามาแนะนำสิ่งที่คุณจะพบเจอบ่อยในโค้ด R 🥁🥁: ตัวดำเนินการ pipe `%>%`\n", + "\n", + "ตัวดำเนินการ pipe (`%>%`) ทำการดำเนินการตามลำดับตรรกะโดยส่งวัตถุไปข้างหน้าเข้าสู่ฟังก์ชันหรือคำสั่ง คุณสามารถคิดว่าตัวดำเนินการ pipe เป็นการพูดว่า \"และจากนั้น\" ในโค้ดของคุณ\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` แสดงให้เห็นว่าข้อมูลนี้มี 442 แถว และ 11 คอลัมน์ โดยทุกคอลัมน์เป็นชนิดข้อมูล `double`\n", + "\n", + "
\n", + "\n", + "> `glimpse()` และ `slice()` เป็นฟังก์ชันใน [`dplyr`](https://dplyr.tidyverse.org/) Dplyr ซึ่งเป็นส่วนหนึ่งของ Tidyverse เป็นไวยากรณ์สำหรับการจัดการข้อมูลที่ให้ชุดคำกริยาที่สอดคล้องกันเพื่อช่วยแก้ปัญหาท้าทายที่พบบ่อยที่สุดในการจัดการข้อมูล\n", + "\n", + "
\n", + "\n", + "ตอนนี้เมื่อเรามีข้อมูลแล้ว เรามาโฟกัสไปที่คุณลักษณะหนึ่ง (`bmi`) เพื่อใช้ในแบบฝึกหัดนี้ ซึ่งจะต้องเลือกคอลัมน์ที่ต้องการ แล้วเราจะทำสิ่งนี้ได้อย่างไร?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) ช่วยให้เราสามารถ *เลือก* (และเปลี่ยนชื่อได้ถ้าต้องการ) คอลัมน์ใน data frame\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. ข้อมูลการฝึกและการทดสอบ\n", + "\n", + "ในกระบวนการเรียนรู้แบบมีผู้สอน มักจะมีการ *แบ่ง* ข้อมูลออกเป็นสองชุดย่อย; ชุดที่ใหญ่กว่า (โดยทั่วไป) สำหรับการฝึกโมเดล และชุดที่เล็กกว่า \"สำรองไว้\" เพื่อดูว่าโมเดลทำงานได้ดีเพียงใด\n", + "\n", + "ตอนนี้เรามีข้อมูลพร้อมแล้ว เราสามารถดูได้ว่ามีวิธีที่เครื่องสามารถช่วยกำหนดการแบ่งที่เหมาะสมระหว่างตัวเลขในชุดข้อมูลนี้หรือไม่ เราสามารถใช้แพ็กเกจ [rsample](https://tidymodels.github.io/rsample/) ซึ่งเป็นส่วนหนึ่งของเฟรมเวิร์ก Tidymodels เพื่อสร้างออบเจ็กต์ที่มีข้อมูลเกี่ยวกับ *วิธี* การแบ่งข้อมูล และใช้ฟังก์ชัน rsample อีกสองตัวเพื่อดึงชุดข้อมูลการฝึกและการทดสอบที่สร้างขึ้น:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. ฝึกโมเดลการถดถอยเชิงเส้นด้วย Tidymodels\n", + "\n", + "ตอนนี้เราพร้อมที่จะฝึกโมเดลของเราแล้ว!\n", + "\n", + "ใน Tidymodels คุณสามารถกำหนดโมเดลโดยใช้ `parsnip()` โดยระบุแนวคิดสามอย่างดังนี้:\n", + "\n", + "- **ประเภทของโมเดล** ใช้เพื่อแยกแยะโมเดล เช่น การถดถอยเชิงเส้น การถดถอยโลจิสติก โมเดลต้นไม้ตัดสินใจ และอื่นๆ\n", + "\n", + "- **โหมดของโมเดล** รวมถึงตัวเลือกทั่วไป เช่น การถดถอยและการจำแนกประเภท; โมเดลบางประเภทสามารถรองรับทั้งสองโหมดนี้ ในขณะที่บางประเภทมีเพียงโหมดเดียว\n", + "\n", + "- **เครื่องมือของโมเดล** คือเครื่องมือคำนวณที่จะใช้ในการปรับโมเดล โดยมักจะเป็นแพ็กเกจใน R เช่น **`\"lm\"`** หรือ **`\"ranger\"`**\n", + "\n", + "ข้อมูลเกี่ยวกับการสร้างโมเดลนี้จะถูกบันทึกไว้ในสเปคโมเดล ดังนั้นเรามาสร้างกันเลย!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "หลังจากที่โมเดลถูก *กำหนด* แล้ว โมเดลสามารถถูก `ประเมิน` หรือ `ฝึกฝน` ได้โดยใช้ฟังก์ชัน [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) ซึ่งมักจะใช้สูตรและข้อมูลบางส่วน\n", + "\n", + "`y ~ .` หมายความว่าเราจะปรับ `y` ให้เป็นค่าที่คาดการณ์/เป้าหมาย โดยอธิบายด้วยตัวแปรทำนาย/คุณลักษณะทั้งหมด เช่น `.` (ในกรณีนี้ เรามีตัวแปรทำนายเพียงตัวเดียว: `bmi`)\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "จากผลลัพธ์ของโมเดล เราสามารถเห็นค่าสัมประสิทธิ์ที่ได้จากการฝึกฝน ซึ่งค่าสัมประสิทธิ์เหล่านี้แสดงถึงค่าของเส้นที่เหมาะสมที่สุดที่ช่วยลดข้อผิดพลาดโดยรวมระหว่างตัวแปรจริงและตัวแปรที่คาดการณ์\n", + "\n", + "
\n", + "\n", + "## 5. ทำนายผลบนชุดข้อมูลทดสอบ\n", + "\n", + "เมื่อเราได้ฝึกฝนโมเดลแล้ว เราสามารถใช้มันเพื่อทำนายการพัฒนาของโรค y สำหรับชุดข้อมูลทดสอบโดยใช้ [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html) ซึ่งจะถูกใช้เพื่อวาดเส้นแบ่งระหว่างกลุ่มข้อมูล\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "เย้! 💃🕺 เราเพิ่งฝึกโมเดลและใช้มันเพื่อทำการพยากรณ์!\n", + "\n", + "เมื่อทำการพยากรณ์ ตามธรรมเนียมของ tidymodels จะสร้างผลลัพธ์ในรูปแบบ tibble หรือ data frame ที่มีชื่อคอลัมน์มาตรฐานเสมอ วิธีนี้ช่วยให้สามารถรวมข้อมูลต้นฉบับและผลการพยากรณ์ในรูปแบบที่ใช้งานได้ง่ายสำหรับการดำเนินการต่อ เช่น การสร้างกราฟ\n", + "\n", + "`dplyr::bind_cols()` ช่วยรวมหลาย data frame เข้าด้วยกันอย่างมีประสิทธิภาพในรูปแบบคอลัมน์\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. แสดงผลการสร้างโมเดล\n", + "\n", + "ถึงเวลาที่จะดูผลลัพธ์ในรูปแบบภาพแล้ว 📈 เราจะสร้างกราฟกระจายของค่าทั้งหมดใน `y` และ `bmi` จากชุดข้อมูลทดสอบ จากนั้นใช้ค่าที่คาดการณ์เพื่อวาดเส้นในตำแหน่งที่เหมาะสมที่สุด ระหว่างกลุ่มข้อมูลของโมเดล\n", + "\n", + "R มีระบบหลายแบบสำหรับการสร้างกราฟ แต่ `ggplot2` เป็นหนึ่งในระบบที่ดูเรียบง่ายและมีความหลากหลายมากที่สุด ระบบนี้ช่วยให้คุณสามารถสร้างกราฟโดย **การรวมองค์ประกอบที่เป็นอิสระเข้าด้วยกัน**\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ ลองคิดดูว่าเกิดอะไรขึ้นที่นี่ เส้นตรงเส้นหนึ่งกำลังผ่านจุดข้อมูลเล็กๆ หลายจุด แต่จริงๆ แล้วมันกำลังทำอะไรอยู่? คุณเห็นไหมว่าคุณควรจะสามารถใช้เส้นนี้เพื่อทำนายว่าจุดข้อมูลใหม่ที่ยังไม่เคยเห็นควรจะอยู่ตรงไหนในความสัมพันธ์กับแกน y ของกราฟ? ลองอธิบายเป็นคำพูดถึงการใช้งานจริงของโมเดลนี้\n", + "\n", + "ขอแสดงความยินดี! คุณสร้างโมเดลการถดถอยเชิงเส้นตัวแรกของคุณ สร้างการทำนายด้วยมัน และแสดงผลในกราฟ!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/2-Regression/1-Tools/solution/notebook.ipynb b/translations/th/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..04c2ce5ee --- /dev/null +++ b/translations/th/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,669 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "เลือกเพียงคุณสมบัติเดียวเพื่อมุ่งเน้นสำหรับการออกกำลังกายนี้\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "เลือกโมเดลและปรับให้เหมาะสมกับข้อมูลการฝึกอบรม\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ใช้ข้อมูลทดสอบเพื่อทำนายเส้น\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "แสดงผลลัพธ์ในกราฟ\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:39:29+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/2-Regression/2-Data/notebook.ipynb b/translations/th/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..7705c608e --- /dev/null +++ b/translations/th/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:46:03+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/th/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..7c67e3eea --- /dev/null +++ b/translations/th/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,672 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:54:07+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# สร้างโมเดลการถดถอย: เตรียมและแสดงข้อมูล\n", + "\n", + "## **การถดถอยเชิงเส้นสำหรับฟักทอง - บทเรียนที่ 2**\n", + "#### บทนำ\n", + "\n", + "เมื่อคุณมีเครื่องมือที่จำเป็นสำหรับการเริ่มต้นสร้างโมเดลการเรียนรู้ของเครื่องด้วย Tidymodels และ Tidyverse คุณก็พร้อมที่จะเริ่มตั้งคำถามกับข้อมูลของคุณแล้ว การทำงานกับข้อมูลและการนำโซลูชัน ML มาใช้ สิ่งสำคัญคือการเข้าใจวิธีตั้งคำถามที่ถูกต้องเพื่อปลดล็อกศักยภาพของชุดข้อมูลของคุณอย่างเหมาะสม\n", + "\n", + "ในบทเรียนนี้ คุณจะได้เรียนรู้:\n", + "\n", + "- วิธีเตรียมข้อมูลของคุณสำหรับการสร้างโมเดล\n", + "\n", + "- วิธีใช้ `ggplot2` สำหรับการแสดงข้อมูล\n", + "\n", + "คำถามที่คุณต้องการคำตอบจะกำหนดประเภทของอัลกอริทึม ML ที่คุณจะใช้ และคุณภาพของคำตอบที่คุณได้รับจะขึ้นอยู่กับลักษณะของข้อมูลของคุณอย่างมาก\n", + "\n", + "มาดูตัวอย่างการทำงานจริงกันเถอะ\n", + "\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. การนำเข้าข้อมูลฟักทองและเรียกใช้ Tidyverse\n", + "\n", + "เราจะต้องใช้แพ็กเกจต่อไปนี้เพื่อจัดการบทเรียนนี้:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้การวิเคราะห์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกมากขึ้น!\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้ด้วยคำสั่ง:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "สคริปต์ด้านล่างจะตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และจะติดตั้งให้คุณในกรณีที่บางแพ็กเกจขาดหายไป\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้ มาเริ่มต้นใช้งานแพ็กเกจและโหลด [ข้อมูล](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) ที่เตรียมไว้สำหรับบทเรียนนี้!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "การใช้คำสั่ง `glimpse()` อย่างรวดเร็วจะช่วยให้เห็นได้ทันทีว่ามีช่องว่างในข้อมูล และมีการผสมผสานระหว่างข้อมูลประเภทข้อความ (`chr`) และข้อมูลตัวเลข (`dbl`) นอกจากนี้ `Date` ยังเป็นประเภทตัวอักษร และยังมีคอลัมน์แปลก ๆ ที่ชื่อว่า `Package` ซึ่งข้อมูลในคอลัมน์นี้เป็นการผสมกันระหว่าง `sacks`, `bins` และค่าอื่น ๆ อีกด้วย โดยรวมแล้ว ข้อมูลนี้ค่อนข้างยุ่งเหยิง 😤\n", + "\n", + "ในความเป็นจริง การได้รับชุดข้อมูลที่พร้อมใช้งานสำหรับสร้างโมเดล Machine Learning โดยตรงนั้นไม่ใช่เรื่องปกติ แต่ไม่ต้องกังวล เพราะในบทเรียนนี้ คุณจะได้เรียนรู้วิธีการเตรียมชุดข้อมูลดิบโดยใช้ไลบรารีมาตรฐานของ R 🧑‍🔧 นอกจากนี้ คุณยังจะได้เรียนรู้เทคนิคต่าง ๆ ในการสร้างภาพข้อมูลอีกด้วย 📈📊\n", + "
\n", + "\n", + "> ทบทวนสั้น ๆ: ตัวดำเนินการ pipe (`%>%`) ทำหน้าที่ดำเนินการตามลำดับตรรกะโดยส่งวัตถุไปข้างหน้าเข้าสู่ฟังก์ชันหรือคำสั่ง คุณสามารถคิดว่าตัวดำเนินการ pipe เป็นเหมือนการพูดว่า \"และจากนั้น\" ในโค้ดของคุณ\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. ตรวจสอบข้อมูลที่หายไป\n", + "\n", + "หนึ่งในปัญหาที่พบบ่อยที่สุดที่นักวิทยาศาสตร์ข้อมูลต้องจัดการคือข้อมูลที่ไม่สมบูรณ์หรือข้อมูลที่หายไป R แทนค่าที่หายไปหรือค่าที่ไม่ทราบด้วยค่าพิเศษที่เรียกว่า `NA` (Not Available)\n", + "\n", + "แล้วเราจะรู้ได้อย่างไรว่ามีค่าที่หายไปใน data frame?\n", + "
\n", + "- วิธีที่ตรงไปตรงมาคือการใช้ฟังก์ชันพื้นฐานของ R `anyNA` ซึ่งจะคืนค่าตรรกะเป็น `TRUE` หรือ `FALSE`\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมเลย ดูเหมือนว่าจะมีข้อมูลบางส่วนหายไป! นั่นเป็นจุดเริ่มต้นที่ดี\n", + "\n", + "- อีกวิธีหนึ่งคือการใช้ฟังก์ชัน `is.na()` ซึ่งจะแสดงว่ามีองค์ประกอบในคอลัมน์ใดที่หายไป โดยจะระบุด้วยค่าตรรกะ `TRUE`\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "โอเค งานเสร็จแล้ว แต่เมื่อเจอกับ Data Frame ขนาดใหญ่แบบนี้ การตรวจสอบแถวและคอลัมน์ทั้งหมดทีละตัวจะไม่มีประสิทธิภาพและแทบจะเป็นไปไม่ได้เลย😴\n", + "\n", + "- วิธีที่เข้าใจง่ายกว่าคือการคำนวณผลรวมของค่าที่หายไปในแต่ละคอลัมน์:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "ดีขึ้นมาก! มีข้อมูลที่ขาดหายไป แต่บางทีอาจจะไม่สำคัญสำหรับงานที่กำลังทำอยู่ ลองดูว่าการวิเคราะห์เพิ่มเติมจะนำไปสู่อะไร\n", + "\n", + "> นอกจากชุดแพ็กเกจและฟังก์ชันที่ยอดเยี่ยมแล้ว R ยังมีเอกสารประกอบที่ดีมากอีกด้วย ตัวอย่างเช่น ใช้ `help(colSums)` หรือ `?colSums` เพื่อค้นหาข้อมูลเพิ่มเติมเกี่ยวกับฟังก์ชันนี้\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: ไวยากรณ์สำหรับการจัดการข้อมูล\n", + "\n", + "

\n", + " \n", + "

ผลงานโดย @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/) เป็นแพ็กเกจใน Tidyverse ที่เป็นไวยากรณ์สำหรับการจัดการข้อมูล โดยมีชุดคำกริยาที่สอดคล้องกันซึ่งช่วยให้คุณแก้ปัญหาการจัดการข้อมูลที่พบบ่อยที่สุดได้ ในส่วนนี้ เราจะมาสำรวจคำกริยาบางตัวของ dplyr กัน! \n", + "
\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` เป็นฟังก์ชันในแพ็กเกจ `dplyr` ที่ช่วยให้คุณเลือกคอลัมน์ที่ต้องการเก็บไว้หรือไม่ต้องการ\n", + "\n", + "เพื่อให้การทำงานกับ data frame ง่ายขึ้น คุณสามารถลบคอลัมน์บางส่วนออกโดยใช้ `select()` และเก็บไว้เฉพาะคอลัมน์ที่คุณต้องการ\n", + "\n", + "ตัวอย่างเช่น ในการวิเคราะห์ครั้งนี้ เราจะใช้คอลัมน์ `Package`, `Low Price`, `High Price` และ `Date` มาทำการเลือกคอลัมน์เหล่านี้กัน\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` เป็นฟังก์ชันในแพ็กเกจ `dplyr` ที่ช่วยให้คุณสร้างหรือแก้ไขคอลัมน์ โดยยังคงคอลัมน์เดิมไว้\n", + "\n", + "โครงสร้างทั่วไปของ `mutate` คือ:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "ลองใช้ `mutate` กับคอลัมน์ `Date` โดยทำตามขั้นตอนต่อไปนี้:\n", + "\n", + "1. แปลงวันที่ (ซึ่งปัจจุบันเป็นประเภทตัวอักษร) ให้เป็นรูปแบบเดือน (วันที่เหล่านี้เป็นวันที่ในสหรัฐฯ ดังนั้นรูปแบบคือ `MM/DD/YYYY`)\n", + "\n", + "2. ดึงข้อมูลเดือนจากวันที่ไปยังคอลัมน์ใหม่\n", + "\n", + "ใน R แพ็กเกจ [lubridate](https://lubridate.tidyverse.org/) ทำให้การทำงานกับข้อมูลวันที่และเวลาเป็นเรื่องง่ายขึ้น ดังนั้นเราจะใช้ `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` เพื่อบรรลุเป้าหมายข้างต้น เราสามารถลบคอลัมน์ Date ได้ เนื่องจากเราไม่จำเป็นต้องใช้มันอีกในขั้นตอนถัดไป\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมไปเลย! 🤩\n", + "\n", + "ต่อไป มาสร้างคอลัมน์ใหม่ชื่อ `Price` ซึ่งแสดงถึงราคากลางของฟักทองกันเถอะ ตอนนี้เราจะคำนวณค่าเฉลี่ยของคอลัมน์ `Low Price` และ `High Price` เพื่อเติมข้อมูลในคอลัมน์ Price ใหม่นี้\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยส!💪\n", + "\n", + "\"แต่เดี๋ยวก่อน!\" คุณอาจพูดหลังจากดูข้อมูลทั้งหมดด้วย `View(pumpkins)` \"มีอะไรแปลกๆ อยู่ที่นี่!\"🤔\n", + "\n", + "ถ้าคุณดูที่คอลัมน์ `Package` จะเห็นว่าฟักทองถูกขายในรูปแบบที่หลากหลาย บางส่วนขายในหน่วย `1 1/9 bushel` บางส่วนในหน่วย `1/2 bushel` บางส่วนขายเป็นลูก บางส่วนขายเป็นปอนด์ และบางส่วนขายในกล่องใหญ่ที่มีขนาดแตกต่างกัน\n", + "\n", + "ลองตรวจสอบดูสิ:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "น่าทึ่งมาก!👏\n", + "\n", + "ดูเหมือนว่าฟักทองจะมีน้ำหนักที่วัดได้ไม่คงที่ ดังนั้นเรามากรองพวกมันโดยเลือกเฉพาะฟักทองที่มีคำว่า *bushel* อยู่ในคอลัมน์ `Package` และนำสิ่งนี้ไปใส่ในกรอบข้อมูลใหม่ชื่อ `new_pumpkins`\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() และ stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): สร้างชุดข้อมูลย่อยที่มีเฉพาะ **แถว** ที่ตรงตามเงื่อนไขของคุณ ในกรณีนี้คือ ฟักทองที่มีคำว่า *bushel* อยู่ในคอลัมน์ `Package`\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): ตรวจสอบว่ามีหรือไม่มีรูปแบบที่กำหนดในสตริง\n", + "\n", + "แพ็กเกจ [`stringr`](https://github.com/tidyverse/stringr) มีฟังก์ชันที่ใช้งานง่ายสำหรับการจัดการสตริงทั่วไป\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "คุณสามารถเห็นได้ว่าเราจำกัดข้อมูลให้เหลือประมาณ 415 แถวที่เกี่ยวกับฟักทองตามปริมาณเป็นบุชเชล 🤩\n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**แต่เดี๋ยวก่อน! ยังมีอีกสิ่งที่ต้องทำ**\n", + "\n", + "คุณสังเกตไหมว่าปริมาณในหน่วย bushel แตกต่างกันในแต่ละแถว? คุณจำเป็นต้องปรับราคาที่แสดงให้เป็นราคาต่อ bushel ไม่ใช่ต่อ 1 1/9 หรือ 1/2 bushel ถึงเวลาทำคณิตศาสตร์เพื่อทำให้มันเป็นมาตรฐานเดียวกัน\n", + "\n", + "เราจะใช้ฟังก์ชัน [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) เพื่อ *ปรับเปลี่ยน* คอลัมน์ Price ตามเงื่อนไขบางอย่าง `case_when` ช่วยให้คุณสามารถจัดการคำสั่ง `if_else()` หลายตัวได้ในรูปแบบเวกเตอร์\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้เราสามารถวิเคราะห์ราคาต่อหน่วยโดยอิงจากการวัดผลตามหน่วย bushel ได้แล้ว การศึกษาหน่วย bushel ของฟักทองนี้แสดงให้เห็นว่า `สำคัญมาก` ที่จะต้อง `เข้าใจลักษณะของข้อมูลของคุณ`!\n", + "\n", + "> ✅ ตามข้อมูลจาก [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308) น้ำหนักของ bushel ขึ้นอยู่กับประเภทของผลผลิต เนื่องจากมันเป็นการวัดตามปริมาตร \"ตัวอย่างเช่น bushel ของมะเขือเทศควรมีน้ำหนัก 56 ปอนด์... ใบและผักใบเขียวใช้พื้นที่มากกว่าแต่น้ำหนักน้อยกว่า ดังนั้น bushel ของผักโขมจึงมีน้ำหนักเพียง 20 ปอนด์\" มันค่อนข้างซับซ้อน! เราไม่ต้องยุ่งยากกับการแปลง bushel เป็นปอนด์ แต่ให้ตั้งราคาตาม bushel แทน การศึกษาหน่วย bushel ของฟักทองนี้แสดงให้เห็นว่า สำคัญมากที่จะต้องเข้าใจลักษณะของข้อมูลของคุณ!\n", + "\n", + "> ✅ คุณสังเกตไหมว่าฟักทองที่ขายเป็นครึ่ง bushel นั้นมีราคาแพงมาก? คุณสามารถหาสาเหตุได้ไหม? คำใบ้: ฟักทองลูกเล็กมีราคาสูงกว่าฟักทองลูกใหญ่มาก อาจเป็นเพราะมีจำนวนมากกว่าต่อ bushel เนื่องจากพื้นที่ที่ไม่ได้ใช้ซึ่งถูกครอบครองโดยฟักทองพายลูกใหญ่ที่กลวง\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "สุดท้ายนี้ เพื่อความสนุกสนานและการผจญภัย 💁‍♀️ เรามาย้ายคอลัมน์ Month ไปอยู่ในตำแหน่งแรกกันดีกว่า นั่นคือ `ก่อน` คอลัมน์ `Package`\n", + "\n", + "สามารถใช้ `dplyr::relocate()` เพื่อเปลี่ยนตำแหน่งของคอลัมน์ได้\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก!👌 ตอนนี้คุณมีชุดข้อมูลที่สะอาดและเป็นระเบียบเรียบร้อย ซึ่งคุณสามารถใช้สร้างโมเดลการถดถอยใหม่ของคุณได้! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. การแสดงข้อมูลด้วย ggplot2\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "มีคำกล่าวที่ชาญฉลาดว่า:\n", + "\n", + "> \"กราฟง่าย ๆ ได้นำข้อมูลมาสู่ความคิดของนักวิเคราะห์ข้อมูลมากกว่าวิธีการอื่นใด\" --- John Tukey\n", + "\n", + "หนึ่งในบทบาทของนักวิทยาศาสตร์ข้อมูลคือการแสดงให้เห็นถึงคุณภาพและลักษณะของข้อมูลที่พวกเขากำลังทำงานด้วย เพื่อทำสิ่งนี้ พวกเขามักสร้างการแสดงผลที่น่าสนใจ เช่น แผนภาพ กราฟ และแผนภูมิ ที่แสดงแง่มุมต่าง ๆ ของข้อมูล ด้วยวิธีนี้ พวกเขาสามารถแสดงความสัมพันธ์และช่องว่างที่อาจยากต่อการค้นพบในรูปแบบอื่น\n", + "\n", + "การแสดงผลข้อมูลยังช่วยในการเลือกเทคนิคการเรียนรู้ของเครื่องที่เหมาะสมที่สุดสำหรับข้อมูล ตัวอย่างเช่น แผนภาพกระจายที่ดูเหมือนจะมีแนวโน้มตามเส้นตรง อาจบ่งชี้ว่าข้อมูลนั้นเหมาะสำหรับการวิเคราะห์การถดถอยเชิงเส้น\n", + "\n", + "R มีระบบหลายแบบสำหรับการสร้างกราฟ แต่ [`ggplot2`](https://ggplot2.tidyverse.org/index.html) เป็นหนึ่งในระบบที่มีความสง่างามและหลากหลายที่สุด `ggplot2` ช่วยให้คุณสร้างกราฟโดย **การรวมองค์ประกอบอิสระเข้าด้วยกัน**\n", + "\n", + "เริ่มต้นด้วยแผนภาพกระจายง่าย ๆ สำหรับคอลัมน์ Price และ Month\n", + "\n", + "ในกรณีนี้ เราจะเริ่มต้นด้วย [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html) โดยใส่ชุดข้อมูลและการแมปเชิงสุนทรียะ (ด้วย [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)) จากนั้นเพิ่มเลเยอร์ (เช่น [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) สำหรับแผนภาพกระจาย\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "นี่เป็นพล็อตที่มีประโยชน์หรือเปล่า 🤷? มีอะไรที่ทำให้คุณแปลกใจไหม?\n", + "\n", + "มันไม่ได้มีประโยชน์มากนัก เพราะทั้งหมดที่มันทำคือแสดงข้อมูลของคุณเป็นการกระจายของจุดในเดือนที่กำหนด\n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **เราจะทำให้มันมีประโยชน์ได้อย่างไร?**\n", + "\n", + "เพื่อให้กราฟแสดงข้อมูลที่มีประโยชน์ คุณมักจะต้องจัดกลุ่มข้อมูลในบางรูปแบบ ตัวอย่างเช่น ในกรณีของเรา การหาค่าเฉลี่ยของราคาฟักทองในแต่ละเดือนจะช่วยให้เราเข้าใจรูปแบบที่ซ่อนอยู่ในข้อมูลได้มากขึ้น ซึ่งนำเราไปสู่การใช้งาน **dplyr** อีกหนึ่งฟังก์ชัน:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "การคำนวณแบบจัดกลุ่มใน R สามารถทำได้ง่าย ๆ โดยใช้\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` เปลี่ยนหน่วยการวิเคราะห์จากทั้งชุดข้อมูลไปเป็นกลุ่มย่อย เช่น กลุ่มตามเดือน\n", + "\n", + "- `dplyr::summarize()` สร้าง Data Frame ใหม่ที่มีคอลัมน์สำหรับตัวแปรที่ใช้จัดกลุ่ม และคอลัมน์สำหรับสถิติสรุปที่คุณระบุ\n", + "\n", + "ตัวอย่างเช่น เราสามารถใช้ `dplyr::group_by() %>% summarize()` เพื่อจัดกลุ่มฟักทองตามคอลัมน์ **Month** และหาค่า **ราคาเฉลี่ย** สำหรับแต่ละเดือน\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "กระชับ!✨\n", + "\n", + "ฟีเจอร์ประเภทหมวดหมู่ เช่น เดือน มักจะแสดงผลได้ดีกว่าด้วยกราฟแท่ง 📊 เลเยอร์ที่ใช้สำหรับสร้างกราฟแท่งคือ `geom_bar()` และ `geom_col()` สามารถดูข้อมูลเพิ่มเติมได้ที่ `?geom_bar`\n", + "\n", + "มาลองสร้างกันเลย!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩นี่คือการแสดงข้อมูลที่มีประโยชน์มากขึ้น! ดูเหมือนว่าราคาสูงสุดของฟักทองจะเกิดขึ้นในเดือนกันยายนและตุลาคม ตรงกับที่คุณคาดไว้หรือไม่? เพราะอะไร?\n", + "\n", + "ขอแสดงความยินดีที่คุณจบบทเรียนที่สอง 👏! คุณได้เตรียมข้อมูลสำหรับการสร้างโมเดล จากนั้นค้นพบข้อมูลเชิงลึกเพิ่มเติมผ่านการแสดงผลข้อมูล!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/2-Regression/2-Data/solution/notebook.ipynb b/translations/th/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..149f35adf --- /dev/null +++ b/translations/th/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์มืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:29+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/2-Regression/3-Linear/notebook.ipynb b/translations/th/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..9750b6f0a --- /dev/null +++ b/translations/th/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## การตั้งราคาฟักทอง\n", + "\n", + "โหลดไลบรารีและชุดข้อมูลที่จำเป็น แปลงข้อมูลให้เป็น dataframe ที่มีข้อมูลบางส่วนดังนี้:\n", + "\n", + "- เลือกเฉพาะฟักทองที่ตั้งราคาเป็นหน่วย bushel\n", + "- แปลงวันที่ให้เป็นเดือน\n", + "- คำนวณราคาโดยเฉลี่ยจากราคาสูงสุดและต่ำสุด\n", + "- แปลงราคาให้สะท้อนถึงการตั้งราคาตามปริมาณ bushel\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "แผนภาพกระจายพื้นฐานเตือนเราว่าเรามีข้อมูลรายเดือนเฉพาะตั้งแต่เดือนสิงหาคมถึงเดือนธันวาคม เราอาจต้องการข้อมูลเพิ่มเติมเพื่อที่จะสรุปผลในรูปแบบเชิงเส้น\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:09:07+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/th/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..8fbb161ab --- /dev/null +++ b/translations/th/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1083 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:23:02+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## การวิเคราะห์การถดถอยเชิงเส้นและพหุนามสำหรับการตั้งราคาฟักทอง - บทเรียนที่ 3\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "#### บทนำ\n", + "\n", + "จนถึงตอนนี้ คุณได้สำรวจว่าการถดถอยคืออะไรโดยใช้ข้อมูลตัวอย่างจากชุดข้อมูลการตั้งราคาฟักทองที่เราจะใช้ตลอดบทเรียนนี้ คุณยังได้สร้างภาพด้วย `ggplot2` 💪\n", + "\n", + "ตอนนี้คุณพร้อมที่จะเจาะลึกลงไปในเรื่องการถดถอยสำหรับการเรียนรู้ของเครื่อง (ML) ในบทเรียนนี้ คุณจะได้เรียนรู้เพิ่มเติมเกี่ยวกับการถดถอยสองประเภท: *การถดถอยเชิงเส้นพื้นฐาน* และ *การถดถอยพหุนาม* พร้อมกับคณิตศาสตร์บางส่วนที่อยู่เบื้องหลังเทคนิคเหล่านี้\n", + "\n", + "> ตลอดหลักสูตรนี้ เราสมมติว่าคุณมีความรู้ทางคณิตศาสตร์ในระดับพื้นฐาน และพยายามทำให้เนื้อหาเข้าถึงได้สำหรับนักเรียนที่มาจากสาขาอื่น ดังนั้นโปรดสังเกตหมายเหตุ 🧮 การเรียกออกมา แผนภาพ และเครื่องมือการเรียนรู้อื่น ๆ เพื่อช่วยในการทำความเข้าใจ\n", + "\n", + "#### การเตรียมตัว\n", + "\n", + "เพื่อเป็นการเตือนความจำ คุณกำลังโหลดข้อมูลนี้เพื่อถามคำถามเกี่ยวกับข้อมูลดังกล่าว\n", + "\n", + "- ช่วงเวลาใดที่ดีที่สุดในการซื้อฟักทอง?\n", + "\n", + "- ราคาที่คาดหวังสำหรับฟักทองขนาดเล็กหนึ่งกล่องคือเท่าไหร่?\n", + "\n", + "- ควรซื้อฟักทองในตะกร้าครึ่งบุชเชลหรือในกล่องขนาด 1 1/9 บุชเชล? มาลองเจาะลึกข้อมูลนี้กันต่อไป\n", + "\n", + "ในบทเรียนก่อนหน้านี้ คุณได้สร้าง `tibble` (การปรับปรุงใหม่ของ data frame) และเติมข้อมูลบางส่วนจากชุดข้อมูลต้นฉบับ โดยการปรับมาตรฐานราคาตามบุชเชล อย่างไรก็ตาม ด้วยวิธีนั้น คุณสามารถรวบรวมข้อมูลได้เพียงประมาณ 400 จุดข้อมูล และเฉพาะในช่วงฤดูใบไม้ร่วงเท่านั้น บางทีเราอาจได้รายละเอียดเพิ่มเติมเกี่ยวกับลักษณะของข้อมูลโดยการทำความสะอาดข้อมูลมากขึ้น? เราจะได้รู้กัน... 🕵️‍♀️\n", + "\n", + "สำหรับงานนี้ เราจะต้องใช้แพ็กเกจดังต่อไปนี้:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้การวิเคราะห์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบงาน [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างแบบจำลองและการเรียนรู้ของเครื่อง\n", + "\n", + "- `janitor`: [แพ็กเกจ janitor](https://github.com/sfirke/janitor) มีเครื่องมือเล็ก ๆ ที่เรียบง่ายสำหรับการตรวจสอบและทำความสะอาดข้อมูลที่สกปรก\n", + "\n", + "- `corrplot`: [แพ็กเกจ corrplot](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) เป็นเครื่องมือสำรวจภาพบนเมทริกซ์ความสัมพันธ์ที่สนับสนุนการจัดเรียงตัวแปรอัตโนมัติเพื่อช่วยตรวจจับรูปแบบที่ซ่อนอยู่ระหว่างตัวแปร\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้ด้วยคำสั่ง:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "สคริปต์ด้านล่างจะตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และจะติดตั้งให้คุณในกรณีที่ยังไม่มี\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "เราจะโหลดแพ็กเกจที่ยอดเยี่ยมเหล่านี้และทำให้พร้อมใช้งานในเซสชัน R ปัจจุบันของเรา (นี่เป็นเพียงตัวอย่าง เพราะ `pacman::p_load()` ได้ทำสิ่งนี้ให้คุณแล้ว)\n", + "\n", + "## 1. เส้นการถดถอยเชิงเส้น\n", + "\n", + "ตามที่คุณได้เรียนรู้ในบทเรียนที่ 1 เป้าหมายของการถดถอยเชิงเส้นคือการสามารถวาด *เส้น* *ที่* *เหมาะสมที่สุด* เพื่อ:\n", + "\n", + "- **แสดงความสัมพันธ์ระหว่างตัวแปร** แสดงความสัมพันธ์ระหว่างตัวแปรต่าง ๆ\n", + "\n", + "- **ทำการพยากรณ์** ทำการพยากรณ์ที่แม่นยำเกี่ยวกับตำแหน่งที่จุดข้อมูลใหม่จะตกอยู่ในความสัมพันธ์กับเส้นนั้น\n", + "\n", + "ในการวาดเส้นประเภทนี้ เราใช้เทคนิคทางสถิติที่เรียกว่า **Least-Squares Regression** คำว่า `least-squares` หมายถึงการที่จุดข้อมูลทั้งหมดที่อยู่รอบเส้นการถดถอยถูกยกกำลังสองและนำมารวมกัน ผลรวมสุดท้ายควรมีค่าน้อยที่สุดเท่าที่จะเป็นไปได้ เพราะเราต้องการจำนวนข้อผิดพลาดที่ต่ำที่สุด หรือ `least-squares` ดังนั้น เส้นที่เหมาะสมที่สุดคือเส้นที่ให้ค่าผลรวมของข้อผิดพลาดที่ยกกำลังสองต่ำที่สุด - ซึ่งเป็นที่มาของชื่อ *least squares regression*\n", + "\n", + "เราทำเช่นนี้เพราะเราต้องการสร้างแบบจำลองเส้นที่มีระยะทางสะสมจากจุดข้อมูลทั้งหมดน้อยที่สุด นอกจากนี้ เรายังยกกำลังสองก่อนที่จะรวมกัน เพราะเราสนใจขนาดของมันมากกว่าทิศทาง\n", + "\n", + "> **🧮 แสดงคณิตศาสตร์ให้ฉันดู**\n", + ">\n", + "> เส้นนี้ ซึ่งเรียกว่า *เส้นที่เหมาะสมที่สุด* สามารถแสดงได้ด้วย [สมการ](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` คือ '`ตัวแปรอธิบาย` หรือ `ตัวพยากรณ์`' ส่วน `Y` คือ '`ตัวแปรตาม` หรือ `ผลลัพธ์`' ความชันของเส้นคือ `b` และ `a` คือจุดตัดแกน y ซึ่งหมายถึงค่าของ `Y` เมื่อ `X = 0`\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"slope = $y/x$\")\n", + " อินโฟกราฟิกโดย Jen Looper\n", + ">\n", + "> ขั้นแรก คำนวณความชัน `b`\n", + ">\n", + "> กล่าวอีกนัยหนึ่ง และอ้างอิงถึงคำถามดั้งเดิมของข้อมูลฟักทองของเรา: \"พยากรณ์ราคาของฟักทองต่อบุชเชลตามเดือน\" `X` จะหมายถึงราคา และ `Y` จะหมายถึงเดือนที่ขาย\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " อินโฟกราฟิกโดย Jen Looper\n", + "> \n", + "> คำนวณค่าของ Y ถ้าคุณจ่ายประมาณ \\$4 นั่นต้องเป็นเดือนเมษายน!\n", + ">\n", + "> คณิตศาสตร์ที่คำนวณเส้นนี้ต้องแสดงความชันของเส้น ซึ่งยังขึ้นอยู่กับจุดตัดแกน หรือที่ที่ `Y` อยู่เมื่อ `X = 0`\n", + ">\n", + "> คุณสามารถดูวิธีการคำนวณค่าสำหรับสิ่งเหล่านี้ได้ที่เว็บไซต์ [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html) และเยี่ยมชม [เครื่องคำนวณ Least-squares](https://www.mathsisfun.com/data/least-squares-calculator.html) เพื่อดูว่าค่าต่าง ๆ ส่งผลต่อเส้นอย่างไร\n", + "\n", + "ไม่น่ากลัวเท่าไหร่ใช่ไหม? 🤓\n", + "\n", + "#### ความสัมพันธ์\n", + "\n", + "อีกคำหนึ่งที่ควรเข้าใจคือ **ค่าสัมประสิทธิ์ความสัมพันธ์** ระหว่างตัวแปร X และ Y ที่กำหนด โดยใช้แผนภาพกระจาย (scatterplot) คุณสามารถมองเห็นค่าสัมประสิทธิ์นี้ได้อย่างรวดเร็ว แผนภาพที่มีจุดข้อมูลกระจายเป็นเส้นเรียบร้อยจะมีความสัมพันธ์สูง แต่แผนภาพที่มีจุดข้อมูลกระจายไปทั่วระหว่าง X และ Y จะมีความสัมพันธ์ต่ำ\n", + "\n", + "โมเดลการถดถอยเชิงเส้นที่ดีจะเป็นโมเดลที่มีค่าสัมประสิทธิ์ความสัมพันธ์สูง (ใกล้ 1 มากกว่า 0) โดยใช้วิธี Least-Squares Regression กับเส้นการถดถอย\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. การเต้นรำกับข้อมูล: สร้าง Data Frame สำหรับการสร้างโมเดล**\n", + "\n", + "

\n", + " \n", + "

ผลงานโดย @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "โหลดไลบรารีและชุดข้อมูลที่จำเป็น จากนั้นแปลงข้อมูลให้เป็น Data Frame ที่มีเฉพาะส่วนย่อยของข้อมูล:\n", + "\n", + "- เลือกเฉพาะฟักทองที่มีการตั้งราคาเป็นหน่วยบุชเชล\n", + "\n", + "- แปลงวันที่ให้เป็นเดือน\n", + "\n", + "- คำนวณราคาให้เป็นค่าเฉลี่ยระหว่างราคาสูงสุดและราคาต่ำสุด\n", + "\n", + "- แปลงราคาให้สะท้อนถึงการตั้งราคาตามปริมาณในหน่วยบุชเชล\n", + "\n", + "> เราได้ครอบคลุมขั้นตอนเหล่านี้ใน [บทเรียนก่อนหน้า](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb)\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "ด้วยจิตวิญญาณแห่งการผจญภัย ลองมาสำรวจ [`janitor package`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor) ที่มีฟังก์ชันง่ายๆ สำหรับตรวจสอบและทำความสะอาดข้อมูลที่ยุ่งเหยิงกัน ตัวอย่างเช่น ลองมาดูชื่อคอลัมน์ของข้อมูลของเรากัน:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 เราสามารถทำได้ดีกว่านี้ มาทำให้ชื่อคอลัมน์เหล่านี้เป็น `friendR` โดยการแปลงให้เป็นรูปแบบ [snake_case](https://en.wikipedia.org/wiki/Snake_case) ด้วยการใช้ `janitor::clean_names` หากต้องการทราบข้อมูลเพิ่มเติมเกี่ยวกับฟังก์ชันนี้: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "ช่างเป็น tidyR 🧹! ตอนนี้ มาลองเต้นรำกับข้อมูลโดยใช้ `dplyr` เหมือนในบทเรียนก่อนหน้านี้! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก!👌 ตอนนี้คุณมีชุดข้อมูลที่สะอาดและเป็นระเบียบเรียบร้อย พร้อมสำหรับการสร้างโมเดลการถดถอยใหม่ของคุณแล้ว!\n", + "\n", + "สนใจกราฟกระจายไหม?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "แผนภาพกระจายช่วยเตือนเราว่าเรามีข้อมูลรายเดือนเพียงตั้งแต่เดือนสิงหาคมถึงเดือนธันวาคมเท่านั้น เราอาจต้องการข้อมูลเพิ่มเติมเพื่อที่จะสามารถสรุปผลในลักษณะเชิงเส้นได้\n", + "\n", + "ลองกลับมาดูข้อมูลการสร้างแบบจำลองของเราอีกครั้ง:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "ถ้าเราต้องการทำนาย `price` ของฟักทองโดยใช้คอลัมน์ `city` หรือ `package` ซึ่งเป็นข้อมูลประเภทตัวอักษรล่ะ? หรือถ้าจะง่ายกว่านั้น เราจะหาความสัมพันธ์ (ซึ่งต้องใช้ข้อมูลทั้งสองเป็นตัวเลข) ระหว่าง `package` กับ `price` ได้อย่างไร? 🤷🤷\n", + "\n", + "โมเดลการเรียนรู้ของเครื่องทำงานได้ดีที่สุดเมื่อใช้คุณลักษณะเป็นตัวเลขแทนที่จะเป็นค่าข้อความ ดังนั้นโดยทั่วไปคุณจำเป็นต้องแปลงคุณลักษณะเชิงหมวดหมู่ให้เป็นตัวแทนในรูปแบบตัวเลข\n", + "\n", + "นั่นหมายความว่าเราต้องหาวิธีปรับรูปแบบตัวทำนายของเราให้ใช้งานได้ง่ายขึ้นสำหรับโมเดล ซึ่งกระบวนการนี้เรียกว่า `feature engineering`\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. การเตรียมข้อมูลสำหรับการสร้างโมเดลด้วย recipes 👩‍🍳👨‍🍳\n", + "\n", + "กิจกรรมที่ปรับเปลี่ยนค่าของตัวแปรพยากรณ์เพื่อให้โมเดลใช้งานได้อย่างมีประสิทธิภาพมากขึ้น เรียกว่า `feature engineering`\n", + "\n", + "โมเดลแต่ละแบบมีความต้องการการเตรียมข้อมูลที่แตกต่างกัน ตัวอย่างเช่น least squares ต้องการ `การเข้ารหัสตัวแปรเชิงหมวดหมู่` เช่น เดือน, ชนิด และ city_name ซึ่งกระบวนการนี้เกี่ยวข้องกับ `การแปลง` คอลัมน์ที่มี `ค่าหมวดหมู่` ให้กลายเป็นหนึ่งหรือมากกว่า `คอลัมน์ตัวเลข` ที่มาแทนที่คอลัมน์เดิม\n", + "\n", + "ตัวอย่างเช่น สมมติว่าข้อมูลของคุณมีตัวแปรเชิงหมวดหมู่ดังนี้:\n", + "\n", + "| city |\n", + "|:-------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokyo |\n", + "\n", + "คุณสามารถใช้ *ordinal encoding* เพื่อแทนค่าหมวดหมู่แต่ละค่าเป็นตัวเลขจำนวนเต็มที่ไม่ซ้ำกัน เช่นนี้:\n", + "\n", + "| city |\n", + "|:----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "และนี่คือสิ่งที่เราจะทำกับข้อมูลของเรา!\n", + "\n", + "ในส่วนนี้ เราจะสำรวจแพ็กเกจ Tidymodels ที่น่าทึ่งอีกตัวหนึ่ง: [recipes](https://tidymodels.github.io/recipes/) - ซึ่งถูกออกแบบมาเพื่อช่วยคุณเตรียมข้อมูลของคุณ **ก่อน** การฝึกโมเดล โดยพื้นฐานแล้ว recipe คือออบเจ็กต์ที่กำหนดว่าควรมีขั้นตอนใดบ้างที่ต้องนำไปใช้กับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการสร้างโมเดล\n", + "\n", + "ตอนนี้ เรามาสร้าง recipe ที่เตรียมข้อมูลของเราสำหรับการสร้างโมเดลโดยการแทนค่าจำนวนเต็มที่ไม่ซ้ำกันให้กับทุกค่าที่พบในคอลัมน์ตัวแปรพยากรณ์:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก! 👏 เราเพิ่งสร้างสูตรแรกที่กำหนดผลลัพธ์ (ราคา) และตัวทำนายที่เกี่ยวข้อง พร้อมทั้งกำหนดให้คอลัมน์ตัวทำนายทั้งหมดถูกแปลงเป็นชุดของตัวเลขจำนวนเต็ม 🙌! มาดูรายละเอียดกันอย่างรวดเร็ว:\n", + "\n", + "- การเรียกใช้ `recipe()` พร้อมสูตรจะบอกสูตรเกี่ยวกับ *บทบาท* ของตัวแปร โดยใช้ข้อมูล `new_pumpkins` เป็นข้อมูลอ้างอิง ตัวอย่างเช่น คอลัมน์ `price` ถูกกำหนดให้มีบทบาทเป็น `outcome` ในขณะที่คอลัมน์อื่น ๆ ถูกกำหนดให้มีบทบาทเป็น `predictor`\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` ระบุว่าตัวทำนายทั้งหมดควรถูกแปลงเป็นชุดของตัวเลขจำนวนเต็ม โดยเริ่มต้นการนับที่ 0\n", + "\n", + "เราเชื่อว่าคุณอาจกำลังคิดว่า: \"นี่มันเจ๋งมาก!! แต่ถ้าฉันต้องการยืนยันว่าสูตรกำลังทำงานตามที่ฉันคาดหวังจริง ๆ ล่ะ? 🤔\"\n", + "\n", + "นั่นเป็นความคิดที่ยอดเยี่ยม! คุณเห็นไหมว่า เมื่อสูตรของคุณถูกกำหนดแล้ว คุณสามารถประมาณค่าพารามิเตอร์ที่จำเป็นสำหรับการเตรียมข้อมูล และจากนั้นดึงข้อมูลที่ผ่านการประมวลผลออกมาได้ โดยปกติคุณไม่จำเป็นต้องทำเช่นนี้เมื่อใช้ Tidymodels (เราจะเห็นวิธีการทั่วไปในอีกสักครู่-\\> `workflows`) แต่สิ่งนี้อาจมีประโยชน์เมื่อคุณต้องการตรวจสอบความถูกต้องเพื่อยืนยันว่าสูตรกำลังทำงานตามที่คุณคาดหวัง\n", + "\n", + "สำหรับสิ่งนี้ คุณจะต้องใช้คำกริยาอีกสองคำ: `prep()` และ `bake()` และเช่นเคย เพื่อนตัวน้อยใน R ของเราโดย [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) จะช่วยให้คุณเข้าใจสิ่งนี้ได้ดียิ่งขึ้น!\n", + "\n", + "

\n", + " \n", + "

ภาพวาดโดย @allison_horst
\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): ประเมินค่าพารามิเตอร์ที่จำเป็นจากชุดข้อมูลการฝึกอบรม ซึ่งสามารถนำไปใช้กับชุดข้อมูลอื่นในภายหลังได้ ตัวอย่างเช่น สำหรับคอลัมน์ตัวทำนายที่กำหนด จะมีการกำหนดค่าการสังเกตเป็นเลขจำนวนเต็ม 0 หรือ 1 หรือ 2 เป็นต้น\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): ใช้สูตรที่เตรียมไว้แล้วและดำเนินการกับชุดข้อมูลใดๆ\n", + "\n", + "เมื่อกล่าวเช่นนี้ เรามาเตรียมและดำเนินการสูตรของเราเพื่อยืนยันจริงๆ ว่าเบื้องหลังนั้น คอลัมน์ตัวทำนายจะถูกเข้ารหัสก่อนที่จะนำไปปรับใช้กับโมเดล\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมไปเลย! 🥳 ข้อมูลที่ผ่านการประมวลผล `baked_pumpkins` มีตัวแปรทำนายทั้งหมดที่ถูกเข้ารหัสแล้ว ซึ่งยืนยันว่าขั้นตอนการเตรียมข้อมูลที่เรากำหนดไว้ในสูตรนั้นทำงานได้ตามที่คาดหวัง แม้ว่ามันอาจจะทำให้คุณอ่านยากขึ้น แต่ก็ทำให้ Tidymodels เข้าใจข้อมูลได้ง่ายขึ้น ลองใช้เวลาสักครู่เพื่อดูว่าแต่ละข้อมูลถูกแปลงเป็นตัวเลขใดบ้าง\n", + "\n", + "นอกจากนี้ยังควรกล่าวถึงว่า `baked_pumpkins` เป็น data frame ที่เราสามารถทำการคำนวณต่าง ๆ ได้\n", + "\n", + "ตัวอย่างเช่น ลองหาความสัมพันธ์ที่ดีระหว่างสองจุดในข้อมูลของคุณเพื่อสร้างโมเดลทำนายที่มีประสิทธิภาพ เราจะใช้ฟังก์ชัน `cor()` เพื่อทำสิ่งนี้ พิมพ์ `?cor()` เพื่อดูข้อมูลเพิ่มเติมเกี่ยวกับฟังก์ชันนี้\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "ปรากฏว่าความสัมพันธ์ระหว่างเมืองและราคานั้นค่อนข้างอ่อนแอ อย่างไรก็ตาม มีความสัมพันธ์ที่ดีกว่าระหว่างแพ็คเกจและราคา ซึ่งก็สมเหตุสมผลใช่ไหม? โดยปกติแล้ว กล่องสินค้ายิ่งใหญ่ ราคาก็ยิ่งสูงขึ้น\n", + "\n", + "ในขณะเดียวกัน เรามาลองสร้างภาพเมทริกซ์ความสัมพันธ์ของทุกคอลัมน์โดยใช้แพ็คเกจ `corrplot` กันเถอะ\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 ดีขึ้นมาก\n", + "\n", + "คำถามที่ดีที่ควรถามเกี่ยวกับข้อมูลนี้คือ: '`ราคาที่คาดหวังสำหรับแพ็คเกจฟักทองคือเท่าไหร่?`' มาเริ่มกันเลย!\n", + "\n", + "> หมายเหตุ: เมื่อคุณ **`bake()`** สูตรที่เตรียมไว้ **`pumpkins_prep`** โดยใช้ **`new_data = NULL`** คุณจะได้ข้อมูลการฝึกที่ผ่านการประมวลผล (เช่น การเข้ารหัส) หากคุณมีชุดข้อมูลอื่น เช่น ชุดทดสอบ และต้องการดูว่าสูตรจะประมวลผลอย่างไร คุณเพียงแค่ bake **`pumpkins_prep`** โดยใช้ **`new_data = test_set`**\n", + "\n", + "## 4. สร้างโมเดลการถดถอยเชิงเส้น\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้ที่เราได้สร้างสูตรและยืนยันแล้วว่าข้อมูลจะถูกประมวลผลล่วงหน้าอย่างเหมาะสม มาสร้างโมเดลการถดถอยเพื่อหาคำตอบสำหรับคำถามนี้กัน: `ราคาที่คาดหวังของแพ็คเกจฟักทองที่กำหนดคือเท่าไร?`\n", + "\n", + "#### ฝึกโมเดลการถดถอยเชิงเส้นโดยใช้ชุดข้อมูลการฝึก\n", + "\n", + "คุณอาจสังเกตเห็นแล้วว่า คอลัมน์ *price* คือ `ตัวแปรผลลัพธ์` ในขณะที่คอลัมน์ *package* คือ `ตัวแปรพยากรณ์`\n", + "\n", + "เพื่อทำสิ่งนี้ เราจะเริ่มต้นด้วยการแบ่งข้อมูล โดยให้ 80% เป็นชุดข้อมูลการฝึก และ 20% เป็นชุดข้อมูลทดสอบ จากนั้นกำหนดสูตรที่จะเข้ารหัสคอลัมน์ตัวแปรพยากรณ์ให้เป็นชุดของตัวเลข และสร้างสเปคของโมเดล เราจะไม่เตรียมและอบสูตรของเราอีกครั้ง เพราะเรารู้อยู่แล้วว่ามันจะประมวลผลข้อมูลได้ตามที่คาดไว้\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก! ตอนนี้เรามีสูตรและสเปคของโมเดลแล้ว เราต้องหาวิธีรวมสิ่งเหล่านี้เข้าด้วยกันเป็นวัตถุที่สามารถทำการเตรียมข้อมูล (prep+bake เบื้องหลัง) ฝึกโมเดลบนข้อมูลที่ผ่านการเตรียมแล้ว และยังรองรับกิจกรรมหลังการประมวลผลได้อีกด้วย แบบนี้ช่วยให้คุณสบายใจขึ้นใช่ไหม!🤩\n", + "\n", + "ใน Tidymodels วัตถุที่สะดวกนี้เรียกว่า [`workflow`](https://workflows.tidymodels.org/) ซึ่งช่วยจัดการองค์ประกอบการสร้างโมเดลของคุณได้อย่างสะดวก! สิ่งนี้คือสิ่งที่เราเรียกว่า *pipelines* ใน *Python* นั่นเอง\n", + "\n", + "ดังนั้น มาเริ่มรวมทุกอย่างเข้าด้วยกันใน workflow กันเถอะ!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "นอกจากนี้ เวิร์กโฟลว์ยังสามารถปรับแต่ง/ฝึกฝนได้ในลักษณะเดียวกับที่โมเดลสามารถทำได้\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "จากผลลัพธ์ของโมเดล เราสามารถเห็นค่าสัมประสิทธิ์ที่ได้จากการฝึกฝน ซึ่งค่าสัมประสิทธิ์เหล่านี้แสดงถึงค่าของเส้นที่เหมาะสมที่สุดที่ช่วยลดข้อผิดพลาดโดยรวมระหว่างค่าจริงและค่าที่โมเดลทำนายได้\n", + "\n", + "#### ประเมินประสิทธิภาพของโมเดลด้วยชุดข้อมูลทดสอบ\n", + "\n", + "ถึงเวลาตรวจสอบว่าโมเดลทำงานได้ดีแค่ไหน 📏! เราจะทำอย่างไร?\n", + "\n", + "เมื่อเราได้ฝึกฝนโมเดลแล้ว เราสามารถใช้โมเดลนี้เพื่อทำนายค่าจาก `test_set` โดยใช้ `parsnip::predict()` จากนั้นเราสามารถเปรียบเทียบค่าที่ทำนายได้กับค่าป้ายกำกับจริงเพื่อประเมินว่าโมเดลทำงานได้ดีหรือไม่ดีเพียงใด\n", + "\n", + "เริ่มต้นด้วยการทำนายค่าจากชุดข้อมูลทดสอบ แล้วรวมคอลัมน์เข้ากับชุดข้อมูลทดสอบ\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "คุณเพิ่งฝึกโมเดลและใช้มันเพื่อทำการพยากรณ์!🔮 มันดีแค่ไหน? มาประเมินประสิทธิภาพของโมเดลกันเถอะ!\n", + "\n", + "ใน Tidymodels เราทำสิ่งนี้โดยใช้ `yardstick::metrics()`! สำหรับการวิเคราะห์การถดถอยเชิงเส้น (linear regression) เรามุ่งเน้นไปที่ตัวชี้วัดต่อไปนี้:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: รากที่สองของ [MSE](https://en.wikipedia.org/wiki/Mean_squared_error) ซึ่งให้ค่าตัวชี้วัดแบบสัมบูรณ์ในหน่วยเดียวกับป้ายกำกับ (ในกรณีนี้คือราคาของฟักทอง) ค่ายิ่งเล็กยิ่งดีสำหรับโมเดล (ในความหมายง่ายๆ มันแสดงถึงราคาที่การพยากรณ์ผิดพลาดโดยเฉลี่ย!)\n", + "\n", + "- `Coefficient of Determination (มักเรียกว่า R-squared หรือ R2)`: ตัวชี้วัดแบบสัมพัทธ์ที่ค่ายิ่งสูงยิ่งดีสำหรับการปรับให้เข้ากับโมเดล โดยพื้นฐานแล้ว ตัวชี้วัดนี้แสดงถึงว่าระดับความแปรปรวนระหว่างค่าที่พยากรณ์และค่าจริงที่โมเดลสามารถอธิบายได้มากน้อยเพียงใด\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "ประสิทธิภาพของโมเดลลดลงไปแล้ว ลองมาดูกันว่าเราจะได้ข้อมูลที่ชัดเจนขึ้นหรือไม่โดยการสร้างกราฟกระจายของแพ็กเกจและราคา แล้วใช้การคาดการณ์ที่ได้มาวางเส้นแนวโน้มที่เหมาะสมที่สุดลงไป\n", + "\n", + "นั่นหมายความว่าเราจะต้องเตรียมและประมวลผลชุดทดสอบเพื่อเข้ารหัสคอลัมน์แพ็กเกจ จากนั้นจึงรวมข้อมูลนี้เข้ากับการคาดการณ์ที่โมเดลของเราสร้างขึ้น\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "ยอดเยี่ยม! ดังที่คุณเห็น โมเดลการถดถอยเชิงเส้นไม่ได้ทำงานได้ดีนักในการสรุปความสัมพันธ์ระหว่างแพ็กเกจกับราคาที่เกี่ยวข้อง\n", + "\n", + "🎃 ขอแสดงความยินดี คุณเพิ่งสร้างโมเดลที่สามารถช่วยทำนายราคาของฟักทองหลากหลายชนิดได้ แปลงฟักทองสำหรับวันหยุดของคุณจะสวยงาม แต่คุณอาจสร้างโมเดลที่ดีกว่านี้ได้!\n", + "\n", + "## 5. สร้างโมเดลการถดถอยเชิงพหุนาม\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "บางครั้งข้อมูลของเราอาจไม่มีความสัมพันธ์แบบเชิงเส้นตรง แต่เรายังคงต้องการทำนายผลลัพธ์ การใช้การถดถอยพหุนาม (Polynomial Regression) สามารถช่วยให้เราทำนายความสัมพันธ์ที่ซับซ้อนและไม่เป็นเชิงเส้นได้\n", + "\n", + "ลองพิจารณาความสัมพันธ์ระหว่างขนาดของแพ็กเกจกับราคาของชุดข้อมูลฟักทองของเรา แม้ว่าบางครั้งจะมีความสัมพันธ์แบบเชิงเส้นตรงระหว่างตัวแปร เช่น ฟักทองที่มีปริมาตรมากขึ้นมักจะมีราคาสูงขึ้น แต่บางครั้งความสัมพันธ์เหล่านี้ไม่สามารถแสดงออกมาในรูปแบบระนาบหรือเส้นตรงได้\n", + "\n", + "> ✅ นี่คือตัวอย่างเพิ่มเติม [บางตัวอย่าง](https://online.stat.psu.edu/stat501/lesson/9/9.8) ของข้อมูลที่อาจใช้การถดถอยพหุนาม\n", + ">\n", + "> ลองพิจารณาความสัมพันธ์ระหว่างชนิดของฟักทอง (Variety) กับราคาในกราฟก่อนหน้านี้อีกครั้ง คุณคิดว่ากราฟกระจายนี้ควรวิเคราะห์ด้วยเส้นตรงหรือไม่? อาจจะไม่ ในกรณีนี้ คุณสามารถลองใช้การถดถอยพหุนามได้\n", + ">\n", + "> ✅ พหุนาม (Polynomial) คือการแสดงออกทางคณิตศาสตร์ที่อาจประกอบด้วยตัวแปรและสัมประสิทธิ์หนึ่งตัวหรือมากกว่า\n", + "\n", + "#### ฝึกโมเดลการถดถอยพหุนามโดยใช้ชุดข้อมูลการฝึก\n", + "\n", + "การถดถอยพหุนามจะสร้าง *เส้นโค้ง* เพื่อให้เหมาะสมกับข้อมูลที่ไม่เป็นเชิงเส้นมากขึ้น\n", + "\n", + "มาดูกันว่าโมเดลพหุนามจะทำงานได้ดีกว่าในการทำนายหรือไม่ เราจะทำตามขั้นตอนที่คล้ายกับที่เราเคยทำมาก่อนหน้านี้:\n", + "\n", + "- สร้างสูตร (recipe) ที่ระบุขั้นตอนการเตรียมข้อมูลที่ควรดำเนินการเพื่อให้ข้อมูลพร้อมสำหรับการสร้างโมเดล เช่น การเข้ารหัสตัวทำนาย (predictors) และการคำนวณพหุนามของระดับ *n*\n", + "\n", + "- สร้างสเปคของโมเดล (model specification)\n", + "\n", + "- รวมสูตรและสเปคของโมเดลเข้าด้วยกันในเวิร์กโฟลว์ (workflow)\n", + "\n", + "- สร้างโมเดลโดยการปรับเวิร์กโฟลว์\n", + "\n", + "- ประเมินว่าโมเดลทำงานได้ดีเพียงใดกับข้อมูลทดสอบ\n", + "\n", + "มาเริ่มกันเลย!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### ประเมินประสิทธิภาพของโมเดล\n", + "\n", + "👏👏คุณได้สร้างโมเดลพหุนามเสร็จเรียบร้อยแล้ว มาลองทำการพยากรณ์บนชุดทดสอบกันเถอะ!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "วู้ฮู มาประเมินกันว่ารุ่นทำงานอย่างไรกับ test_set โดยใช้ `yardstick::metrics()`\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 ประสิทธิภาพดีขึ้นมาก\n", + "\n", + "`rmse` ลดลงจากประมาณ 7 เหลือประมาณ 3 ซึ่งแสดงถึงข้อผิดพลาดที่ลดลงระหว่างราคาจริงและราคาที่คาดการณ์ คุณสามารถ *ตีความแบบคร่าวๆ* ได้ว่าค่าเฉลี่ยของการคาดการณ์ที่ผิดพลาดนั้นผิดไปประมาณ \\$3 `rsq` เพิ่มขึ้นจากประมาณ 0.4 เป็น 0.8\n", + "\n", + "ตัวชี้วัดทั้งหมดนี้แสดงให้เห็นว่าโมเดลพหุนามทำงานได้ดีกว่าโมเดลเชิงเส้นมาก เยี่ยมมาก!\n", + "\n", + "ลองมาดูว่าพอจะสร้างภาพให้เห็นได้ไหม!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "คุณสามารถเห็นเส้นโค้งที่เข้ากับข้อมูลของคุณได้ดีขึ้น! 🤩\n", + "\n", + "คุณสามารถทำให้เส้นนี้ดูเรียบเนียนขึ้นได้โดยการส่งสูตรพหุนามไปที่ `geom_smooth` แบบนี้:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "เหมือนกับเส้นโค้งที่ราบรื่น!🤩\n", + "\n", + "นี่คือวิธีที่คุณจะสร้างการพยากรณ์ใหม่:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "การทำนายด้วย `polynomial model` ดูสมเหตุสมผลเมื่อพิจารณาจากกราฟกระจายของ `price` และ `package`! และถ้านี่เป็นโมเดลที่ดีกว่าโมเดลก่อนหน้า เมื่อดูจากข้อมูลเดียวกัน คุณจำเป็นต้องวางแผนงบประมาณสำหรับฟักทองที่มีราคาสูงขึ้นเหล่านี้!\n", + "\n", + "🏆 ยอดเยี่ยมมาก! คุณได้สร้างโมเดลการถดถอยสองแบบในบทเรียนเดียว ในส่วนสุดท้ายเกี่ยวกับการถดถอย คุณจะได้เรียนรู้เกี่ยวกับการถดถอยโลจิสติกเพื่อกำหนดหมวดหมู่\n", + "\n", + "## **🚀ความท้าทาย**\n", + "\n", + "ทดลองใช้ตัวแปรที่แตกต่างกันหลายตัวในโน้ตบุ๊กนี้เพื่อดูว่าความสัมพันธ์ส่งผลต่อความแม่นยำของโมเดลอย่างไร\n", + "\n", + "## [**แบบทดสอบหลังบทเรียน**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **ทบทวนและศึกษาด้วยตนเอง**\n", + "\n", + "ในบทเรียนนี้เราได้เรียนรู้เกี่ยวกับการถดถอยเชิงเส้น ยังมีประเภทการถดถอยที่สำคัญอื่น ๆ อีก อ่านเพิ่มเติมเกี่ยวกับเทคนิค Stepwise, Ridge, Lasso และ Elasticnet หลักสูตรที่ดีสำหรับการศึกษาเพิ่มเติมคือ [Stanford Statistical Learning course](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning)\n", + "\n", + "หากคุณต้องการเรียนรู้เพิ่มเติมเกี่ยวกับการใช้เฟรมเวิร์ก Tidymodels ที่น่าทึ่งนี้ โปรดดูแหล่งข้อมูลต่อไปนี้:\n", + "\n", + "- เว็บไซต์ Tidymodels: [เริ่มต้นกับ Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn และ Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **ขอขอบคุณ:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) สำหรับการสร้างภาพประกอบที่น่าทึ่งซึ่งทำให้ R ดูน่าสนใจและเข้าถึงได้มากขึ้น ค้นหาภาพประกอบเพิ่มเติมได้ที่ [แกลเลอรีของเธอ](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM)\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/2-Regression/3-Linear/solution/notebook.ipynb b/translations/th/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..1f864da24 --- /dev/null +++ b/translations/th/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1109 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## การวิเคราะห์เชิงเส้นและพหุนามสำหรับการตั้งราคาฟักทอง - บทเรียนที่ 3\n", + "\n", + "โหลดไลบรารีและชุดข้อมูลที่จำเป็น จากนั้นแปลงข้อมูลให้เป็น DataFrame ที่มีเพียงส่วนย่อยของข้อมูล:\n", + "\n", + "- เลือกเฉพาะฟักทองที่ตั้งราคาเป็นหน่วย bushel\n", + "- แปลงวันที่ให้เป็นเดือน\n", + "- คำนวณราคาให้เป็นค่าเฉลี่ยระหว่างราคาสูงสุดและต่ำสุด\n", + "- แปลงราคาให้สะท้อนถึงการตั้งราคาต่อปริมาณในหน่วย bushel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "แผนภาพกระจายเตือนเราว่าเรามีข้อมูลรายเดือนเฉพาะตั้งแต่เดือนสิงหาคมถึงเดือนธันวาคม เราอาจต้องการข้อมูลเพิ่มเติมเพื่อที่จะสรุปผลในรูปแบบเชิงเส้น\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ดูเหมือนว่าความสัมพันธ์จะค่อนข้างน้อย แต่มีความสัมพันธ์อื่นที่สำคัญกว่า - เพราะจุดราคาบนกราฟด้านบนดูเหมือนจะมีการจัดกลุ่มที่แตกต่างกันหลายกลุ่ม ลองสร้างกราฟที่แสดงพันธุ์ฟักทองที่แตกต่างกัน:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การถดถอยเชิงเส้น\n", + "\n", + "เราจะใช้ Scikit Learn เพื่อฝึกโมเดลการถดถอยเชิงเส้น:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การถดถอยเชิงพหุนาม\n", + "\n", + "บางครั้งความสัมพันธ์ระหว่างคุณลักษณะและผลลัพธ์อาจไม่เป็นเชิงเส้นโดยธรรมชาติ ตัวอย่างเช่น ราคาฟักทองอาจสูงในฤดูหนาว (เดือน 1, 2) จากนั้นลดลงในฤดูร้อน (เดือน 5-7) และกลับมาสูงขึ้นอีกครั้ง การถดถอยเชิงเส้นไม่สามารถจับความสัมพันธ์นี้ได้อย่างแม่นยำ\n", + "\n", + "ในกรณีนี้ เราอาจพิจารณาเพิ่มคุณลักษณะเพิ่มเติม วิธีง่ายๆ คือการใช้พหุนามจากคุณลักษณะอินพุต ซึ่งจะนำไปสู่ **การถดถอยเชิงพหุนาม** ใน Scikit Learn เราสามารถคำนวณคุณลักษณะพหุนามล่วงหน้าโดยอัตโนมัติด้วยการใช้ pipelines:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การเข้ารหัสชนิดต่าง ๆ\n", + "\n", + "ในโลกที่สมบูรณ์แบบ เราต้องการที่จะสามารถทำนายราคาของฟักทองชนิดต่าง ๆ โดยใช้โมเดลเดียวกัน เพื่อที่จะนำชนิดของฟักทองมาพิจารณา เราจำเป็นต้องแปลงมันให้อยู่ในรูปแบบตัวเลข หรือที่เรียกว่า **การเข้ารหัส** มีหลายวิธีที่เราสามารถทำได้:\n", + "\n", + "* การเข้ารหัสตัวเลขแบบง่าย ซึ่งจะสร้างตารางของชนิดฟักทองต่าง ๆ แล้วแทนชื่อชนิดด้วยดัชนีในตารางนั้น วิธีนี้ไม่ใช่ตัวเลือกที่ดีที่สุดสำหรับการวิเคราะห์ถดถอยเชิงเส้น (linear regression) เพราะการวิเคราะห์ถดถอยเชิงเส้นจะนำค่าตัวเลขของดัชนีมาพิจารณา และค่าตัวเลขนั้นอาจไม่มีความสัมพันธ์เชิงตัวเลขกับราคา\n", + "* การเข้ารหัสแบบ One-hot ซึ่งจะเปลี่ยนคอลัมน์ `Variety` ให้เป็น 4 คอลัมน์ที่แตกต่างกัน โดยแต่ละคอลัมน์จะเป็นตัวแทนของชนิดฟักทองแต่ละชนิด และจะมีค่าเป็น 1 หากแถวที่เกี่ยวข้องเป็นชนิดนั้น และมีค่าเป็น 0 หากไม่ใช่\n", + "\n", + "โค้ดด้านล่างแสดงวิธีที่เราสามารถเข้ารหัสชนิดฟักทองแบบ One-hot:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การถดถอยเชิงเส้นบนชนิดพันธุ์\n", + "\n", + "ตอนนี้เราจะใช้โค้ดเดิมเหมือนข้างต้น แต่แทนที่จะใช้ `DayOfYear` เราจะใช้ชนิดพันธุ์ที่ผ่านการเข้ารหัสแบบ one-hot เป็นข้อมูลนำเข้า:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "เรายังสามารถลองใช้คุณสมบัติอื่นในลักษณะเดียวกัน และรวมเข้ากับคุณสมบัติทางตัวเลข เช่น `Month` หรือ `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การถดถอยเชิงพหุนาม\n", + "\n", + "การถดถอยเชิงพหุนามสามารถนำมาใช้กับคุณลักษณะเชิงหมวดหมู่ที่ผ่านการเข้ารหัสแบบ one-hot ได้เช่นกัน โค้ดสำหรับการฝึกการถดถอยเชิงพหุนามจะมีลักษณะเหมือนกับที่เราได้เห็นไปก่อนหน้านี้\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:11:52+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/2-Regression/4-Logistic/notebook.ipynb b/translations/th/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..f67ac940f --- /dev/null +++ b/translations/th/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## สายพันธุ์ฟักทองและสี\n", + "\n", + "โหลดไลบรารีและชุดข้อมูลที่จำเป็น แปลงข้อมูลให้เป็น dataframe ที่มีเพียงส่วนย่อยของข้อมูล:\n", + "\n", + "มาดูความสัมพันธ์ระหว่างสีและสายพันธุ์\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:44+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/th/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..1c2468329 --- /dev/null +++ b/translations/th/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## สร้างโมเดล Logistic Regression - บทเรียนที่ 4\n", + "\n", + "![ภาพประกอบ Logistic vs. Linear Regression](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[แบบทดสอบก่อนเรียน](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### บทนำ\n", + "\n", + "ในบทเรียนสุดท้ายเกี่ยวกับ Regression ซึ่งเป็นหนึ่งในเทคนิคพื้นฐานของ *คลาสสิก* ML เราจะมาดู Logistic Regression กัน คุณสามารถใช้เทคนิคนี้เพื่อค้นหารูปแบบในการทำนายหมวดหมู่แบบไบนารี ตัวอย่างเช่น ลูกอมนี้เป็นช็อกโกแลตหรือไม่? โรคนี้ติดต่อหรือไม่? ลูกค้าคนนี้จะเลือกสินค้านี้หรือไม่?\n", + "\n", + "ในบทเรียนนี้ คุณจะได้เรียนรู้:\n", + "\n", + "- เทคนิคสำหรับ Logistic Regression\n", + "\n", + "✅ เพิ่มความเข้าใจเกี่ยวกับการทำงานกับ Regression ประเภทนี้ใน [โมดูลการเรียนรู้](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## ความต้องการเบื้องต้น\n", + "\n", + "หลังจากที่เราได้ทำงานกับข้อมูลฟักทองมาแล้ว ตอนนี้เราคุ้นเคยกับมันมากพอที่จะสังเกตเห็นว่ามีหมวดหมู่แบบไบนารีที่เราสามารถทำงานด้วยได้: `Color`\n", + "\n", + "เรามาสร้างโมเดล Logistic Regression เพื่อทำนายว่า *ฟักทองที่กำหนดมีแนวโน้มที่จะมีสีอะไร* (สีส้ม 🎃 หรือสีขาว 👻)\n", + "\n", + "> ทำไมเราถึงพูดถึงการจัดหมวดหมู่แบบไบนารีในบทเรียนที่เกี่ยวกับ Regression? ก็เพื่อความสะดวกทางภาษาเท่านั้น เพราะ Logistic Regression นั้น [จริงๆ แล้วเป็นวิธีการจัดหมวดหมู่](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression) แม้ว่าจะอิงตามเส้นตรงก็ตาม เรียนรู้วิธีอื่นๆ ในการจัดหมวดหมู่ข้อมูลในกลุ่มบทเรียนถัดไป\n", + "\n", + "สำหรับบทเรียนนี้ เราจะต้องใช้แพ็กเกจดังต่อไปนี้:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้วิทยาศาสตร์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบงานที่เป็น [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างโมเดลและการเรียนรู้ของเครื่อง\n", + "\n", + "- `janitor`: [แพ็กเกจ janitor](https://github.com/sfirke/janitor) มีเครื่องมือเล็กๆ ที่เรียบง่ายสำหรับการตรวจสอบและทำความสะอาดข้อมูลที่ไม่สมบูรณ์\n", + "\n", + "- `ggbeeswarm`: [แพ็กเกจ ggbeeswarm](https://github.com/eclarke/ggbeeswarm) มีวิธีการสร้างกราฟแบบ beeswarm โดยใช้ ggplot2\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้ด้วยคำสั่ง:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "หรือใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับบทเรียนนี้หรือไม่ และติดตั้งให้ในกรณีที่ยังไม่มี\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **กำหนดคำถาม**\n", + "\n", + "สำหรับจุดประสงค์ของเรา เราจะกำหนดคำถามในรูปแบบไบนารี: 'สีขาว' หรือ 'ไม่ใช่สีขาว' ในชุดข้อมูลของเรายังมีหมวดหมู่ 'ลาย' อยู่ด้วย แต่มีตัวอย่างในหมวดหมู่นี้น้อยมาก ดังนั้นเราจะไม่ใช้มัน และมันจะหายไปเมื่อเราลบค่าที่เป็น null ออกจากชุดข้อมูลอยู่แล้ว\n", + "\n", + "> 🎃 ข้อเท็จจริงสนุกๆ บางครั้งเราจะเรียกฟักทองสีขาวว่า 'ฟักทองผี' ฟักทองเหล่านี้แกะสลักได้ยากกว่า จึงไม่ได้รับความนิยมเท่าฟักทองสีส้ม แต่ก็ดูเท่ดี! ดังนั้นเราสามารถปรับคำถามของเราใหม่ได้ว่า: 'ฟักทองผี' หรือ 'ไม่ใช่ฟักทองผี' 👻\n", + "\n", + "## **เกี่ยวกับการถดถอยโลจิสติก**\n", + "\n", + "การถดถอยโลจิสติกแตกต่างจากการถดถอยเชิงเส้นที่คุณเคยเรียนรู้มาก่อนในหลายๆ แง่มุมที่สำคัญ\n", + "\n", + "#### **การจัดประเภทแบบไบนารี**\n", + "\n", + "การถดถอยโลจิสติกไม่ได้มีคุณสมบัติเหมือนกับการถดถอยเชิงเส้น การถดถอยโลจิสติกให้การคาดการณ์เกี่ยวกับ `หมวดหมู่แบบไบนารี` (\"สีส้มหรือไม่ใช่สีส้ม\") ในขณะที่การถดถอยเชิงเส้นสามารถคาดการณ์ `ค่าต่อเนื่อง` ได้ เช่น จากแหล่งที่มาของฟักทองและเวลาที่เก็บเกี่ยว *ราคาของมันจะเพิ่มขึ้นเท่าไร*\n", + "\n", + "![อินโฟกราฟิกโดย Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### การจัดประเภทอื่นๆ\n", + "\n", + "ยังมีการถดถอยโลจิสติกประเภทอื่นๆ เช่น มัลติโนเมียลและออร์ดินัล:\n", + "\n", + "- **มัลติโนเมียล** ซึ่งเกี่ยวข้องกับการมีมากกว่าหนึ่งหมวดหมู่ - \"สีส้ม, สีขาว, และลาย\"\n", + "\n", + "- **ออร์ดินัล** ซึ่งเกี่ยวข้องกับหมวดหมู่ที่มีลำดับ เหมาะสมหากเราต้องการจัดลำดับผลลัพธ์อย่างมีตรรกะ เช่น ฟักทองของเราที่จัดลำดับตามขนาดที่มีจำนวนจำกัด (เล็กมาก, เล็ก, กลาง, ใหญ่, ใหญ่มาก, ใหญ่ที่สุด)\n", + "\n", + "![การถดถอยมัลติโนเมียล vs ออร์ดินัล](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **ตัวแปรไม่จำเป็นต้องมีความสัมพันธ์กัน**\n", + "\n", + "จำได้ไหมว่าการถดถอยเชิงเส้นทำงานได้ดีขึ้นเมื่อมีตัวแปรที่มีความสัมพันธ์กันมากขึ้น? การถดถอยโลจิสติกตรงกันข้าม - ตัวแปรไม่จำเป็นต้องสอดคล้องกัน ซึ่งเหมาะกับข้อมูลนี้ที่มีความสัมพันธ์ค่อนข้างอ่อน\n", + "\n", + "#### **คุณต้องการข้อมูลที่สะอาดและมากพอ**\n", + "\n", + "การถดถอยโลจิสติกจะให้ผลลัพธ์ที่แม่นยำมากขึ้นหากคุณใช้ข้อมูลจำนวนมาก ชุดข้อมูลขนาดเล็กของเราไม่เหมาะสมที่สุดสำหรับงานนี้ ดังนั้นโปรดคำนึงถึงข้อนี้\n", + "\n", + "✅ ลองคิดถึงประเภทของข้อมูลที่เหมาะสมกับการถดถอยโลจิสติก\n", + "\n", + "## แบบฝึกหัด - จัดระเบียบข้อมูล\n", + "\n", + "ก่อนอื่น ทำความสะอาดข้อมูลเล็กน้อย โดยลบค่าที่เป็น null และเลือกเฉพาะบางคอลัมน์:\n", + "\n", + "1. เพิ่มโค้ดต่อไปนี้:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "คุณสามารถดูข้อมูลใน dataframe ใหม่ของคุณได้เสมอ โดยใช้ฟังก์ชัน [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) ดังตัวอย่างด้านล่าง:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "เรามายืนยันกันว่าเรากำลังทำปัญหาการจำแนกประเภทแบบไบนารีจริงๆ:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การแสดงผล - แผนภูมิประเภทหมวดหมู่\n", + "ตอนนี้คุณได้โหลดข้อมูลฟักทองขึ้นมาอีกครั้งและทำการทำความสะอาดเพื่อให้ได้ชุดข้อมูลที่มีตัวแปรบางตัว รวมถึงตัวแปร Color มาลองแสดงผล dataframe ในโน้ตบุ๊กโดยใช้ไลบรารี ggplot กัน\n", + "\n", + "ไลบรารี ggplot มีวิธีที่น่าสนใจในการแสดงผลข้อมูลของคุณ ตัวอย่างเช่น คุณสามารถเปรียบเทียบการกระจายตัวของข้อมูลสำหรับแต่ละ Variety และ Color ในแผนภูมิประเภทหมวดหมู่ได้\n", + "\n", + "1. สร้างแผนภูมิประเภทนี้โดยใช้ฟังก์ชัน geombar โดยใช้ข้อมูลฟักทองของเรา และกำหนดการจับคู่สีสำหรับแต่ละหมวดหมู่ของฟักทอง (สีส้มหรือสีขาว):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "จากการสังเกตข้อมูล คุณสามารถเห็นได้ว่า ข้อมูลสีมีความสัมพันธ์กับชนิดพันธุ์อย่างไร\n", + "\n", + "✅ จากกราฟประเภทนี้ มีการสำรวจที่น่าสนใจอะไรบ้างที่คุณสามารถจินตนาการได้?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### การเตรียมข้อมูล: การเข้ารหัสคุณลักษณะ\n", + "\n", + "ชุดข้อมูลฟักทองของเรามีค่าที่เป็นสตริงในทุกคอลัมน์ การทำงานกับข้อมูลประเภทหมวดหมู่เป็นเรื่องง่ายสำหรับมนุษย์ แต่ไม่ใช่สำหรับเครื่องจักร อัลกอริธึมการเรียนรู้ของเครื่องทำงานได้ดีเมื่อใช้ตัวเลข นั่นเป็นเหตุผลว่าทำไมการเข้ารหัสจึงเป็นขั้นตอนสำคัญในกระบวนการเตรียมข้อมูล เนื่องจากช่วยให้เราสามารถเปลี่ยนข้อมูลประเภทหมวดหมู่ให้เป็นข้อมูลเชิงตัวเลขโดยไม่สูญเสียข้อมูล การเข้ารหัสที่ดีนำไปสู่การสร้างโมเดลที่ดี\n", + "\n", + "สำหรับการเข้ารหัสคุณลักษณะ มีตัวเข้ารหัสหลักสองประเภท:\n", + "\n", + "1. **Ordinal encoder**: เหมาะสำหรับตัวแปรประเภทลำดับ (ordinal variables) ซึ่งเป็นตัวแปรหมวดหมู่ที่ข้อมูลมีการเรียงลำดับอย่างมีเหตุผล เช่น คอลัมน์ `item_size` ในชุดข้อมูลของเรา มันจะสร้างการจับคู่ที่แต่ละหมวดหมู่ถูกแทนด้วยตัวเลข ซึ่งตัวเลขนั้นแสดงถึงลำดับของหมวดหมู่ในคอลัมน์\n", + "\n", + "2. **Categorical encoder**: เหมาะสำหรับตัวแปรประเภทนามธรรม (nominal variables) ซึ่งเป็นตัวแปรหมวดหมู่ที่ข้อมูลไม่มีการเรียงลำดับอย่างมีเหตุผล เช่น คุณลักษณะทั้งหมดที่แตกต่างจาก `item_size` ในชุดข้อมูลของเรา มันใช้การเข้ารหัสแบบ one-hot ซึ่งหมายความว่าแต่ละหมวดหมู่จะถูกแทนด้วยคอลัมน์ไบนารี: ตัวแปรที่ถูกเข้ารหัสจะเท่ากับ 1 หากฟักทองนั้นอยู่ใน Variety นั้น และเท่ากับ 0 หากไม่ใช่\n", + "\n", + "Tidymodels มีอีกหนึ่งแพ็กเกจที่น่าสนใจ: [recipes](https://recipes.tidymodels.org/) - แพ็กเกจสำหรับการเตรียมข้อมูล เราจะกำหนด `recipe` ที่ระบุว่าคอลัมน์ตัวทำนายทั้งหมดควรถูกเข้ารหัสเป็นชุดของตัวเลข จากนั้น `prep` เพื่อประมาณค่าปริมาณและสถิติที่จำเป็นสำหรับการดำเนินการใด ๆ และสุดท้าย `bake` เพื่อใช้การคำนวณกับข้อมูลใหม่\n", + "\n", + "> โดยปกติ recipes มักถูกใช้เป็นตัวเตรียมข้อมูลสำหรับการสร้างโมเดล ซึ่งมันจะกำหนดว่าควรมีการดำเนินการใดบ้างกับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการสร้างโมเดล ในกรณีนี้ **แนะนำอย่างยิ่ง** ให้คุณใช้ `workflow()` แทนการประมาณ recipe ด้วย prep และ bake ด้วยตนเอง เราจะเห็นทั้งหมดนี้ในอีกสักครู่\n", + ">\n", + "> อย่างไรก็ตาม สำหรับตอนนี้ เรากำลังใช้ recipes + prep + bake เพื่อระบุว่าควรมีการดำเนินการใดบ้างกับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการวิเคราะห์ข้อมูล และจากนั้นดึงข้อมูลที่ผ่านการเตรียมพร้อมแล้วพร้อมกับขั้นตอนที่ได้ดำเนินการ\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ ข้อดีของการใช้ ordinal encoder กับคอลัมน์ Item Size คืออะไร?\n", + "\n", + "### วิเคราะห์ความสัมพันธ์ระหว่างตัวแปร\n", + "\n", + "เมื่อเราได้ทำการเตรียมข้อมูลเบื้องต้นแล้ว เราสามารถวิเคราะห์ความสัมพันธ์ระหว่างฟีเจอร์และป้ายกำกับ (label) เพื่อทำความเข้าใจว่าโมเดลจะสามารถทำนายป้ายกำกับจากฟีเจอร์ได้ดีแค่ไหน วิธีที่ดีที่สุดในการวิเคราะห์ประเภทนี้คือการสร้างกราฟ \n", + "เราจะใช้ฟังก์ชัน ggplot geom_boxplot_ อีกครั้ง เพื่อแสดงความสัมพันธ์ระหว่าง Item Size, Variety และ Color ในกราฟแบบหมวดหมู่ (categorical plot) เพื่อให้การแสดงผลข้อมูลดียิ่งขึ้น เราจะใช้คอลัมน์ Item Size ที่ถูกเข้ารหัสแล้ว และคอลัมน์ Variety ที่ยังไม่ได้เข้ารหัส\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ใช้ swarm plot\n", + "\n", + "เนื่องจาก Color เป็นหมวดหมู่แบบไบนารี (สีขาวหรือไม่ใช่สีขาว) จึงต้องใช้ '[วิธีการเฉพาะทาง](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)' ในการแสดงผลข้อมูล\n", + "\n", + "ลองใช้ `swarm plot` เพื่อแสดงการกระจายของสีในความสัมพันธ์กับ item_size\n", + "\n", + "เราจะใช้ [ggbeeswarm package](https://github.com/eclarke/ggbeeswarm) ซึ่งมีวิธีการสร้างกราฟแบบ beeswarm โดยใช้ ggplot2 กราฟ beeswarm เป็นวิธีการแสดงจุดข้อมูลที่ปกติจะทับซ้อนกันให้อยู่ข้างกันแทน\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ตอนนี้เรามีความเข้าใจเกี่ยวกับความสัมพันธ์ระหว่างหมวดหมู่แบบทวิภาคของสีและกลุ่มขนาดที่ใหญ่ขึ้นแล้ว ลองมาสำรวจการใช้โลจิสติกรีเกรสชันเพื่อกำหนดสีที่เป็นไปได้ของฟักทองแต่ละลูกกัน\n", + "\n", + "## สร้างโมเดลของคุณ\n", + "\n", + "เลือกตัวแปรที่คุณต้องการใช้ในโมเดลการจำแนกประเภท และแบ่งข้อมูลออกเป็นชุดฝึกอบรมและชุดทดสอบ [rsample](https://rsample.tidymodels.org/) ซึ่งเป็นแพ็กเกจใน Tidymodels มีโครงสร้างพื้นฐานสำหรับการแบ่งข้อมูลและการสุ่มตัวอย่างซ้ำอย่างมีประสิทธิภาพ:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 เราพร้อมแล้วที่จะฝึกโมเดลโดยการปรับคุณสมบัติการฝึกให้เข้ากับป้ายกำกับการฝึก (สี)\n", + "\n", + "เราจะเริ่มต้นด้วยการสร้างสูตรที่ระบุขั้นตอนการเตรียมข้อมูลที่ควรดำเนินการเพื่อเตรียมข้อมูลให้พร้อมสำหรับการสร้างโมเดล เช่น การเข้ารหัสตัวแปรประเภทให้เป็นชุดของตัวเลข เช่นเดียวกับ `baked_pumpkins` เราสร้าง `pumpkins_recipe` แต่จะไม่ใช้ `prep` และ `bake` เนื่องจากจะถูกรวมไว้ในเวิร์กโฟลว์ ซึ่งคุณจะได้เห็นในอีกไม่กี่ขั้นตอนจากนี้\n", + "\n", + "มีหลายวิธีในการระบุโมเดลการถดถอยโลจิสติกใน Tidymodels ดูที่ `?logistic_reg()` สำหรับตอนนี้ เราจะระบุโมเดลการถดถอยโลจิสติกผ่านเครื่องยนต์เริ่มต้น `stats::glm()`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ตอนนี้เรามีสูตรและสเปคของโมเดลแล้ว เราจำเป็นต้องหาวิธีรวมสิ่งเหล่านี้เข้าด้วยกันเป็นวัตถุหนึ่ง ที่จะช่วยจัดการการเตรียมข้อมูล (prep+bake เบื้องหลัง) ฝึกโมเดลด้วยข้อมูลที่ผ่านการเตรียมแล้ว และยังรองรับกิจกรรมการประมวลผลหลังการฝึกโมเดลได้อีกด้วย\n", + "\n", + "ใน Tidymodels วัตถุที่สะดวกนี้เรียกว่า [`workflow`](https://workflows.tidymodels.org/) ซึ่งช่วยจัดเก็บองค์ประกอบการสร้างโมเดลของคุณได้อย่างสะดวกสบาย\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "หลังจากที่กำหนด *workflow* แล้ว สามารถ `train` โมเดลได้โดยใช้ฟังก์ชัน [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html) ฟังก์ชันนี้จะช่วยประเมินสูตรและเตรียมข้อมูลก่อนการฝึกโมเดล ดังนั้นเราไม่จำเป็นต้องทำการเตรียมข้อมูลด้วย prep และ bake ด้วยตัวเอง\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "โมเดลจะแสดงค่าสัมประสิทธิ์ที่ได้จากการฝึกสอนระหว่างการเทรน\n", + "\n", + "ตอนนี้เราได้ฝึกสอนโมเดลด้วยข้อมูลการฝึกสอนเรียบร้อยแล้ว เราสามารถใช้โมเดลนี้เพื่อทำนายผลบนข้อมูลทดสอบได้โดยใช้ [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html) มาเริ่มต้นด้วยการใช้โมเดลเพื่อทำนายป้ายกำกับสำหรับชุดข้อมูลทดสอบ และความน่าจะเป็นของแต่ละป้ายกำกับกัน เมื่อความน่าจะเป็นมากกว่า 0.5 ป้ายกำกับที่ทำนายจะเป็น `WHITE` หากน้อยกว่านั้นจะเป็น `ORANGE`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "เยี่ยมมาก! นี่ช่วยให้เข้าใจการทำงานของโลจิสติกรีเกรสชันได้ลึกซึ้งยิ่งขึ้น\n", + "\n", + "### เข้าใจได้ดีขึ้นผ่านเมทริกซ์ความสับสน\n", + "\n", + "การเปรียบเทียบแต่ละการทำนายกับค่าจริงที่สอดคล้องกัน (\"ground truth\") ไม่ใช่วิธีที่มีประสิทธิภาพนักในการประเมินว่ารุ่นทำนายได้ดีแค่ไหน โชคดีที่ Tidymodels มีเครื่องมือเพิ่มเติมที่ช่วยได้: [`yardstick`](https://yardstick.tidymodels.org/) - แพ็กเกจที่ใช้วัดประสิทธิภาพของโมเดลด้วยเมตริกประสิทธิภาพ\n", + "\n", + "หนึ่งในเมตริกประสิทธิภาพที่เกี่ยวข้องกับปัญหาการจำแนกประเภทคือ [`confusion matrix`](https://wikipedia.org/wiki/Confusion_matrix) เมทริกซ์ความสับสนอธิบายว่ารุ่นการจำแนกประเภททำงานได้ดีเพียงใด โดยเมทริกซ์ความสับสนจะจัดทำตารางว่าตัวอย่างในแต่ละคลาสถูกจำแนกอย่างถูกต้องโดยโมเดลกี่ตัวอย่าง ในกรณีของเรา มันจะแสดงให้คุณเห็นว่าฟักทองสีส้มถูกจำแนกเป็นสีส้มกี่ลูก และฟักทองสีขาวถูกจำแนกเป็นสีขาวกี่ลูก นอกจากนี้ เมทริกซ์ความสับสนยังแสดงให้เห็นว่ามีการจำแนกไปยังหมวดหมู่ที่ **ผิด** กี่ตัวอย่างด้วย\n", + "\n", + "ฟังก์ชัน [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) จาก yardstick ใช้คำนวณการจัดตารางไขว้ระหว่างคลาสที่สังเกตได้และคลาสที่ทำนาย\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "มาทำความเข้าใจเกี่ยวกับ confusion matrix กัน โมเดลของเราถูกขอให้จำแนกฟักทองออกเป็นสองประเภท คือประเภท `white` และประเภท `not-white`\n", + "\n", + "- หากโมเดลของคุณทำนายว่าฟักทองเป็นสีขาว และในความเป็นจริงมันอยู่ในประเภท 'white' เราเรียกสิ่งนี้ว่า `true positive` ซึ่งแสดงด้วยตัวเลขด้านบนซ้าย\n", + "\n", + "- หากโมเดลของคุณทำนายว่าฟักทองไม่ใช่สีขาว และในความเป็นจริงมันอยู่ในประเภท 'white' เราเรียกสิ่งนี้ว่า `false negative` ซึ่งแสดงด้วยตัวเลขด้านล่างซ้าย\n", + "\n", + "- หากโมเดลของคุณทำนายว่าฟักทองเป็นสีขาว และในความเป็นจริงมันอยู่ในประเภท 'not-white' เราเรียกสิ่งนี้ว่า `false positive` ซึ่งแสดงด้วยตัวเลขด้านบนขวา\n", + "\n", + "- หากโมเดลของคุณทำนายว่าฟักทองไม่ใช่สีขาว และในความเป็นจริงมันอยู่ในประเภท 'not-white' เราเรียกสิ่งนี้ว่า `true negative` ซึ่งแสดงด้วยตัวเลขด้านล่างขวา\n", + "\n", + "| ความจริง |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **ทำนาย** | WHITE | ORANGE |\n", + "| WHITE | TP | FP |\n", + "| ORANGE | FN | TN |\n", + "\n", + "อย่างที่คุณอาจเดาได้ว่า เราต้องการให้มีจำนวน true positive และ true negative มากขึ้น และจำนวน false positive และ false negative น้อยลง ซึ่งหมายความว่าโมเดลทำงานได้ดีขึ้น\n", + "\n", + "confusion matrix มีประโยชน์เพราะมันช่วยให้เราสามารถคำนวณตัวชี้วัดอื่น ๆ ที่ช่วยประเมินประสิทธิภาพของโมเดลการจำแนกประเภทได้ดียิ่งขึ้น มาดูตัวชี้วัดเหล่านี้กัน:\n", + "\n", + "🎓 Precision: `TP/(TP + FP)` หมายถึงสัดส่วนของผลลัพธ์ที่ทำนายว่าเป็นบวกที่เป็นบวกจริง ๆ หรือที่เรียกว่า [positive predictive value](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\")\n", + "\n", + "🎓 Recall: `TP/(TP + FN)` หมายถึงสัดส่วนของผลลัพธ์ที่เป็นบวกจากจำนวนตัวอย่างที่เป็นบวกจริง ๆ หรือที่เรียกว่า `sensitivity`\n", + "\n", + "🎓 Specificity: `TN/(TN + FP)` หมายถึงสัดส่วนของผลลัพธ์ที่เป็นลบจากจำนวนตัวอย่างที่เป็นลบจริง ๆ\n", + "\n", + "🎓 Accuracy: `TP + TN/(TP + TN + FP + FN)` เปอร์เซ็นต์ของป้ายกำกับที่ทำนายได้อย่างถูกต้องสำหรับตัวอย่าง\n", + "\n", + "🎓 F Measure: ค่าเฉลี่ยถ่วงน้ำหนักระหว่าง precision และ recall โดยค่าที่ดีที่สุดคือ 1 และค่าที่แย่ที่สุดคือ 0\n", + "\n", + "มาคำนวณตัวชี้วัดเหล่านี้กัน!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## แสดงกราฟ ROC ของโมเดลนี้\n", + "\n", + "มาทำการแสดงผลอีกครั้งเพื่อดูสิ่งที่เรียกว่า [`กราฟ ROC`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "กราฟ ROC มักถูกใช้เพื่อดูผลลัพธ์ของตัวจำแนกในแง่ของค่าบวกจริง (True Positives) เทียบกับค่าบวกเท็จ (False Positives) กราฟ ROC โดยทั่วไปจะแสดง `True Positive Rate`/Sensitivity บนแกน Y และ `False Positive Rate`/1-Specificity บนแกน X ดังนั้น ความชันของกราฟและพื้นที่ระหว่างเส้นกลางกับกราฟจึงมีความสำคัญ: คุณต้องการกราฟที่พุ่งขึ้นและข้ามเส้นไปอย่างรวดเร็ว ในกรณีของเรา มีค่าบวกเท็จในช่วงเริ่มต้น และจากนั้นเส้นก็พุ่งขึ้นและข้ามเส้นไปอย่างเหมาะสม\n", + "\n", + "สุดท้ายนี้ เรามาใช้ `yardstick::roc_auc()` เพื่อคำนวณค่าพื้นที่ใต้กราฟ (Area Under the Curve) วิธีหนึ่งในการตีความ AUC คือความน่าจะเป็นที่โมเดลจะจัดอันดับตัวอย่างบวกแบบสุ่มให้สูงกว่าตัวอย่างลบแบบสุ่ม\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ผลลัพธ์อยู่ที่ประมาณ `0.975` ซึ่งเมื่อพิจารณาว่า AUC มีค่าตั้งแต่ 0 ถึง 1 คุณต้องการคะแนนที่สูง เพราะโมเดลที่ทำนายได้ถูกต้อง 100% จะมีค่า AUC เท่ากับ 1; ในกรณีนี้ โมเดลถือว่า *ค่อนข้างดี*\n", + "\n", + "ในบทเรียนอนาคตเกี่ยวกับการจำแนกประเภท คุณจะได้เรียนรู้วิธีปรับปรุงคะแนนของโมเดล (เช่น การจัดการกับข้อมูลที่ไม่สมดุลในกรณีนี้)\n", + "\n", + "## 🚀ความท้าทาย\n", + "\n", + "ยังมีอีกมากมายเกี่ยวกับการวิเคราะห์ Logistic Regression! แต่วิธีที่ดีที่สุดในการเรียนรู้คือการทดลอง ค้นหาชุดข้อมูลที่เหมาะสมกับการวิเคราะห์ประเภทนี้และสร้างโมเดลด้วยชุดข้อมูลนั้น คุณได้เรียนรู้อะไร? เคล็ดลับ: ลองดู [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) สำหรับชุดข้อมูลที่น่าสนใจ\n", + "\n", + "## ทบทวนและศึกษาด้วยตนเอง\n", + "\n", + "อ่านหน้าแรก ๆ ของ [เอกสารนี้จาก Stanford](https://web.stanford.edu/~jurafsky/slp3/5.pdf) เกี่ยวกับการใช้งาน Logistic Regression ในทางปฏิบัติ ลองคิดถึงงานที่เหมาะสมกับการวิเคราะห์แบบ Regression ประเภทต่าง ๆ ที่เราได้ศึกษาไปจนถึงตอนนี้ งานแบบไหนที่เหมาะสมที่สุด?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:35:48+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/th/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/th/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..0b09ea0d5 --- /dev/null +++ b/translations/th/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## การถดถอยโลจิสติก - บทเรียนที่ 4\n", + "\n", + "โหลดไลบรารีและชุดข้อมูลที่จำเป็น แปลงข้อมูลให้เป็น DataFrame ที่มีเฉพาะส่วนย่อยของข้อมูล:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# มาดูข้อมูลของเรากันเถอะ!\n", + "\n", + "ด้วยการแสดงผลด้วย Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# การเตรียมข้อมูลล่วงหน้า\n", + "\n", + "มาเข้ารหัสคุณลักษณะและป้ายกำกับเพื่อให้สามารถแสดงข้อมูลและฝึกโมเดลได้ดียิ่งขึ้น\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**ระวัง**: การละเลยคำเตือนไม่ใช่วิธีปฏิบัติที่ดีและควรหลีกเลี่ยงเมื่อเป็นไปได้ คำเตือนมักมีข้อความที่เป็นประโยชน์ซึ่งช่วยให้เราปรับปรุงโค้ดและแก้ไขปัญหา \n", + "เหตุผลที่เราละเลยคำเตือนนี้โดยเฉพาะคือเพื่อรับประกันความชัดเจนของกราฟ การแสดงจุดข้อมูลทั้งหมดด้วยขนาดเครื่องหมายที่ลดลง ในขณะที่ยังคงความสอดคล้องกับสีของพาเลต จะทำให้การแสดงผลไม่ชัดเจน\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:28:01+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/3-Web-App/1-Web-App/notebook.ipynb b/translations/th/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/th/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/th/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..6fbe430c7 --- /dev/null +++ b/translations/th/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:18+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์มืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/1-Introduction/notebook.ipynb b/translations/th/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..bcad60b1d --- /dev/null +++ b/translations/th/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:56+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/th/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..34b01cff4 --- /dev/null +++ b/translations/th/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,716 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T15:00:03+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## บทนำสู่การจำแนกประเภท: ทำความสะอาด เตรียม และแสดงข้อมูลของคุณ\n", + "\n", + "ในบทเรียนทั้งสี่นี้ คุณจะได้สำรวจหัวข้อพื้นฐานของการเรียนรู้ด้วยเครื่องแบบคลาสสิก - *การจำแนกประเภท* เราจะเดินทางผ่านการใช้อัลกอริธึมการจำแนกประเภทต่าง ๆ กับชุดข้อมูลเกี่ยวกับอาหารที่ยอดเยี่ยมของเอเชียและอินเดีย หวังว่าคุณจะหิวแล้ว!\n", + "\n", + "

\n", + " \n", + "

เฉลิมฉลองอาหารเอเชียในบทเรียนเหล่านี้! ภาพโดย Jen Looper
\n", + "\n", + "การจำแนกประเภทเป็นรูปแบบหนึ่งของ [การเรียนรู้แบบมีผู้สอน](https://wikipedia.org/wiki/Supervised_learning) ซึ่งมีความคล้ายคลึงกับเทคนิคการถดถอย ในการจำแนกประเภท คุณจะฝึกโมเดลเพื่อทำนายว่า `หมวดหมู่` ใดที่รายการนั้นอยู่ หากการเรียนรู้ด้วยเครื่องเกี่ยวกับการทำนายค่าหรือชื่อของสิ่งต่าง ๆ โดยใช้ชุดข้อมูล การจำแนกประเภทมักจะแบ่งออกเป็นสองกลุ่ม: *การจำแนกประเภทแบบทวิภาค* และ *การจำแนกประเภทแบบหลายคลาส*\n", + "\n", + "จำไว้ว่า:\n", + "\n", + "- **การถดถอยเชิงเส้น** ช่วยให้คุณทำนายความสัมพันธ์ระหว่างตัวแปรและทำการทำนายที่แม่นยำเกี่ยวกับตำแหน่งที่จุดข้อมูลใหม่จะตกอยู่ในความสัมพันธ์กับเส้นนั้น ตัวอย่างเช่น คุณสามารถทำนายค่าตัวเลข เช่น *ราคาของฟักทองในเดือนกันยายนเทียบกับเดือนธันวาคม*\n", + "\n", + "- **การถดถอยโลจิสติก** ช่วยให้คุณค้นพบ \"หมวดหมู่ทวิภาค\": ที่จุดราคานี้ *ฟักทองนี้เป็นสีส้มหรือไม่เป็นสีส้ม*?\n", + "\n", + "การจำแนกประเภทใช้หลากหลายอัลกอริธึมเพื่อกำหนดวิธีอื่น ๆ ในการกำหนดฉลากหรือคลาสของจุดข้อมูล ลองทำงานกับข้อมูลอาหารนี้เพื่อดูว่า โดยการสังเกตกลุ่มของส่วนผสม เราสามารถกำหนดแหล่งกำเนิดของอาหารได้หรือไม่\n", + "\n", + "### [**แบบทดสอบก่อนการบรรยาย**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **บทนำ**\n", + "\n", + "การจำแนกประเภทเป็นหนึ่งในกิจกรรมพื้นฐานของนักวิจัยและนักวิทยาศาสตร์ข้อมูลด้านการเรียนรู้ด้วยเครื่อง ตั้งแต่การจำแนกค่าทวิภาคพื้นฐาน (\"อีเมลนี้เป็นสแปมหรือไม่?\") ไปจนถึงการจำแนกภาพและการแบ่งส่วนที่ซับซ้อนโดยใช้การมองเห็นด้วยคอมพิวเตอร์ การสามารถจัดเรียงข้อมูลเป็นคลาสและตั้งคำถามกับมันเป็นสิ่งที่มีประโยชน์เสมอ\n", + "\n", + "หากจะกล่าวถึงกระบวนการในเชิงวิทยาศาสตร์ วิธีการจำแนกประเภทของคุณจะสร้างโมเดลการทำนายที่ช่วยให้คุณสามารถจับคู่ความสัมพันธ์ระหว่างตัวแปรอินพุตกับตัวแปรเอาต์พุตได้\n", + "\n", + "

\n", + " \n", + "

ปัญหาแบบทวิภาคและแบบหลายคลาสสำหรับอัลกอริธึมการจำแนกประเภท ภาพประกอบโดย Jen Looper
\n", + "\n", + "ก่อนเริ่มกระบวนการทำความสะอาดข้อมูลของเรา การแสดงภาพ และการเตรียมข้อมูลสำหรับงาน ML ของเรา ลองเรียนรู้เกี่ยวกับวิธีต่าง ๆ ที่การเรียนรู้ด้วยเครื่องสามารถนำมาใช้เพื่อจำแนกข้อมูลได้\n", + "\n", + "การจำแนกประเภทที่ได้มาจาก [สถิติ](https://wikipedia.org/wiki/Statistical_classification) ใช้คุณลักษณะ เช่น `smoker`, `weight`, และ `age` เพื่อกำหนด *ความน่าจะเป็นในการพัฒนาโรค X* ในฐานะเทคนิคการเรียนรู้แบบมีผู้สอนที่คล้ายกับการฝึกถดถอยที่คุณทำมาก่อนหน้านี้ ข้อมูลของคุณจะถูกติดป้ายกำกับ และอัลกอริธึม ML จะใช้ป้ายกำกับเหล่านั้นเพื่อจำแนกและทำนายคลาส (หรือ 'คุณลักษณะ') ของชุดข้อมูลและกำหนดให้กับกลุ่มหรือผลลัพธ์\n", + "\n", + "✅ ลองใช้เวลาสักครู่เพื่อจินตนาการถึงชุดข้อมูลเกี่ยวกับอาหาร โมเดลแบบหลายคลาสจะสามารถตอบคำถามอะไรได้บ้าง? โมเดลแบบทวิภาคจะสามารถตอบคำถามอะไรได้บ้าง? ถ้าคุณต้องการกำหนดว่าอาหารที่กำหนดมีแนวโน้มที่จะใช้ลูกซัดหรือไม่? หรือถ้าคุณต้องการดูว่า หากคุณได้รับของขวัญเป็นถุงช้อปปิ้งที่เต็มไปด้วยโป๊ยกั๊ก อาร์ติโชก กะหล่ำดอก และฮอร์สแรดิช คุณจะสามารถสร้างอาหารอินเดียทั่วไปได้หรือไม่?\n", + "\n", + "### **สวัสดี 'ตัวจำแนก'**\n", + "\n", + "คำถามที่เราต้องการถามจากชุดข้อมูลอาหารนี้เป็นคำถามแบบ **หลายคลาส** เนื่องจากเรามีอาหารประจำชาติหลายประเภทที่สามารถทำงานได้ เมื่อพิจารณากลุ่มของส่วนผสมแล้ว ข้อมูลจะเข้ากับคลาสใดในหลาย ๆ คลาสนี้?\n", + "\n", + "Tidymodels มีอัลกอริธึมหลายแบบให้เลือกใช้เพื่อจำแนกข้อมูล ขึ้นอยู่กับประเภทของปัญหาที่คุณต้องการแก้ไข ในสองบทเรียนถัดไป คุณจะได้เรียนรู้เกี่ยวกับอัลกอริธึมเหล่านี้\n", + "\n", + "#### **ข้อกำหนดเบื้องต้น**\n", + "\n", + "สำหรับบทเรียนนี้ เราจะต้องใช้แพ็กเกจต่อไปนี้เพื่อทำความสะอาด เตรียม และแสดงข้อมูลของเรา:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) เป็น [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้วิทยาศาสตร์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบงาน [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างโมเดลและการเรียนรู้ด้วยเครื่อง\n", + "\n", + "- `DataExplorer`: [แพ็กเกจ DataExplorer](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) มีไว้เพื่อทำให้กระบวนการ EDA และการสร้างรายงานง่ายขึ้นและอัตโนมัติ\n", + "\n", + "- `themis`: [แพ็กเกจ themis](https://themis.tidymodels.org/) ให้ขั้นตอนเพิ่มเติมสำหรับการจัดการข้อมูลที่ไม่สมดุล\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้โดยใช้:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "หรือใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และติดตั้งให้คุณในกรณีที่ขาดหายไป\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "เราจะโหลดแพ็กเกจที่ยอดเยี่ยมเหล่านี้ในภายหลังและทำให้พร้อมใช้งานในเซสชัน R ปัจจุบันของเรา (นี่เป็นเพียงการแสดงตัวอย่าง `pacman::p_load()` ได้ทำสิ่งนี้ให้คุณแล้ว)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## แบบฝึกหัด - ทำความสะอาดและปรับสมดุลข้อมูลของคุณ\n", + "\n", + "งานแรกที่ต้องทำก่อนเริ่มโครงการนี้คือการทำความสะอาดและ **ปรับสมดุล** ข้อมูลของคุณเพื่อให้ได้ผลลัพธ์ที่ดียิ่งขึ้น\n", + "\n", + "มาทำความรู้จักกับข้อมูลกันเถอะ!🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "น่าสนใจ! จากลักษณะของมัน คอลัมน์แรกดูเหมือนจะเป็นคอลัมน์ประเภท `id` ลองมาหาข้อมูลเพิ่มเติมเกี่ยวกับข้อมูลนี้กันเถอะ\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "จากผลลัพธ์ เราสามารถเห็นได้ทันทีว่าเรามี `2448` แถว และ `385` คอลัมน์ และไม่มีค่าที่หายไปเลย (`0` missing values) นอกจากนี้ เรายังมีคอลัมน์แบบไม่ต่อเนื่อง 1 คอลัมน์ คือ *cuisine*\n", + "\n", + "## แบบฝึกหัด - เรียนรู้เกี่ยวกับประเภทอาหาร\n", + "\n", + "ตอนนี้งานเริ่มน่าสนใจมากขึ้นแล้ว มาค้นพบการกระจายของข้อมูลในแต่ละประเภทอาหารกันเถอะ\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "มีจำนวนอาหารที่จำกัด แต่การกระจายของข้อมูลไม่เท่ากัน คุณสามารถแก้ไขได้! ก่อนที่จะทำเช่นนั้น ลองสำรวจเพิ่มเติมอีกเล็กน้อย\n", + "\n", + "ต่อไป เรามาแบ่งอาหารแต่ละประเภทออกเป็น tibble ของตัวเอง และตรวจสอบว่ามีข้อมูลมากน้อยแค่ไหน (จำนวนแถวและคอลัมน์) ต่ออาหารแต่ละประเภท\n", + "\n", + "> [tibble](https://tibble.tidyverse.org/) คือรูปแบบข้อมูลเฟรมที่ทันสมัย\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **แบบฝึกหัด - ค้นหาเครื่องปรุงยอดนิยมตามประเภทอาหารด้วย dplyr**\n", + "\n", + "ตอนนี้คุณสามารถเจาะลึกลงไปในข้อมูลและเรียนรู้ว่าเครื่องปรุงที่เป็นเอกลักษณ์ของแต่ละประเภทอาหารคืออะไร คุณควรทำความสะอาดข้อมูลที่ซ้ำซ้อนซึ่งอาจสร้างความสับสนระหว่างประเภทอาหาร ดังนั้นมาทำความเข้าใจปัญหานี้กันเถอะ\n", + "\n", + "สร้างฟังก์ชัน `create_ingredient()` ใน R ที่จะคืนค่าเป็น dataframe ของเครื่องปรุง ฟังก์ชันนี้จะเริ่มต้นด้วยการลบคอลัมน์ที่ไม่เป็นประโยชน์ออก และจัดเรียงเครื่องปรุงตามจำนวนครั้งที่ปรากฏ\n", + "\n", + "โครงสร้างพื้นฐานของฟังก์ชันใน R คือ:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "สามารถดูการแนะนำเบื้องต้นเกี่ยวกับฟังก์ชันใน R ได้ [ที่นี่](https://skirmer.github.io/presentations/functions_with_r.html#1)\n", + "\n", + "มาเริ่มกันเลย! เราจะใช้ [คำกริยาใน dplyr](https://dplyr.tidyverse.org/) ที่เราได้เรียนรู้ในบทเรียนก่อนหน้า เพื่อทบทวน:\n", + "\n", + "- `dplyr::select()`: ช่วยให้คุณเลือกว่าจะเก็บหรือไม่เก็บ **คอลัมน์** ใด\n", + "\n", + "- `dplyr::pivot_longer()`: ช่วยให้คุณ \"ยืด\" ข้อมูล เพิ่มจำนวนแถวและลดจำนวนคอลัมน์\n", + "\n", + "- `dplyr::group_by()` และ `dplyr::summarise()`: ช่วยให้คุณหาสถิติสรุปสำหรับกลุ่มต่าง ๆ และจัดให้อยู่ในตารางที่ดูดี\n", + "\n", + "- `dplyr::filter()`: สร้างชุดข้อมูลย่อยที่มีเฉพาะแถวที่ตรงตามเงื่อนไขของคุณ\n", + "\n", + "- `dplyr::mutate()`: ช่วยให้คุณสร้างหรือแก้ไขคอลัมน์\n", + "\n", + "ลองดู [บทเรียน learnr ที่เต็มไปด้วยศิลปะ](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) โดย Allison Horst ที่แนะนำฟังก์ชันการจัดการข้อมูลที่มีประโยชน์ใน dplyr *(ส่วนหนึ่งของ Tidyverse)*\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้เราสามารถใช้ฟังก์ชันนี้เพื่อดูแนวโน้มของส่วนผสมยอดนิยมสิบอันดับแรกตามประเภทของอาหารได้แล้ว ลองนำไปใช้กับ `thai_df` กันดู\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "ในส่วนก่อนหน้านี้ เราได้ใช้ `geom_col()` มาดูกันว่าคุณสามารถใช้ `geom_bar` ได้อย่างไรบ้างในการสร้างแผนภูมิแท่ง ใช้ `?geom_bar` เพื่ออ่านเพิ่มเติม\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "แล้วอาหารจีนล่ะ?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "จากการวิเคราะห์ข้อมูลด้วยภาพ เราสามารถตัดส่วนผสมที่พบบ่อยที่สุดซึ่งสร้างความสับสนระหว่างอาหารที่แตกต่างกันออกได้ โดยใช้ `dplyr::select()`\n", + "\n", + "ใครๆ ก็ชอบข้าว กระเทียม และขิง!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## การเตรียมข้อมูลด้วย Recipes 👩‍🍳👨‍🍳 - การจัดการข้อมูลที่ไม่สมดุล ⚖️\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n", + "\n", + "เนื่องจากบทเรียนนี้เกี่ยวกับอาหาร เราจึงต้องนำ `recipes` มาใช้ในบริบทที่เหมาะสม\n", + "\n", + "Tidymodels มีอีกหนึ่งแพ็กเกจที่น่าสนใจ: `recipes` - แพ็กเกจสำหรับการเตรียมข้อมูลก่อนการวิเคราะห์\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "มาดูการกระจายของอาหารของเราอีกครั้ง\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "ดังที่คุณเห็น มีการกระจายจำนวนอาหารที่ไม่เท่ากันอย่างชัดเจน อาหารเกาหลีมีจำนวนเกือบ 3 เท่าของอาหารไทย ข้อมูลที่ไม่สมดุลมักส่งผลเสียต่อประสิทธิภาพของโมเดล ลองนึกถึงการจำแนกประเภทแบบสองค่า หากข้อมูลส่วนใหญ่เป็นคลาสเดียว โมเดลการเรียนรู้ของเครื่อง (ML) จะมีแนวโน้มที่จะทำนายคลาสนั้นบ่อยขึ้น เพียงเพราะมีข้อมูลสำหรับคลาสนั้นมากกว่า การปรับสมดุลข้อมูลจะช่วยแก้ไขความไม่สมดุลนี้โดยการปรับข้อมูลที่มีการกระจายไม่เท่ากัน หลายโมเดลทำงานได้ดีที่สุดเมื่อจำนวนการสังเกตเท่ากัน และมักจะประสบปัญหาเมื่อข้อมูลไม่สมดุล\n", + "\n", + "มีวิธีหลักสองวิธีในการจัดการกับชุดข้อมูลที่ไม่สมดุล:\n", + "\n", + "- เพิ่มจำนวนการสังเกตในคลาสที่มีจำนวนน้อย: `Over-sampling` เช่น การใช้ SMOTE algorithm\n", + "\n", + "- ลดจำนวนการสังเกตในคลาสที่มีจำนวนมาก: `Under-sampling`\n", + "\n", + "ตอนนี้เรามาแสดงวิธีจัดการกับชุดข้อมูลที่ไม่สมดุลโดยใช้ `recipe` กัน `Recipe` สามารถมองว่าเป็นแผนงานที่อธิบายขั้นตอนที่ควรนำไปใช้กับชุดข้อมูลเพื่อเตรียมพร้อมสำหรับการวิเคราะห์ข้อมูล\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "มาดูขั้นตอนการเตรียมข้อมูลของเรากัน\n", + "\n", + "- การเรียกใช้ `recipe()` พร้อมสูตรจะบอกให้ recipe กำหนด *บทบาท* ของตัวแปรโดยใช้ข้อมูล `df_select` เป็นข้อมูลอ้างอิง ตัวอย่างเช่น คอลัมน์ `cuisine` ถูกกำหนดให้มีบทบาทเป็น `outcome` ในขณะที่คอลัมน์อื่นๆ ถูกกำหนดให้มีบทบาทเป็น `predictor`\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) สร้าง *สเปค* ของขั้นตอนใน recipe ที่สร้างตัวอย่างใหม่ของคลาสที่มีจำนวนน้อยโดยใช้เพื่อนบ้านที่ใกล้ที่สุดของกรณีเหล่านี้\n", + "\n", + "ตอนนี้ หากเราต้องการดูข้อมูลที่ผ่านการเตรียมแล้ว เราจะต้อง [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) และ [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) recipe ของเรา\n", + "\n", + "`prep()`: ประเมินพารามิเตอร์ที่จำเป็นจากชุดข้อมูลการฝึกที่สามารถนำไปใช้กับชุดข้อมูลอื่นในภายหลัง\n", + "\n", + "`bake()`: ใช้ recipe ที่ผ่านการเตรียมแล้วและดำเนินการกับชุดข้อมูลใดๆ\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้เรามาตรวจสอบการกระจายของอาหารของเราและเปรียบเทียบกับข้อมูลที่ไม่สมดุลกัน\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "อร่อย! ข้อมูลสะอาด สมดุล และน่าทานมาก 😋!\n", + "\n", + "> โดยปกติแล้ว สูตร (recipe) มักถูกใช้เป็นตัวเตรียมข้อมูลก่อนการสร้างโมเดล ซึ่งจะกำหนดขั้นตอนที่ควรนำไปใช้กับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการสร้างโมเดล ในกรณีนี้ `workflow()` มักจะถูกใช้งาน (อย่างที่เราได้เห็นในบทเรียนก่อนหน้านี้) แทนที่จะประเมินสูตรด้วยตนเอง\n", + ">\n", + "> ดังนั้น โดยทั่วไปคุณไม่จำเป็นต้องใช้ **`prep()`** และ **`bake()`** กับสูตรเมื่อคุณใช้ tidymodels แต่ฟังก์ชันเหล่านี้มีประโยชน์ในกรณีที่คุณต้องการยืนยันว่าสูตรทำงานตามที่คุณคาดหวังไว้ เช่นในกรณีของเรา\n", + ">\n", + "> เมื่อคุณใช้ **`bake()`** กับสูตรที่ผ่านการ **`prep()`** แล้ว โดยกำหนด **`new_data = NULL`** คุณจะได้ข้อมูลที่คุณให้ไว้ตอนกำหนดสูตรกลับมา แต่ข้อมูลนั้นจะผ่านขั้นตอนการเตรียมข้อมูลแล้ว\n", + "\n", + "ตอนนี้เรามาบันทึกสำเนาของข้อมูลนี้ไว้เพื่อใช้ในบทเรียนถัดไป:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "ไฟล์ CSV ใหม่สามารถพบได้ในโฟลเดอร์ข้อมูลหลัก\n", + "\n", + "**🚀ความท้าทาย**\n", + "\n", + "หลักสูตรนี้มีชุดข้อมูลที่น่าสนใจหลายชุด ลองค้นหาในโฟลเดอร์ `data` และดูว่ามีชุดข้อมูลใดที่เหมาะสมสำหรับการจัดประเภทแบบไบนารีหรือหลายคลาสหรือไม่? คุณจะตั้งคำถามอะไรกับชุดข้อมูลนี้?\n", + "\n", + "## [**แบบทดสอบหลังการบรรยาย**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **ทบทวนและศึกษาด้วยตนเอง**\n", + "\n", + "- ลองดู [แพ็กเกจ themis](https://github.com/tidymodels/themis) มีเทคนิคอื่นใดที่เราสามารถใช้เพื่อจัดการกับข้อมูลที่ไม่สมดุลได้บ้าง?\n", + "\n", + "- เว็บไซต์อ้างอิงของ Tidy models [เว็บไซต์อ้างอิง](https://www.tidymodels.org/start/)\n", + "\n", + "- H. Wickham และ G. Grolemund, [*R for Data Science: Visualize, Model, Transform, Tidy, and Import Data*](https://r4ds.had.co.nz/)\n", + "\n", + "#### ขอขอบคุณ:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) สำหรับการสร้างภาพประกอบที่ยอดเยี่ยมซึ่งทำให้ R น่าสนใจและเข้าถึงได้มากขึ้น ค้นหาภาพประกอบเพิ่มเติมได้ที่ [แกลเลอรี](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM)\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) และ [Jen Looper](https://www.twitter.com/jenlooper) สำหรับการสร้างเวอร์ชัน Python ดั้งเดิมของโมดูลนี้ ♥️\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์มืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/th/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..b82f87655 --- /dev/null +++ b/translations/th/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,700 @@ +{ + "cells": [ + { + "source": [ + "# อาหารเอเชียและอินเดียแสนอร่อย\n", + "\n", + "## แนะนำ\n", + "อาหารเอเชียและอินเดียมีรสชาติที่หลากหลายและเต็มไปด้วยเครื่องเทศที่เป็นเอกลักษณ์ ไม่ว่าคุณจะชอบรสเผ็ด รสหวาน หรือรสเปรี้ยว คุณจะพบเมนูที่ตอบโจทย์ความชอบของคุณได้อย่างแน่นอน\n", + "\n", + "## อาหารเอเชียยอดนิยม\n", + "### ซูชิ\n", + "ซูชิเป็นอาหารญี่ปุ่นที่ได้รับความนิยมทั่วโลก ประกอบด้วยข้าวปรุงรสและปลาดิบหรือส่วนผสมอื่น ๆ ที่ห่อด้วยสาหร่ายหรือจัดวางอย่างสวยงาม\n", + "\n", + "### ผัดไทย\n", + "ผัดไทยเป็นอาหารไทยที่มีชื่อเสียง ประกอบด้วยเส้นก๋วยเตี๋ยวผัดกับไข่ เต้าหู้ กุ้ง และซอสที่มีรสชาติกลมกล่อม\n", + "\n", + "### ติ่มซำ\n", + "ติ่มซำเป็นอาหารจีนที่มักเสิร์ฟในรูปแบบของอาหารว่าง มีหลากหลายชนิด เช่น ขนมจีบ ฮะเก๋า และซาลาเปา\n", + "\n", + "## อาหารอินเดียยอดนิยม\n", + "### แกงกะหรี่\n", + "แกงกะหรี่เป็นอาหารอินเดียที่มีรสชาติเข้มข้นและหอมเครื่องเทศ มักเสิร์ฟพร้อมข้าวหรือแป้งนาน\n", + "\n", + "### ไก่ทันดูรี\n", + "ไก่ทันดูรีเป็นเมนูที่ปรุงด้วยเครื่องเทศและโยเกิร์ต แล้วนำไปย่างในเตาทันดูร์จนได้รสชาติที่หอมและเข้มข้น\n", + "\n", + "### ซาโมซ่า\n", + "ซาโมซ่าเป็นอาหารว่างที่มีไส้หลากหลาย เช่น มันฝรั่งและถั่ว มักทอดจนกรอบและเสิร์ฟพร้อมซอสจิ้ม\n", + "\n", + "## เคล็ดลับการทำอาหาร\n", + "- [!TIP] ใช้เครื่องเทศสดใหม่เพื่อเพิ่มรสชาติให้กับอาหารของคุณ\n", + "- [!NOTE] การปรับรสชาติให้เหมาะกับความชอบของคุณเป็นสิ่งสำคัญ\n", + "- [!WARNING] ระวังอย่าใช้เครื่องเทศมากเกินไป เพราะอาจทำให้อาหารมีรสชาติเข้มจนเกินไป\n", + "\n", + "## สรุป\n", + "อาหารเอเชียและอินเดียมีความหลากหลายและเต็มไปด้วยรสชาติที่น่าตื่นเต้น ลองสำรวจเมนูต่าง ๆ และปรุงอาหารด้วยตัวเองเพื่อสัมผัสประสบการณ์ที่ไม่เหมือนใคร!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "ชุดข้อมูลนี้ประกอบด้วย 385 คอลัมน์ที่แสดงถึงส่วนผสมทุกประเภทในอาหารหลากหลายประเภทจากชุดอาหารที่กำหนด\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "แสดงอาหารในกราฟแท่ง\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfeElEQVR4nO3deZwV1Z338c8XbCEKYlSMiEurISECsl1NULO5xD2aCYgT4hLzkkcn4pgEnyGPJoMTM+OSbTRGJRmFUUcF1GjCqHGUuBL1NggNKmgiPmGJ4tZiEILwmz/qtN603U033O5bl/6+X69+dd1Tp079Thfxl3Oqbh1FBGZmZnnRrdIBmJmZlXJiMjOzXHFiMjOzXHFiMjOzXHFiMjOzXNmm0gFsDXbZZZeora2tdBhmZlWlrq7u1Yjo27TciakMamtrKRaLlQ7DzKyqSHqpuXJP5ZmZWa44MZmZWa44MZmZWa74HpOZWSdYv349y5YtY+3atZUOpdP17NmTPfbYg5qamjbVd2Iqg/rlDdROmlXpMKyDLb30uEqHYFVs2bJl9O7dm9raWiRVOpxOExG89tprLFu2jH322adNx3gqz8ysE6xdu5add965SyUlAEnsvPPO7RopOjGZmXWSrpaUGrW3305MZmaWK7m4xyTpNGAiEMACYDpwEbAt8BowDlgFLAYOjohVkroBS4BRqZlrgb3S9vkR8Zikyals3/T7pxFxpaRa4B7gUeBgYDlwYkS8I2k/4GqgL7AGOCsinuu43ptZV1Tu+9Llvgfaq1cv3n777bK22VYVHzFJGkSWhA6LiKHAP5IljE9FxHDgVuD/RsRG4CayJAVwBDA/IlYB/w78JCIOBL4M/LLkFAOBo4CDgH+W1PhYyADg6ogYBLyZjgOYAkyIiJFkyfLnLcQ9XlJRUnHDmoYt/juYmVmm4okJOAyYERGvAkTE68AewH2S6oELgEGp7vXAaWn7TOCGtH0E8DNJTwN3AztI6pX2zYqIdan9V4CPpPIXI+LptF0H1KZjDgZmpLauA/o1F3RETImIQkQUum/XZwv/BGZmHWvSpElcffXV732ePHkyl1xyCYcffjgjRoxgyJAh3HXXXR847ne/+x3HH3/8e5/PPfdcpk6dCkBdXR2f/exnGTlyJEcddRQrV64sS6x5SEzNuQr4WUQMAf4P0BMgIv4EvCzpMLIR0D2pfjeyEdaw9NM/IhrHoOtK2t3A+9OXzZV3A94saWdYRHyiIzpoZtaZxo4dy/Tp09/7PH36dE4//XTuvPNO5s6dy+zZs/n2t79NRLSpvfXr1zNhwgRmzpxJXV0dZ555JhdeeGFZYs3DPaYHgTsl/TgiXpO0E9CH7L4PwOlN6v+SbErvxojYkMp+C0wArgCQNKxkNNRmEfGWpBcljYmIGcoeJTkgIuZvRr/MzHJj+PDhvPLKK6xYsYJVq1bx4Q9/mN12241vfvObPPzww3Tr1o3ly5fz8ssvs9tuu22yvcWLF7Nw4UKOPPJIADZs2EC/fs1OMLVbxRNTRCyS9APgIUkbgHnAZLLptDfIElfpt7LuJpvCu6Gk7DzgakkLyPr0MHD2ZoY0DrhG0kVADdk9LicmM6t6Y8aMYebMmfz5z39m7Nix3HzzzaxatYq6ujpqamqora39wPeNttlmGzZu3Pje58b9EcGgQYOYM2dO2eOseGICiIhpwLQmxR+c7MwMJXvo4b0n5dL9o7HNtDu5yefBJR8Hl5T/sGT7ReDotsZuZlYtxo4dy1lnncWrr77KQw89xPTp09l1112pqalh9uzZvPTSB1eh2HvvvXnmmWdYt24d77zzDg888ACHHnooH//4x1m1ahVz5sxh1KhRrF+/niVLljBo0KBmztw+uUhMbSVpEnAO7z+ZlwtD+veh6NfVmFk7VOIVV4MGDWL16tX079+ffv36MW7cOE444QSGDBlCoVBg4MCBHzhmzz335OSTT2bw4MHss88+DB8+HIBtt92WmTNnct5559HQ0MC7777L+eefX5bEpLbe6LKWFQqF8EKBZtaaZ599lk98ous+S9Vc/yXVRUShad28PpVnZmZdlBOTmZnlihOTmVkn6aq3TtrbbycmM7NO0LNnT1577bUul5wa12Pq2bNnm4+pqqfyzMyq1R577MGyZctYtWpVpUPpdI0r2LaVE5OZWSeoqalp8wquXZ2n8szMLFecmMzMLFecmMzMLFd8j6kM6pc3lH01SsuvSrxKxqwr8YjJzMxypcslJkn/LWnHSsdhZmbN61JTeWnhv+MjYuMmK5uZWUVs9SMmSbWSFkv6T2AhsEHSLmnfaZIWSJov6cZU1lfS7ZKeSj+HVDJ+M7OupquMmAYAp0fE7yUtBZA0CLgIODgiXk1LugP8O/CTiHhU0l7AfcAH3lUvaTwwHqD7Dn07oQtmZl1DV0lML0XE75uUHQbMSKvfEhGvp/IjgP2zWT8AdpDUKyLeLj04IqYAUwB69BvQtV5+ZWbWgbpKYvpLO+p2Az4VEWs3WdPMzMpuq7/H1IoHgTGSdgYomcr7LTChsZKkYRWIzcysy+qyiSkiFgE/AB6SNB/4cdp1HlBID0U8A5xdqRjNzLoidbW1QTpCoVCIYrFY6TDMzKqKpLqIKDQt77IjJjMzyycnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzy5Wu8nbxDlW/vIHaSbMqHYblxNJLj6t0CGZVzSMmMzPLFScmMzPLlapITJJ2lzSz0nGYmVnHq4rEFBErImJ0Jc4tyffhzMw6Ue4Sk6RLJX2j5PNkSRMlLUyfz5B0h6R7JT0v6fKSul+QNEfSXEkzJPVK5cdKek5SnaQrJf0mlR+U6s+T9Likj5ec425JDwIPdOofwMysi8tdYgJuA04u+Xwy8ESTOsOAscAQYKykPSXtAlwEHBERI4Ai8C1JPYHrgGMiYiTQt6Sd54BPR8Rw4HvAv5bsGwGMjojPNhekpPGSipKKG9Y0bG5fzcysidxNU0XEPEm7StqdLIm8AfypSbUHIqIBIC1/vjewI7A/8JgkgG2BOcBA4I8R8WI69hZgfNruA0yTNAAIoKbkHPdHxOutxDkFmALQo98ALwNsZlYmuUtMyQxgNLAb2QiqqXUl2xvI+iGyZPL3pRUlDWvlPN8HZkfElyTVAr8r2feXdkdtZmZbLI9TeZAlo1PIktOMNh7ze+AQSR8FkLS9pI8Bi4F9U+KBbAqwUR9gedo+Y8tCNjOzcshlYoqIRUBvYHlErGzjMavIksstkhaQpvEi4h3gH4B7JdUBq4HGm0KXA/8maR75HT2amXUpitj6b49I6hURbyu7+XQ18HxE/KRc7RcKhSgWi+VqzsysS5BUFxGFpuW5HDF1gLMkPQ0sIpu+u67C8ZiZWQu6xPRVGh2VbYRkZmYdp6uMmMzMrEo4MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa50iS/YdrT65Q3UTppV6TDM2mTppcdVOgSzVnnEZGZmuVL1iUnS+ZK2K1Nbn2tcdt3MzCqj6hMTcD7QrsQkqXsHxWJmZluoqhJTWvxvlqT5khZK+mdgd2C2pNmpzjWSipIWSbq45Nilki6TNBcYI+mjkv4ntTVX0n6pai9JMyU9J+nmtFSGmZl1kmp7+OFoYEVEHAcgqQ/wNeDzEfFqqnNhRLyeRkUPSDogIhakfa9FxIh07BPApRFxp6SeZEl6T2A4MAhYATwGHAI82jQQSeOB8QDdd+jbMb01M+uCqmrEBNQDR6aRz6cjoqGZOienUdE8sgSzf8m+2wAk9Qb6R8SdABGxNiLWpDpPRsSyiNgIPA3UNhdIREyJiEJEFLpv16csnTMzsyobMUXEEkkjgGOBSyQ9ULpf0j7ARODAiHhD0lSgZ0mVv7ThNOtKtjdQZX8jM7NqV1UjJkm7A2si4ibgCmAEsBronarsQJZ8GiR9BDimuXYiYjWwTNJJqd0e5Xqyz8zMtky1jQaGAFdI2gisB84BRgH3SloREZ+XNA94DvgT2T2ilpwKXCfpX1JbYzo2dDMzawtFRKVjqHo9+g2Ifqf/tNJhmLWJ3/xgeSGpLiIKTcurbcSUS0P696Ho/7GbmZVFVd1jMjOzrZ8Tk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YpfSVQG9csbqJ00q9JhmLWL35lneeURk5mZ5YoTk5mZ5UpFE5OkkyTt34Z6UyWNbqb8c5J+U8Z4CpKuTNtnSPpZudo2M7O2qfSI6SRgk4mps0REMSLOq3QcZmZdWauJSdKlkr5R8nmypImSLpD0lKQFki4u2f9dSYslPSrpFkkTU/l+ku6VVCfpEUkDJR0MfJFsRdqnU52zUrvzJd3eZLnzIyQVJS2RdHwzsW4v6XpJT0qaJ+nEVvrVU9INkupT3c+n8jaPwCSNT/EUN6xpaMshZmbWBpsaMd0GnFzy+WRgFTAAOAgYBoyU9BlJBwJfBoYCxwClqxJOASZExEhgIvDziHgcuBu4ICKGRcQfgDsi4sCIGAo8C3y9pI3adM7jgGsl9WwS64XAgxFxEPB5soS3fQv9+gYQETEE+HtgWjPttSoipkREISIK3bfr055DzcysFa0+Lh4R8yTtKml3oC/wBjAE+AIwL1XrRZaoegN3RcRaYK2kXwNI6gUcDMyQ1Nh0jxZOOVjSJcCOqd37SvZNj4iNwPOS/ggMbHLsF4AvNo7SgJ7AXmQJrqlDgatSH5+T9BLwsdb+FmZm1jna8j2mGcBoYDeyEdTewL9FxHWllSSd38Lx3YA3I2JYG841FTgpIuZLOgP4XMm+aFK36WcBX46IxW04j5mZ5VRbHn64DTiFLDnNIBvFnJlGQkjqL2lX4DHghHT/phdwPEBEvAW8KGlMqi9JQ1Pbq8lGWo16Aysl1QDjmsQxRlI3SfsB+wJNE9B9wASlYZmk4a306ZHG9iV9jGxk5YRmZpYDm0xMEbGILGEsj4iVEfFb4L+AOZLqgZlA74h4iuye0QLgHqAeaHwqYBzwdUnzgUVA44MJtwIXpAcQ9gO+CzxBluSeaxLK/weeTG2fnaYMS30fqAEWSFqUPrfk50C3FP9twBkRsW5TfwszM+t4img6I7YFjUm9IuLt9DTdw8D4iJhbthPkVKFQiGKxWOkwzMyqiqS6iCg0LS/3u/KmpC/M9gSmdYWkZGZm5VXWxBQRXylne1tK0lHAZU2KX4yIL1UiHjMz27St+u3iEXEff/vIuZmZ5VylX0lkZmb2N5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV7bq7zF1lvrlDdROmlXpMMw6xNJLj6t0CNbFeMRkZma54sRkZma54sTUhKSzJZ2WtqdKGl3pmMzMuhLfY2oiIq6tdAxmZl1ZWUdMkraXNEvSfEkLJY2VNFLSQ5LqJN0nqV+qe56kZyQtkHRrKjtI0py0cODjkj6eys+Q9CtJ90taKulcSd9K9X4vaadUbz9J96ZzPSJpYCux1kp6MJ3/AUl7pfLJkia2oa/jJRUlFTesadhUdTMza6NyT+UdDayIiKERMRi4F7gKGB0RI4HrgR+kupOA4RFxAHB2KnsO+HREDAe+B/xrSduDgb8DDkxtrEn15gCnpTpTgAnpXBPJVqptyVVka0YdANwMXNmejkbElIgoRESh+3Z92nOomZm1otxTefXAjyRdBvwGeIMsodwvCaA7sDLVXQDcLOlXwK9SWR9gmqQBQJAtld5odkSsBlZLagB+XXLOAyT1Ag4GZqRzAfRoJdZRZIkO4Ebg8vZ318zMyq3cCwUukTQCOBa4BHgQWBQRo5qpfhzwGeAE4EJJQ4DvkyWgL0mqBX5XUn9dyfbGks8byfrRDXgzIoaVrUNmZtbpyn2PaXeyKbabgCuATwJ9JY1K+2skDZLUDdgzImYD/0Q2UuqVfi9PzZ3RnnNHxFvAi5LGpHNJ0tBWDnkcOCVtjwMeac/5zMysY5R7Km8IcIWkjcB64BzgXeBKSX3S+X4KLAFuSmUCroyINyVdTjaVdxGwOa9SGAdck46vAW4F5rdQdwJwg6QLgFXA1zbjfAAM6d+Hor8db2ZWFoqISsdQ9QqFQhSLxUqHYWZWVSTVRUShabm/YGtmZrmy1X/BVtKFwJgmxTMi4gfN1Tczs8ra6hNTSkBOQmZmVcJTeWZmlitOTGZmlitOTGZmlitOTGZmlitOTGZmlitOTGZmlitb/ePinaF+eQO1kzbnDUpmtqWW+nVgWx2PmMzMLFecmMzMLFe2usSUlkxfmLY/J+k3afuLkiZVNjozM9uULnOPKSLuBu6udBxmZta63I2YJG0vaZak+ZIWShor6UBJj6eyJyX1TiOjRyTNTT8Hb6LdMyT9LG3XSnpQ0gJJD0jaK5VPlXRlOtcfJY3ujD6bmdn78jhiOhpYERHHAaTFBOcBYyPiKUk7AO8ArwBHRsRaSQOAW4APrOvRgquAaRExTdKZwJXASWlfP+BQYCDZCGtmcw1IGg+MB+i+Q9/299LMzJqVuxETUA8cKekySZ8G9gJWRsRTkC2hHhHvkq1Q+wtJ9cAMYP92nGMU8F9p+0ayRNToVxGxMSKeAT7SUgMRMSUiChFR6L5dn3ac2szMWpO7EVNELJE0AjgWuAR4sIWq3wReBoaSJdi1ZQphXcm2ytSmmZm1Ue5GTJJ2B9ZExE3AFcAngX6SDkz7e0vaBuhDNpLaCJwKdG/HaR4HTknb44BHyhW/mZltmdyNmIAhwBWSNgLrgXPIRi5XSfoQ2f2lI4CfA7dLOg24F/hLO84xAbhB0gXAKuBrZYzfzMy2gCKi0jFUvUKhEMVisdJhmJlVFUl1EfGBh9ZyN5VnZmZdmxOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlSh7flVd16pc3UDtpVqXDMLMyWnrpcZUOocvyiMnMzHKlyyamJkutn53eUm5mZhXWJafy0npO74mIaysVi5mZ/a2qTkySvgt8lWxNpT8BdUADMB7YFngBODUi1kiaSrbK7XDgMWBBSTuTgbcj4oeSPgpcC/QFNgBjIuIPndUnM7Ourmqn8tKKtl8mW1r9GKBxTY87IuLAiBgKPAt8veSwPYCDI+JbrTR9M3B1Ov5gYGUL5x8vqSipuGFNwxb2xszMGlXziOkQ4K6IWAuslfTrVD5Y0iXAjkAv4L6SY2ZExIaWGpTUG+gfEXcCpLabFRFTgCkAPfoN8GqLZmZlUrUjplZMBc6NiCHAxUDPkn3tWX7dzMwqoJoT02PACZJ6SuoFHJ/KewMrJdUA49rTYESsBpZJOglAUg9J25UzaDMza13VJqaIeAq4m+whhnuAerIHH74LPEGWuJ7bjKZPBc6TtAB4HNitLAGbmVmbKKJ6b49I6hURb6dRzcPA+IiY29lx9Og3IPqd/tPOPq2ZdSC/+aHjSaqLiELT8mp++AFgiqT9ye4jTatEUgIY0r8PRf8jNjMri6pOTBHxlUrHYGZm5VW195jMzGzr5MRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma5UtVvfsiL+uUN1E6aVekwzKwD+J15nc8jJjMzy5WqT0yS/kXSEZWOw8zMyqPqp/Ii4nsdfQ5J3Vtbkt3MzMqnqkZMkr4rabGkRyXdImmipKmSRqf9SyVdLGmupHpJA1N5X0n3S1ok6ZeSXpK0S9r3VUlPSnpa0nWSuqfytyX9SNJ8YFTFOm1m1sVUTWKSdCDwZWAocAzwgcWlklcjYgRwDTAxlf0z8GBEDAJmAnulNj8BjAUOiYhhwAbeX459e+CJiBgaEY82E894SUVJxQ1rGsrSRzMzq66pvEOAuyJiLbBW0q9bqHdH+l0H/F3aPhT4EkBE3CvpjVR+ODASeEoSwIeAV9K+DcDtLQUTEVOAKZCtYLs5HTIzsw+qpsTUVuvS7w1sun8iW/n2O83sW+v7SmZmna9qpvKAx4ATJPWU1As4vp3Hngwg6QvAh1P5A8BoSbumfTtJ2ruMMZuZWTtVzYgpIp6SdDewAHgZqAfaenPnYuAWSacCc4A/A6sj4lVJFwG/ldQNWA98A3ip7B0wM7M2qZrElPwwIiZL2g54GKiLiF807oyI2pLtIvC59LEBOCoi3pU0CjgwItalercBtzU9UUT06rBemJlZi6otMU2RtD/Qk+ze0Nw2HrcXMD2Niv4KnFXOoIb070PRry0xMyuLqkpMEfGVzTzueWB4mcMxM7MOUE0PP5iZWRfgxGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrlSVV+wzav65Q3UTppV6TDMzDrV0g56441HTGZmlitOTGZmlitOTGZmlitOTGZmlisdmpgkbS9plqT5khZKGivpcEnzJNVLul5SD0mHSfpVyXFHSrqzhTa7S5qa2quX9M1Ufpakp9K5bk9rNpHqji45/u2S7X9KbcyXdGkq20/SvZLqJD0iaWBH/X3MzOyDOnrEdDSwIiKGRsRg4F5gKjA2IoaQPRV4DjAbGCipbzrua8D1LbQ5DOgfEYNTGzek8jsi4sCIGAo8C3y9tcAkHQOcCHwyHXN52jUFmBARI4GJwM9bOH68pKKk4oY1bV1I18zMNqWjE1M9cKSkyyR9GqgFXoyIJWn/NOAzERHAjcBXJe0IjALuaaHNPwL7SrpK0tHAW6l8cBrh1APjgEGbiO0I4IaIWAMQEa9L6gUcDMyQ9DRwHdCvuYMjYkpEFCKi0H27Ppv6O5iZWRt16PeYImKJpBHAscAlwIOtVL8B+DWwFpgREe+20OYbkoYCRwFnAycDZ5KNxE6KiPmSzuD9ZdXfJSXgtILttq3E0A14MyKGtaV/ZmZWfh19j2l3YE1E3ARcQTYSqpX00VTlVOAhgIhYAawALuL96bnm2twF6BYRt6e6I9Ku3sBKSTVkI6ZGS4GRafuLQE3avh/4Wsm9qJ0i4i3gRUljUplSEjQzs07S0W9+GAJcIWkjsJ7sflIfsqmybYCngGtL6t8M9I2IZ1tpsz9wQxr9AHwn/f4u8ASwKv3uncp/AdwlaT7ZPa6/AETEvZKGAUVJfwX+G/h/ZEntGkkXkSWxW4H5m9l/MzNrJ2W3d/JB0s+AeRHxH5WOpT0KhUIUi8VKh2FmVlUk1UVEoWl5bt6VJ6mObDTz7UrHYmZmlZObxJQez/4bkp4AejQpPjUi6jsnKjMz62y5SUzNiYhPVjoGMzPrXH4lkZmZ5YoTk5mZ5UqunsqrVpJWA4srHUcZ7QK8Wukgysx9yr+trT+w9fWp3P3ZOyL6Ni3M9T2mKrK4uUceq5Wk4tbUH3CfqsHW1h/Y+vrUWf3xVJ6ZmeWKE5OZmeWKE1N5TKl0AGW2tfUH3KdqsLX1B7a+PnVKf/zwg5mZ5YpHTGZmlitOTGZmlitOTFtA0tGSFkt6QdKkSsezuSQtlVQv6WlJxVS2k6T7JT2ffn+40nG2RtL1kl6RtLCkrNk+pHW2rkzXbUFazDJXWujPZEnL03V6WtKxJfu+k/qzWNJRlYm6ZZL2lDRb0jOSFkn6x1RezdeopT5V83XqKelJSfNTny5O5ftIeiLFfpukbVN5j/T5hbS/tiyBRIR/NuMH6A78AdiXbFXc+cD+lY5rM/uyFNilSdnlwKS0PQm4rNJxbqIPnyFbNHLhpvpAtqLyPYCATwFPVDr+NvZnMjCxmbr7p39/PYB90r/L7pXuQ5MY+wEj0nZvYEmKu5qvUUt9qubrJKBX2q4hW9vuU8B04JRUfi1wTtr+B+DatH0KcFs54vCIafMdBLwQEX+MiL+SLSh4YoVjKqcTgWlpexpwUgVj2aSIeBh4vUlxS304EfjPyPwe2FFSv86JtG1a6E9LTgRujYh1EfEi8ALZv8/ciIiVETE3ba8GniVb9LOar1FLfWpJNVyniIi308ea9BPAYcDMVN70OjVev5nA4ZK0pXE4MW2+/sCfSj4vo/V/lHkWwG8l1Ukan8o+EhEr0/afgY9UJrQt0lIfqvnanZumtq4vmV6tqv6k6Z7hZP9vfKu4Rk36BFV8nSR1l/Q08ApwP9nI7s2IeDdVKY37vT6l/Q3AzlsagxOTARwaESOAY4BvSPpM6c7IxulV/b2CraEPwDXAfsAwYCXwo8qG036SegG3A+dHxFul+6r1GjXTp6q+ThGxISKGAXuQjegGdnYMTkybbzmwZ8nnPVJZ1YmI5en3K8CdZP8YX26cOkm/X6lchJutpT5U5bWLiJfTfzQ2Ar/g/WmgquiPpBqy/4DfHBF3pOKqvkbN9anar1OjiHgTmA2MIptKbXy3amnc7/Up7e8DvLal53Zi2nxPAQPS0yrbkt34u7vCMbWbpO0l9W7cBr4ALCTry+mp2unAXZWJcIu01Ie7gdPSk1+fAhpKppNyq8k9li+RXSfI+nNKekJqH2AA8GRnx9eadN/hP4BnI+LHJbuq9hq11Kcqv059Je2Ytj8EHEl272w2MDpVa3qdGq/faODBNPLdMpV+CqSaf8ieHFpCNgd7YaXj2cw+7Ev2pNB8YFFjP8jmiR8Angf+B9ip0rFuoh+3kE2brCebA/96S30ge/Lo6nTd6oFCpeNvY39uTPEuSP9B6FdS/8LUn8XAMZWOv5n+HEo2TbcAeDr9HFvl16ilPlXzdToAmJdiXwh8L5XvS5ZEXwBmAD1Sec/0+YW0f99yxOFXEpmZWa54Ks/MzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLlfwHH5sUVMquziAAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "ปรับสมดุลข้อมูลด้วยการทำโอเวอร์แซมพลิง SMOTE ให้เท่ากับคลาสที่มีจำนวนสูงสุด อ่านเพิ่มเติมได้ที่นี่: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:52:42+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/th/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/th/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..3cf25c6b2 --- /dev/null +++ b/translations/th/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:45+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/th/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..0c830e42f --- /dev/null +++ b/translations/th/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1294 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:39:22+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## ตัวจำแนกประเภทอาหาร 1\n", + "\n", + "ในบทเรียนนี้ เราจะสำรวจตัวจำแนกประเภทหลากหลายชนิดเพื่อ *ทำนายประเภทอาหารประจำชาติจากกลุ่มของส่วนผสมที่ให้มา* ในขณะเดียวกัน เราจะเรียนรู้เพิ่มเติมเกี่ยวกับวิธีที่อัลกอริทึมสามารถนำมาใช้ในงานการจำแนกประเภทได้\n", + "\n", + "### [**แบบทดสอบก่อนเรียน**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **การเตรียมตัว**\n", + "\n", + "บทเรียนนี้ต่อยอดจาก [บทเรียนก่อนหน้า](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) ซึ่งเราได้:\n", + "\n", + "- แนะนำเบื้องต้นเกี่ยวกับการจำแนกประเภทโดยใช้ชุดข้อมูลเกี่ยวกับอาหารที่ยอดเยี่ยมของเอเชียและอินเดีย 😋\n", + "\n", + "- สำรวจ [dplyr verbs](https://dplyr.tidyverse.org/) เพื่อเตรียมและทำความสะอาดข้อมูลของเรา\n", + "\n", + "- สร้างภาพที่สวยงามโดยใช้ ggplot2\n", + "\n", + "- แสดงวิธีจัดการกับข้อมูลที่ไม่สมดุลโดยการเตรียมข้อมูลด้วย [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html)\n", + "\n", + "- แสดงวิธี `prep` และ `bake` สูตรของเราเพื่อยืนยันว่ามันทำงานตามที่คาดไว้\n", + "\n", + "#### **ข้อกำหนดเบื้องต้น**\n", + "\n", + "สำหรับบทเรียนนี้ เราจะต้องใช้แพ็กเกจต่อไปนี้เพื่อทำความสะอาด เตรียม และสร้างภาพข้อมูลของเรา:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้การวิเคราะห์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบงานที่เป็น [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างแบบจำลองและการเรียนรู้ของเครื่อง\n", + "\n", + "- `themis`: [แพ็กเกจ themis](https://themis.tidymodels.org/) ให้ขั้นตอนเพิ่มเติมในสูตรสำหรับจัดการกับข้อมูลที่ไม่สมดุล\n", + "\n", + "- `nnet`: [แพ็กเกจ nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) ให้ฟังก์ชันสำหรับการประมาณเครือข่ายประสาทเทียมแบบ feed-forward ที่มีชั้นซ่อนเพียงชั้นเดียว และสำหรับแบบจำลองการถดถอยโลจิสติกแบบหลายตัวแปร\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้ดังนี้:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "หรือคุณสามารถใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และติดตั้งให้คุณในกรณีที่ยังไม่มี\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. แบ่งข้อมูลออกเป็นชุดฝึกอบรมและชุดทดสอบ\n", + "\n", + "เราจะเริ่มต้นด้วยการเลือกขั้นตอนบางส่วนจากบทเรียนก่อนหน้านี้\n", + "\n", + "### ลบส่วนผสมที่พบได้บ่อยที่สุดซึ่งสร้างความสับสนระหว่างอาหารที่แตกต่างกัน โดยใช้ `dplyr::select()`\n", + "\n", + "ใครๆ ก็ชอบข้าว กระเทียม และขิง!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "ยอดเยี่ยม! ตอนนี้ถึงเวลาที่จะแบ่งข้อมูล โดยให้ 70% ของข้อมูลไปที่ชุดการฝึกอบรม และ 30% ไปที่ชุดการทดสอบ เราจะใช้เทคนิค `stratification` ในการแบ่งข้อมูลเพื่อ `รักษาสัดส่วนของแต่ละประเภทอาหาร` ในชุดข้อมูลการฝึกอบรมและการตรวจสอบ\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), แพ็กเกจใน Tidymodels, ให้โครงสร้างสำหรับการแบ่งข้อมูลและการสุ่มตัวอย่างที่มีประสิทธิภาพ:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. จัดการกับข้อมูลที่ไม่สมดุล\n", + "\n", + "คุณอาจสังเกตเห็นในชุดข้อมูลต้นฉบับรวมถึงชุดข้อมูลการฝึกของเรา ว่ามีการกระจายจำนวนของประเภทอาหารที่ไม่เท่ากันอย่างมาก อาหารเกาหลีมีจำนวน *เกือบ* 3 เท่าของอาหารไทย ข้อมูลที่ไม่สมดุลมักส่งผลเสียต่อประสิทธิภาพของโมเดล หลายโมเดลทำงานได้ดีที่สุดเมื่อจำนวนตัวอย่างมีความเท่ากัน และดังนั้นจึงมักมีปัญหาเมื่อข้อมูลไม่สมดุล\n", + "\n", + "มีวิธีหลัก ๆ สองวิธีในการจัดการกับชุดข้อมูลที่ไม่สมดุล:\n", + "\n", + "- เพิ่มตัวอย่างในกลุ่มที่มีจำนวนน้อย: `Over-sampling` เช่น การใช้ SMOTE algorithm ซึ่งสร้างตัวอย่างใหม่ในกลุ่มที่มีจำนวนน้อยโดยใช้เพื่อนบ้านที่ใกล้เคียงของกรณีเหล่านั้น\n", + "\n", + "- ลบตัวอย่างในกลุ่มที่มีจำนวนมาก: `Under-sampling`\n", + "\n", + "ในบทเรียนก่อนหน้านี้ เราได้แสดงวิธีจัดการกับชุดข้อมูลที่ไม่สมดุลโดยใช้ `recipe` ซึ่งสามารถคิดได้ว่าเป็นแผนงานที่อธิบายขั้นตอนที่ควรนำไปใช้กับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการวิเคราะห์ข้อมูล ในกรณีของเรา เราต้องการให้มีการกระจายจำนวนประเภทอาหารที่เท่ากันใน `training set` ของเรา มาเริ่มกันเลย!\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "คุณสามารถยืนยันได้เลย (โดยใช้ prep+bake) ว่าสูตรนี้จะทำงานตามที่คุณคาดหวัง - โดยที่ป้ายกำกับอาหารทั้งหมดมี `559` การสังเกตการณ์\n", + "\n", + "เนื่องจากเราจะใช้สูตรนี้เป็นตัวเตรียมข้อมูลสำหรับการสร้างแบบจำลอง `workflow()` จะทำหน้าที่เตรียมและประมวลผลข้อมูลทั้งหมดให้เรา ดังนั้นเราจะไม่ต้องคำนวณสูตรด้วยตัวเอง\n", + "\n", + "ตอนนี้เราพร้อมที่จะฝึกโมเดลแล้ว 👩‍💻👨‍💻!\n", + "\n", + "## 3. การเลือกตัวจำแนกประเภทของคุณ\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้เราต้องตัดสินใจว่าจะใช้อัลกอริทึมใดสำหรับงานนี้ 🤔\n", + "\n", + "ใน Tidymodels, [`parsnip package`](https://parsnip.tidymodels.org/index.html) มีอินเทอร์เฟซที่สม่ำเสมอสำหรับการทำงานกับโมเดลในหลายๆ engine (แพ็กเกจ) โปรดดูเอกสารของ parsnip เพื่อสำรวจ [ประเภทโมเดลและ engine](https://www.tidymodels.org/find/parsnip/#models) และ [อาร์กิวเมนต์ของโมเดล](https://www.tidymodels.org/find/parsnip/#model-args) ที่เกี่ยวข้อง ความหลากหลายอาจทำให้สับสนในตอนแรก ตัวอย่างเช่น วิธีการต่อไปนี้ล้วนเป็นเทคนิคการจัดประเภท:\n", + "\n", + "- โมเดลการจัดประเภทแบบใช้กฎ C5.0\n", + "\n", + "- โมเดลการจำแนกแบบยืดหยุ่น\n", + "\n", + "- โมเดลการจำแนกเชิงเส้น\n", + "\n", + "- โมเดลการจำแนกแบบมีการปรับค่า\n", + "\n", + "- โมเดลการถดถอยโลจิสติก\n", + "\n", + "- โมเดลการถดถอยแบบหลายตัวแปร\n", + "\n", + "- โมเดล Naive Bayes\n", + "\n", + "- Support Vector Machines\n", + "\n", + "- Nearest Neighbors\n", + "\n", + "- Decision Trees\n", + "\n", + "- Ensemble methods\n", + "\n", + "- Neural Networks\n", + "\n", + "รายการยังคงมีต่อไป!\n", + "\n", + "### **จะเลือกตัวจัดประเภทตัวไหนดี?**\n", + "\n", + "แล้วเราควรเลือกตัวจัดประเภทตัวไหน? บ่อยครั้ง การลองใช้หลายๆ ตัวและมองหาผลลัพธ์ที่ดีเป็นวิธีการทดสอบ\n", + "\n", + "> AutoML แก้ปัญหานี้ได้อย่างลงตัวโดยการเปรียบเทียบในระบบคลาวด์ ทำให้คุณสามารถเลือกอัลกอริทึมที่ดีที่สุดสำหรับข้อมูลของคุณ ลองใช้ [ที่นี่](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "นอกจากนี้ การเลือกตัวจัดประเภทขึ้นอยู่กับปัญหาของเรา ตัวอย่างเช่น เมื่อผลลัพธ์สามารถจัดหมวดหมู่ได้เป็น `มากกว่าสองคลาส` เช่นในกรณีของเรา คุณต้องใช้ `อัลกอริทึมการจัดประเภทแบบหลายคลาส` แทนที่จะเป็น `การจัดประเภทแบบไบนารี`\n", + "\n", + "### **วิธีที่ดีกว่า**\n", + "\n", + "วิธีที่ดีกว่าการเดาแบบสุ่มคือการทำตามแนวคิดใน [ML Cheat sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott) ที่สามารถดาวน์โหลดได้ ที่นี่เราพบว่า สำหรับปัญหาแบบหลายคลาสของเรา เรามีตัวเลือกบางอย่าง:\n", + "\n", + "

\n", + " \n", + "

ส่วนหนึ่งของ Algorithm Cheat Sheet ของ Microsoft ที่แสดงตัวเลือกการจัดประเภทแบบหลายคลาส
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **เหตุผล**\n", + "\n", + "ลองมาดูว่ามีวิธีการใดบ้างที่เราสามารถใช้ได้ตามข้อจำกัดที่กำหนดไว้:\n", + "\n", + "- **โครงข่ายประสาทเทียมเชิงลึกหนักเกินไป** เนื่องจากเรามีชุดข้อมูลที่สะอาดแต่มีขนาดเล็ก และเรากำลังฝึกโมเดลในเครื่องผ่านโน้ตบุ๊ก โครงข่ายประสาทเทียมเชิงลึกจึงไม่เหมาะสมสำหรับงานนี้\n", + "\n", + "- **ไม่ใช้ตัวจำแนกแบบสองคลาส** เราไม่ได้ใช้ตัวจำแนกแบบสองคลาส ดังนั้นจึงตัดวิธี one-vs-all ออกไป\n", + "\n", + "- **ต้นไม้ตัดสินใจหรือการถดถอยโลจิสติกอาจใช้ได้** ต้นไม้ตัดสินใจอาจเหมาะสม หรือการถดถอยพหุคลาส/การถดถอยโลจิสติกแบบพหุคลาสสำหรับข้อมูลหลายคลาส\n", + "\n", + "- **ต้นไม้ตัดสินใจแบบบูสต์สำหรับพหุคลาสแก้ปัญหาคนละแบบ** ต้นไม้ตัดสินใจแบบบูสต์สำหรับพหุคลาสเหมาะสำหรับงานที่ไม่ใช่พารามิเตอร์ เช่น งานที่ออกแบบมาเพื่อสร้างการจัดอันดับ ดังนั้นจึงไม่เหมาะกับเรา\n", + "\n", + "โดยปกติแล้ว ก่อนที่จะเริ่มใช้โมเดลการเรียนรู้ของเครื่องที่ซับซ้อนขึ้น เช่น วิธีการแบบเอนเซมเบิล ควรเริ่มจากโมเดลที่ง่ายที่สุดเพื่อทำความเข้าใจภาพรวมของข้อมูล ดังนั้นในบทเรียนนี้ เราจะเริ่มต้นด้วยโมเดล `การถดถอยพหุคลาส` \n", + "\n", + "> การถดถอยโลจิสติกเป็นเทคนิคที่ใช้เมื่อผลลัพธ์เป็นตัวแปรเชิงหมวดหมู่ (หรือเชิงนาม) สำหรับการถดถอยโลจิสติกแบบไบนารี จำนวนตัวแปรผลลัพธ์จะมีสองค่า ในขณะที่การถดถอยโลจิสติกแบบพหุคลาสจะมีจำนวนตัวแปรผลลัพธ์มากกว่าสองค่า ดูเพิ่มเติมได้ที่ [Advanced Regression Methods](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html)\n", + "\n", + "## 4. ฝึกและประเมินโมเดลการถดถอยโลจิสติกแบบพหุคลาส\n", + "\n", + "ใน Tidymodels, `parsnip::multinom_reg()` ใช้กำหนดโมเดลที่ใช้ตัวทำนายเชิงเส้นเพื่อทำนายข้อมูลหลายคลาสโดยใช้การแจกแจงแบบพหุคลาส ดู `?multinom_reg()` เพื่อดูวิธี/เอนจินต่าง ๆ ที่คุณสามารถใช้ในการปรับโมเดลนี้\n", + "\n", + "สำหรับตัวอย่างนี้ เราจะปรับโมเดลการถดถอยพหุคลาสผ่านเอนจินเริ่มต้น [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf)\n", + "\n", + "> ฉันเลือกค่า `penalty` แบบสุ่ม มีวิธีที่ดีกว่าในการเลือกค่านี้ เช่น การใช้ `resampling` และ `tuning` โมเดล ซึ่งเราจะพูดถึงในภายหลัง\n", + ">\n", + "> ดูเพิ่มเติมที่ [Tidymodels: Get Started](https://www.tidymodels.org/start/tuning/) หากคุณต้องการเรียนรู้เพิ่มเติมเกี่ยวกับการปรับแต่งพารามิเตอร์ของโมเดล\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก 🥳! ตอนนี้เรามีสูตรและสเปคของโมเดลแล้ว เราต้องหาวิธีรวมสิ่งเหล่านี้เข้าด้วยกันเป็นวัตถุที่จะช่วยในการเตรียมข้อมูลเบื้องต้น จากนั้นจึงปรับโมเดลกับข้อมูลที่ผ่านการเตรียมแล้ว และยังสามารถรองรับกิจกรรมหลังการประมวลผลได้อีกด้วย ใน Tidymodels วัตถุที่สะดวกนี้เรียกว่า [`workflow`](https://workflows.tidymodels.org/) ซึ่งจะเก็บส่วนประกอบของการสร้างโมเดลของคุณไว้อย่างสะดวก! สิ่งนี้คือสิ่งที่เราเรียกว่า *pipelines* ใน *Python* \n", + "\n", + "ดังนั้น มาเริ่มรวมทุกอย่างเข้าด้วยกันใน workflow กันเถอะ!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "เวิร์กโฟลว์ 👌👌! **`workflow()`** สามารถปรับให้เหมาะสมได้ในลักษณะเดียวกับที่โมเดลสามารถทำได้ ดังนั้น ถึงเวลาฝึกโมเดลแล้ว!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "ผลลัพธ์จะแสดงค่าสัมประสิทธิ์ที่โมเดลได้เรียนรู้ระหว่างการฝึก\n", + "\n", + "### ประเมินผลโมเดลที่ผ่านการฝึก\n", + "\n", + "ถึงเวลาที่จะดูว่าโมเดลทำงานได้ดีแค่ไหน 📏 โดยการประเมินผลบนชุดทดสอบ! มาเริ่มต้นด้วยการสร้างการคาดการณ์บนชุดทดสอบกันเถอะ!\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "งานยอดเยี่ยม! ใน Tidymodels การประเมินประสิทธิภาพของโมเดลสามารถทำได้โดยใช้ [yardstick](https://yardstick.tidymodels.org/) - แพ็กเกจที่ใช้วัดประสิทธิภาพของโมเดลด้วยตัวชี้วัดประสิทธิภาพ เช่นเดียวกับที่เราได้ทำในบทเรียนการถดถอยโลจิสติก มาเริ่มต้นด้วยการคำนวณเมทริกซ์ความสับสนกันเถอะ\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "สี่เหลี่ยมที่มีสีเข้มในกราฟเมทริกซ์ความสับสนแสดงถึงจำนวนกรณีที่สูง และคุณน่าจะเห็นเส้นทแยงมุมของสี่เหลี่ยมสีเข้มที่บ่งบอกถึงกรณีที่ป้ายกำกับที่คาดการณ์และป้ายกำกับจริงตรงกัน\n", + "\n", + "ตอนนี้เรามาคำนวณสถิติสรุปสำหรับเมทริกซ์ความสับสนกัน\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "ถ้าเรามุ่งเน้นไปที่ตัวชี้วัดบางอย่าง เช่น ความแม่นยำ, ความไว, ppv เราก็ไม่ได้เริ่มต้นแย่เลย 🥳!\n", + "\n", + "## 4. เจาะลึกลงไปอีก\n", + "\n", + "ลองถามคำถามที่ละเอียดอ่อนสักข้อ: เกณฑ์อะไรที่ใช้ในการตัดสินใจเลือกประเภทของอาหารเป็นผลลัพธ์ที่คาดการณ์?\n", + "\n", + "จริง ๆ แล้ว อัลกอริทึมการเรียนรู้ของเครื่องเชิงสถิติ เช่น logistic regression จะอิงอยู่บน `ความน่าจะเป็น`; ดังนั้นสิ่งที่ตัวจำแนกประเภทคาดการณ์จริง ๆ ก็คือการแจกแจงความน่าจะเป็นในชุดของผลลัพธ์ที่เป็นไปได้ คลาสที่มีความน่าจะเป็นสูงสุดจะถูกเลือกเป็นผลลัพธ์ที่มีแนวโน้มมากที่สุดสำหรับการสังเกตที่กำหนด\n", + "\n", + "ลองมาดูตัวอย่างนี้ในทางปฏิบัติโดยการทำทั้งการคาดการณ์แบบคลาสที่ชัดเจนและการคาดการณ์แบบความน่าจะเป็น\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ คุณสามารถอธิบายได้ไหมว่าทำไมโมเดลถึงมั่นใจว่าการสังเกตการณ์แรกเป็นภาษาไทย?\n", + "\n", + "## **🚀ความท้าทาย**\n", + "\n", + "ในบทเรียนนี้ คุณได้ใช้ข้อมูลที่ทำความสะอาดแล้วเพื่อสร้างโมเดลการเรียนรู้ของเครื่องที่สามารถทำนายอาหารประจำชาติได้จากชุดของส่วนผสม ลองใช้เวลาศึกษา [ตัวเลือกมากมาย](https://www.tidymodels.org/find/parsnip/#models) ที่ Tidymodels มีให้สำหรับการจัดประเภทข้อมูล และ [วิธีอื่นๆ](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) ในการปรับโมเดลการถดถอยแบบหลายตัวแปร\n", + "\n", + "#### ขอบคุณ:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) สำหรับการสร้างภาพประกอบที่น่าทึ่งซึ่งทำให้ R ดูน่าสนใจและเข้าถึงได้มากขึ้น ค้นหาภาพประกอบเพิ่มเติมได้ที่ [แกลเลอรี](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) ของเธอ\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) และ [Jen Looper](https://www.twitter.com/jenlooper) สำหรับการสร้างเวอร์ชัน Python ดั้งเดิมของโมดูลนี้ ♥️\n", + "\n", + "
\n", + "อยากจะใส่มุกตลกลงไป แต่ฉันไม่เข้าใจมุกเกี่ยวกับอาหารเลย 😅\n", + "\n", + "
\n", + "\n", + "เรียนรู้อย่างมีความสุข,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์มืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/th/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..e26dc114f --- /dev/null +++ b/translations/th/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาที่เป็นต้นฉบับควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:14+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/th/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/th/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..32e49fc89 --- /dev/null +++ b/translations/th/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:28+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/th/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/th/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..05bf31910 --- /dev/null +++ b/translations/th/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,648 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:48:47+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## ตัวจำแนกประเภทอาหาร 2\n", + "\n", + "ในบทเรียนการจำแนกประเภทครั้งที่สองนี้ เราจะสำรวจ `วิธีเพิ่มเติม` ในการจำแนกข้อมูลเชิงหมวดหมู่ นอกจากนี้เรายังจะเรียนรู้ถึงผลกระทบจากการเลือกตัวจำแนกประเภทหนึ่งแทนอีกตัวหนึ่ง\n", + "\n", + "### [**แบบทดสอบก่อนเรียน**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **ความรู้พื้นฐานที่ต้องมี**\n", + "\n", + "เราสมมติว่าคุณได้เรียนจบบทเรียนก่อนหน้านี้แล้ว เนื่องจากเราจะนำแนวคิดบางอย่างที่เราเรียนรู้มาก่อนมาใช้ต่อในบทเรียนนี้\n", + "\n", + "สำหรับบทเรียนนี้ เราจะต้องใช้แพ็กเกจดังต่อไปนี้:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) คือ [ชุดของแพ็กเกจ R](https://www.tidyverse.org/packages) ที่ออกแบบมาเพื่อทำให้การวิเคราะห์ข้อมูลเร็วขึ้น ง่ายขึ้น และสนุกมากขึ้น!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) เป็นกรอบงานที่เป็น [ชุดของแพ็กเกจ](https://www.tidymodels.org/packages/) สำหรับการสร้างแบบจำลองและการเรียนรู้ของเครื่อง\n", + "\n", + "- `themis`: [แพ็กเกจ themis](https://themis.tidymodels.org/) ให้ขั้นตอนเพิ่มเติมสำหรับการจัดการข้อมูลที่ไม่สมดุล\n", + "\n", + "คุณสามารถติดตั้งแพ็กเกจเหล่านี้ได้โดยใช้คำสั่ง:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "หรือใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการเรียนในโมดูลนี้หรือไม่ และติดตั้งให้ในกรณีที่ยังไม่มี\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. แผนที่การจัดประเภท**\n", + "\n", + "ใน [บทเรียนก่อนหน้านี้](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1) เราได้พยายามตอบคำถามว่า: เราจะเลือกใช้โมเดลใดในหลายๆ โมเดลที่มีอยู่? คำตอบส่วนใหญ่ขึ้นอยู่กับลักษณะของข้อมูลและประเภทของปัญหาที่เราต้องการแก้ไข (เช่น การจัดประเภทหรือการถดถอย)\n", + "\n", + "ก่อนหน้านี้ เราได้เรียนรู้เกี่ยวกับตัวเลือกต่างๆ ที่คุณมีเมื่อจัดประเภทข้อมูลโดยใช้แผ่นโกงของ Microsoft Python's Machine Learning framework, Scikit-learn มีแผ่นโกงที่คล้ายกันแต่มีรายละเอียดมากขึ้น ซึ่งสามารถช่วยจำกัดตัวเลือกของคุณให้แคบลง (อีกคำหนึ่งที่ใช้เรียกตัวจัดประเภท):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> เคล็ดลับ: [เยี่ยมชมแผนที่นี้ออนไลน์](https://scikit-learn.org/stable/tutorial/machine_learning_map/) และคลิกตามเส้นทางเพื่ออ่านเอกสารประกอบ \n", + "> \n", + "> [เว็บไซต์อ้างอิงของ Tidymodels](https://www.tidymodels.org/find/parsnip/#models) ยังมีเอกสารที่ยอดเยี่ยมเกี่ยวกับประเภทของโมเดลต่าง ๆ ให้ศึกษาเพิ่มเติมด้วย\n", + "\n", + "### **แผนการ** 🗺️\n", + "\n", + "แผนที่นี้มีประโยชน์มากเมื่อคุณเข้าใจข้อมูลของคุณอย่างชัดเจน เพราะคุณสามารถ 'เดิน' ไปตามเส้นทางเพื่อหาคำตอบได้:\n", + "\n", + "- เรามีตัวอย่างมากกว่า 50 ตัวอย่าง\n", + "\n", + "- เราต้องการทำนายประเภท (category)\n", + "\n", + "- เรามีข้อมูลที่มีป้ายกำกับ (labeled data)\n", + "\n", + "- เรามีตัวอย่างน้อยกว่า 100,000 ตัวอย่าง\n", + "\n", + "- ✨ เราสามารถเลือกใช้ Linear SVC ได้\n", + "\n", + "- ถ้าไม่ได้ผล เนื่องจากเรามีข้อมูลเชิงตัวเลข\n", + "\n", + " - เราสามารถลองใช้ ✨ KNeighbors Classifier\n", + "\n", + " - ถ้ายังไม่ได้ผลอีก ให้ลองใช้ ✨ SVC และ ✨ Ensemble Classifiers\n", + "\n", + "นี่เป็นเส้นทางที่มีประโยชน์มากในการปฏิบัติตาม ตอนนี้ มาเริ่มต้นกันเลยโดยใช้ [tidymodels](https://www.tidymodels.org/) ซึ่งเป็นกรอบการทำงานสำหรับการสร้างโมเดล: คอลเลกชันของแพ็กเกจ R ที่สอดคล้องและยืดหยุ่น ซึ่งพัฒนาขึ้นเพื่อส่งเสริมการปฏิบัติทางสถิติที่ดี 😊\n", + "\n", + "## 2. แบ่งข้อมูลและจัดการกับชุดข้อมูลที่ไม่สมดุล\n", + "\n", + "จากบทเรียนก่อนหน้า เราได้เรียนรู้ว่ามีส่วนผสมบางอย่างที่พบได้ทั่วไปในอาหารของเรา นอกจากนี้ ยังมีการกระจายตัวของจำนวนอาหารที่ไม่เท่ากันอย่างมาก\n", + "\n", + "เราจะจัดการกับสิ่งเหล่านี้โดย:\n", + "\n", + "- ลบส่วนผสมที่พบได้บ่อยที่สุดซึ่งสร้างความสับสนระหว่างอาหารที่แตกต่างกัน โดยใช้ `dplyr::select()`\n", + "\n", + "- ใช้ `recipe` เพื่อเตรียมข้อมูลให้พร้อมสำหรับการสร้างโมเดล โดยการใช้อัลกอริธึม `over-sampling`\n", + "\n", + "เราได้ดูสิ่งเหล่านี้ไปแล้วในบทเรียนก่อนหน้า ดังนั้นสิ่งนี้น่าจะง่ายมาก 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### จัดการกับข้อมูลที่ไม่สมดุล\n", + "\n", + "ข้อมูลที่ไม่สมดุลมักส่งผลเสียต่อประสิทธิภาพของโมเดล หลายโมเดลทำงานได้ดีที่สุดเมื่อจำนวนข้อมูลมีความเท่ากัน และดังนั้นจึงมักมีปัญหาเมื่อเจอกับข้อมูลที่ไม่สมดุล\n", + "\n", + "มีวิธีหลัก ๆ สองวิธีในการจัดการกับชุดข้อมูลที่ไม่สมดุล:\n", + "\n", + "- เพิ่มข้อมูลในกลุ่มที่มีจำนวนน้อย: `Over-sampling` เช่น การใช้ SMOTE algorithm ซึ่งสร้างตัวอย่างใหม่ในกลุ่มที่มีจำนวนน้อยโดยใช้ข้อมูลจากเพื่อนบ้านที่ใกล้เคียงที่สุดของกรณีเหล่านั้น\n", + "\n", + "- ลบข้อมูลออกจากกลุ่มที่มีจำนวนมาก: `Under-sampling`\n", + "\n", + "ในบทเรียนก่อนหน้านี้ เราได้แสดงวิธีจัดการกับชุดข้อมูลที่ไม่สมดุลโดยใช้ `recipe` ซึ่งสามารถมองว่าเป็นแผนงานที่อธิบายขั้นตอนที่ควรนำไปใช้กับชุดข้อมูลเพื่อเตรียมให้พร้อมสำหรับการวิเคราะห์ข้อมูล ในกรณีของเรา เราต้องการให้มีการกระจายจำนวนข้อมูลในกลุ่มอาหารของเราอย่างเท่าเทียมกันสำหรับ `training set` มาเริ่มกันเลย!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "ตอนนี้เราพร้อมที่จะฝึกโมเดลแล้ว 👩‍💻👨‍💻!\n", + "\n", + "## 3. เกินกว่ารุ่นการถดถอยแบบพหุคูณ\n", + "\n", + "ในบทเรียนก่อนหน้า เราได้ศึกษารุ่นการถดถอยแบบพหุคูณ ลองมาสำรวจโมเดลที่ยืดหยุ่นมากขึ้นสำหรับการจำแนกประเภทกันเถอะ\n", + "\n", + "### Support Vector Machines\n", + "\n", + "ในบริบทของการจำแนกประเภท `Support Vector Machines` เป็นเทคนิคการเรียนรู้ของเครื่องที่พยายามค้นหา *ไฮเปอร์เพลน* ที่ \"ดีที่สุด\" ในการแยกประเภท ลองดูตัวอย่างง่าย ๆ:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ ไม่ได้แยกคลาสออกจากกัน H2~ แยกคลาสออกจากกัน แต่มีระยะห่างเพียงเล็กน้อย H3~ แยกคลาสออกจากกันด้วยระยะห่างสูงสุด\n", + "\n", + "#### ตัวจำแนกเชิงเส้นแบบ Support Vector\n", + "\n", + "Support-Vector clustering (SVC) เป็นส่วนหนึ่งของกลุ่มเทคนิคการเรียนรู้ของเครื่อง (ML) ในตระกูล Support-Vector machines ใน SVC จะมีการเลือก hyperplane เพื่อแยก `ส่วนใหญ่` ของข้อมูลการฝึกอบรมออกจากกันอย่างถูกต้อง แต่ `อาจมีการจัดประเภทผิดพลาด` สำหรับบางข้อมูล โดยการอนุญาตให้บางจุดอยู่ในด้านที่ผิด SVM จะมีความทนทานต่อค่าผิดปกติมากขึ้น และสามารถปรับตัวให้เข้ากับข้อมูลใหม่ได้ดีขึ้น พารามิเตอร์ที่ควบคุมการละเมิดนี้เรียกว่า `cost` ซึ่งมีค่าเริ่มต้นเป็น 1 (ดู `help(\"svm_poly\")`)\n", + "\n", + "มาลองสร้าง SVC เชิงเส้นโดยตั้งค่า `degree = 1` ในโมเดล SVM แบบพหุนาม\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "ตอนนี้ที่เราได้รวบรวมขั้นตอนการเตรียมข้อมูลและการกำหนดโมเดลไว้ใน *workflow* แล้ว เราสามารถเริ่มฝึก Linear SVC และประเมินผลลัพธ์ไปพร้อมกันได้ สำหรับตัวชี้วัดประสิทธิภาพ เรามาสร้างชุดตัวชี้วัดที่จะประเมิน: `accuracy`, `sensitivity`, `Positive Predicted Value` และ `F Measure`\n", + "\n", + "> `augment()` จะเพิ่มคอลัมน์สำหรับการทำนายลงในข้อมูลที่กำหนด\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### ซัพพอร์ตเวกเตอร์แมชชีน\n", + "\n", + "ซัพพอร์ตเวกเตอร์แมชชีน (SVM) เป็นการขยายความสามารถของซัพพอร์ตเวกเตอร์คลาสซิไฟเออร์เพื่อรองรับเส้นแบ่งระหว่างคลาสที่ไม่เป็นเส้นตรง โดยหลักการแล้ว SVM ใช้ *เคอร์เนลทริก* เพื่อขยายพื้นที่ฟีเจอร์ให้สามารถปรับตัวเข้ากับความสัมพันธ์ที่ไม่เป็นเส้นตรงระหว่างคลาสได้ หนึ่งในฟังก์ชันเคอร์เนลที่ได้รับความนิยมและมีความยืดหยุ่นสูงที่ SVM ใช้คือ *Radial basis function* มาดูกันว่า SVM จะทำงานกับข้อมูลของเราได้อย่างไร\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "ดีขึ้นมาก 🤩!\n", + "\n", + "> ✅ โปรดดู:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> สำหรับการอ่านเพิ่มเติม\n", + "\n", + "### ตัวจำแนกประเภท Nearest Neighbor\n", + "\n", + "*K*-nearest neighbor (KNN) เป็นอัลกอริทึมที่การคาดการณ์แต่ละค่าจะขึ้นอยู่กับ *ความคล้ายคลึง* กับค่าของการสังเกตอื่น ๆ\n", + "\n", + "ลองปรับให้เข้ากับข้อมูลของเรากันเถอะ\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "ดูเหมือนว่ารุ่นนี้จะยังทำงานได้ไม่ดีนัก อาจจะลองเปลี่ยนพารามิเตอร์ของรุ่น (ดู `help(\"nearest_neighbor\")`) เพื่อปรับปรุงประสิทธิภาพของรุ่น อย่าลืมทดลองดูนะ\n", + "\n", + "> ✅ โปรดดู:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> เพื่อเรียนรู้เพิ่มเติมเกี่ยวกับตัวจัดประเภท *K*-Nearest Neighbors\n", + "\n", + "### ตัวจัดประเภทแบบ Ensemble\n", + "\n", + "อัลกอริธึมแบบ Ensemble ทำงานโดยการรวมตัวประมาณค่าพื้นฐานหลายตัวเข้าด้วยกันเพื่อสร้างโมเดลที่เหมาะสมที่สุด โดยใช้วิธีการดังนี้:\n", + "\n", + "`bagging`: ใช้ *ฟังก์ชันเฉลี่ย* กับชุดของโมเดลพื้นฐาน\n", + "\n", + "`boosting`: สร้างลำดับของโมเดลที่ต่อยอดจากกันและกันเพื่อปรับปรุงประสิทธิภาพการพยากรณ์\n", + "\n", + "เริ่มต้นด้วยการลองใช้โมเดล Random Forest ซึ่งสร้างชุดของต้นไม้ตัดสินใจจำนวนมาก จากนั้นใช้ฟังก์ชันเฉลี่ยเพื่อสร้างโมเดลโดยรวมที่ดียิ่งขึ้น\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "ทำได้ดีมาก 👏!\n", + "\n", + "ลองมาทดลองใช้โมเดล Boosted Tree กันเถอะ\n", + "\n", + "Boosted Tree เป็นวิธีการแบบกลุ่มที่สร้างชุดของต้นไม้ตัดสินใจที่ต่อเนื่องกัน โดยที่แต่ละต้นไม้จะขึ้นอยู่กับผลลัพธ์ของต้นไม้ก่อนหน้า เพื่อพยายามลดข้อผิดพลาดลงทีละน้อย วิธีนี้เน้นไปที่น้ำหนักของรายการที่ถูกจัดประเภทผิด และปรับการคาดการณ์สำหรับตัวจัดประเภทถัดไปเพื่อแก้ไขข้อผิดพลาด\n", + "\n", + "มีหลายวิธีในการปรับโมเดลนี้ (ดู `help(\"boost_tree\")`) ในตัวอย่างนี้ เราจะปรับ Boosted trees ผ่านเครื่องมือ `xgboost`\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ โปรดดู:\n", + ">\n", + "> - [Machine Learning for Social Scientists](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> - - สำรวจโมเดล AdaBoost ซึ่งเป็นทางเลือกที่ดีสำหรับ xgboost\n", + ">\n", + "> เพื่อเรียนรู้เพิ่มเติมเกี่ยวกับ Ensemble classifiers\n", + "\n", + "## 4. เพิ่มเติม - การเปรียบเทียบโมเดลหลายตัว\n", + "\n", + "เราได้สร้างโมเดลจำนวนมากในห้องทดลองนี้ 🙌 การสร้าง workflows จำนวนมากจากชุด preprocessors และ/หรือ model specifications ที่แตกต่างกัน แล้วคำนวณค่าประสิทธิภาพทีละตัวอาจกลายเป็นงานที่น่าเบื่อหรือยุ่งยาก\n", + "\n", + "ลองมาดูกันว่าเราสามารถแก้ไขปัญหานี้ได้หรือไม่โดยการสร้างฟังก์ชันที่สามารถปรับ workflows หลายตัวในชุดข้อมูลการฝึกอบรม แล้วคืนค่าประสิทธิภาพตามชุดข้อมูลทดสอบ เราจะใช้ `map()` และ `map_dfr()` จากแพ็กเกจ [purrr](https://purrr.tidyverse.org/) เพื่อใช้ฟังก์ชันกับแต่ละองค์ประกอบในลิสต์\n", + "\n", + "> ฟังก์ชัน [`map()`](https://purrr.tidyverse.org/reference/map.html) ช่วยให้คุณแทนที่ for loops จำนวนมากด้วยโค้ดที่กระชับและอ่านง่ายขึ้น จุดเริ่มต้นที่ดีที่สุดในการเรียนรู้เกี่ยวกับฟังก์ชัน [`map()`](https://purrr.tidyverse.org/reference/map.html) คือบท [iteration chapter](http://r4ds.had.co.nz/iteration.html) ใน R for data science\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "แพ็กเกจ [**workflowset**](https://workflowsets.tidymodels.org/) ช่วยให้ผู้ใช้สามารถสร้างและปรับใช้โมเดลจำนวนมากได้อย่างง่ายดาย แต่ส่วนใหญ่ถูกออกแบบมาเพื่อใช้งานร่วมกับเทคนิคการสุ่มตัวอย่าง เช่น `cross-validation` ซึ่งเป็นวิธีที่เรายังไม่ได้กล่าวถึง\n", + "\n", + "## **🚀ความท้าทาย**\n", + "\n", + "แต่ละเทคนิคเหล่านี้มีพารามิเตอร์จำนวนมากที่คุณสามารถปรับแต่งได้ เช่น `cost` ใน SVMs, `neighbors` ใน KNN, `mtry` (ตัวทำนายที่ถูกเลือกแบบสุ่ม) ใน Random Forest\n", + "\n", + "ค้นคว้าพารามิเตอร์เริ่มต้นของแต่ละโมเดล และลองคิดดูว่าการปรับแต่งพารามิเตอร์เหล่านี้จะส่งผลต่อคุณภาพของโมเดลอย่างไร\n", + "\n", + "หากต้องการทราบข้อมูลเพิ่มเติมเกี่ยวกับโมเดลและพารามิเตอร์ของมัน ให้ใช้: `help(\"model\")` เช่น `help(\"rand_forest\")`\n", + "\n", + "> ในการใช้งานจริง เรามักจะ *ประมาณค่า* *ค่าที่ดีที่สุด* โดยการฝึกโมเดลหลายตัวบน `ชุดข้อมูลจำลอง` และวัดผลว่าโมเดลเหล่านี้ทำงานได้ดีเพียงใด กระบวนการนี้เรียกว่า **การปรับแต่ง**\n", + "\n", + "### [**แบบทดสอบหลังการบรรยาย**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **ทบทวนและศึกษาด้วยตนเอง**\n", + "\n", + "มีคำศัพท์เฉพาะมากมายในบทเรียนเหล่านี้ ใช้เวลาสักครู่เพื่อทบทวน [รายการนี้](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) ซึ่งรวบรวมคำศัพท์ที่มีประโยชน์ไว้!\n", + "\n", + "#### ขอขอบคุณ:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) สำหรับการสร้างภาพประกอบที่น่าทึ่งซึ่งทำให้ R ดูน่าสนใจและเข้าถึงได้มากขึ้น ค้นหาภาพประกอบเพิ่มเติมได้ที่ [แกลเลอรีของเธอ](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM)\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) และ [Jen Looper](https://www.twitter.com/jenlooper) สำหรับการสร้างเวอร์ชัน Python ดั้งเดิมของโมดูลนี้ ♥️\n", + "\n", + "เรียนรู้อย่างมีความสุข,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/th/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..3afd7a4c8 --- /dev/null +++ b/translations/th/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามอย่างเต็มที่เพื่อให้การแปลมีความถูกต้อง โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์ที่เป็นมืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:43:00+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/th/4-Classification/4-Applied/notebook.ipynb b/translations/th/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..d2b566c12 --- /dev/null +++ b/translations/th/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:38+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/4-Classification/4-Applied/solution/notebook.ipynb b/translations/th/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..c4a2e0f50 --- /dev/null +++ b/translations/th/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:42:03+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/5-Clustering/1-Visualize/notebook.ipynb b/translations/th/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..abd29b9fc --- /dev/null +++ b/translations/th/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:08:08+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์ที่เป็นมืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/th/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..fa4bb2087 --- /dev/null +++ b/translations/th/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,493 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **การวิเคราะห์เพลงไนจีเรียที่ดึงข้อมูลจาก Spotify**\n", + "\n", + "การจัดกลุ่ม (Clustering) เป็นรูปแบบหนึ่งของ [การเรียนรู้แบบไม่มีผู้สอน](https://wikipedia.org/wiki/Unsupervised_learning) ซึ่งสมมติว่าชุดข้อมูลไม่มีการติดป้ายกำกับ หรือข้อมูลนำเข้าไม่ได้จับคู่กับผลลัพธ์ที่กำหนดไว้ล่วงหน้า โดยใช้หลากหลายอัลกอริทึมเพื่อจัดเรียงข้อมูลที่ไม่มีป้ายกำกับและสร้างกลุ่มตามรูปแบบที่พบในข้อมูล\n", + "\n", + "[**แบบทดสอบก่อนการบรรยาย**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **บทนำ**\n", + "\n", + "[การจัดกลุ่ม](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) มีประโยชน์มากสำหรับการสำรวจข้อมูล ลองมาดูกันว่ามันสามารถช่วยค้นหาแนวโน้มและรูปแบบในวิธีที่ผู้ฟังชาวไนจีเรียบริโภคเพลงได้หรือไม่\n", + "\n", + "> ✅ ลองใช้เวลาสักครู่คิดถึงการใช้งานของการจัดกลุ่ม ในชีวิตจริง การจัดกลุ่มเกิดขึ้นเมื่อคุณมีกองผ้าซักและต้องแยกเสื้อผ้าของสมาชิกในครอบครัวออกจากกัน 🧦👕👖🩲 ในด้านวิทยาศาสตร์ข้อมูล การจัดกลุ่มเกิดขึ้นเมื่อพยายามวิเคราะห์ความชอบของผู้ใช้ หรือกำหนดลักษณะของชุดข้อมูลที่ไม่มีป้ายกำกับ การจัดกลุ่มช่วยให้เข้าใจความยุ่งเหยิง เช่น ลิ้นชักถุงเท้า\n", + "\n", + "ในสภาพแวดล้อมการทำงาน การจัดกลุ่มสามารถใช้เพื่อกำหนดสิ่งต่าง ๆ เช่น การแบ่งส่วนตลาด หรือการระบุว่ากลุ่มอายุใดซื้อสินค้าอะไร ตัวอย่างเช่น อีกการใช้งานหนึ่งคือการตรวจจับความผิดปกติ เช่น การตรวจจับการฉ้อโกงจากชุดข้อมูลธุรกรรมบัตรเครดิต หรือคุณอาจใช้การจัดกลุ่มเพื่อระบุเนื้องอกจากชุดภาพสแกนทางการแพทย์\n", + "\n", + "✅ ลองคิดสักครู่เกี่ยวกับวิธีที่คุณอาจเคยพบการจัดกลุ่มในชีวิตจริง เช่น ในธนาคาร อีคอมเมิร์ซ หรือการตั้งค่าทางธุรกิจ\n", + "\n", + "> 🎓 น่าสนใจที่การวิเคราะห์การจัดกลุ่มมีต้นกำเนิดในสาขามานุษยวิทยาและจิตวิทยาในช่วงปี 1930 คุณจินตนาการได้ไหมว่ามันอาจถูกใช้อย่างไร?\n", + "\n", + "อีกทางหนึ่ง คุณสามารถใช้มันเพื่อจัดกลุ่มผลการค้นหา เช่น ลิงก์การช็อปปิ้ง รูปภาพ หรือรีวิว การจัดกลุ่มมีประโยชน์เมื่อคุณมีชุดข้อมูลขนาดใหญ่ที่ต้องการลดขนาดลงและต้องการวิเคราะห์ในเชิงลึกมากขึ้น ดังนั้นเทคนิคนี้สามารถใช้เพื่อเรียนรู้เกี่ยวกับข้อมูลก่อนที่จะสร้างโมเดลอื่น ๆ\n", + "\n", + "✅ เมื่อข้อมูลของคุณถูกจัดระเบียบในกลุ่ม คุณสามารถกำหนดรหัสกลุ่มให้กับมัน เทคนิคนี้มีประโยชน์เมื่อคุณต้องการรักษาความเป็นส่วนตัวของชุดข้อมูล คุณสามารถอ้างถึงจุดข้อมูลโดยใช้รหัสกลุ่มแทนที่จะใช้ข้อมูลที่สามารถระบุตัวตนได้ คุณคิดเหตุผลอื่น ๆ ได้ไหมว่าทำไมคุณถึงเลือกใช้รหัสกลุ่มแทนองค์ประกอบอื่น ๆ ของกลุ่มเพื่อระบุข้อมูล?\n", + "\n", + "### เริ่มต้นกับการจัดกลุ่ม\n", + "\n", + "> 🎓 วิธีที่เราสร้างกลุ่มมีความเกี่ยวข้องอย่างมากกับวิธีที่เรารวบรวมจุดข้อมูลเข้าด้วยกัน ลองมาทำความเข้าใจคำศัพท์บางคำ:\n", + ">\n", + "> 🎓 ['Transductive' vs. 'Inductive'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> การอนุมานแบบ Transductive มาจากกรณีการฝึกอบรมที่สังเกตได้ซึ่งจับคู่กับกรณีทดสอบเฉพาะ การอนุมานแบบ Inductive มาจากกรณีการฝึกอบรมที่จับคู่กับกฎทั่วไปซึ่งจะถูกนำไปใช้กับกรณีทดสอบในภายหลัง\n", + ">\n", + "> ตัวอย่าง: ลองจินตนาการว่าคุณมีชุดข้อมูลที่มีการติดป้ายกำกับบางส่วน บางรายการเป็น 'แผ่นเสียง' บางรายการเป็น 'ซีดี' และบางรายการไม่มีป้ายกำกับ งานของคุณคือการให้ป้ายกำกับกับข้อมูลที่ไม่มีป้ายกำกับ หากคุณเลือกวิธี Inductive คุณจะฝึกโมเดลเพื่อค้นหา 'แผ่นเสียง' และ 'ซีดี' และนำป้ายกำกับเหล่านั้นไปใช้กับข้อมูลที่ไม่มีป้ายกำกับ วิธีนี้จะมีปัญหาในการจัดประเภทสิ่งที่จริง ๆ แล้วเป็น 'เทปคาสเซ็ต' ในทางกลับกัน วิธี Transductive จะจัดการกับข้อมูลที่ไม่รู้จักได้อย่างมีประสิทธิภาพมากกว่า โดยทำงานเพื่อจัดกลุ่มรายการที่คล้ายกันเข้าด้วยกันและนำป้ายกำกับไปใช้กับกลุ่ม ในกรณีนี้ กลุ่มอาจสะท้อนถึง 'สิ่งดนตรีทรงกลม' และ 'สิ่งดนตรีทรงสี่เหลี่ยม'\n", + ">\n", + "> 🎓 ['Non-flat' vs. 'Flat' Geometry](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> มาจากคำศัพท์ทางคณิตศาสตร์ 'Non-flat' vs. 'Flat' Geometry หมายถึงการวัดระยะทางระหว่างจุดโดยใช้วิธีการทางเรขาคณิตแบบ 'Flat' ([Euclidean](https://wikipedia.org/wiki/Euclidean_geometry)) หรือ 'Non-flat' (Non-Euclidean)\n", + ">\n", + "> 'Flat' ในบริบทนี้หมายถึงเรขาคณิตแบบยูคลิด (บางส่วนของมันถูกสอนเป็นเรขาคณิต 'Plane') และ 'Non-flat' หมายถึงเรขาคณิตแบบไม่ใช่ยูคลิด เรขาคณิตเกี่ยวข้องอะไรกับการเรียนรู้ของเครื่อง? เนื่องจากทั้งสองสาขามีรากฐานมาจากคณิตศาสตร์ จึงต้องมีวิธีการทั่วไปในการวัดระยะทางระหว่างจุดในกลุ่ม ซึ่งสามารถทำได้ในแบบ 'Flat' หรือ 'Non-flat' ขึ้นอยู่กับลักษณะของข้อมูล [ระยะทางแบบยูคลิด](https://wikipedia.org/wiki/Euclidean_distance) ถูกวัดเป็นความยาวของเส้นตรงระหว่างสองจุด [ระยะทางแบบไม่ใช่ยูคลิด](https://wikipedia.org/wiki/Non-Euclidean_geometry) ถูกวัดตามเส้นโค้ง หากข้อมูลของคุณเมื่อแสดงภาพดูเหมือนจะไม่อยู่บนระนาบ คุณอาจต้องใช้อัลกอริทึมเฉพาะเพื่อจัดการกับมัน\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "> 🎓 ['Distances'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> กลุ่มถูกกำหนดโดยเมทริกซ์ระยะทาง เช่น ระยะทางระหว่างจุด ระยะทางนี้สามารถวัดได้หลายวิธี กลุ่มแบบยูคลิดถูกกำหนดโดยค่าเฉลี่ยของค่าจุด และมี 'Centroid' หรือจุดศูนย์กลาง ระยะทางจึงถูกวัดโดยระยะทางไปยัง Centroid นั้น ระยะทางแบบไม่ใช่ยูคลิดหมายถึง 'Clustroids' ซึ่งเป็นจุดที่ใกล้ที่สุดกับจุดอื่น ๆ Clustroids สามารถกำหนดได้หลายวิธี\n", + ">\n", + "> 🎓 ['Constrained'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [การจัดกลุ่มแบบมีข้อจำกัด](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) แนะนำการเรียนรู้แบบกึ่งมีผู้สอนเข้าสู่วิธีการแบบไม่มีผู้สอน ความสัมพันธ์ระหว่างจุดถูกกำหนดเป็น 'ไม่สามารถเชื่อมโยง' หรือ 'ต้องเชื่อมโยง' ดังนั้นจึงมีการบังคับกฎบางอย่างในชุดข้อมูล\n", + ">\n", + "> ตัวอย่าง: หากอัลกอริทึมถูกปล่อยให้ทำงานกับชุดข้อมูลที่ไม่มีป้ายกำกับหรือมีป้ายกำกับบางส่วน กลุ่มที่มันสร้างขึ้นอาจมีคุณภาพต่ำ ในตัวอย่างข้างต้น กลุ่มอาจจัดกลุ่ม 'สิ่งดนตรีทรงกลม' 'สิ่งดนตรีทรงสี่เหลี่ยม' 'สิ่งทรงสามเหลี่ยม' และ 'คุกกี้' หากมีการให้ข้อจำกัดหรือกฎบางอย่าง (\"รายการต้องทำจากพลาสติก\", \"รายการต้องสามารถผลิตเสียงดนตรีได้\") สิ่งนี้สามารถช่วย 'จำกัด' อัลกอริทึมให้เลือกได้ดีขึ้น\n", + ">\n", + "> 🎓 'Density'\n", + ">\n", + "> ข้อมูลที่มี 'เสียงรบกวน' ถือว่าเป็น 'หนาแน่น' ระยะทางระหว่างจุดในแต่ละกลุ่มอาจพิสูจน์ได้ว่ามีความหนาแน่นมากหรือน้อยเมื่อวิเคราะห์ และข้อมูลนี้จำเป็นต้องได้รับการวิเคราะห์ด้วยวิธีการจัดกลุ่มที่เหมาะสม [บทความนี้](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) แสดงให้เห็นความแตกต่างระหว่างการใช้ K-Means clustering กับ HDBSCAN เพื่อสำรวจชุดข้อมูลที่มีความหนาแน่นของกลุ่มไม่เท่ากัน\n", + "\n", + "เพิ่มความเข้าใจของคุณเกี่ยวกับเทคนิคการจัดกลุ่มใน [โมดูลการเรียนรู้](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **อัลกอริทึมการจัดกลุ่ม**\n", + "\n", + "มีอัลกอริทึมการจัดกลุ่มมากกว่า 100 แบบ และการใช้งานขึ้นอยู่กับลักษณะของข้อมูลที่มีอยู่ ลองมาพูดถึงบางส่วนที่สำคัญ:\n", + "\n", + "- **การจัดกลุ่มแบบลำดับชั้น** หากวัตถุถูกจัดประเภทตามความใกล้ชิดกับวัตถุใกล้เคียงมากกว่ากับวัตถุที่อยู่ไกลออกไป กลุ่มจะถูกสร้างขึ้นตามระยะทางของสมาชิกไปยังและจากวัตถุอื่น ๆ การจัดกลุ่มแบบลำดับชั้นมีลักษณะโดยการรวมสองกลุ่มซ้ำ ๆ\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "- **การจัดกลุ่มแบบ Centroid** อัลกอริทึมยอดนิยมนี้ต้องการการเลือก 'k' หรือจำนวนกลุ่มที่จะสร้าง หลังจากนั้นอัลกอริทึมจะกำหนดจุดศูนย์กลางของกลุ่มและรวบรวมข้อมูลรอบจุดนั้น [K-means clustering](https://wikipedia.org/wiki/K-means_clustering) เป็นเวอร์ชันยอดนิยมของการจัดกลุ่มแบบ Centroid ซึ่งแยกชุดข้อมูลออกเป็น K กลุ่มที่กำหนดไว้ล่วงหน้า จุดศูนย์กลางถูกกำหนดโดยค่าเฉลี่ยที่ใกล้ที่สุด จึงเป็นที่มาของชื่อ ระยะทางยกกำลังสองจากกลุ่มจะถูกลดลง\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Dasani Madipalli
\n", + "\n", + "- **การจัดกลุ่มแบบอิงการกระจาย** อิงตามการสร้างแบบจำลองทางสถิติ การจัดกลุ่มแบบอิงการกระจายมุ่งเน้นไปที่การกำหนดความน่าจะเป็นที่จุดข้อมูลจะอยู่ในกลุ่ม และกำหนดให้ตามนั้น วิธี Gaussian mixture เป็นส่วนหนึ่งของประเภทนี้\n", + "\n", + "- **การจัดกลุ่มแบบอิงความหนาแน่น** จุดข้อมูลจะถูกกำหนดให้กับกลุ่มตามความหนาแน่น หรือการรวมกลุ่มรอบ ๆ กัน จุดข้อมูลที่อยู่ไกลจากกลุ่มจะถือว่าเป็นข้อมูลรบกวนหรือเสียงรบกวน DBSCAN, Mean-shift และ OPTICS เป็นส่วนหนึ่งของประเภทนี้\n", + "\n", + "- **การจัดกลุ่มแบบอิงกริด** สำหรับชุดข้อมูลหลายมิติ จะมีการสร้างกริดและข้อมูลจะถูกแบ่งออกเป็นเซลล์ของกริดนั้น ซึ่งจะสร้างกลุ่มขึ้นมา\n", + "\n", + "วิธีที่ดีที่สุดในการเรียนรู้เกี่ยวกับการจัดกลุ่มคือการลองทำด้วยตัวเอง และนั่นคือสิ่งที่คุณจะทำในแบบฝึกหัดนี้\n", + "\n", + "เราจะต้องใช้แพ็กเกจบางอย่างเพื่อเริ่มต้นโมดูลนี้ คุณสามารถติดตั้งได้โดยใช้: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "หรือใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับการทำโมดูลนี้หรือไม่ และติดตั้งให้คุณในกรณีที่บางแพ็กเกจขาดหายไป\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## แบบฝึกหัด - จัดกลุ่มข้อมูลของคุณ\n", + "\n", + "การจัดกลุ่มข้อมูล (Clustering) เป็นเทคนิคที่ได้รับประโยชน์อย่างมากจากการแสดงผลข้อมูลที่เหมาะสม ดังนั้นเรามาเริ่มต้นด้วยการแสดงผลข้อมูลเพลงของเรากัน การฝึกหัดนี้จะช่วยให้เราตัดสินใจได้ว่าวิธีการจัดกลุ่มข้อมูลแบบใดที่เหมาะสมที่สุดสำหรับลักษณะของข้อมูลนี้\n", + "\n", + "มาเริ่มต้นกันเลยด้วยการนำเข้าข้อมูล!\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "บางครั้งเราอาจต้องการข้อมูลเพิ่มเติมเล็กน้อยเกี่ยวกับข้อมูลของเรา เราสามารถดู `data` และ `โครงสร้างของมัน` ได้โดยใช้ฟังก์ชัน [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมมาก!💪\n", + "\n", + "เราสามารถสังเกตได้ว่า `glimpse()` จะแสดงจำนวนแถวทั้งหมด (observations) และจำนวนคอลัมน์ทั้งหมด (variables) จากนั้นจะแสดงข้อมูลบางส่วนของแต่ละตัวแปรในแถวถัดจากชื่อของตัวแปร นอกจากนี้ *ประเภทข้อมูล* ของตัวแปรจะถูกแสดงอยู่ถัดจากชื่อของตัวแปรภายใน `< >`\n", + "\n", + "`DataExplorer::introduce()` สามารถสรุปข้อมูลเหล่านี้ได้อย่างเป็นระเบียบ:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "เยี่ยมไปเลย! เราเพิ่งทราบว่าข้อมูลของเราไม่มีค่าที่หายไป\n", + "\n", + "ในขณะที่เรากำลังทำสิ่งนี้ เราสามารถสำรวจสถิติที่แสดงถึงแนวโน้มศูนย์กลางทั่วไป (เช่น [ค่าเฉลี่ย](https://en.wikipedia.org/wiki/Arithmetic_mean) และ [ค่ามัธยฐาน](https://en.wikipedia.org/wiki/Median)) และการวัดการกระจายตัว (เช่น [ส่วนเบี่ยงเบนมาตรฐาน](https://en.wikipedia.org/wiki/Standard_deviation)) โดยใช้ `summarytools::descr()`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "มาดูค่าทั่วไปของข้อมูลกันก่อน โปรดทราบว่าความนิยม (popularity) อาจมีค่าเป็น `0` ซึ่งแสดงถึงเพลงที่ไม่มีการจัดอันดับ เราจะลบข้อมูลเหล่านั้นในภายหลัง\n", + "\n", + "> 🤔 ถ้าเรากำลังทำงานกับการจัดกลุ่ม (clustering) ซึ่งเป็นวิธีการแบบไม่มีผู้สอนที่ไม่ต้องการข้อมูลที่มีป้ายกำกับ ทำไมเราถึงแสดงข้อมูลนี้พร้อมป้ายกำกับ? ในขั้นตอนการสำรวจข้อมูล ป้ายกำกับเหล่านี้มีประโยชน์ แต่ไม่ได้จำเป็นสำหรับการทำงานของอัลกอริทึมการจัดกลุ่ม\n", + "\n", + "### 1. สำรวจแนวเพลงยอดนิยม\n", + "\n", + "มาลองค้นหาแนวเพลงที่ได้รับความนิยมมากที่สุด 🎶 โดยการนับจำนวนครั้งที่ปรากฏกันเถอะ\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "นั่นไปได้ดี! เขาว่าภาพหนึ่งภาพมีค่ามากกว่าพันแถวของข้อมูล (จริงๆ แล้วไม่มีใครพูดแบบนั้นเลย 😅) แต่คุณเข้าใจความหมายใช่ไหม?\n", + "\n", + "วิธีหนึ่งในการแสดงข้อมูลประเภทหมวดหมู่ (ตัวแปรประเภทตัวอักษรหรือแฟคเตอร์) คือการใช้กราฟแท่ง ลองมาสร้างกราฟแท่งของ 10 อันดับแรกของประเภทแนวเพลงกัน:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "ตอนนี้มันง่ายขึ้นมากที่จะระบุว่าเรามี `missing` ประเภทเพลง 🧐!\n", + "\n", + "> การแสดงผลข้อมูลที่ดีจะช่วยให้คุณเห็นสิ่งที่คุณไม่คาดคิด หรือทำให้เกิดคำถามใหม่เกี่ยวกับข้อมูล - Hadley Wickham และ Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "โปรดทราบว่า เมื่อประเภทเพลงที่อยู่ด้านบนถูกระบุว่า `Missing` นั่นหมายความว่า Spotify ไม่ได้จัดประเภทให้ ดังนั้นเรามาลบมันออกไปกันเถอะ\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "จากการสำรวจข้อมูลเบื้องต้น เราได้เรียนรู้ว่าแนวเพลงสามอันดับแรกมีอิทธิพลอย่างมากในชุดข้อมูลนี้ ดังนั้นเรามาเน้นที่ `afro dancehall`, `afropop` และ `nigerian pop` และกรองชุดข้อมูลเพิ่มเติมเพื่อเอาข้อมูลที่มีค่า popularity เท่ากับ 0 ออก (ซึ่งหมายความว่าไม่ได้ถูกจัดประเภทด้วยค่าความนิยมในชุดข้อมูล และสามารถพิจารณาได้ว่าเป็นสัญญาณรบกวนสำหรับวัตถุประสงค์ของเรา):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "มาดูกันว่ามีความสัมพันธ์เชิงเส้นที่ชัดเจนระหว่างตัวแปรเชิงปริมาณในชุดข้อมูลของเราหรือไม่ ความสัมพันธ์นี้ถูกวัดในเชิงคณิตศาสตร์โดย [สถิติความสัมพันธ์](https://en.wikipedia.org/wiki/Correlation)\n", + "\n", + "สถิติความสัมพันธ์เป็นค่าที่อยู่ระหว่าง -1 ถึง 1 ซึ่งบ่งบอกถึงความแข็งแกร่งของความสัมพันธ์ ค่าเหนือ 0 แสดงถึงความสัมพันธ์แบบ *บวก* (ค่าที่สูงของตัวแปรหนึ่งมักจะสอดคล้องกับค่าที่สูงของอีกตัวแปรหนึ่ง) ในขณะที่ค่าต่ำกว่า 0 แสดงถึงความสัมพันธ์แบบ *ลบ* (ค่าที่สูงของตัวแปรหนึ่งมักจะสอดคล้องกับค่าที่ต่ำของอีกตัวแปรหนึ่ง)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "ข้อมูลไม่ได้มีความสัมพันธ์ที่ชัดเจน ยกเว้นระหว่าง `energy` และ `loudness` ซึ่งสมเหตุสมผล เพราะเพลงที่เสียงดังมักจะมีพลังงานสูง `Popularity` มีความสัมพันธ์กับ `release date` ซึ่งก็สมเหตุสมผลเช่นกัน เนื่องจากเพลงที่ออกใหม่มักจะได้รับความนิยมมากกว่า นอกจากนี้ ความยาวและพลังงานดูเหมือนจะมีความสัมพันธ์กันด้วย\n", + "\n", + "น่าสนใจที่จะดูว่าอัลกอริทึมการจัดกลุ่มจะสามารถวิเคราะห์ข้อมูลนี้ได้อย่างไร!\n", + "\n", + "> 🎓 โปรดทราบว่าความสัมพันธ์ไม่ได้หมายถึงเหตุและผล! เรามีหลักฐานของความสัมพันธ์ แต่ไม่มีหลักฐานของเหตุและผล เว็บไซต์ [ที่น่าสนุก](https://tylervigen.com/spurious-correlations) มีภาพประกอบที่เน้นประเด็นนี้\n", + "\n", + "### 2. สำรวจการกระจายตัวของข้อมูล\n", + "\n", + "ลองถามคำถามที่ลึกซึ้งขึ้นอีกหน่อย แนวเพลงต่าง ๆ มีความแตกต่างกันอย่างมีนัยสำคัญในแง่ของการเต้นได้หรือไม่ โดยพิจารณาจากความนิยม? ลองตรวจสอบการกระจายตัวของข้อมูลในสามแนวเพลงยอดนิยมของเราสำหรับความนิยมและการเต้นได้ตามแกน x และ y ที่กำหนด โดยใช้ [density plots](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "เราสังเกตเห็นว่ามีวงกลมที่เรียงตัวกันเป็นชั้นๆ ไม่ว่าจะเป็นแนวเพลงใดก็ตาม อาจเป็นไปได้ว่ารสนิยมของชาวไนจีเรียมาบรรจบกันที่ระดับความสามารถในการเต้นสำหรับแนวเพลงนี้?\n", + "\n", + "โดยทั่วไปแล้ว แนวเพลงทั้งสามมีความสอดคล้องกันในแง่ของความนิยมและความสามารถในการเต้น การกำหนดกลุ่มในข้อมูลที่เรียงตัวกันอย่างหลวมๆ นี้จะเป็นความท้าทาย ลองมาดูกันว่าการใช้ scatter plot จะช่วยสนับสนุนเรื่องนี้ได้หรือไม่\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "แผนภาพกระจายของแกนเดียวกันแสดงรูปแบบการบรรจบกันที่คล้ายกัน\n", + "\n", + "โดยทั่วไป สำหรับการจัดกลุ่มข้อมูล คุณสามารถใช้แผนภาพกระจายเพื่อแสดงกลุ่มของข้อมูล ดังนั้นการเชี่ยวชาญการสร้างภาพประเภทนี้จึงมีประโยชน์มาก ในบทเรียนถัดไป เราจะนำข้อมูลที่กรองแล้วนี้มาใช้กับการจัดกลุ่มแบบ k-means เพื่อค้นหากลุ่มในข้อมูลที่มีการทับซ้อนกันในรูปแบบที่น่าสนใจ\n", + "\n", + "## **🚀 ความท้าทาย**\n", + "\n", + "เพื่อเตรียมตัวสำหรับบทเรียนถัดไป สร้างแผนภูมิที่เกี่ยวกับอัลกอริทึมการจัดกลุ่มต่าง ๆ ที่คุณอาจค้นพบและใช้ในสภาพแวดล้อมการผลิต อัลกอริทึมการจัดกลุ่มเหล่านี้พยายามแก้ไขปัญหาแบบใด?\n", + "\n", + "## [**แบบทดสอบหลังบทเรียน**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **ทบทวนและศึกษาด้วยตัวเอง**\n", + "\n", + "ก่อนที่คุณจะใช้การจัดกลุ่มข้อมูลตามที่เราได้เรียนรู้มา การทำความเข้าใจลักษณะของชุดข้อมูลของคุณเป็นความคิดที่ดี อ่านเพิ่มเติมเกี่ยวกับหัวข้อนี้ [ที่นี่](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html)\n", + "\n", + "เพิ่มพูนความเข้าใจเกี่ยวกับเทคนิคการจัดกลุ่มข้อมูล:\n", + "\n", + "- [ฝึกฝนและประเมินโมเดลการจัดกลุ่มด้วย Tidymodels และเครื่องมืออื่น ๆ](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **งานที่ได้รับมอบหมาย**\n", + "\n", + "[ค้นคว้าการสร้างภาพอื่น ๆ สำหรับการจัดกลุ่มข้อมูล](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## ขอขอบคุณ:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) สำหรับการสร้างเวอร์ชัน Python ดั้งเดิมของโมดูลนี้ ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) สำหรับการสร้างภาพประกอบที่ยอดเยี่ยมซึ่งช่วยให้แนวคิดเกี่ยวกับการเรียนรู้ของเครื่องเข้าใจง่ายและเข้าถึงได้มากขึ้น\n", + "\n", + "เรียนรู้อย่างมีความสุข,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:16:59+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/th/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/th/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..f648adf97 --- /dev/null +++ b/translations/th/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "รับข้อมูลเกี่ยวกับดาต้าเฟรม\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ดูค่าทั่วไปของข้อมูล โปรดทราบว่าความนิยมสามารถเป็น '0' ได้ - และมีหลายแถวที่มีค่านั้น\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "โปรดลบประเภทเพลง 'Missing' เนื่องจากไม่ได้ถูกจัดประเภทใน Spotify\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:09:46+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/5-Clustering/2-K-Means/notebook.ipynb b/translations/th/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..b5b8958ff --- /dev/null +++ b/translations/th/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,229 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:51+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "เราจะมุ่งเน้นเพียง 3 ประเภทเท่านั้น บางทีเราอาจสร้าง 3 กลุ่มขึ้นมาได้!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/th/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..c08e56927 --- /dev/null +++ b/translations/th/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,639 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:29:51+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## สำรวจการจัดกลุ่ม K-Means ด้วย R และหลักการข้อมูลแบบ Tidy\n", + "\n", + "### [**แบบทดสอบก่อนเรียน**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "ในบทเรียนนี้ คุณจะได้เรียนรู้วิธีสร้างกลุ่มโดยใช้แพ็กเกจ Tidymodels และแพ็กเกจอื่นๆ ในระบบนิเวศของ R (เราจะเรียกพวกมันว่าเพื่อน 🧑‍🤝‍🧑) รวมถึงชุดข้อมูลเพลงไนจีเรียที่คุณนำเข้าไว้ก่อนหน้านี้ เราจะครอบคลุมพื้นฐานของ K-Means สำหรับการจัดกลุ่ม โปรดจำไว้ว่า ตามที่คุณได้เรียนรู้ในบทเรียนก่อนหน้า มีหลายวิธีในการทำงานกับการจัดกลุ่ม และวิธีที่คุณใช้ขึ้นอยู่กับข้อมูลของคุณ เราจะลองใช้ K-Means เนื่องจากเป็นเทคนิคการจัดกลุ่มที่พบได้บ่อยที่สุด มาเริ่มกันเลย!\n", + "\n", + "คำศัพท์ที่คุณจะได้เรียนรู้:\n", + "\n", + "- การให้คะแนน Silhouette\n", + "\n", + "- วิธี Elbow\n", + "\n", + "- ความเฉื่อย (Inertia)\n", + "\n", + "- ความแปรปรวน (Variance)\n", + "\n", + "### **บทนำ**\n", + "\n", + "[K-Means Clustering](https://wikipedia.org/wiki/K-means_clustering) เป็นวิธีที่มาจากสาขาการประมวลผลสัญญาณ ใช้ในการแบ่งและจัดกลุ่มข้อมูลออกเป็น `k clusters` โดยอิงจากความคล้ายคลึงกันของคุณลักษณะ\n", + "\n", + "กลุ่มเหล่านี้สามารถแสดงผลเป็น [แผนภาพ Voronoi](https://wikipedia.org/wiki/Voronoi_diagram) ซึ่งประกอบด้วยจุด (หรือ 'seed') และพื้นที่ที่เกี่ยวข้อง\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกโดย Jen Looper
\n", + "\n", + "ขั้นตอนของ K-Means clustering มีดังนี้:\n", + "\n", + "1. นักวิทยาศาสตร์ข้อมูลเริ่มต้นโดยกำหนดจำนวนกลุ่มที่ต้องการสร้าง\n", + "\n", + "2. จากนั้น อัลกอริทึมจะสุ่มเลือก K ข้อมูลจากชุดข้อมูลเพื่อใช้เป็นจุดศูนย์กลางเริ่มต้นสำหรับกลุ่ม (หรือที่เรียกว่า centroids)\n", + "\n", + "3. ต่อมา ข้อมูลที่เหลือทั้งหมดจะถูกจัดกลุ่มไปยัง centroid ที่ใกล้ที่สุด\n", + "\n", + "4. จากนั้น คำนวณค่าเฉลี่ยใหม่ของแต่ละกลุ่ม และ centroid จะถูกย้ายไปยังตำแหน่งค่าเฉลี่ยนั้น\n", + "\n", + "5. เมื่อจุดศูนย์กลางถูกคำนวณใหม่แล้ว ข้อมูลทุกชิ้นจะถูกตรวจสอบอีกครั้งเพื่อดูว่ามันอาจใกล้กับกลุ่มอื่นมากกว่า ข้อมูลทั้งหมดจะถูกจัดกลุ่มใหม่โดยใช้ค่าเฉลี่ยของกลุ่มที่อัปเดต ขั้นตอนการจัดกลุ่มและการอัปเดต centroid จะถูกทำซ้ำจนกว่าการจัดกลุ่มจะหยุดเปลี่ยนแปลง (หรือเมื่อเกิดการลู่เข้า) โดยทั่วไป อัลกอริทึมจะหยุดเมื่อการเคลื่อนที่ของ centroid ในแต่ละรอบใหม่มีน้อยมาก และกลุ่มกลายเป็นคงที่\n", + "\n", + "
\n", + "\n", + "> โปรดทราบว่าเนื่องจากการสุ่มเลือกข้อมูลเริ่มต้น k ที่ใช้เป็น centroid การดำเนินการอาจให้ผลลัพธ์ที่แตกต่างกันเล็กน้อยในแต่ละครั้ง ด้วยเหตุนี้ อัลกอริทึมส่วนใหญ่จึงใช้การเริ่มต้นแบบสุ่มหลายครั้ง (*random starts*) และเลือกการวนซ้ำที่มีค่า WCSS ต่ำที่สุด ดังนั้นจึงแนะนำอย่างยิ่งให้รัน K-Means ด้วยค่าของ *nstart* หลายค่าเพื่อหลีกเลี่ยง *local optimum* ที่ไม่พึงประสงค์\n", + "\n", + "
\n", + "\n", + "แอนิเมชันสั้นๆ นี้ใช้ [งานศิลปะ](https://github.com/allisonhorst/stats-illustrations) ของ Allison Horst อธิบายกระบวนการจัดกลุ่ม:\n", + "\n", + "

\n", + " \n", + "

งานศิลปะโดย @allison_horst
\n", + "\n", + "คำถามพื้นฐานที่เกิดขึ้นในการจัดกลุ่มคือ: คุณจะรู้ได้อย่างไรว่าควรแยกข้อมูลออกเป็นกี่กลุ่ม? ข้อเสียของการใช้ K-Means คือคุณจะต้องกำหนด `k` ซึ่งก็คือจำนวน `centroids` โชคดีที่ `elbow method` ช่วยประมาณค่าที่ดีสำหรับการเริ่มต้น `k` คุณจะได้ลองใช้ในอีกสักครู่\n", + "\n", + "### \n", + "\n", + "**ข้อกำหนดเบื้องต้น**\n", + "\n", + "เราจะเริ่มต้นจากจุดที่เราหยุดไว้ใน [บทเรียนก่อนหน้า](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb) ซึ่งเราได้วิเคราะห์ชุดข้อมูล สร้างภาพจำนวนมาก และกรองชุดข้อมูลไปยังข้อมูลที่น่าสนใจ อย่าลืมตรวจสอบ!\n", + "\n", + "เราจะต้องใช้แพ็กเกจบางตัวเพื่อดำเนินการในโมดูลนี้ คุณสามารถติดตั้งได้โดยใช้: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "หรือใช้สคริปต์ด้านล่างเพื่อตรวจสอบว่าคุณมีแพ็กเกจที่จำเป็นสำหรับโมดูลนี้หรือไม่ และติดตั้งให้ในกรณีที่ขาดหาย\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "มาลุยกันเลย!\n", + "\n", + "## 1. เต้นรำกับข้อมูล: คัดเลือก 3 แนวเพลงที่ได้รับความนิยมมากที่สุด\n", + "\n", + "นี่คือการทบทวนสิ่งที่เราได้ทำในบทเรียนก่อนหน้า มาลองจัดการข้อมูลกันเถอะ!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 นั่นไปได้ดีมาก!\n", + "\n", + "## 2. การสำรวจข้อมูลเพิ่มเติม\n", + "\n", + "ข้อมูลนี้สะอาดแค่ไหน? มาลองตรวจสอบค่าผิดปกติด้วยการใช้กล่องแสดงค่ากัน เราจะเน้นไปที่คอลัมน์ตัวเลขที่มีค่าผิดปกติน้อยกว่า (แม้ว่าคุณจะสามารถลบค่าผิดปกติออกได้) กล่องแสดงค่าจะช่วยแสดงช่วงของข้อมูลและช่วยเลือกว่าคอลัมน์ใดควรนำมาใช้ อย่างไรก็ตาม กล่องแสดงค่าไม่ได้แสดงค่าความแปรปรวน ซึ่งเป็นองค์ประกอบสำคัญของข้อมูลที่สามารถจัดกลุ่มได้ดี กรุณาดู [การอภิปรายนี้](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) เพื่ออ่านเพิ่มเติม\n", + "\n", + "[กล่องแสดงค่า](https://en.wikipedia.org/wiki/Box_plot) ถูกใช้เพื่อแสดงการกระจายของข้อมูล `ตัวเลข` ในรูปแบบกราฟิก ดังนั้นเรามาเริ่มต้นด้วยการ *เลือก* คอลัมน์ตัวเลขทั้งหมดควบคู่ไปกับแนวเพลงยอดนิยมกัน\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "ดูว่า `where` ใน selection helper ทำให้เรื่องนี้ง่ายขึ้นแค่ไหน 💁? ลองสำรวจฟังก์ชันอื่นๆ ได้ [ที่นี่](https://tidyselect.r-lib.org/) \n", + "\n", + "เนื่องจากเราจะสร้าง boxplot สำหรับแต่ละคุณสมบัติที่เป็นตัวเลข และเราต้องการหลีกเลี่ยงการใช้ loops ลองปรับรูปแบบข้อมูลของเราให้เป็น *longer* format ซึ่งจะช่วยให้เราใช้ `facets` ได้ - subplots ที่แสดงข้อมูลแต่ละชุดแยกกัน\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "ถึงเวลาสนุกกับ `ggplots` กันแล้ว! แล้วเราจะใช้ `geom` อะไรดี?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "ง่ายมาก!\n", + "\n", + "ตอนนี้เราสามารถเห็นได้ว่าข้อมูลนี้ค่อนข้างมีความแปรปรวน: เมื่อดูแต่ละคอลัมน์ในรูปแบบกล่องแสดงค่ากระจาย คุณจะเห็นค่าผิดปกติ คุณอาจจะไล่ดูชุดข้อมูลและลบค่าผิดปกติเหล่านี้ออกไป แต่การทำเช่นนั้นจะทำให้ข้อมูลเหลือน้อยลงมาก\n", + "\n", + "สำหรับตอนนี้ เรามาเลือกคอลัมน์ที่เราจะใช้สำหรับการฝึกการจัดกลุ่มกันดีกว่า เราจะเลือกคอลัมน์ตัวเลขที่มีช่วงค่าคล้ายกัน เราสามารถเข้ารหัส `artist_top_genre` เป็นตัวเลขได้ แต่ตอนนี้เราจะตัดมันออกไปก่อน\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. การคำนวณ k-means clustering ใน R\n", + "\n", + "เราสามารถคำนวณ k-means ใน R ได้โดยใช้ฟังก์ชัน `kmeans` ที่มีอยู่แล้ว ดูเพิ่มเติมได้ที่ `help(\"kmeans()\")` ฟังก์ชัน `kmeans()` รับข้อมูลในรูปแบบ data frame ที่มีคอลัมน์เป็นตัวเลขทั้งหมดเป็นอาร์กิวเมนต์หลัก\n", + "\n", + "ขั้นตอนแรกในการใช้ k-means clustering คือการกำหนดจำนวนคลัสเตอร์ (k) ที่จะสร้างขึ้นในผลลัพธ์สุดท้าย เราทราบว่ามี 3 ประเภทของเพลงที่เราแบ่งออกมาจากชุดข้อมูล ดังนั้นลองใช้ค่า k เป็น 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "วัตถุ kmeans มีข้อมูลหลายส่วนที่อธิบายได้ดีใน `help(\"kmeans()\")` สำหรับตอนนี้ เรามาเน้นที่บางส่วนกัน เราเห็นว่าข้อมูลถูกจัดกลุ่มเป็น 3 กลุ่ม โดยมีขนาด 65, 110, 111 ผลลัพธ์ยังมีจุดศูนย์กลางของกลุ่ม (ค่าเฉลี่ย) สำหรับ 3 กลุ่มใน 5 ตัวแปร\n", + "\n", + "เวกเตอร์การจัดกลุ่มคือการกำหนดกลุ่มสำหรับแต่ละการสังเกตการณ์ ลองใช้ฟังก์ชัน `augment` เพื่อเพิ่มการกำหนดกลุ่มลงในชุดข้อมูลเดิม\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "เยี่ยมเลย! ตอนนี้เราได้แบ่งชุดข้อมูลของเราออกเป็น 3 กลุ่มแล้ว ทีนี้คำถามคือ การจัดกลุ่มของเราดีแค่ไหน 🤷? มาดูที่ `Silhouette score` กันเถอะ\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[การวิเคราะห์ Silhouette](https://en.wikipedia.org/wiki/Silhouette_(clustering)) สามารถใช้เพื่อศึกษาระยะห่างระหว่างกลุ่มที่ได้จากการจัดกลุ่ม คะแนนนี้มีค่าตั้งแต่ -1 ถึง 1 โดยถ้าคะแนนใกล้ 1 หมายความว่ากลุ่มนั้นมีความหนาแน่นและแยกออกจากกลุ่มอื่นได้ดี ค่าใกล้ 0 หมายถึงกลุ่มที่มีการทับซ้อนกัน โดยตัวอย่างอยู่ใกล้กับเส้นแบ่งเขตของกลุ่มข้างเคียง [แหล่งที่มา](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam)\n", + "\n", + "วิธีการเฉลี่ย Silhouette จะคำนวณค่าเฉลี่ยของ Silhouette ของตัวอย่างสำหรับค่าต่าง ๆ ของ *k* คะแนน Silhouette เฉลี่ยที่สูงบ่งบอกถึงการจัดกลุ่มที่ดี\n", + "\n", + "ฟังก์ชัน `silhouette` ในแพ็กเกจ cluster ใช้สำหรับคำนวณค่าเฉลี่ยความกว้างของ Silhouette\n", + "\n", + "> Silhouette สามารถคำนวณได้ด้วย [ระยะทาง](https://en.wikipedia.org/wiki/Distance \"Distance\") เมตริกใด ๆ เช่น [ระยะทางแบบยุคลิด](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") หรือ [ระยะทางแบบแมนฮัตตัน](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") ซึ่งเราได้พูดถึงใน [บทเรียนก่อนหน้า](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb)\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "คะแนนของเราคือ **.549** ซึ่งอยู่ตรงกลางพอดี นี่บ่งชี้ว่าข้อมูลของเราไม่ได้เหมาะสมกับการจัดกลุ่มประเภทนี้มากนัก ลองมาดูกันว่าเราสามารถยืนยันข้อสันนิษฐานนี้ด้วยการมองเห็นได้หรือไม่ [แพ็กเกจ factoextra](https://rpkgs.datanovia.com/factoextra/index.html) มีฟังก์ชัน (`fviz_cluster()`) สำหรับการแสดงผลการจัดกลุ่มแบบภาพ\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "การที่กลุ่มข้อมูลมีการทับซ้อนกันแสดงให้เห็นว่าข้อมูลของเราอาจไม่เหมาะสมกับการจัดกลุ่มประเภทนี้นัก แต่เรามาลองทำต่อกันเถอะ\n", + "\n", + "## 4. การกำหนดจำนวนกลุ่มที่เหมาะสม\n", + "\n", + "คำถามพื้นฐานที่มักเกิดขึ้นใน K-Means clustering คือ - หากเราไม่มีป้ายกำกับคลาสที่รู้ล่วงหน้า เราจะทราบได้อย่างไรว่าควรแบ่งข้อมูลออกเป็นกี่กลุ่ม?\n", + "\n", + "วิธีหนึ่งที่เราสามารถลองหาคำตอบได้คือการใช้ตัวอย่างข้อมูลเพื่อ `สร้างโมเดลการจัดกลุ่มหลายชุด` โดยเพิ่มจำนวนกลุ่มทีละขั้น (เช่น จาก 1-10) และประเมินเมตริกการจัดกลุ่ม เช่น **Silhouette score**\n", + "\n", + "เรามาลองกำหนดจำนวนกลุ่มที่เหมาะสมโดยการคำนวณอัลกอริธึมการจัดกลุ่มสำหรับค่าต่าง ๆ ของ *k* และประเมิน **Within Cluster Sum of Squares** (WCSS) กันดีกว่า โดย WCSS หรือผลรวมของกำลังสองภายในกลุ่มทั้งหมดนั้นใช้วัดความกระชับของการจัดกลุ่ม ซึ่งเราต้องการให้ค่าต่ำที่สุดเท่าที่จะเป็นไปได้ เพราะค่าที่ต่ำกว่าหมายความว่าจุดข้อมูลอยู่ใกล้กันมากขึ้น\n", + "\n", + "เรามาสำรวจผลกระทบของการเลือกค่า `k` ที่แตกต่างกัน ตั้งแต่ 1 ถึง 10 ต่อการจัดกลุ่มนี้กันเถอะ\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "ตอนนี้เรามีผลรวมของภายในคลัสเตอร์ทั้งหมด (tot.withinss) สำหรับแต่ละอัลกอริทึมการจัดกลุ่มที่มีศูนย์ *k* เราใช้ [วิธีข้อศอก](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) เพื่อหาจำนวนคลัสเตอร์ที่เหมาะสมที่สุด วิธีนี้ประกอบด้วยการวาดกราฟ WCSS เป็นฟังก์ชันของจำนวนคลัสเตอร์ และเลือก [ข้อศอกของกราฟ](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") เป็นจำนวนคลัสเตอร์ที่ใช้\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "กราฟแสดงการลดลงอย่างมากของ WCSS (ซึ่งหมายถึง *ความกระชับ* ที่มากขึ้น) เมื่อจำนวนคลัสเตอร์เพิ่มขึ้นจากหนึ่งเป็นสอง และมีการลดลงที่สังเกตได้ชัดเจนอีกครั้งจากสองเป็นสามคลัสเตอร์ หลังจากนั้น การลดลงจะไม่เด่นชัดเท่าเดิม ทำให้เกิด `elbow` 💪ในกราฟประมาณที่สามคลัสเตอร์ นี่เป็นสัญญาณที่ดีที่บ่งบอกว่ามีคลัสเตอร์ของจุดข้อมูลที่แยกกันได้ดีประมาณสองถึงสามคลัสเตอร์\n", + "\n", + "ตอนนี้เราสามารถดำเนินการและดึงโมเดลการจัดกลุ่มที่ `k = 3` ได้แล้ว:\n", + "\n", + "> `pull()`: ใช้สำหรับดึงคอลัมน์เดียว\n", + ">\n", + "> `pluck()`: ใช้สำหรับเข้าถึงโครงสร้างข้อมูล เช่น ลิสต์\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "เยี่ยมเลย! มาลองดูการจัดกลุ่มที่ได้กันดีกว่า สนใจเพิ่มความโต้ตอบด้วย `plotly` ไหม?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "บางทีเราอาจคาดหวังว่ากลุ่มแต่ละกลุ่ม (แสดงด้วยสีต่างๆ) จะมีประเภทที่แตกต่างกัน (แสดงด้วยรูปร่างต่างๆ)\n", + "\n", + "ลองมาดูความแม่นยำของโมเดลกัน\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "ความแม่นยำของโมเดลนี้ไม่แย่ แต่ก็ไม่ได้ดีมากนัก อาจเป็นเพราะข้อมูลไม่เหมาะสมกับการใช้ K-Means Clustering ข้อมูลนี้มีความไม่สมดุลกันมาก มีความสัมพันธ์ระหว่างคอลัมน์น้อย และมีความแปรปรวนระหว่างค่าของคอลัมน์สูงเกินไปที่จะจัดกลุ่มได้ดี ในความเป็นจริง กลุ่มที่เกิดขึ้นอาจได้รับอิทธิพลหรือถูกบิดเบือนอย่างมากจากสามหมวดหมู่ของประเภทที่เรากำหนดไว้ข้างต้น\n", + "\n", + "อย่างไรก็ตาม นี่เป็นกระบวนการเรียนรู้ที่น่าสนใจมาก!\n", + "\n", + "ในเอกสารของ Scikit-learn คุณจะเห็นว่าโมเดลแบบนี้ ที่มีการจัดกลุ่มที่ไม่ชัดเจน มีปัญหาเรื่อง 'ความแปรปรวน':\n", + "\n", + "

\n", + " \n", + "

อินโฟกราฟิกจาก Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **ความแปรปรวน**\n", + "\n", + "ความแปรปรวนถูกนิยามว่าเป็น \"ค่าเฉลี่ยของผลต่างกำลังสองจากค่าเฉลี่ย\" [แหล่งข้อมูล](https://www.mathsisfun.com/data/standard-deviation.html) ในบริบทของปัญหาการจัดกลุ่มนี้ หมายถึงข้อมูลที่ค่าต่างๆ ในชุดข้อมูลมีแนวโน้มที่จะเบี่ยงเบนจากค่าเฉลี่ยมากเกินไป\n", + "\n", + "✅ นี่เป็นช่วงเวลาที่ดีในการคิดถึงวิธีต่างๆ ที่คุณสามารถแก้ไขปัญหานี้ได้ ลองปรับข้อมูลอีกเล็กน้อย? ใช้คอลัมน์ที่แตกต่างกัน? ใช้อัลกอริทึมที่แตกต่างกัน? เคล็ดลับ: ลอง [ปรับขนาดข้อมูลของคุณ](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) เพื่อทำให้ข้อมูลเป็นมาตรฐานและทดสอบคอลัมน์อื่นๆ\n", + "\n", + "> ลองใช้ '[เครื่องคำนวณความแปรปรวน](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' เพื่อทำความเข้าใจแนวคิดนี้เพิ่มเติม\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀ความท้าทาย**\n", + "\n", + "ใช้เวลาสักครู่กับโน้ตบุ๊กนี้ ปรับพารามิเตอร์ต่างๆ คุณสามารถปรับปรุงความแม่นยำของโมเดลได้โดยการทำความสะอาดข้อมูลเพิ่มเติม (เช่น การลบค่าผิดปกติ)? คุณสามารถใช้น้ำหนักเพื่อให้ความสำคัญกับตัวอย่างข้อมูลบางตัวมากขึ้น คุณสามารถทำอะไรอีกเพื่อสร้างกลุ่มที่ดีกว่า?\n", + "\n", + "เคล็ดลับ: ลองปรับขนาดข้อมูลของคุณ มีโค้ดที่ถูกคอมเมนต์ไว้ในโน้ตบุ๊กที่เพิ่มการปรับขนาดมาตรฐานเพื่อทำให้คอลัมน์ข้อมูลมีลักษณะคล้ายกันมากขึ้นในแง่ของช่วง คุณจะพบว่าแม้คะแนน silhouette จะลดลง แต่ 'จุดหัก' ในกราฟข้อศอกจะเรียบขึ้น นี่เป็นเพราะการปล่อยให้ข้อมูลไม่ได้ปรับขนาดทำให้ข้อมูลที่มีความแปรปรวนน้อยมีน้ำหนักมากขึ้น อ่านเพิ่มเติมเกี่ยวกับปัญหานี้ [ที่นี่](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226)\n", + "\n", + "## [**แบบทดสอบหลังการบรรยาย**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **การทบทวนและการศึกษาด้วยตนเอง**\n", + "\n", + "- ลองดูตัวจำลอง K-Means [เช่นนี้](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/) คุณสามารถใช้เครื่องมือนี้เพื่อแสดงภาพจุดข้อมูลตัวอย่างและกำหนดจุดศูนย์กลางของมัน คุณสามารถแก้ไขความสุ่มของข้อมูล จำนวนกลุ่ม และจำนวนจุดศูนย์กลาง สิ่งนี้ช่วยให้คุณเข้าใจวิธีการจัดกลุ่มข้อมูลได้หรือไม่?\n", + "\n", + "- นอกจากนี้ ลองดู [เอกสารประกอบเกี่ยวกับ K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) จาก Stanford\n", + "\n", + "ต้องการลองใช้ทักษะการจัดกลุ่มที่คุณเพิ่งเรียนรู้กับชุดข้อมูลที่เหมาะสมกับ K-Means clustering? โปรดดู:\n", + "\n", + "- [การฝึกและประเมินโมเดลการจัดกลุ่ม](https://rpubs.com/eR_ic/clustering) โดยใช้ Tidymodels และเพื่อนๆ\n", + "\n", + "- [การวิเคราะห์กลุ่มด้วย K-Means](https://uc-r.github.io/kmeans_clustering), UC Business Analytics R Programming Guide\n", + "\n", + "- [การจัดกลุ่มด้วย K-Means โดยใช้หลักการข้อมูลที่เป็นระเบียบ](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **งานที่ได้รับมอบหมาย**\n", + "\n", + "[ลองใช้วิธีการจัดกลุ่มที่แตกต่างกัน](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## ขอขอบคุณ:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) สำหรับการสร้างเวอร์ชัน Python ดั้งเดิมของโมดูลนี้ ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) สำหรับการสร้างภาพประกอบที่น่าทึ่งซึ่งทำให้ R น่าสนใจและเข้าถึงได้มากขึ้น ค้นหาภาพประกอบเพิ่มเติมได้ที่ [แกลเลอรีของเธอ](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM)\n", + "\n", + "เรียนรู้อย่างมีความสุข,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Student Ambassador.\n", + "\n", + "

\n", + " \n", + "

ภาพประกอบโดย @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/th/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..79dfd2f95 --- /dev/null +++ b/translations/th/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,544 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:21:39+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "เราจะมุ่งเน้นเพียง 3 ประเภทเท่านั้น บางทีเราอาจสร้าง 3 กลุ่มขึ้นมาได้!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "ข้อมูลนี้สะอาดแค่ไหน? ตรวจสอบค่าผิดปกติโดยใช้กล่องแผนภาพ เราจะมุ่งเน้นไปที่คอลัมน์ที่มีค่าผิดปกติน้อยกว่า (แม้ว่าคุณจะสามารถลบค่าผิดปกติออกได้) กล่องแผนภาพสามารถแสดงช่วงของข้อมูลและช่วยเลือกคอลัมน์ที่จะใช้ โปรดทราบว่ากล่องแผนภาพไม่ได้แสดงความแปรปรวน ซึ่งเป็นองค์ประกอบสำคัญของข้อมูลที่สามารถจัดกลุ่มได้ดี (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "ตัวเลขเหล่านั้นไม่ได้มีความหมายมากนักสำหรับเรา ดังนั้นมาหา 'คะแนนซิลลูเอต' เพื่อดูความแม่นยำกัน คะแนนของเราอยู่ในระดับกลาง\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "ใช้โมเดลนั้นเพื่อตัดสินใจโดยใช้วิธี Elbow Method ว่าควรสร้างจำนวนคลัสเตอร์ที่เหมาะสมที่สุดกี่คลัสเตอร์\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "ความแม่นยำของโมเดลนี้ไม่ได้แย่ แต่ก็ไม่ได้ดีนัก อาจเป็นไปได้ว่าข้อมูลอาจไม่เหมาะสมกับการจัดกลุ่มแบบ K-Means คุณอาจลองใช้วิธีอื่นดู\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/th/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..d27a38ec2 --- /dev/null +++ b/translations/th/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,341 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:22:55+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "เราจะมุ่งเน้นเพียง 3 ประเภทเท่านั้น บางทีเราอาจสร้าง 3 กลุ่มขึ้นมาได้!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์ที่เป็นมืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/th/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..6613ba3c6 --- /dev/null +++ b/translations/th/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:18+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/th/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/th/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/th/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..b7c5a3aa1 --- /dev/null +++ b/translations/th/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:57+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/th/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/th/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..1c1eaff22 --- /dev/null +++ b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:39+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..7f2b88014 --- /dev/null +++ b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:23:00+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..3ac54a358 --- /dev/null +++ b/translations/th/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:20+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/th/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..38736668f --- /dev/null +++ b/translations/th/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ในสมุดบันทึกนี้ เราจะแสดงวิธีการ:\n", + "- ตั้งค่าข้อมูลอนุกรมเวลาเพื่อใช้งานในโมดูลนี้\n", + "- แสดงภาพข้อมูล\n", + "\n", + "ข้อมูลในตัวอย่างนี้นำมาจากการแข่งขันพยากรณ์ GEFCom2014 \n", + "ประกอบด้วยข้อมูลการใช้ไฟฟ้ารายชั่วโมงและค่าความร้อนรายชั่วโมงเป็นเวลา 3 ปี ระหว่างปี 2012 ถึง 2014 \n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli และ Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, pp 896-913, กรกฎาคม-กันยายน, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "โหลดข้อมูลจากไฟล์ CSV ลงใน Pandas DataFrame\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "แสดงข้อมูลโหลดทั้งหมดที่มีอยู่ (มกราคม 2012 ถึง ธันวาคม 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:46+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/th/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..78a284060 --- /dev/null +++ b/translations/th/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "source": [ + "# การตั้งค่าข้อมูล\n", + "\n", + "ในโน้ตบุ๊กนี้ เราจะแสดงวิธีการ:\n", + "\n", + "ตั้งค่าข้อมูลอนุกรมเวลาเพื่อใช้งานในโมดูลนี้ \n", + "แสดงภาพข้อมูล \n", + "\n", + "ข้อมูลในตัวอย่างนี้นำมาจากการแข่งขันพยากรณ์ GEFCom2014 โดยข้อมูลประกอบด้วยการใช้ไฟฟ้ารายชั่วโมงและค่าความร้อนรายชั่วโมงเป็นเวลา 3 ปี ระหว่างปี 2012 ถึง 2014 \n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli และ Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, pp 896-913, กรกฎาคม-กันยายน, 2016. \n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:38+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/th/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..d164cbb88 --- /dev/null +++ b/translations/th/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1095 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# การพยากรณ์ข้อมูลอนุกรมเวลาด้วย ARIMA\n", + "\n", + "ในโน้ตบุ๊กนี้ เราจะแสดงวิธีการ:\n", + "- เตรียมข้อมูลอนุกรมเวลาเพื่อฝึกโมเดลการพยากรณ์อนุกรมเวลาด้วย ARIMA\n", + "- สร้างโมเดล ARIMA แบบง่ายเพื่อพยากรณ์ค่าล่วงหน้าในช่วง HORIZON (ตั้งแต่เวลา *t+1* ถึง *t+HORIZON*) ในอนุกรมเวลา\n", + "- ประเมินผลโมเดล\n", + "\n", + "ข้อมูลในตัวอย่างนี้นำมาจากการแข่งขัน GEFCom2014 ด้านการพยากรณ์ ซึ่งประกอบด้วยข้อมูลโหลดไฟฟ้าและอุณหภูมิรายชั่วโมงเป็นเวลา 3 ปี ระหว่างปี 2012 ถึง 2014 โดยมีเป้าหมายเพื่อพยากรณ์ค่าโหลดไฟฟ้าในอนาคต ในตัวอย่างนี้ เราจะแสดงวิธีการพยากรณ์ค่าล่วงหน้าเพียงหนึ่งช่วงเวลา โดยใช้ข้อมูลโหลดในอดีตเท่านั้น\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli และ Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, pp 896-913, กรกฎาคม-กันยายน, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## ติดตั้ง Dependencies\n", + "เริ่มต้นด้วยการติดตั้ง dependencies ที่จำเป็น ไลบรารีเหล่านี้พร้อมกับเวอร์ชันที่ระบุเป็นที่ทราบกันดีว่าสามารถใช้งานได้กับโซลูชันนี้:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "แสดงข้อมูลโหลดทั้งหมดที่มีอยู่ (มกราคม 2012 ถึง ธันวาคม 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## สร้างชุดข้อมูลสำหรับการฝึกและการทดสอบ\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "ข้อมูลดั้งเดิมเทียบกับข้อมูลที่ปรับขนาด:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAD7CAYAAACMlyg3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAaCklEQVR4nO3de5RV5Znn8e+PSyjlIhEK1GBZagJeQBQL7ehSM9EWO2omhjjRqOOltaIOsTNZzdLJ0oTYtunOmpl0JEokbTAg2iRpcKKxXYKXVmNPQsE0ERQ0LsHQogIJSCF3n/ljn9JjUUXtU5x9Tp3av89aZ8m+nf285+B5ePfz7ncrIjAzs3zqU+0AzMysepwEzMxyzEnAzCzHnATMzHLMScDMLMf6VTuAUgwfPjwaGxurHYaZWU1ZsmTJhoio72hbTSWBxsZGWlpaqh2GmVlNkbSms22+HGRmlmNOAmZmOeYkYGaWYzVVEzCz2rJr1y7Wrl3L9u3bqx1KLtTV1TFq1Cj69++f+hgnATPLzNq1axk8eDCNjY1IqnY4vVpEsHHjRtauXcuRRx6Z+jhfDjKzzGzfvp1hw4Y5AVSAJIYNG1Zyr8tJwMwy5QRQOd35rJ0EzMxyzDUBM6uYxlt+Vdb3W/1355f1/To8x+rVXHDBBSxfvjz1MVdddRUXXHABX/rSl1Ktr0RMnXESMMuB4h/fSvxwWu3w5SAz67W2bt3K+eefz/jx4xk7dizz5s0DYPHixZx22mmMHz+eU045hS1btrB69WrOOOMMJkyYwIQJE3jhhRf2er89e/YwdepUJk6cyAknnMC9994LJCNzpkyZwpgxYzjnnHN45513uoztySef5KSTTmLcuHFcc8017NixA4Dbb7+diRMnMnbsWJqbm2l7+uOSJUsYP34848eP5+677y7XR1SZJCDpU5K2S3qgaN1XJK2RtFXSw5IOrkQsZpYfjz/+OIcddhjLli1j+fLlnHfeeezcuZMvf/nL/OAHP2DZsmUsWrSIAw44gBEjRrBw4UKWLl3KvHnzuOmmm/Z6v/vuu4+DDjqIxYsXs3jxYn784x/z+uuvs2DBAlatWsVLL73E7NmzO0wgxbZv385VV13FvHnzePHFF9m9ezczZswAYMqUKSxevJjly5ezbds2Hn30UQCuvvpqpk+fzrJly8r6GVWqJ3A3sLhtQdLxwL3AFcBI4D3gngrFYmY5MW7cOBYuXMjNN9/Mc889x0EHHcSqVas49NBDmThxIgBDhgyhX79+7Nq1i+uuu45x48Zx8cUX89JLL+31fk888QSzZ8/mxBNP5NRTT2Xjxo28+uqrPPvss1x66aX07duXww47jM9+9rP7jGvVqlUceeSRjB49GoArr7ySZ599FoCnn36aU089lXHjxvHUU0+xYsUKNm3axKZNmzjzzDMBuOKKK8r2GWVeE5B0CbAJeAH4ZGH1ZcAjEfFsYZ/bgJclDY6ILVnHZGb5MHr0aJYuXcpjjz3Grbfeytlnn81FF13U4b7f//73GTlyJMuWLeP999+nrq5ur30igunTpzNp0qSPrH/sscfKEu/27du58cYbaWlp4fDDD2fatGmZ322daU9A0hDgduAb7TYdD3zQp4mI14CdwOgO3qNZUouklvXr12cZrlnuNN7yqw9evdGbb77JgQceyOWXX87UqVNZunQpY8aMYd26dSxenFyc2LJlC7t372bz5s0ceuih9OnThzlz5rBnz5693m/SpEnMmDGDXbt2AfDKK6+wdetWzjzzTObNm8eePXtYt24dTz/99D7jGjNmDKtXr+b3v/89AHPmzOGss8764Ad/+PDhtLa28otf/AKAoUOHMnToUJ5//nkA5s6dW54PiOx7An8D3BcRa9vdxDAI2Nxu383A4PZvEBEzgZkATU1NkVGcZlYBlR6Z9OKLLzJ16lT69OlD//79mTFjBh/72MeYN28eX/va19i2bRsHHHAAixYt4sYbb2Ty5MnMnj2b8847j4EDB+71ftdeey2rV69mwoQJRAT19fU8/PDDXHTRRTz11FMcd9xxNDQ08OlPf3qfcdXV1TFr1iwuvvhidu/ezcSJE7n++usZMGAA1113HWPHjuWQQw754JIVwKxZs7jmmmuQxLnnnlu2z0htledyk3QiMBc4KSJ2SpoGfDIiLpf0f4BfR8T3ivbfAnwmIpZ09p5NTU3hh8qYla6zIaJZDx19+eWXOfbYY8v+vta5jj5zSUsioqmj/bPsCXwGaATeKPQCBgF9JR0HPA6MLwrwKGAA8EqG8ZiZWTtZJoGZwD8VLf81SVK4ARgB/JukM4ClJHWD+S4Km5lVVmZJICLeIxn6CYCkVmB7RKwH1ku6nuRy0TBgEXB1VrGYWfVEhCeRq5DuXN6v2LQRETGt3fKDwIOVOr+ZVV5dXR0bN270dNIV0PY8gY6Gtu6L5w4ys8yMGjWKtWvX4uHdldH2ZLFSOAmYWWb69+9f0lOurPI8gZyZWY45CZiZ5ZiTgJlZjjkJmJnlmJOAmVmOOQmYmeWYk4CZWY45CZiZ5ZiTgJlZjjkJmJnlmJOAmVmOee4gsx4g6yd8mXXGPQEzsxzLNAlIekDSOknvSnpF0rWF9Y2SQlJr0eu2LGMxM7O9ZX056LvAX0bEDknHAM9I+n/AxsL2oRGxO+MYzMysE5n2BCJiRUTsaFssvI7O8pxmZpZe5jUBSfdIeg9YCawDHivavEbSWkmzJA3v5PhmSS2SWvx0IjOz8so8CUTEjcBg4AxgPrAD2ABMBI4ATi5sn9vJ8TMjoikimurr67MO18wsVyoyOigi9kTE88Ao4IaIaI2IlojYHRFvA1OAcyUNrkQ8ZmaWqPQQ0X50XBOIwn89ZNXMrIIy+9GVNELSJZIGSeoraRJwKfCkpFMljZHUR9Iw4C7gmYjYnFU8Zma2tyyHiAZwA/AjkmSzBvh6RPxS0qXAncAI4F1gIUmCMLMeoPgOZvBdzL1ZZkkgItYDZ3Wy7SHgoazObWZm6fgavJlZjjkJmJnlmJOAmVmOeSppM+uSp7ruvdwTMDPLMScBM7MccxIwM8sxJwEzsxxzYdgsYy6qWk/mnoCZWY45CZiZ5ZiTgJlZjjkJmJnlmJOAmVmOOQmYmeVYpklA0gOS1kl6V9Irkq4t2na2pJWS3pP0tKQjsozFzMz2lnVP4LtAY0QMAT4P3CHpZEnDgfnAbcDBQAswL+NYzMysnUxvFouIFcWLhdfRwMnAioj4OYCkacAGScdExMosYzIzsw9lXhOQdI+k94CVwDrgMeB4YFnbPhGxFXitsL798c2SWiS1rF+/PutwzcxyJfMkEBE3AoOBM0guAe0ABgGb2+26ubBf++NnRkRTRDTV19dnHa6ZWa5UZHRQROyJiOeBUcANQCswpN1uQ4AtlYjHzMwSlR4i2o+kJrACGN+2UtLAovVmZlYhmSUBSSMkXSJpkKS+kiYBlwJPAguAsZImS6oDvgX8zkVhM7PKynJ0UJBc+vkRSbJZA3w9In4JIGky8EPgAeA3wCUZxmLWI2Q9rbSnrbZSZZYEImI9cNY+ti8Cjsnq/GZm1jVPG2FmlmNOAmZmOeYkYGaWY37GsFkP4+KuVZJ7AmZmOeYkYGaWY04CZmY55iRgZpZjLgybWUlcuO5d3BMwM8sxJwEzsxxzEjAzy7FUSUDSuKwDMTOzyktbGL5H0gDgfmBuRLR/NKSZZcBFWMtaqp5ARJwBXAYcDiyR9KCkP880MjMzy1zqmkBEvArcCtxM8pyAuyStlPTFjvaXNEDSfZLWSNoi6d8l/UVhW6OkkNRa9LqtHA0yM7P0Ul0OknQCcDVwPrAQuDAilko6DPg3YH4n7/0HkoTxBvA54Gft6gtDI2L3fsRvZmb7IW1NYDrwj8A3I2Jb28qIeFPSrR0dEBFbgWlFqx6V9DpwMrCke+GamVk5pU0C5wPbImIPgKQ+QF1EvBcRc9K8gaSRwGhgRdHqNZKCpHcxNSI2pA/dzMz2V9oksAg4B2gtLB8IPAGcluZgSf2BucBPI2KlpEHARODfgWHA3YXtkzo4thloBmhoaEgZbr54BEm+FX//+7N/qe9jvUPawnBdRLQlAAp/PjDNgYVewxxgJzCl7fiIaImI3RHxdmH9uZIGtz8+ImZGRFNENNXX16cM18zM0kibBLZKmtC2IOlkYNs+9m/bT8B9wEhgckTs6mTXKDEeMzMrg7SXg74O/FzSm4CAQ4AvpzhuBnAscE5xQVnSqcAm4FXg48BdwDO+Cc3MrLJSJYGIWCzpGGBMYdWqffyrHgBJRwBfBXYAbyWdAiisex+4ExgBvEtSGL605OjNzGy/lPI8gYlAY+GYCZKIiNmd7RwRa0h6DZ15qIRzm/U6WRdiXei1NNLeLDYHOJpkNM+ewuoAOk0CZmbW86XtCTQBx0VEdLmnmZnVjLSjcZaTFIPNzKwXSdsTGA68JOm3JIVeACLi85lEZWZmFZE2CUzLMoi88p2+tcPflfVWaYeI/mthyOenImKRpAOBvtmGZmZmWUv7eMnrgF8A9xZWfQJ4OKugzMysMtIWhv8bcDrJjV1tD5gZkVVQZmZWGWmTwI6I2Nm2IKkfH873Y2ZmNSptYfhfJX0TOKDwbOEbgUeyC8vKwcXM6vHdulYr0vYEbgHWAy+SzP3zGMnzhs3MrIalHR30PvDjwsvMzHqJtHMHvU4HNYCIOKrsEZmZWcWUMndQmzrgYuDg8odjZmaVlPZy0MZ2q/5B0hLgW+UPyaz8ylkkd8G9Y/5calPay0ETihb7kPQMSnkWgZmZ9UBpf8j/V9GfdwOrgf+yrwMkDQDuAc4huXT0GvA/IuJfCtvPBu4GGoDfAFcVHkRjZmYVkvZy0H/q5nv/ATgLeAP4HPAzSeOAVmA+cC3J/QZ/A8wD/qwb5zEzs25KeznoG/vaHhH/u4N1W/no7KOPFkYZnQwMA1ZExM8L7z8N2CDpmIhYmS50MzPbX6WMDpoI/LKwfCHwW+DVtCeSNBIYDawAbgCWtW2LiK2SXgOOB1a2O64ZaAZoaGhIezqrkLwXA/Pefqt9aZPAKGBCRGyBD/7l/quIuDzNwZL6A3OBn0bESkmDSO5ALrYZGNz+2IiYCcwEaGpq8nxFZmZllHbaiJHAzqLlnYV1XZLUB5hTOGZKYXUrMKTdrkOALSnjMTOzMkjbE5gN/FbSgsLyF4CfdnWQJAH3kSSMz0XErsKmFcCVRfsNBI4urDczswpJ1ROIiL8Frgb+VHhdHRF3pjh0BnAscGFEbCtavwAYK2mypDqSm85+56KwmVlllXLD14HAuxExS1K9pCMj4vXOdi48jvKrJA+mfyvpFADw1YiYK2ky8EPgAZL7BC7pVgvMrGo8ZXbtSztE9NskI4TGALOA/iQ/3qd3dkzhxi/tY/si4JhSgjUzs/JKWxi+CPg8sBUgIt6kg5E8ZmZWW9ImgZ0RERSmky4Ucs3MrMalTQI/k3QvMFTSdcAi/IAZM7Oal3buoP9ZeLbwuyR1gW9FxMJMI7Oa5jtpy8+fqWWhyyQgqS+wqDCJnH/4zcx6kS4vB0XEHuB9SQdVIB4zM6ugtPcJtAIvSlpIYYQQQETclElUZmZWEWmTwPzCy8zMepF9JgFJDRHxRkR0OU+Q7R8X/awUvlPXyqWrmsDDbX+Q9M8Zx2JmZhXWVRIonvbhqCwDMTOzyusqCUQnfzYzs16gq8LweEnvkvQIDij8mcJyRET7B8OYmVkN2WcSiIi+lQrEKquzwmJxUTrNPj3Z/hZPSz3exVqrRWnnDjIzs14o0yQgaYqkFkk7JN1ftL5RUkhqLXrdlmUsZma2t1KeLNYdbwJ3AJOAAzrYPjQidmccg5mZdSLTJBAR8wEkNQGjsjyXmZmVLuueQFfWSAqS2UmnRsSG9jtIagaaARoaGiocXnWkKTDWSnHW8qnUO+B9x3z1VKswvAGYCBwBnEzyqMq5He0YETMjoikimurr6ysYoplZ71eVnkBEtAIthcW3JU0B1kkaHBFbqhGTmVke9ZQhom13I/eUeMzMciHTnoCkfoVz9AX6SqoDdpNcAtoEvAp8HLgLeCYiNmcZj5mZfVTWl4NuBb5dtHw58B1gFXAnMILkucULgUszjqVqXPT6kD8La9PZAAgXlSsr6yGi04BpnWx+KMtzm5lZ13wN3swsx5wEzMxyzEnAzCzHqn3HsHVTmqJarXJh0Kxy3BMwM8sxJwEzsxxzEjAzyzEnATOzHHNhuBs6K0T2tqJsLb5nmnPtq5DcG75Ds1K4J2BmlmNOAmZmOeYkYGaWY04CZmY55sLwfnIh8UP781n4c8yHcv0d8Z3h5eOegJlZjmWaBCRNkdQiaYek+9ttO1vSSknvSXpa0hFZxmJmZnvLuifwJnAH8JPilZKGA/OB24CDSR46Py/jWMzMrJ2snyw2H0BSEzCqaNMXgRUR8fPC9mnABknHRMTKLGMyM7MPVaswfDywrG0hIrZKeq2w/iNJQFIz0AzQ0NBQyRith6mVu5mtfPz9ZK9aheFBwOZ26zYDg9vvGBEzI6IpIprq6+srEpyZWV5UKwm0AkParRsCbKlCLGZmuVWtJLACGN+2IGkgcHRhvZmZVUjWQ0T7SaoD+gJ9JdVJ6gcsAMZKmlzY/i3gdy4Km5lVVtaF4VuBbxctXw58JyKmSZoM/BB4APgNcEnGseyXWi9Q1Xr8ZpaNrIeITgOmdbJtEXBMluc3M7N987QRZmY55iRgZpZjTgJmZjnmqaT3wXeoZsufRT70hO+5sxg8JbV7AmZmueYkYGaWY04CZmY55iRgZpZjLgy30xOKWPYhfx/WEf+9KB/3BMzMcsxJwMwsx5wEzMxyzEnAzCzHclsYLi4s+a7B0rgoZz2V/78unXsCZmY5VtUkIOkZSdsltRZeq6oZj5lZ3vSEnsCUiBhUeI2pdjBmZnnSE5KAmZlVSU9IAt+VtEHSryV9ptrBmJnlSbWTwM3AUcAngJnAI5KOLt5BUrOkFkkt69evr0aMZma9VlWTQET8JiK2RMSOiPgp8Gvgc+32mRkRTRHRVF9fX51Azcx6qWr3BNoLQNUOwswsL6qWBCQNlTRJUp2kfpIuA84EHq9WTGZmeVPNO4b7A3cAxwB7gJXAFyLilSrGZGaWK1VLAhGxHphYrfObWe+WZnoTTzPR82oCZmZWQU4CZmY55iRgZpZjTgJmZjmW2+cJmJl1Jk8FY/cEzMxyzEnAzCzHnATMzHLMScDMLMdyVRj2A9LNbH+k/Q2ppWKyewJmZjnmJGBmlmNOAmZmOeYkYGaWY7kqDJuZdaazom93BpSUesdxZ/tX4s5l9wTMzHKsqklA0sGSFkjaKmmNpK9UMx4zs7yp9uWgu4GdwEjgROBXkpZFxIrqhmVmlg/VfND8QGAycFtEtEbE88AvgSuqFZOZWd4oIqpzYukk4NcRcWDRur8GzoqIC4vWNQPNhcUxwKr9OO1wYMN+HF9r8tZecJvzwm0uzRERUd/RhmpeDhoEvNtu3WZgcPGKiJgJzCzHCSW1RERTOd6rFuStveA254XbXD7VLAy3AkParRsCbKlCLGZmuVTNJPAK0E/Sp4rWjQdcFDYzq5CqJYGI2ArMB26XNFDS6cB/BuZkeNqyXFaqIXlrL7jNeeE2l0nVCsOQ3CcA/AT4c2AjcEtEPFi1gMzMcqaqScDMzKrL00aYmeWYk4CZWY71qiSQdi4iJf5e0sbC6+8lqdLxlkMJbZ4qabmkLZJelzS10rGWS6lzTkn6mKSXJa2tVIzlVEp7JU2Q9KykVklvS/qrSsZaLiX8vR4g6UeFtv5R0iOSPlHpeMtB0hRJLZJ2SLq/i33/u6S3JL0r6SeSBnT3vL0qCfDRuYguA2ZIOr6D/ZqBL5AMST0BuBD4aqWCLLO0bRbwX4GPA+cBUyRdUrEoyyttm9tMBdZXIrCMpGqvpOHA48C9wDDgk8ATFYyznNJ+x38FfJrk/+PDgD8B0ysVZJm9CdxBMlimU5ImAbcAZwNHAEcB3+n2WSOiV7yAgSR/aUYXrZsD/F0H+74ANBct/yXwf6vdhizb3MGxdwHTq92GrNsMHAm8DPwFsLba8WfZXuBOYE61Y65wm2cA3ytaPh9YVe027Gf77wDu38f2B4E7i5bPBt7q7vl6U09gNLA7Il4pWrcM6OhfD8cXtnW1X09XSps/ULj0dQa1eWNeqW2eDnwT2JZ1YBkppb1/BvxR0guS3ilcGmmoSJTlVUqb7wNOl3SYpANJeg3/UoEYq6mj36+RkoZ15816UxJINRdR0b6b2+03qAbrAqW0udg0ku9+VgYxZS11myVdBPSNiAWVCCwjpXzHo4ArSS6RNACvAw9lGl02Smnzq8AfgP8oHHMscHum0VVfR79f0PX/9x3qTUmglLmI2u87BGiNQt+qhpQ8/5KkKSS1gfMjYkeGsWUlVZsLU5V/D7ipQnFlpZTveBuwICIWR8R2kuvEp0k6KOMYy62UNt8NDCCpgQwkmYWgt/cEOvr9gm7Ou9abkkApcxGtKGzrar+erqT5lyRdQ6GgFBE1OVKG9G3+FNAIPCfpLZIfh0MLIyoaKxBnuZTyHf8OKP6HTK39o6ZNKW0+keT6+R8L/6iZDpxSKJL3Vh39fr0dERu79W7VLoKUuaDyTyTd34HA6STdpOM72O96kmLhJ0hGFKwArq92/Bm3+TLgLeDYasdciTaTTJN+SNHriySjLw4huURU9XZk8B1/lmR0zIlAf+D7wHPVjj/jNs8C/hk4qNDmbwL/Ue34u9nmfkAd8F2SQngd0K+D/c4r/L98HDAUeIoUg0E6PW+1G17mD/Fg4GFgK/AG8JXC+jNILve07SeSSwV/LLy+R2EKjVp7ldDm14FdJF3JttePqh1/lm1ud8xnqMHRQaW2F7iB5Pr4n4BHgMOrHX+WbSa5DDQXeAfYBDwPnFLt+LvZ5mkkvbfi1zSS+k4r0FC07zeAt0nqILOAAd09r+cOMjPLsd5UEzAzsxI5CZiZ5ZiTgJlZjjkJmJnlmJOAmVmOOQmYmeWYk4CZWY45CZiZ5dj/BywbGaIaCXKXAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "มาปรับขนาดข้อมูลทดสอบกันเถอะ\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "สร้างจุดข้อมูลทดสอบสำหรับแต่ละขั้นตอนของ HORIZON\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "เปรียบเทียบการคาดการณ์กับภาระงานจริง\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "คำนวณ **ค่าเฉลี่ยเปอร์เซ็นต์ความคลาดเคลื่อนสัมบูรณ์ (MAPE)** สำหรับการพยากรณ์ทั้งหมด\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "แสดงกราฟการคาดการณ์เทียบกับค่าจริงสำหรับสัปดาห์แรกของชุดทดสอบ\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:59:09+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/th/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..21ea12e6c --- /dev/null +++ b/translations/th/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,61 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:42+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# การพยากรณ์ข้อมูลอนุกรมเวลาด้วย ARIMA\n", + "\n", + "ในโน้ตบุ๊กนี้ เราจะแสดงวิธีการ:\n", + "- เตรียมข้อมูลอนุกรมเวลาเพื่อฝึกโมเดลการพยากรณ์อนุกรมเวลาด้วย ARIMA\n", + "- สร้างโมเดล ARIMA แบบง่ายเพื่อพยากรณ์ค่าล่วงหน้าในช่วง HORIZON (ตั้งแต่เวลา *t+1* ถึง *t+HORIZON*) ในอนุกรมเวลา\n", + "- ประเมินผลโมเดล\n", + "\n", + "ข้อมูลในตัวอย่างนี้นำมาจากการแข่งขัน GEFCom2014 ด้านการพยากรณ์ \n", + "\n", + "ประกอบด้วยข้อมูลโหลดไฟฟ้ารายชั่วโมงและค่าอุณหภูมิเป็นเวลา 3 ปี ระหว่างปี 2012 ถึง 2014 โดยมีเป้าหมายเพื่อพยากรณ์ค่าโหลดไฟฟ้าในอนาคต ในตัวอย่างนี้ เราจะแสดงวิธีพยากรณ์ค่าล่วงหน้าเพียงหนึ่งช่วงเวลา โดยใช้ข้อมูลโหลดในอดีตเท่านั้น\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli และ Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, pp 896-913, กรกฎาคม-กันยายน, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/th/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..2aa997fe3 --- /dev/null +++ b/translations/th/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1013 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ในสมุดบันทึกนี้ เราจะแสดงวิธีการ:\n", + "\n", + "- เตรียมข้อมูลชุดเวลาแบบ 2 มิติสำหรับการฝึกโมเดล SVM regressor \n", + "- ใช้ SVR ด้วย RBF kernel \n", + "- ประเมินโมเดลโดยใช้กราฟและค่า MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### โหลดข้อมูล\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ตอนนี้คุณจำเป็นต้องเตรียมข้อมูลสำหรับการฝึกโดยการกรองและปรับขนาดข้อมูลของคุณ\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ปรับขนาดข้อมูลให้อยู่ในช่วง (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "สำหรับ SVR ของเรา เราแปลงข้อมูลอินพุตให้อยู่ในรูปแบบ `[batch, timesteps]` ดังนั้น เราจึงปรับรูปร่าง `train_data` และ `test_data` ที่มีอยู่ให้มีมิติใหม่ซึ่งหมายถึง timesteps สำหรับตัวอย่างของเรา เรากำหนดให้ `timesteps = 5` ดังนั้น อินพุตของโมเดลจะเป็นข้อมูลสำหรับ 4 timesteps แรก และเอาต์พุตจะเป็นข้อมูลสำหรับ timestep ที่ 5\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาต้นทางควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:04:42+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/th/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/th/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..573efb259 --- /dev/null +++ b/translations/th/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,689 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ในสมุดบันทึกนี้ เราจะแสดงวิธีการ:\n", + "\n", + "- เตรียมข้อมูลชุดเวลาแบบ 2 มิติสำหรับการฝึกโมเดล SVM regressor \n", + "- ใช้ SVR ด้วย RBF kernel \n", + "- ประเมินโมเดลโดยใช้กราฟและค่า MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### โหลดข้อมูล\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ตอนนี้คุณจำเป็นต้องเตรียมข้อมูลสำหรับการฝึกโดยการกรองและปรับขนาดข้อมูลของคุณ\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ปรับขนาดข้อมูลให้อยู่ในช่วง (0, 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "สำหรับ SVR ของเรา เราแปลงข้อมูลอินพุตให้อยู่ในรูปแบบ `[batch, timesteps]` ดังนั้น เราจึงปรับรูปร่าง `train_data` และ `test_data` ที่มีอยู่ให้มีมิติใหม่ซึ่งอ้างอิงถึง timesteps สำหรับตัวอย่างของเรา เรากำหนดให้ `timesteps = 5` ดังนั้น อินพุตของโมเดลจะเป็นข้อมูลสำหรับ 4 timesteps แรก และเอาต์พุตจะเป็นข้อมูลสำหรับ timestep ที่ 5\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:07:12+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/th/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/th/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..03f175e77 --- /dev/null +++ b/translations/th/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:05:51+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# ปีเตอร์กับหมาป่า: บทเรียนพื้นฐานเกี่ยวกับการเรียนรู้แบบเสริมกำลัง\n", + "\n", + "ในบทเรียนนี้ เราจะเรียนรู้วิธีการนำการเรียนรู้แบบเสริมกำลังมาใช้กับปัญหาการค้นหาเส้นทาง สถานการณ์นี้ได้รับแรงบันดาลใจจากนิทานดนตรีเรื่อง [ปีเตอร์กับหมาป่า](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) โดยนักประพันธ์ชาวรัสเซีย [เซอร์เกย์ โปรโกฟีเยฟ](https://en.wikipedia.org/wiki/Sergei_Prokofiev) เป็นเรื่องราวเกี่ยวกับปีเตอร์ เด็กชายผู้กล้าหาญที่ออกจากบ้านไปยังลานป่าเพื่อไล่ล่าหมาป่า เราจะฝึกอัลกอริทึมการเรียนรู้ของเครื่องที่จะช่วยปีเตอร์สำรวจพื้นที่โดยรอบและสร้างแผนที่นำทางที่เหมาะสมที่สุด\n", + "\n", + "ก่อนอื่น มาเริ่มต้นด้วยการนำเข้าไลบรารีที่มีประโยชน์หลายตัว:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## ภาพรวมของการเรียนรู้แบบเสริมกำลัง\n", + "\n", + "**การเรียนรู้แบบเสริมกำลัง** (Reinforcement Learning หรือ RL) เป็นเทคนิคการเรียนรู้ที่ช่วยให้เราสามารถเรียนรู้พฤติกรรมที่เหมาะสมที่สุดของ **ตัวแทน** (agent) ใน **สภาพแวดล้อม** (environment) โดยการทดลองทำซ้ำหลายๆ ครั้ง ตัวแทนในสภาพแวดล้อมนี้ควรมี **เป้าหมาย** ซึ่งกำหนดโดย **ฟังก์ชันรางวัล** (reward function)\n", + "\n", + "## สภาพแวดล้อม\n", + "\n", + "เพื่อความเข้าใจง่าย ลองพิจารณาโลกของปีเตอร์เป็นกระดานสี่เหลี่ยมขนาด `width` x `height` แต่ละช่องในกระดานนี้สามารถเป็นได้ดังนี้:\n", + "* **พื้นดิน** ซึ่งปีเตอร์และสิ่งมีชีวิตอื่นๆ สามารถเดินได้\n", + "* **น้ำ** ซึ่งแน่นอนว่าไม่สามารถเดินได้\n", + "* **ต้นไม้** หรือ **หญ้า** - สถานที่ที่สามารถพักผ่อนได้\n", + "* **แอปเปิล** ซึ่งเป็นสิ่งที่ปีเตอร์จะดีใจที่ได้พบเพื่อใช้เป็นอาหาร\n", + "* **หมาป่า** ซึ่งเป็นอันตรายและควรหลีกเลี่ยง\n", + "\n", + "ในการทำงานกับสภาพแวดล้อม เราจะกำหนดคลาสที่เรียกว่า `Board` เพื่อไม่ให้เนื้อหาในสมุดบันทึกนี้ดูรกเกินไป เราได้ย้ายโค้ดทั้งหมดที่เกี่ยวข้องกับการทำงานของกระดานไปยังโมดูล `rlboard` แยกต่างหาก ซึ่งเราจะนำเข้าในตอนนี้ คุณสามารถดูรายละเอียดเพิ่มเติมเกี่ยวกับการทำงานภายในของการนำไปใช้ในโมดูลนี้ได้\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "ตอนนี้มาสร้างกระดานแบบสุ่มและดูว่ามันเป็นอย่างไร:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## การกระทำและนโยบาย\n", + "\n", + "ในตัวอย่างของเรา เป้าหมายของปีเตอร์คือการหาแอปเปิ้ล ในขณะที่หลีกเลี่ยงหมาป่าและสิ่งกีดขวางอื่น ๆ กำหนดการกระทำเหล่านั้นเป็นพจนานุกรม และจับคู่กับคู่ของการเปลี่ยนแปลงพิกัดที่สอดคล้องกัน\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "กลยุทธ์ของตัวแทนของเรา (ปีเตอร์) ถูกกำหนดโดยสิ่งที่เรียกว่า **นโยบาย** ลองพิจารณานโยบายที่ง่ายที่สุดที่เรียกว่า **การเดินแบบสุ่ม**\n", + "\n", + "## การเดินแบบสุ่ม\n", + "\n", + "มาแก้ปัญหาของเราด้วยการใช้งานกลยุทธ์การเดินแบบสุ่มกันก่อน\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## ฟังก์ชันรางวัล\n", + "\n", + "เพื่อทำให้กลยุทธ์ของเราฉลาดขึ้น เราจำเป็นต้องเข้าใจว่าการเคลื่อนไหวใด \"ดีกว่า\" การเคลื่อนไหวอื่นๆ\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "สร้าง Q-Table หรืออาเรย์หลายมิติ เนื่องจากกระดานของเรามีขนาด `width` x `height` เราสามารถแสดง Q-Table ได้ด้วย numpy array ที่มีรูปร่างเป็น `width` x `height` x `len(actions)`\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "ส่ง Q-Table ไปยังฟังก์ชัน `plot` เพื่อแสดงตารางบนกระดาน:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## สาระสำคัญของ Q-Learning: สมการ Bellman และอัลกอริทึมการเรียนรู้\n", + "\n", + "เขียน pseudo-code สำหรับอัลกอริทึมการเรียนรู้ของเรา:\n", + "\n", + "* เริ่มต้น Q-Table Q ด้วยค่าที่เท่ากันสำหรับทุกสถานะและการกระทำ\n", + "* กำหนดอัตราการเรียนรู้ $\\alpha\\leftarrow 1$\n", + "* ทำการจำลองซ้ำหลายครั้ง\n", + " 1. เริ่มต้นที่ตำแหน่งสุ่ม\n", + " 1. ทำซ้ำ\n", + " 1. เลือกการกระทำ $a$ ที่สถานะ $s$\n", + " 2. ดำเนินการโดยย้ายไปยังสถานะใหม่ $s'$\n", + " 3. หากพบเงื่อนไขสิ้นสุดเกม หรือรางวัลรวมมีค่าน้อยเกินไป - ออกจากการจำลอง \n", + " 4. คำนวณรางวัล $r$ ที่สถานะใหม่\n", + " 5. อัปเดต Q-Function ตามสมการ Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. อัปเดตรางวัลรวมและลดค่า $\\alpha$\n", + "\n", + "## การใช้ประโยชน์ vs. การสำรวจ\n", + "\n", + "วิธีที่ดีที่สุดคือการสร้างสมดุลระหว่างการสำรวจและการใช้ประโยชน์ เมื่อเราเรียนรู้เกี่ยวกับสภาพแวดล้อมมากขึ้น เราจะมีแนวโน้มที่จะเลือกเส้นทางที่เหมาะสมที่สุด แต่ควรเลือกเส้นทางที่ยังไม่ได้สำรวจบ้างเป็นครั้งคราว\n", + "\n", + "## การใช้งาน Python\n", + "\n", + "ตอนนี้เราพร้อมที่จะใช้อัลกอริทึมการเรียนรู้ ก่อนหน้านั้น เราต้องมีฟังก์ชันที่จะแปลงตัวเลขใน Q-Table ให้เป็นเวกเตอร์ของความน่าจะเป็นสำหรับการกระทำที่เกี่ยวข้อง:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "เราเพิ่มค่า `eps` เล็กน้อยลงในเวกเตอร์ต้นฉบับเพื่อหลีกเลี่ยงการหารด้วย 0 ในกรณีเริ่มต้น เมื่อทุกองค์ประกอบของเวกเตอร์มีค่าเหมือนกัน\n", + "\n", + "อัลกอริทึมการเรียนรู้ที่เราจะใช้งานจริงจะถูกดำเนินการใน 5000 การทดลอง ซึ่งเรียกว่า **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "หลังจากดำเนินการอัลกอริทึมนี้ ตาราง Q-Table ควรได้รับการอัปเดตด้วยค่าที่กำหนดความน่าสนใจของการกระทำต่าง ๆ ในแต่ละขั้นตอน แสดงตารางที่นี่:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## การตรวจสอบนโยบาย\n", + "\n", + "เนื่องจาก Q-Table แสดง \"ความน่าสนใจ\" ของแต่ละการกระทำในแต่ละสถานะ การใช้งานมันเพื่อกำหนดการนำทางที่มีประสิทธิภาพในโลกของเราจึงค่อนข้างง่าย ในกรณีที่ง่ายที่สุด เราสามารถเลือกการกระทำที่สอดคล้องกับค่าที่สูงที่สุดใน Q-Table ได้:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "หากคุณลองรันโค้ดด้านบนหลายครั้ง คุณอาจสังเกตเห็นว่าบางครั้งมันจะ \"ค้าง\" และคุณจำเป็นต้องกดปุ่ม STOP ในโน้ตบุ๊กเพื่อหยุดการทำงาน\n", + "\n", + "> **งานที่ 1:** แก้ไขฟังก์ชัน `walk` เพื่อจำกัดความยาวสูงสุดของเส้นทางโดยกำหนดจำนวนก้าว (เช่น 100) และสังเกตว่าโค้ดด้านบนจะคืนค่านี้เป็นครั้งคราว\n", + "\n", + "> **งานที่ 2:** แก้ไขฟังก์ชัน `walk` เพื่อไม่ให้กลับไปยังตำแหน่งที่เคยไปมาก่อนหน้านี้ สิ่งนี้จะช่วยป้องกันไม่ให้ `walk` เกิดการวนซ้ำ อย่างไรก็ตาม ตัวแทนอาจยังคงติดอยู่ในตำแหน่งที่ไม่สามารถหลบหนีได้\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5wU5Z3v8c8vEk1islETkuPtlcFdT3LMvjbRsF5iTnajibdkQ5KjOeRKjKsnWT3rms1mwVw8q/EWL6gJXlAwxBsqQSWCIgJeuDPc5TrDfQBhhoFhYBiYgef80U8PPT19qe7p7qrp+r5fL5jup6qrnuqq/tVTTz31POacQ0RE4uE9YWdAREQqR0FfRCRGFPRFRGJEQV9EJEYU9EVEYqRf2BnI5aMf/airqakJOxsiIn3KggULmpxz/TNNi3TQr6mpoba2NuxsiIj0KWa2Mds0Ve+IiMSIgr6ISIwo6IuIxIiCvohIjCjoi4jEiIK+iEiMKOiLiMSIgr5IlXr1nW007T0QdjYkYhT0RapQy/4OfvLkQn78x/lhZ0UiRkFfpAp1HjoMQMOu/SHnRKJGQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYCBX0zu8HMlpvZO2b2jJm9z8wGmNlcM6szs2fN7Gg/7zH+fb2fXpOynGE+fbWZXVyeTRIRkWzyBn0zOxn4V2Cgc+5vgaOAwcCdwHDn3OnALuAq/5GrgF3Oub8Bhvv5MLMz/Oc+DVwCPGhmR5V2c0REJJeg1Tv9gPebWT/gA8A24AJgnJ8+BviGfz3Iv8dPv9DMzKePdc4dcM6tB+qBs3u/CSIiElTeoO+c2wLcDWwiEexbgAXAbudcp5+tATjZvz4Z2Ow/2+nn/0hqeobPdDGza8ys1sxqGxsbi9kmERHJIkj1zvEkSukDgJOAY4FLM8zqkh/JMi1bevcE50Y65wY65wb2798/X/ZEpACz1+7k0bfWhZ0NCVG/APN8GVjvnGsEMLPxwOeB48ysny/NnwJs9fM3AKcCDb466MNAc0p6UupnRKQCvvPoHACu/uJpIedEwhKkTn8TcK6ZfcDXzV8IrACmA5f7eYYAL/nXE/x7/PRpzjnn0wf71j0DgNOBeaXZDBERCSJvSd85N9fMxgELgU5gETASmAiMNbPf+rRR/iOjgCfMrJ5ECX+wX85yM3uOxAmjE7jWOXeoxNsjIiI5BKnewTl3E3BTWvI6MrS+cc61A1dkWc6twK0F5lFEREpET+SKiMSIgr6ISIwo6IuIxEigOn0R6du27t7PrLU7w86GRICCvkgMDB45h03NbWFnQyKgqqt3duxp56xbprBme2vYWREJVWPrgbCzIAENHjmb+1+vK9vyqzroT1m5neZ9B3l85oawsyIiEsicdc0Mf31N2ZZf1UFfRES6U9AXEYkRBX0RkRhR0BcRiREFfZEq1GOgChFPQV+kimUauUjiTUFfpIqpxC/pFPRFqpBK+JJNTIL+kfLOV+59k/ELG0LMi4hIeKo66FuG8k7djr387LklIeRGRCR8VR30RUSkOwV9EZEYUdAXEYkRBX2RGHBqvCmegr6ISIwo6IuIxIiCvohIjMQq6Dunek2Jp0zPrEg8xSLoK9aLiCRUddA3FW5ERLqp6qAvIiLdVXXQV7WOiEh3VR30k1TNIyJRc+iw47/+spyGXW0VXW8sgr6ISNQs3ryLx2du4IZnF1d0vQr6IiIhSFY/H65wNbSCvkiVeLuukUfeXBt2NiTiAgV9MzvOzMaZ2SozW2lm55nZCWY2xczq/N/j/bxmZg+YWb2ZLTWzs1KWM8TPX2dmQ8q1USJx9INR87j9lVVhZ0MiLmhJ/37gVefcp4DPACuBocBU59zpwFT/HuBS4HT/7xrgIQAzOwG4CTgHOBu4KXmiqBS15hGRuMsb9M3sr4AvAqMAnHMHnXO7gUHAGD/bGOAb/vUg4E8uYQ5wnJmdCFwMTHHONTvndgFTgEtKujVZKNhL3OiQl2yClPRPAxqBx81skZk9ZmbHAh93zm0D8H8/5uc/Gdic8vkGn5YtvRszu8bMas2strGxseAN6r6sXn1cpM9L/gTUn74kBQn6/YCzgIecc2cC+zhSlZNJplDrcqR3T3BupHNuoHNuYP/+/QNkT0REggoS9BuABufcXP9+HImTwHZfbYP/uyNl/lNTPn8KsDVHuoiUicr3ki5v0HfOvQtsNrNP+qQLgRXABCDZAmcI8JJ/PQH4oW/Fcy7Q4qt/JgMXmdnx/gbuRT5NREpMNZuSTb+A8/1f4CkzOxpYB1xJ4oTxnJldBWwCrvDzTgIuA+qBNj8vzrlmM7sFmO/nu9k511ySrRARkUACBX3n3GJgYIZJF2aY1wHXZlnOaGB0IRkUEZHS0RO5IiIxEqugr5taIhJVlRrONRZBXw9nSdxpjNxo++sbJ/Gth2ZVZF1VHfR1mItIVKWWRQ8ddizatLsi663qoC8iIt0p6IuIhCCsmggFfRGRGFHQFxGJkaoO+mq0IyLSXVUH/SR1sSxx5Zzj91Pr2N9xKOysSETEIugnVerhB5Go2NXWwT1T1oSdDYmQWAV9kUrae6CTZQ0trNy2hzteWaVCh0RC0F42+zT91iQMV4+pZfa6nRzT7z0c6DzMtV/6az70vveGnS2Juaou6asqX8K0cNMuADoPJ0odpptLEgFVHfRFRKQ7BX2REL2xegf7D5a2ZY3uHUguCvoiIVn9bis/enw+v3rxnZIu97ZJK0u6PKkuCvoiIWlt7wBgw859JV3uk3M26cFEyUpBX0SkAhpbD1AzdCK1GxJDg4d1Yo5V0FfpRyopebwl2+yorj3e5q1PBPvRM9d3S690m65YBX2RMKilpkSJgr6ISIgqff0Xi6DvVLEjMaOLi+jTICploMtqEZHuqjroi4hIdwr6ImWmRjsSJQr6IiIxEqugf+iwilxSebq3JFESq6B/9+TVYWdBpAc9tCWVFKug/+ryd8POgkgXXQFIGKo66KsAJVFWruNTJ5PqsHPvgbIst6qDfpLpURWpgB2t7cxZt7Pgz5V6RC0VdqIt1/7pPHS463W5bkHGIujriVyphG+OmMXgkXPCzob0YXe+uqrs6wgc9M3sKDNbZGYv+/cDzGyumdWZ2bNmdrRPP8a/r/fTa1KWMcynrzazi0u9MT3zXO41iByxZff+nNNV9BA4EpcyHQ9zfU+cienlOWIKKelfD6QOyXMnMNw5dzqwC7jKp18F7HLO/Q0w3M+HmZ0BDAY+DVwCPGhmR/Uu+yIR5n+zql6UXCLZtbKZnQJ8FXjMvzfgAmCcn2UM8A3/epB/j59+oZ9/EDDWOXfAObceqAfOLsVGBKWSv4RCx50UI+Q6/fuAXwDJuwwfAXY75zr9+wbgZP/6ZGAzgJ/e4ufvSs/wmS5mdo2Z1ZpZbWNjYwGbIiIi+eQN+mb2NWCHc25BanKGWdMHCkqfluszRxKcG+mcG+icG9i/f/982SuIWjVI2DoOHWZWfVPY2ZAIyRqWynSFGKSkfz7wdTPbAIwlUa1zH3CcmfXz85wCbPWvG4BTAfz0DwPNqekZPiNSvVJ+1fdOWcN3H5vbNU5qqbR3HCrp8qT88sb0sKp3nHPDnHOnOOdqSNyIneac+x4wHbjczzYEeMm/nuDf46dPc4nnzCcAg33rngHA6cC8km1JACrpS5gMWLtjLwBNew+WdNnN+0q7PCmf1vbOrNMqcfunN+30/xP4mZnVk6izH+XTRwEf8ek/A4YCOOeWA88BK4BXgWudcyqexMg9r61mWUNL2NkI7K7Jq3hhUUPJlucobeHtF+OWUDN0YgmXKJXwdl336r0FG3dVdP398s9yhHPuDeAN/3odGVrfOOfagSuyfP5W4NZCM9lbyRK+Wu+E6/fT6vn9tHo23PHVsLMSyIjpawH45pmn9G5BeY67Yjtce642+wkp6BI7Dh3m1y++w79eeDonHff+ovIh5VGuiomqfiJX7aMlalKPyPRCyN4DnSzc1PtSXyGFm7frGhk7fzM3vrCs1+uVvqGqg75I1OQqvV371EK+9eAs9rR3VCw/SSoeRUPq8VGufaKgLxICs54NC5ZtSdzv6Og8nOET0tcVWr2s6h2RKpIa8EvRy6buV1WHqLfeEYmFGXVN7G7rO00inVN1TRRFpcm4gr5IHt8fNZerxtT2ejmpP/rmfQfZvKut18sslYjEo9hbktKkuVwnCQV9kQDWbG8t+rOZSt03vrCMG55dknH+Yn7rqQFif4Cncy9/aJbPm64JwhLWiVZBXySICvxCexN+t+bpyz9dbYUfCJLCles+jYK+VESxDyBFRVGl7wqW5fIN4CJ9j6p3eiH53fXxuBOqhZt20XGo+psSjphezx2v9ByyrvNwdW67hhINx4qte3hyzsZQ1l1QNwx9jqorS2Lltj1868FZXP0/B/DLr54RdnbK6q7JqwEYeumnuqW3dwQP+sPGL2XHngNd74OGVYXf+LjsgbdDW3d1B/20X5HaMhenaW8igK3cVvzNzGp36PCRg+2ZeZszzpPt+OvrVV9SHlEYI7fPUqyXchs2fmmg+XLF9zCO02ufWhTCWiVMsQj65eacU2kt5nL1eJkUtUPkzTWNgZp3SjjK1Zw2VkG/HD+6toOdDBg2iT9Mqy/9wiNGN/2KU4qf7oqte/iHu6bT0pa5M7b0Y3vKiu15l7moBD16SnCF972j6p1I2u1/hE/P2xRyTnqvdkMzO/ce6JGuB3h6pxQ/3Qem1rFxZxuz1gYbX/eXL76Td577Xq/reh21qxApHwX9Eho+ZQ33vLY67GwU7fKHZ3PFI7NLvtz7X69jwLBJJV9utSpFAFZ1o2QTq6Bf7tY790+t4/d9vJpnXeO+ki9zzOwNJV9mNUn2sqnWZZJKD2dJn1Xtpc69B7IPdF1q2b5J3W+Jvqj8DGIR9CPyXcdWNX//k5Zt429vmlySZeVszlnmATh0lRE96nunGClf2uHDrixn2moOaNLTo2+t4+a/rOh6/9aaxuAfDniwhBGAo1IKlSMmLXu3LMut7qCf4rQbJ2XslOqRN9fy0uItgZbxz2Pm89mbX8s4bVtLe6/yJ6WxoWkfNUMnsrRhd1mWf+uklYyeuR6APe0dge6BFBrDcwXgbNPS0xXE+75bXl6Rf6YixCboZ3P7K6u4fuziQPO+vnJHVxPNOEoGknELGlj17p6M83QeOsySzd0DbiUD0LRVOwAYvzDYiTyb1G4Vsvn2w7OZt6G5wCVnX25qCb9ue2ugtvYihYp90O+tOFSFplc3/Pz5JVxyX+YOo+5+bQ2DRsxk+daWjNP7iuZ9+YdHXPVusL6IijnnfWX4W1z9p56jdanuXXpLQT/mWto6WNu4t6jPDhk9r0fVWDLYN+3tO2PKVox1/ddNtbdukmhR0I+5r4+YwYX3vFlU4HlzTWPgqrFstu9pp2V/6arMol8S7l2AL9f5Qaed+FDQ76W+/mPZuDP44NzFBpxcJ5RzbpvKF+6cVtyCC9TecYimvQdoj3gnY+lt7mfUNfUYo7fQ4RFFkqq7P/0y29kHAkhQ5axhyLfo1vbgDzftbjvIuAUNXPWFAV1PsmZcZ4YN+tSvXwXg7075MBOu+0LgdZaMSzQGyC7z9nx/1NweaZ+/o/uJUjVE0fDtR2bzoWP6MepHf99jWlSuQhX0e+Fzv32dE449OuxsVEwUDtrP3jwl8ffU4xhYc0KP6UGyuLShuJvM/3DX9KI+F4GvLa++kMe+YN76QltzVV4sqnfKWQoK0sqjL0j9ivpC1cHBEMbrLaQqLFXwwy//nOpuQXqrqoO+Si+5ZRvoPL3qIKnok2c54lQBy2xsPcDztZmHMIyChZt2d6uzT+3KumboxIrkQaeS+FD1Toz95qUjfa7nutlazMkzSs0Q/88TtSzcVJ4ndEvlouFv8dEPJqoKc5Xms41tEJ1vW6Kuqkv6kluuJz63tezPWHUVpWCeS2oud7T2HBgmmvKfXh2OXwcYIEUSxs7bxKvvlKcPm0JF5aeTN+ib2almNt3MVprZcjO73qefYGZTzKzO/z3ep5uZPWBm9Wa21MzOSlnWED9/nZkNKd9mJUTkO+4T0r+r826fxlm3TOkx33MBq0neWH2kI7JK1u7katFTDa57ehFPzNkYdjb6jKHjl/GTJxeEnY1ICVLS7wT+3Tn3P4BzgWvN7AxgKDDVOXc6MNW/B7gUON3/uwZ4CBInCeAm4BzgbOCm5Imi3MKIA+ffMa3qSmQOR932zE/vvrx0K7tSrgz+OGtDhXIVfQc7E/dOdBNWoiBv0HfObXPOLfSvW4GVwMnAIGCMn20M8A3/ehDwJ5cwBzjOzE4ELgamOOeanXO7gCnAJSXdmgppyjCObLotu/cXVCL77csrutWxl0vDrjbWbG/11TRHzoa9ufTc1rKf655exE+fCl6iembeJs7M0mNpKaRuT5UX/gGYsGRrrz7fm2q78++Yxr1T1hT12T3tHRVvLfbKsm1sbk60xNqyez81QyeyMEaDxBdUp29mNcCZwFzg4865bZA4MQAf87OdDKTWATT4tGzp6eu4xsxqzay2sbGAvsorKFlyK6XHZqznT7PLf9n+hTunc9Hwt3hm3mYKrXgxLGMATX4fW3cH71562Phl7Cqwx9Ig33scAnwmBfXrX2Jbdu/ngal1+WfM4OLhb2VtLVYuP31qIZfdn+gwcEZd4nsbO29TSdfR0tbB6BnrI3kPLHDQN7MPAn8G/s05l7lfXT9rhjSXI717gnMjnXMDnXMD+/fvHzR7FRW93RjM9FVHngZNDxLlrHoo1YG/cluuwy65rpKsiu17QhwfIYQDLKx7IWGNQ9Fa5iEuh72wlJtfXkHtxuhdQQQK+mb2XhIB/ynn3HifvN1X2+D/JiNKA3BqysdPAbbmSC+7CJ5sQzFtVXoXAIX90B2uqOBQlhu5Zd6nC8rwY83W3DLdna+uLvm6o6alrYPdbdXxYGMmyaveYh/oK6cgrXcMGAWsdM7dmzJpApBsgTMEeCkl/Ye+Fc+5QIuv/pkMXGRmx/sbuBf5tLJJ/sTmbdjZq+V0HjrMw2+u7XofxUu2pBcWNXDFw7MK/lzOTUqJVZm2PUpfRzVU7/x5YUPF17k6y6A45fKZm1/r6lIjTOU6dhf7gYTumryqKy0qx2aQh7POB34ALDOzZD+6NwJ3AM+Z2VXAJuAKP20ScBlQD7QBVwI455rN7BZgvp/vZudcRTqq2Nxc/I2izkOHOfu2qX2mu4Ubnl2SdVpqFc6ry4O1XT4cYASpsI2esZ7jj30v3zzzlK601G3NVsLuOHSY68cu4voL/zuf/G8fKns+o2z7nr7yLEPpjJm1gZsmLC/rOqJUIErKG/SdczPIXg9wYYb5HXBtlmWNBkYXksGwvbmmMWPAzzZcYLVJ7+MmU/VOvhJM+oH/qxeX9TZb3dzsxxL95pmn8Nry4EMMrti6h0nL3qVh1/6K9Lp54FDuHlmDtAqrViOm1/P9cz7Bhz/w3oqt8+m5pb15m0lUSvep9ERuHpnGSnWOijzl19rewYqtxZ1cnHPM39DM7LW9q9pKDdj5Si2bmttYmFYXvqe9g/1p3U8/Oafnj62zRB2ozahv6vb+3ZZ2NjX3rFd1zvGbHKW8Fxf3bozdTK4e03P4w1LoC1djSTv3HmBHa8+bt3dNXs2vKtBkWRT08ypFq4ZtLfsZMb2+4HsBVz4+n8seyDwWbT7OwRUPz+Y7j84JPH82qdUj+b6NfQe7B/gfPz4/y5zdFTp6Vr7WRsnt+drvZ2Sc/lzt5h4DuKdaVIa+epYU2aVzPoWMR1Col5duZUPTvkDzBjn5fO63r3P2rVMzTmsrc4uaMESxekdBv0hBW2IA/PTJhdw1eTV1Owobi7Y3zb0OpRxt+w/2bqCXoAE2k8U5AmsQW3bv59aJK4ouzWarMvnPP5e2iilMo2asK9uyr3t6ERfd91bZlp8qzPhYyO+5EBGM+fEN+hOXbqvYupJB93CRp/1iWgulVkt9+5HZrGvcmzM4z12fvxrIrPCD+D0Br5Q6DmVe8vXPLOLRt9ezpKH7yeOFhVtwzmUthT5V5vraJ2ZvKOvyC/HAtPqyLr8cDyNWQiGNL54tU9fbhVSPVkpsg/61Ty+s+DorudNTb8Au29LCBfe8yZYcj7v/KEA1jHMw8q2epcpccf09AY+wc2/vfsm/o7Wd7Xva6chSwh+/aAuTl7/LzLVNGaeX269fKm+rj76ot4f37raD1AydyMtLjzy+s2Z7K/uKrPZpzNC76gV3v1Fs9qpGbIN+ujezPMZeiou+Qm4LzN/QsxVr0JPFz58/0lwzU510as+XhejNpW/Qkn6q1vYOzr51KufclrnuN6llf0dkSk/VYH3Auvtsevv8yjq//lEz1nct76Lhb3HVmGD3hQpZRy6z1jbxwqLKPytRKQr63pDR8zI+hZkpZjlXmqZYre0dfOvBmaxrPFLXf8XDs3uUyA8eOsyNLyzLe7k6bsGRA/W495em6Vtqff7qlNGdgiom6N82aVX+maTkdgZsMvrioi3UDJ3Yo6O0XCF/4859jJheWDVU8hwyZ11xj/PkuxeV7Xj+7qNzuz3v8vLSrTya4Qq3r6rqoF9oy5v/9dCswKWVl3I06UvvtyVZJ/r4zPXd0qet2sHCTbsZ/nr3zqrOT+uAasLirTw9dxO3T1oZKG8Ag0bMDDxvLtv3HGCXf1x+d5YO0nK2/Cni5Higo+eN50zNLqF7oPl/RT5ok8zijLpwqoqiImg5PfnEcCENE74/ai53Tc7dvUSxFwqz1jZ161eq1K57ehG3FvDb6y56l6JVG/Snr97BnHWFt1F/fkH+yzqHY21j9svE9GqJPe2JYPlcbXGXjMkbwNkasMysb6LtYHmau33p7jf4l6eKv/9RTEk/U23S9WMX93hQbOe+g7yR8mNP78O/vsDWUoti1L1uPqve3ZO1yjOb13OMxBakBVl6s90g4XJ90z6+++hcrvxjzyqgYk4iuQpzNUMnsnFn4ne/YGMzNUMnUuevFkZMr+ebD5amoFVuVTtG7pUB24enW76lBQaemn/GgvSMYm0HO3lweqI/n3yllGxx0znHym2tfO+xuXz9Myf1OpfFSg/G5XLFw7O7vf9dno7Jvnzvm+XMTlW75L78z4ekH5Y/fWohG+74Kks272bnvgNc8KmP9yoPQa66v5RyY3b+hmb+vuYEnHNFD5F5/djFOacvbWjhEx85lr8sSbT+e6uuidM//qG8VzEQnadzq7akXyrFPhGbz/Apa7rqFPfmaZ2Q7dj/3eTVXQ9vFfoMQCllC77rGvcW9NDVlBXbmVvE1VmpRO9CvLJyxdgOf2Lfvqedt/NUgw0aMZMf/zHx9PH6pn28sXpHUaXu9I+s3LYnY0OHpCsens3m5jYeeWsd59w2tceN6VJ0lJhcwtKGYM+fpK4yW/VopVVtSb9YyX00d91OTj3hA9yTYUSgH4ya16t17G47yMz63gW3WfVNPPTGkZ4/g/Q1Xy6vr8x8WX/BPYWVtK/+UyJQfOusI2Pr5HpqtpSmrNjeZ9ujV8L3HpvLoM+elHW4zKT2tPsxXyphE8lL/cAntwz6ND84rybjPHvaO7ruzWxOuQ/0wqIGBn2mx5hNef3kicyjwS30reMKOZFsbO5d66hSUdDP4n+PzN59Qbabipn8/PklPZ4KHTxyDqveLbwljMOxo7WdptaDLNtSnkf6o6C3TQcLtaShpeuEE2cz6xNNFedv6HlvY976Zuat717KzlRd8alfv1qSvHz/sbl8+qS/yjjtpgnL+d45n+A97+mZgQUbd3XlMzUc3/DsEv7p7wqvAg3SG+2yMnWvUS4K+mlKXe02Lu3G8KX3vx0o4Kc2H92YcpL5x7veoO3gIf7j4k+WLpMRU45+byS/sfM3RaaL5Rn1Td06z0ttrnzYwR2vrmJthirN3+R4aK5c1Xf/9IfM/TtBorEBJJrD5rtKqhQF/TRjZm8sawdWQathxqS0REmtxmnzrSCC3DgSKUSh3RuVq7+aTM66pfuAK5meDM9nXY4Wd5DorqRQnQG/tC/f+2bBY0KXS6xv5KaXwpPGLyp9t7oiUVfojc58fUlNydGEsxLS85evSW569VUmvxy/jJqhE7veBy18RSXgQ8yDfmq3BVGT2v9Il7g3L5Gyatpb2Ohw/zEu9++n2PskpareS29VVujPZ1tLz76q0gdUzzTeRtTFOuhHWR88liRmolL/H1ShLTbPu31a/pn6IAX9PkTVTiLFu/GFcMZQuPkvK0JZbzYK+iJStFINc1nNRqf1uRU2BX0RKdq/R/i+mGSmoC8iRXtpcYYGBxJpCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxUvGgb2aXmNlqM6s3s6GVXr+ISJxVNOib2VHACOBS4AzgO2Z2RqnX07I/OkOTiYhESaVL+mcD9c65dc65g8BYYFCpV7KuMRqjzouIRE2lg/7JwOaU9w0+rYuZXWNmtWZW29jYWNRKPnvqccXnUEQkAp695tyyLLdfWZaanWVI6zZypXNuJDASYODAgUWNFGtmbLjjq8V8VESkqlW6pN8AnJry/hRAozCIiFRIpYP+fOB0MxtgZkcDg4EJFc6DiEhsVbR6xznXaWbXAZOBo4DRzrnllcyDiEicVbpOH+fcJGBSpdcrIiJ6IldEJFYU9EVEYkRBX0QkRhT0RURixJwr6vmnijCzRmBjLxbxUaCpRNnpC+K2vaBtjgttc2E+4Zzrn2lCpIN+b5lZrXNuYNj5qJS4bS9om+NC21w6qt4REYkRBX0RkRip9qA/MuwMVFjcthe0zXGhbS6Rqq7TFxGR7qq9pC8iIikU9EVEYqQqg341Db5uZqea2XQzW2lmy83sep9+gplNMbM6/y+8XRcAAAQhSURBVPd4n25m9oDf9qVmdlbKsob4+evMbEhY2xSEmR1lZovM7GX/foCZzfV5f9Z3zY2ZHePf1/vpNSnLGObTV5vZxeFsSTBmdpyZjTOzVX5fnxeDfXyDP6bfMbNnzOx91bafzWy0me0ws3dS0kq2X83sc2a2zH/mATPLNFBVd865qvpHosvmtcBpwNHAEuCMsPPVi+05ETjLv/4QsIbEoPK/A4b69KHAnf71ZcArJEYpOxeY69NPANb5v8f718eHvX05tvtnwNPAy/79c8Bg//ph4Kf+9b8AD/vXg4Fn/esz/L4/Bhjgj4mjwt6uHNs7Bvhn//po4Lhq3sckhkldD7w/Zf/+qNr2M/BF4CzgnZS0ku1XYB5wnv/MK8ClefMU9pdShi/5PGByyvthwLCw81XC7XsJ+AqwGjjRp50IrPavHwG+kzL/aj/9O8AjKend5ovSPxIjqk0FLgBe9gd0E9AvfR+TGJvhPP+6n5/P0vd76nxR+wf8lQ+AlpZezfs4OV72CX6/vQxcXI37GahJC/ol2a9+2qqU9G7zZftXjdU7eQdf76v8Je2ZwFzg4865bQD+78f8bNm2vy99L/cBvwAO+/cfAXY75zr9+9S8d22Xn97i5+9L23sa0Ag87qu0HjOzY6nifeyc2wLcDWwCtpHYbwuo7v2cVKr9erJ/nZ6eUzUG/byDr/dFZvZB4M/Avznn9uSaNUOay5EeKWb2NWCHc25BanKGWV2eaX1ie71+JKoAHnLOnQnsI3HZn02f32Zfjz2IRJXMScCxwKUZZq2m/ZxPodtY1LZXY9CvusHXzey9JAL+U8658T55u5md6KefCOzw6dm2v698L+cDXzezDcBYElU89wHHmVlypLfUvHdtl5/+YaCZvrO9kMhrg3Nurn8/jsRJoFr3McCXgfXOuUbnXAcwHvg81b2fk0q1Xxv86/T0nKox6FfV4Ov+bvwoYKVz7t6USROA5F38ISTq+pPpP/QtAc4FWvwl5GTgIjM73peyLvJpkeKcG+acO8U5V0Ni301zzn0PmA5c7mdL397k93C5n9/59MG+1ccA4HQSN70ixzn3LrDZzD7pky4EVlCl+9jbBJxrZh/wx3hym6t2P6coyX7101rN7Fz/Hf4wZVnZhX2To0w3Ti4j0cplLfDLsPPTy235AolLtqXAYv/vMhL1mVOBOv/3BD+/ASP8ti8DBqYs68dAvf93ZdjbFmDb/5EjrXdOI/FjrgeeB47x6e/z7+v99NNSPv9L/z2sJkCrhpC39bNArd/PL5JopVHV+xj4L2AV8A7wBIkWOFW1n4FnSNyz6CBRMr+qlPsVGOi/v7XAH0hrDJDpn7phEBGJkWqs3hERkSwU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEb+P5qkdQkuhnG4AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## แบบฝึกหัด\n", + "## โลกของปีเตอร์กับหมาป่าที่สมจริงยิ่งขึ้น\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/th/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..e6058844f --- /dev/null +++ b/translations/th/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,447 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:15:11+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# ปีเตอร์กับหมาป่า: สภาพแวดล้อมที่สมจริง\n", + "\n", + "ในสถานการณ์ของเรา ปีเตอร์สามารถเคลื่อนที่ไปมาได้เกือบจะโดยไม่รู้สึกเหนื่อยหรือหิวเลย ในโลกที่สมจริงมากขึ้น เขาจะต้องนั่งพักและพักผ่อนเป็นครั้งคราว รวมถึงต้องหาอาหารกินด้วย มาเพิ่มความสมจริงให้โลกของเราด้วยการนำกฎเหล่านี้มาใช้:\n", + "\n", + "1. เมื่อเคลื่อนที่จากที่หนึ่งไปยังอีกที่หนึ่ง ปีเตอร์จะสูญเสีย **พลังงาน** และเพิ่ม **ความเหนื่อยล้า** ขึ้น\n", + "2. ปีเตอร์สามารถเพิ่มพลังงานได้โดยการกินแอปเปิ้ล\n", + "3. ปีเตอร์สามารถลดความเหนื่อยล้าได้โดยการพักผ่อนใต้ต้นไม้หรือบนพื้นหญ้า (เช่น เดินไปยังตำแหน่งที่มีต้นไม้หรือหญ้า - พื้นที่สีเขียว)\n", + "4. ปีเตอร์ต้องค้นหาและกำจัดหมาป่า\n", + "5. เพื่อที่จะกำจัดหมาป่า ปีเตอร์จำเป็นต้องมีระดับพลังงานและความเหนื่อยล้าในระดับที่เหมาะสม มิฉะนั้นเขาจะพ่ายแพ้ในการต่อสู้\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## การกำหนดสถานะ\n", + "\n", + "ในกฎของเกมใหม่ เราจำเป็นต้องติดตามพลังงานและความเหนื่อยล้าในแต่ละสถานะของกระดาน ดังนั้นเราจะสร้างออบเจกต์ `state` ที่จะเก็บข้อมูลทั้งหมดที่จำเป็นเกี่ยวกับสถานะปัจจุบันของปัญหา รวมถึงสถานะของกระดาน ระดับพลังงานและความเหนื่อยล้าในปัจจุบัน และความเป็นไปได้ที่เราจะชนะหมาป่าเมื่ออยู่ในสถานะสุดท้าย:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## ฟังก์ชันรางวัล\n", + "\n", + "### คำอธิบาย\n", + "ฟังก์ชันรางวัลเป็นส่วนสำคัญในการกำหนดพฤติกรรมของตัวแทนในระบบการเรียนรู้แบบเสริมกำลัง (Reinforcement Learning - RL) โดยฟังก์ชันนี้จะให้ค่าตอบแทนแก่ตัวแทนตามการกระทำที่เกิดขึ้นในสภาพแวดล้อม\n", + "\n", + "### ตัวอย่าง\n", + "ตัวอย่างฟังก์ชันรางวัลที่เรียบง่าย:\n", + "\n", + "```python\n", + "def reward_function(state, action):\n", + " if state == \"goal\":\n", + " return 1 # รางวัลสำหรับการบรรลุเป้าหมาย\n", + " else:\n", + " return 0 # ไม่มีรางวัลสำหรับสถานะอื่น\n", + "```\n", + "\n", + "### หลักการออกแบบ\n", + "- **ความชัดเจน**: ฟังก์ชันรางวัลควรสะท้อนเป้าหมายของระบบอย่างชัดเจน\n", + "- **ความสมดุล**: หลีกเลี่ยงการให้รางวัลที่มากเกินไปหรือไม่เพียงพอ\n", + "- **ความสอดคล้อง**: รางวัลควรสอดคล้องกับพฤติกรรมที่ต้องการ\n", + "\n", + "### ข้อควรระวัง\n", + "[!WARNING] การออกแบบฟังก์ชันรางวัลที่ไม่เหมาะสมอาจนำไปสู่พฤติกรรมที่ไม่คาดคิดของตัวแทน\n", + "\n", + "### เคล็ดลับ\n", + "[!TIP] ทดสอบฟังก์ชันรางวัลในสถานการณ์จำลองก่อนใช้งานจริงเพื่อให้แน่ใจว่ามันทำงานตามที่คาดหวัง\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## อัลกอริทึม Q-Learning\n", + "\n", + "อัลกอริทึมการเรียนรู้จริงๆ ยังคงเหมือนเดิม เพียงแค่เราใช้ `state` แทนที่จะใช้ตำแหน่งของกระดานเพียงอย่างเดียว\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## ผลลัพธ์\n", + "\n", + "มาดูกันว่าเราสามารถฝึกปีเตอร์ให้ต่อสู้กับหมาป่าได้สำเร็จหรือไม่!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/th/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..392e8e168 --- /dev/null +++ b/translations/th/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:11:50+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# ปีเตอร์กับหมาป่า: บทเรียนพื้นฐานเกี่ยวกับการเรียนรู้แบบเสริมกำลัง\n", + "\n", + "ในบทเรียนนี้ เราจะเรียนรู้วิธีการนำการเรียนรู้แบบเสริมกำลังมาใช้กับปัญหาการค้นหาเส้นทาง สถานการณ์นี้ได้รับแรงบันดาลใจจากนิทานดนตรีเรื่อง [ปีเตอร์กับหมาป่า](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) โดยนักประพันธ์ชาวรัสเซีย [เซอร์เก โปรโกเฟียฟ](https://en.wikipedia.org/wiki/Sergei_Prokofiev) เป็นเรื่องราวเกี่ยวกับปีเตอร์ เด็กชายผู้กล้าหาญที่ออกจากบ้านไปยังลานป่าเพื่อไล่ล่าหมาป่า เราจะฝึกอัลกอริทึมการเรียนรู้ของเครื่องที่จะช่วยปีเตอร์สำรวจพื้นที่โดยรอบและสร้างแผนที่การนำทางที่เหมาะสมที่สุด\n", + "\n", + "ก่อนอื่น มาเริ่มต้นด้วยการนำเข้าไลบรารีที่มีประโยชน์หลายตัว:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## ภาพรวมของการเรียนรู้แบบเสริมกำลัง\n", + "\n", + "**การเรียนรู้แบบเสริมกำลัง** (Reinforcement Learning หรือ RL) เป็นเทคนิคการเรียนรู้ที่ช่วยให้เราสามารถเรียนรู้พฤติกรรมที่เหมาะสมที่สุดของ **ตัวแทน** (agent) ใน **สภาพแวดล้อม** (environment) โดยการทดลองทำซ้ำหลายๆ ครั้ง ตัวแทนในสภาพแวดล้อมนี้ควรมี **เป้าหมาย** ซึ่งกำหนดโดย **ฟังก์ชันรางวัล** (reward function)\n", + "\n", + "## สภาพแวดล้อม\n", + "\n", + "เพื่อความเข้าใจง่าย ลองพิจารณาโลกของปีเตอร์เป็นกระดานสี่เหลี่ยมขนาด `width` x `height` แต่ละช่องในกระดานนี้สามารถเป็นได้ดังนี้:\n", + "* **พื้นดิน** ซึ่งปีเตอร์และสิ่งมีชีวิตอื่นๆ สามารถเดินได้\n", + "* **น้ำ** ซึ่งแน่นอนว่าไม่สามารถเดินได้\n", + "* **ต้นไม้** หรือ **หญ้า** - สถานที่ที่คุณสามารถพักผ่อนได้\n", + "* **แอปเปิ้ล** ซึ่งเป็นสิ่งที่ปีเตอร์จะดีใจที่ได้พบเพื่อใช้เป็นอาหาร\n", + "* **หมาป่า** ซึ่งเป็นอันตรายและควรหลีกเลี่ยง\n", + "\n", + "ในการทำงานกับสภาพแวดล้อม เราจะกำหนดคลาสที่เรียกว่า `Board` เพื่อไม่ให้เนื้อหาในสมุดบันทึกนี้ดูรกเกินไป เราได้ย้ายโค้ดทั้งหมดที่เกี่ยวข้องกับการทำงานของกระดานไปยังโมดูล `rlboard` แยกต่างหาก ซึ่งเราจะนำเข้าในตอนนี้ คุณสามารถดูรายละเอียดเพิ่มเติมเกี่ยวกับการทำงานภายในของการนำไปใช้ในโมดูลนี้ได้\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "ตอนนี้มาสร้างกระดานแบบสุ่มและดูว่ามันเป็นอย่างไร:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## การกระทำและนโยบาย\n", + "\n", + "ในตัวอย่างของเรา เป้าหมายของปีเตอร์คือการหาลูกแอปเปิ้ล ในขณะเดียวกันก็ต้องหลีกเลี่ยงหมาป่าและสิ่งกีดขวางอื่นๆ เพื่อทำสิ่งนี้ เขาสามารถเดินไปรอบๆ ได้จนกว่าจะเจอลูกแอปเปิ้ล ดังนั้น ในแต่ละตำแหน่ง เขาสามารถเลือกทำหนึ่งในสี่การกระทำต่อไปนี้: ขึ้น, ลง, ซ้าย และขวา เราจะกำหนดการกระทำเหล่านี้เป็นพจนานุกรม และจับคู่กับการเปลี่ยนแปลงพิกัดที่สอดคล้องกัน ตัวอย่างเช่น การเคลื่อนที่ไปทางขวา (`R`) จะสอดคล้องกับคู่พิกัด `(1,0)`\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "กลยุทธ์ของตัวแทนของเรา (ปีเตอร์) ถูกกำหนดโดยสิ่งที่เรียกว่า **นโยบาย** ลองพิจารณานโยบายที่ง่ายที่สุดที่เรียกว่า **การเดินแบบสุ่ม**\n", + "\n", + "## การเดินแบบสุ่ม\n", + "\n", + "มาแก้ปัญหาของเราด้วยการใช้กลยุทธ์การเดินแบบสุ่ม\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "มาลองทำการทดลองเดินสุ่มหลายครั้งและดูจำนวนก้าวเฉลี่ยที่ใช้:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## ฟังก์ชันรางวัล\n", + "\n", + "เพื่อทำให้กลยุทธ์ของเราฉลาดขึ้น เราจำเป็นต้องเข้าใจว่าการเคลื่อนไหวใด \"ดีกว่า\" การเคลื่อนไหวอื่นๆ\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "สร้าง Q-Table หรืออาร์เรย์หลายมิติ เนื่องจากกระดานของเรามีขนาด `width` x `height` เราสามารถแสดง Q-Table ด้วย numpy array ที่มีรูปร่าง `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "ส่ง Q-Table ไปยังฟังก์ชัน plot เพื่อแสดงตารางบนกระดาน:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## สาระสำคัญของ Q-Learning: สมการ Bellman และอัลกอริทึมการเรียนรู้\n", + "\n", + "เขียน pseudo-code สำหรับอัลกอริทึมการเรียนรู้ของเรา:\n", + "\n", + "* เริ่มต้น Q-Table Q ด้วยค่าที่เท่ากันสำหรับทุกสถานะและการกระทำ\n", + "* กำหนดอัตราการเรียนรู้ $\\alpha\\leftarrow 1$\n", + "* ทำการจำลองซ้ำหลายครั้ง\n", + " 1. เริ่มต้นที่ตำแหน่งสุ่ม\n", + " 1. ทำซ้ำ\n", + " 1. เลือกการกระทำ $a$ ที่สถานะ $s$\n", + " 2. ดำเนินการโดยย้ายไปยังสถานะใหม่ $s'$\n", + " 3. หากพบเงื่อนไขสิ้นสุดเกม หรือรางวัลรวมมีค่าน้อยเกินไป - ออกจากการจำลอง \n", + " 4. คำนวณรางวัล $r$ ที่สถานะใหม่\n", + " 5. อัปเดต Q-Function ตามสมการ Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. อัปเดตรางวัลรวมและลดค่า $\\alpha$\n", + "\n", + "## ใช้ประโยชน์ vs. สำรวจ\n", + "\n", + "วิธีที่ดีที่สุดคือการสร้างสมดุลระหว่างการสำรวจและการใช้ประโยชน์ เมื่อเราเรียนรู้เกี่ยวกับสภาพแวดล้อมมากขึ้น เราจะมีแนวโน้มที่จะเลือกเส้นทางที่เหมาะสมที่สุด แต่ควรเลือกเส้นทางที่ยังไม่ได้สำรวจเป็นครั้งคราว\n", + "\n", + "## การใช้งาน Python\n", + "\n", + "ตอนนี้เราพร้อมที่จะใช้อัลกอริทึมการเรียนรู้ ก่อนหน้านั้น เราต้องมีฟังก์ชันที่จะแปลงตัวเลขใน Q-Table ให้เป็นเวกเตอร์ของความน่าจะเป็นสำหรับการกระทำที่เกี่ยวข้อง:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "เราเพิ่มค่า `eps` เล็กน้อยลงในเวกเตอร์ต้นฉบับเพื่อหลีกเลี่ยงการหารด้วย 0 ในกรณีเริ่มต้น เมื่อทุกองค์ประกอบของเวกเตอร์มีค่าเหมือนกัน\n", + "\n", + "อัลกอริทึมการเรียนรู้ที่เราจะใช้งานจริงสำหรับการทดลอง 5000 ครั้ง ซึ่งเรียกว่า **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "หลังจากดำเนินการอัลกอริทึมนี้ ตาราง Q-Table ควรได้รับการอัปเดตด้วยค่าที่กำหนดความน่าสนใจของการกระทำต่าง ๆ ในแต่ละขั้นตอน แสดงตารางที่นี่:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## การตรวจสอบนโยบาย\n", + "\n", + "เนื่องจาก Q-Table แสดง \"ความน่าสนใจ\" ของแต่ละการกระทำในแต่ละสถานะ การใช้งานเพื่อกำหนดการนำทางที่มีประสิทธิภาพในโลกของเราจึงค่อนข้างง่าย ในกรณีที่ง่ายที่สุด เราสามารถเลือกการกระทำที่สอดคล้องกับค่าที่สูงที่สุดใน Q-Table ได้:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "หากคุณลองรันโค้ดด้านบนหลายครั้ง คุณอาจสังเกตเห็นว่าบางครั้งมันจะ \"ค้าง\" และคุณจำเป็นต้องกดปุ่ม STOP ในโน้ตบุ๊กเพื่อหยุดการทำงาน\n", + "\n", + "> **งานที่ 1:** แก้ไขฟังก์ชัน `walk` เพื่อจำกัดความยาวสูงสุดของเส้นทางโดยกำหนดจำนวนก้าว (เช่น 100) และดูว่าโค้ดด้านบนคืนค่านี้เป็นครั้งคราว\n", + "\n", + "> **งานที่ 2:** แก้ไขฟังก์ชัน `walk` เพื่อไม่ให้กลับไปยังตำแหน่งที่เคยไปมาก่อนหน้านี้ สิ่งนี้จะช่วยป้องกันไม่ให้ `walk` วนลูป อย่างไรก็ตาม ตัวแทนอาจยังคงติดอยู่ในตำแหน่งที่ไม่สามารถหลบหนีได้\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "สิ่งที่เราเห็นในที่นี้คือในตอนแรกความยาวเฉลี่ยของเส้นทางเพิ่มขึ้น นี่อาจเป็นเพราะว่าเมื่อเราไม่รู้อะไรเกี่ยวกับสภาพแวดล้อมเลย เรามักจะติดอยู่ในสถานะที่ไม่ดี เช่น น้ำหรือหมาป่า เมื่อเราเรียนรู้มากขึ้นและเริ่มใช้ความรู้นี้ เราสามารถสำรวจสภาพแวดล้อมได้นานขึ้น แต่เรายังไม่รู้แน่ชัดว่าแอปเปิ้ลอยู่ที่ไหน\n", + "\n", + "เมื่อเราเรียนรู้มากพอ มันจะง่ายขึ้นสำหรับตัวแทนในการบรรลุเป้าหมาย และความยาวของเส้นทางก็เริ่มลดลง อย่างไรก็ตาม เรายังคงเปิดรับการสำรวจ ดังนั้นเรามักจะเบี่ยงเบนออกจากเส้นทางที่ดีที่สุด และลองสำรวจตัวเลือกใหม่ ๆ ซึ่งทำให้เส้นทางยาวกว่าที่ควรจะเป็น\n", + "\n", + "สิ่งที่เราสังเกตได้อีกอย่างจากกราฟนี้คือในบางจุดความยาวเพิ่มขึ้นอย่างรวดเร็ว สิ่งนี้บ่งชี้ถึงธรรมชาติแบบสุ่มของกระบวนการ และในบางครั้งเราอาจ \"ทำให้ค่าสัมประสิทธิ์ใน Q-Table เสียหาย\" โดยการเขียนทับด้วยค่าที่ใหม่กว่า ซึ่งควรลดผลกระทบนี้ให้น้อยที่สุดโดยการลดอัตราการเรียนรู้ (เช่น ในช่วงท้ายของการฝึก เราปรับค่าของ Q-Table เพียงเล็กน้อย)\n", + "\n", + "โดยรวมแล้ว สิ่งสำคัญคือต้องจำไว้ว่าความสำเร็จและคุณภาพของกระบวนการเรียนรู้ขึ้นอยู่กับพารามิเตอร์อย่างมาก เช่น อัตราการเรียนรู้ การลดอัตราการเรียนรู้ และตัวคูณลดค่า พารามิเตอร์เหล่านี้มักถูกเรียกว่า **ไฮเปอร์พารามิเตอร์** เพื่อแยกความแตกต่างจาก **พารามิเตอร์** ซึ่งเราปรับแต่งระหว่างการฝึก (เช่น ค่าสัมประสิทธิ์ใน Q-Table) กระบวนการค้นหาค่าที่ดีที่สุดของไฮเปอร์พารามิเตอร์เรียกว่า **การปรับแต่งไฮเปอร์พารามิเตอร์** ซึ่งเป็นหัวข้อที่ควรศึกษาแยกต่างหาก\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## แบบฝึกหัด\n", + "#### โลกของปีเตอร์กับหมาป่าที่สมจริงยิ่งขึ้น\n", + "\n", + "ในสถานการณ์ของเรา ปีเตอร์สามารถเคลื่อนที่ไปมาได้แทบจะไม่รู้สึกเหนื่อยหรือหิวเลย ในโลกที่สมจริงมากขึ้น เขาจะต้องนั่งพักเป็นครั้งคราว และต้องหาอาหารกินด้วย ลองทำให้โลกของเราสมจริงขึ้นโดยการเพิ่มกฎดังต่อไปนี้:\n", + "\n", + "1. เมื่อปีเตอร์เคลื่อนที่จากที่หนึ่งไปยังอีกที่หนึ่ง เขาจะสูญเสีย **พลังงาน** และเพิ่ม **ความเหนื่อยล้า** ขึ้น\n", + "2. ปีเตอร์สามารถเพิ่มพลังงานได้โดยการกินแอปเปิ้ล\n", + "3. ปีเตอร์สามารถลดความเหนื่อยล้าได้โดยการพักผ่อนใต้ต้นไม้หรือบนพื้นหญ้า (เช่น เดินไปยังตำแหน่งบนกระดานที่มีต้นไม้หรือหญ้า - พื้นที่สีเขียว)\n", + "4. ปีเตอร์ต้องค้นหาและฆ่าหมาป่า\n", + "5. เพื่อที่จะฆ่าหมาป่า ปีเตอร์จำเป็นต้องมีระดับพลังงานและความเหนื่อยล้าในระดับที่เหมาะสม มิฉะนั้นเขาจะพ่ายแพ้ในการต่อสู้\n", + "\n", + "ปรับฟังก์ชันรางวัลตามกฎของเกมนี้ จากนั้นรันอัลกอริทึมการเรียนรู้แบบเสริมกำลังเพื่อเรียนรู้กลยุทธ์ที่ดีที่สุดในการชนะเกม และเปรียบเทียบผลลัพธ์ของการเดินแบบสุ่มกับอัลกอริทึมของคุณในแง่ของจำนวนเกมที่ชนะและแพ้\n", + "\n", + "> **Note**: คุณอาจต้องปรับไฮเปอร์พารามิเตอร์เพื่อให้ระบบทำงานได้ โดยเฉพาะจำนวนรอบการฝึกฝน เนื่องจากความสำเร็จของเกม (การต่อสู้กับหมาป่า) เป็นเหตุการณ์ที่เกิดขึ้นได้ยาก คุณอาจต้องใช้เวลาฝึกฝนนานขึ้น\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/8-Reinforcement/2-Gym/notebook.ipynb b/translations/th/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..7694d20f4 --- /dev/null +++ b/translations/th/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,392 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:18:01+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## การเล่นสเก็ต CartPole\n", + "\n", + "> **ปัญหา**: หากปีเตอร์ต้องการหนีจากหมาป่า เขาจำเป็นต้องเคลื่อนที่ได้เร็วกว่า เราจะมาดูกันว่าปีเตอร์สามารถเรียนรู้การเล่นสเก็ตได้อย่างไร โดยเฉพาะการรักษาสมดุล ด้วยการใช้ Q-Learning\n", + "\n", + "ก่อนอื่น มาติดตั้ง gym และนำเข้าห้องสมุดที่จำเป็น:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## สร้างสภาพแวดล้อมรถเข็นเสา\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "เพื่อดูว่าโครงสร้างของสิ่งแวดล้อมทำงานอย่างไร ลองรันการจำลองสั้น ๆ เป็นเวลา 100 ขั้นตอน\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "ระหว่างการจำลอง เราจำเป็นต้องได้รับการสังเกตเพื่อที่จะตัดสินใจว่าจะดำเนินการอย่างไร ในความเป็นจริง ฟังก์ชัน `step` จะส่งคืนการสังเกตปัจจุบัน ฟังก์ชันรางวัล และธง `done` ที่ระบุว่าควรดำเนินการจำลองต่อไปหรือไม่:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "เราสามารถหาค่าต่ำสุดและค่าสูงสุดของตัวเลขเหล่านั้น:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "มาสำรวจวิธีการแยกส่วนอื่น ๆ โดยใช้ถัง:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "มาทำการจำลองสั้น ๆ และสังเกตค่าของสภาพแวดล้อมแบบแยกส่วนเหล่านั้นกันเถอะ\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "จากกราฟนี้ ไม่สามารถบอกอะไรได้เลย เนื่องจากลักษณะของกระบวนการฝึกแบบสุ่มทำให้ระยะเวลาของการฝึกแต่ละครั้งแตกต่างกันอย่างมาก เพื่อให้กราฟนี้มีความหมายมากขึ้น เราสามารถคำนวณ **ค่าเฉลี่ยเคลื่อนที่** จากชุดการทดลอง เช่น 100 ซึ่งสามารถทำได้อย่างสะดวกโดยใช้ `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## การปรับเปลี่ยนไฮเปอร์พารามิเตอร์และดูผลลัพธ์ที่เกิดขึ้น\n", + "\n", + "ตอนนี้จะน่าสนใจมากขึ้นถ้าเราได้เห็นว่ารูปแบบที่ผ่านการฝึกฝนทำงานอย่างไร ลองรันการจำลองดู และเราจะใช้กลยุทธ์การเลือกการกระทำแบบเดียวกับที่ใช้ในระหว่างการฝึก: การสุ่มตัวอย่างตามการกระจายความน่าจะเป็นใน Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## การบันทึกผลลัพธ์เป็นภาพ GIF แบบเคลื่อนไหว\n", + "\n", + "หากคุณต้องการสร้างความประทับใจให้เพื่อน ๆ คุณอาจต้องการส่งภาพ GIF แบบเคลื่อนไหวของเสาเพื่อความสมดุลให้พวกเขา สำหรับการทำเช่นนี้ เราสามารถเรียกใช้ `env.render` เพื่อสร้างเฟรมภาพ และบันทึกเฟรมนั้นเป็นภาพ GIF แบบเคลื่อนไหวโดยใช้ไลบรารี PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามืออาชีพ เราจะไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/th/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..9db8feddb --- /dev/null +++ b/translations/th/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,524 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:20:54+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "th" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## การเล่นสเก็ต CartPole\n", + "\n", + "> **ปัญหา**: หากปีเตอร์ต้องการหนีจากหมาป่า เขาจำเป็นต้องเคลื่อนที่ได้เร็วกว่า เราจะมาดูกันว่าปีเตอร์สามารถเรียนรู้การเล่นสเก็ตได้อย่างไร โดยเฉพาะการรักษาสมดุล ด้วยการใช้ Q-Learning\n", + "\n", + "ก่อนอื่น มาติดตั้ง gym และนำเข้าไลบรารีที่จำเป็น:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## สร้างสภาพแวดล้อมรถเข็นและเสา\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "เพื่อดูว่าโครงสร้างของสิ่งแวดล้อมทำงานอย่างไร ลองรันการจำลองสั้น ๆ เป็นเวลา 100 ขั้นตอน\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "ระหว่างการจำลอง เราจำเป็นต้องได้รับการสังเกตเพื่อที่จะตัดสินใจว่าจะดำเนินการอย่างไร ในความเป็นจริง ฟังก์ชัน `step` จะส่งคืนการสังเกตการณ์ปัจจุบัน ฟังก์ชันรางวัล และธง `done` ที่ระบุว่ามีเหตุผลที่จะดำเนินการจำลองต่อไปหรือไม่:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "เราสามารถหาค่าต่ำสุดและค่าสูงสุดของตัวเลขเหล่านั้น:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "มาสำรวจวิธีการแยกส่วนอื่น ๆ โดยใช้ถังข้อมูล:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "มาลองรันการจำลองสั้น ๆ และสังเกตค่าของสภาพแวดล้อมแบบแยกส่วนเหล่านั้นกันเถอะ\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "จากกราฟนี้ ไม่สามารถบอกอะไรได้เลย เพราะเนื่องจากลักษณะของกระบวนการฝึกแบบสุ่ม ความยาวของช่วงการฝึกจะแตกต่างกันอย่างมาก เพื่อให้เข้าใจกราฟนี้มากขึ้น เราสามารถคำนวณ **ค่าเฉลี่ยเคลื่อนที่** จากชุดการทดลอง เช่น 100 ซึ่งสามารถทำได้อย่างสะดวกโดยใช้ `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## การปรับเปลี่ยนค่าพารามิเตอร์และดูผลลัพธ์ที่เกิดขึ้น\n", + "\n", + "ตอนนี้จะน่าสนใจมากขึ้นถ้าเราได้เห็นว่ารูปแบบการทำงานของโมเดลที่ผ่านการฝึกฝนเป็นอย่างไร ลองรันการจำลองดู และเราจะใช้กลยุทธ์การเลือกการกระทำแบบเดียวกับที่ใช้ในระหว่างการฝึกฝน: การสุ่มตัวอย่างตามการกระจายความน่าจะเป็นใน Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## การบันทึกผลลัพธ์เป็นภาพ GIF แบบเคลื่อนไหว\n", + "\n", + "หากคุณต้องการสร้างความประทับใจให้เพื่อน ๆ คุณอาจต้องการส่งภาพ GIF แบบเคลื่อนไหวของเสาเพื่อความสมดุลให้พวกเขา สำหรับการทำเช่นนี้ เราสามารถเรียกใช้ `env.render` เพื่อสร้างเฟรมภาพ และบันทึกเฟรมเหล่านั้นเป็นภาพ GIF แบบเคลื่อนไหวโดยใช้ไลบรารี PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้องมากที่สุด แต่โปรดทราบว่าการแปลโดยอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่ถูกต้อง เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ ขอแนะนำให้ใช้บริการแปลภาษามนุษย์ที่เป็นมืออาชีพ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความผิดที่เกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/th/PyTorch_Fundamentals.ipynb b/translations/th/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..18fa658fd --- /dev/null +++ b/translations/th/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2828 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:15+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "th" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**ข้อจำกัดความรับผิดชอบ**: \nเอกสารนี้ได้รับการแปลโดยใช้บริการแปลภาษา AI [Co-op Translator](https://github.com/Azure/co-op-translator) แม้ว่าเราจะพยายามให้การแปลมีความถูกต้อง แต่โปรดทราบว่าการแปลอัตโนมัติอาจมีข้อผิดพลาดหรือความไม่แม่นยำ เอกสารต้นฉบับในภาษาดั้งเดิมควรถือเป็นแหล่งข้อมูลที่เชื่อถือได้ สำหรับข้อมูลที่สำคัญ แนะนำให้ใช้บริการแปลภาษาจากผู้เชี่ยวชาญ เราไม่รับผิดชอบต่อความเข้าใจผิดหรือการตีความที่ผิดพลาดซึ่งเกิดจากการใช้การแปลนี้\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/2-Regression/1-Tools/notebook.ipynb b/translations/tr/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/tr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/tr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..a317a9da4 --- /dev/null +++ b/translations/tr/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,448 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:45:03+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Regresyona Giriş - Ders 1\n", + "\n", + "#### Perspektif Kazanma\n", + "\n", + "✅ Regresyon yöntemlerinin birçok türü vardır ve hangisini seçeceğiniz, aradığınız cevaba bağlıdır. Örneğin, belirli bir yaşta bir kişinin muhtemel boyunu tahmin etmek istiyorsanız, **sayısal bir değer** aradığınız için `doğrusal regresyon` kullanırsınız. Eğer bir mutfak türünün vegan olarak kabul edilip edilmemesi gerektiğini keşfetmek istiyorsanız, **kategori ataması** arıyorsunuz demektir ve bu durumda `lojistik regresyon` kullanırsınız. Lojistik regresyon hakkında daha fazla bilgi edineceksiniz. Verilere sorabileceğiniz bazı soruları düşünün ve bu yöntemlerden hangisinin daha uygun olacağını değerlendirin.\n", + "\n", + "Bu bölümde, [diyabet hakkında küçük bir veri seti](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html) ile çalışacaksınız. Diyabet hastaları için bir tedaviyi test etmek istediğinizi hayal edin. Makine Öğrenimi modelleri, değişkenlerin kombinasyonlarına dayanarak hangi hastaların tedaviye daha iyi yanıt vereceğini belirlemenize yardımcı olabilir. Görselleştirildiğinde, çok basit bir regresyon modeli bile teorik klinik deneylerinizi organize etmenize yardımcı olacak değişkenler hakkında bilgi gösterebilir.\n", + "\n", + "Öyleyse, bu göreve başlayalım!\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış sanat eseri
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Araç setimizi yükleme\n", + "\n", + "Bu görev için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages).\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Aşağıdaki script, bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi, bu harika paketleri yükleyelim ve mevcut R oturumumuzda kullanılabilir hale getirelim. (Bu sadece bir örnek için, `pacman::p_load()` bunu zaten sizin için yaptı)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Diyabet veri seti\n", + "\n", + "Bu alıştırmada, diyabet veri seti üzerinde tahminler yaparak regresyon becerilerimizi sergileyeceğiz. [Diyabet veri seti](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt), diyabetle ilgili `442 örnek` veri içerir ve 10 tahmin edici özellik değişkeni, `yaş`, `cinsiyet`, `vücut kitle indeksi`, `ortalama kan basıncı` ve `altı kan serumu ölçümü` ile bir sonuç değişkeni `y`: başlangıçtan bir yıl sonra hastalık ilerlemesinin nicel bir ölçüsünü içerir.\n", + "\n", + "|Gözlem Sayısı|442|\n", + "|----------------------|:---|\n", + "|Tahmin Edici Sayısı|İlk 10 sütun sayısal tahmin edici|\n", + "|Sonuç/Hedef|11. sütun başlangıçtan bir yıl sonra hastalık ilerlemesinin nicel bir ölçüsüdür|\n", + "|Tahmin Edici Bilgileri|- yaş (yıl olarak)\n", + "||- cinsiyet\n", + "||- bmi vücut kitle indeksi\n", + "||- bp ortalama kan basıncı\n", + "||- s1 tc, toplam serum kolesterol\n", + "||- s2 ldl, düşük yoğunluklu lipoproteinler\n", + "||- s3 hdl, yüksek yoğunluklu lipoproteinler\n", + "||- s4 tch, toplam kolesterol / HDL\n", + "||- s5 ltg, muhtemelen serum trigliserit seviyesinin logaritması\n", + "||- s6 glu, kan şekeri seviyesi|\n", + "\n", + "\n", + "> 🎓 Unutmayın, bu denetimli öğrenmedir ve adlandırılmış bir 'y' hedefine ihtiyacımız var.\n", + "\n", + "R ile veri üzerinde işlem yapmadan önce, veriyi R'nin belleğine aktarmanız veya R'nin veriye uzaktan erişebilmesi için bir bağlantı oluşturmanız gerekir.\n", + "\n", + "> [readr](https://readr.tidyverse.org/) paketi, Tidyverse'in bir parçası olup, dikdörtgen verileri R'ye hızlı ve kullanıcı dostu bir şekilde aktarmanın bir yolunu sunar.\n", + "\n", + "Şimdi, bu kaynak URL'de sağlanan diyabet veri setini yükleyelim: \n", + "\n", + "Ayrıca, verilerimiz üzerinde bir kontrol yapacağız ve `glimpse()` kullanarak veri yapısını inceleyeceğiz, ardından `slice()` ile ilk 5 satırı görüntüleyeceğiz.\n", + "\n", + "Daha ileri gitmeden önce, R kodunda sıkça karşılaşacağınız bir şeyi tanıtalım 🥁🥁: pipe operatörü `%>%`\n", + "\n", + "Pipe operatörü (`%>%`), bir nesneyi mantıksal bir sırayla bir fonksiyona veya çağrı ifadesine ileterek işlemleri gerçekleştirir. Pipe operatörünü kodunuzda \"ve sonra\" demek gibi düşünebilirsiniz.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` bize bu verinin 442 satır ve 11 sütundan oluştuğunu, tüm sütunların veri türünün `double` olduğunu gösteriyor.\n", + "\n", + "
\n", + "\n", + "> glimpse() ve slice(), [`dplyr`](https://dplyr.tidyverse.org/) paketindeki fonksiyonlardır. Dplyr, Tidyverse'in bir parçası olup, veri manipülasyonu için bir dil sunar ve en yaygın veri manipülasyonu zorluklarını çözmenize yardımcı olan tutarlı bir dizi fiil sağlar.\n", + "\n", + "
\n", + "\n", + "Artık elimizde veri olduğuna göre, bu alıştırma için bir özelliği (`bmi`) hedef alarak daraltalım. Bunun için istediğimiz sütunları seçmemiz gerekecek. Peki bunu nasıl yaparız?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html), bir veri çerçevesindeki sütunları *seçmemize* (ve isteğe bağlı olarak yeniden adlandırmamıza) olanak tanır.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Eğitim ve Test Verileri\n", + "\n", + "Denetimli öğrenmede, verileri iki alt kümeye *bölmek* yaygın bir uygulamadır; modelin eğitilmesi için kullanılan (genellikle daha büyük) bir küme ve modelin performansını görmek için kullanılan daha küçük bir \"ayrılmış\" küme.\n", + "\n", + "Artık veriler hazır olduğuna göre, bu veri kümesindeki sayılar arasında mantıklı bir ayrım yapıp yapamayacağını görmek için bir makineye başvurabiliriz. Verileri nasıl böleceğimize dair bilgileri içeren bir nesne oluşturmak için Tidymodels çerçevesinin bir parçası olan [rsample](https://tidymodels.github.io/rsample/) paketini kullanabiliriz. Daha sonra oluşturulan eğitim ve test kümelerini çıkarmak için iki rsample fonksiyonunu kullanabiliriz:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Tidymodels ile bir doğrusal regresyon modeli eğitmek\n", + "\n", + "Artık modelimizi eğitmeye hazırız!\n", + "\n", + "Tidymodels'de modelleri `parsnip()` kullanarak üç kavramı belirterek tanımlarsınız:\n", + "\n", + "- Model **türü**, doğrusal regresyon, lojistik regresyon, karar ağacı modelleri gibi modelleri birbirinden ayırır.\n", + "\n", + "- Model **modu**, regresyon ve sınıflandırma gibi yaygın seçenekleri içerir; bazı model türleri her iki modu desteklerken bazıları yalnızca bir moda sahiptir.\n", + "\n", + "- Model **motoru**, modeli eğitmek için kullanılacak hesaplama aracıdır. Genellikle bunlar R paketleridir, örneğin **`\"lm\"`** veya **`\"ranger\"`**\n", + "\n", + "Bu modelleme bilgisi bir model spesifikasyonunda tutulur, o halde bir tane oluşturalım!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bir model *belirlendikten* sonra, model [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) fonksiyonu kullanılarak genellikle bir formül ve bazı verilerle `tahmin edilebilir` veya `eğitilebilir`.\n", + "\n", + "`y ~ .` ifadesi, `y`'yi tahmin edilen değer/hedef olarak, tüm tahmin ediciler/özellikler tarafından açıklanacak şekilde (yani `.`) uyarlayacağımız anlamına gelir (bu durumda yalnızca bir tahmin edicimiz var: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Model çıktısından, eğitim sırasında öğrenilen katsayıları görebiliriz. Bu katsayılar, gerçek ve tahmin edilen değişken arasındaki toplam hatayı en aza indiren en iyi uyum çizgisinin katsayılarını temsil eder.\n", + "
\n", + "\n", + "## 5. Test seti üzerinde tahminler yapma\n", + "\n", + "Artık bir model eğittiğimize göre, test veri seti için hastalık ilerlemesi y'yi [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html) kullanarak tahmin edebiliriz. Bu, veri grupları arasındaki çizgiyi çizmek için kullanılacaktır.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 💃🕺 Bir model eğittik ve tahminler yapmak için kullandık!\n", + "\n", + "Tahmin yaparken, tidymodels geleneği her zaman standartlaştırılmış sütun adlarına sahip bir tibble/veri çerçevesi üretmektir. Bu, orijinal verilerle tahminleri birleştirerek, grafik oluşturma gibi sonraki işlemler için kullanılabilir bir format oluşturmayı kolaylaştırır.\n", + "\n", + "`dplyr::bind_cols()` birden fazla veri çerçevesini sütun olarak verimli bir şekilde birleştirir.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Modelleme Sonuçlarını Görselleştirme\n", + "\n", + "Şimdi bunu görsel olarak inceleme zamanı 📈. Test setindeki tüm `y` ve `bmi` değerlerinin bir dağılım grafiğini oluşturacağız, ardından modelin tahminlerini kullanarak, modelin veri gruplamaları arasında en uygun yere bir çizgi çizeceğiz.\n", + "\n", + "R, grafik oluşturmak için birkaç sisteme sahiptir, ancak `ggplot2` bunların en zarif ve en esnek olanlarından biridir. Bu, grafikleri **bağımsız bileşenleri birleştirerek** oluşturmanıza olanak tanır.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Burada neler olduğunu biraz düşünün. Bir doğru, birçok küçük veri noktasının içinden geçiyor, ancak tam olarak ne yapıyor? Bu doğruyu kullanarak yeni, görülmemiş bir veri noktasının grafiğin y ekseniyle olan ilişkisini nasıl tahmin edebileceğinizi görebiliyor musunuz? Bu modelin pratik kullanımını kelimelerle ifade etmeye çalışın.\n", + "\n", + "Tebrikler, ilk doğrusal regresyon modelinizi oluşturdunuz, onunla bir tahmin yaptınız ve bunu bir grafikte gösterdiniz!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/2-Regression/1-Tools/solution/notebook.ipynb b/translations/tr/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..a2f6959ed --- /dev/null +++ b/translations/tr/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diyabet veri seti için Doğrusal Regresyon - Ders 1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Gerekli kütüphaneleri içe aktar\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Diyabet veri setini yükleyin, `X` verileri ve `y` özelliklerine ayırın\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bu alıştırma için hedeflemek üzere sadece bir özellik seçin\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hem `X` hem de `y` için eğitim ve test verilerini ayırın\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modeli seçin ve eğitim verileriyle eğitin\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test verilerini kullanarak bir çizgi tahmin edin\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sonuçları bir grafikte göster\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:39:43+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/2-Data/notebook.ipynb b/translations/tr/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..d9eba010d --- /dev/null +++ b/translations/tr/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:46:05+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/tr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..f6d4d5865 --- /dev/null +++ b/translations/tr/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,672 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:55:20+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Bir regresyon modeli oluşturma: verileri hazırlama ve görselleştirme\n", + "\n", + "## **Balkabağı için Doğrusal Regresyon - Ders 2**\n", + "#### Giriş\n", + "\n", + "Tidymodels ve Tidyverse ile makine öğrenimi modeli oluşturma sürecine başlamak için gerekli araçları kurduğunuza göre, artık verilerinizle ilgili sorular sormaya hazırsınız. Verilerle çalışırken ve ML çözümleri uygularken, veri setinizin potansiyelini doğru bir şekilde açığa çıkarmak için doğru soruyu nasıl soracağınızı anlamak çok önemlidir.\n", + "\n", + "Bu derste öğreneceksiniz:\n", + "\n", + "- Model oluşturma için verilerinizi nasıl hazırlayacağınızı.\n", + "\n", + "- Veri görselleştirme için `ggplot2`'yi nasıl kullanacağınızı.\n", + "\n", + "Cevaplanmasını istediğiniz soru, hangi tür ML algoritmalarını kullanacağınızı belirleyecektir. Ve alacağınız cevabın kalitesi, büyük ölçüde verilerinizin doğasına bağlı olacaktır.\n", + "\n", + "Bunu pratik bir alıştırma ile görelim.\n", + "\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış sanat eseri
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Kabak verilerini içe aktarma ve Tidyverse'i çağırma\n", + "\n", + "Bu dersin verilerini işlemek için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "Paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Aşağıdaki script, bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi, bazı paketleri çalıştıralım ve bu ders için sağlanan [veriyi](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) yükleyelim!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hızlı bir `glimpse()` işlemi, boşluklar olduğunu ve metin (`chr`) ile sayısal verilerin (`dbl`) karışık olduğunu hemen gösteriyor. `Date` karakter türünde ve ayrıca verilerin `sacks`, `bins` ve diğer değerlerin karışımı olduğu garip bir `Package` adlı sütun da var. Aslında, veri biraz karışık 😤.\n", + "\n", + "Aslında, tamamen kullanıma hazır bir veri setinin size sunulması ve doğrudan bir ML modeli oluşturmak için uygun olması pek yaygın değildir. Ama endişelenmeyin, bu derste, standart R kütüphanelerini kullanarak ham bir veri setini nasıl hazırlayacağınızı öğreneceksiniz 🧑‍🔧. Ayrıca, veriyi görselleştirmek için çeşitli teknikleri de öğreneceksiniz. 📈📊\n", + "
\n", + "\n", + "> Bir hatırlatma: Pipe operatörü (`%>%`), bir nesneyi ileriye doğru bir fonksiyona veya çağrı ifadesine geçirerek işlemleri mantıksal bir sırayla gerçekleştirir. Pipe operatörünü kodunuzda \"ve sonra\" demek gibi düşünebilirsiniz.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Eksik verileri kontrol et\n", + "\n", + "Veri bilimcilerin en sık karşılaştığı sorunlardan biri eksik veya kayıp verilerdir. R, eksik veya bilinmeyen değerleri özel bir işaret değeri olan `NA` (Not Available) ile temsil eder.\n", + "\n", + "Peki, veri çerçevesinde eksik değerler olduğunu nasıl anlayabiliriz?\n", + "
\n", + "- En basit yöntem, mantıksal nesneler `TRUE` veya `FALSE` döndüren temel R fonksiyonu `anyNA`yı kullanmaktır.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika, bazı eksik veriler var gibi görünüyor! Bu başlamak için iyi bir yer.\n", + "\n", + "- Bir başka yöntem, hangi bireysel sütun elemanlarının eksik olduğunu `TRUE` mantıksal değeriyle gösteren `is.na()` fonksiyonunu kullanmaktır.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tamam, iş tamamlandı ancak bu kadar büyük bir veri çerçevesiyle, tüm satırları ve sütunları tek tek incelemek verimsiz ve pratikte imkansız olurdu😴.\n", + "\n", + "- Daha sezgisel bir yöntem, her sütundaki eksik değerlerin toplamını hesaplamak olacaktır:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Çok daha iyi! Eksik veriler var, ancak belki de bu, yapılacak görev için önemli olmayabilir. Bakalım, daha fazla analiz ne ortaya çıkaracak.\n", + "\n", + "> R, harika paketler ve fonksiyonlar setlerinin yanı sıra çok iyi bir dokümantasyona sahiptir. Örneğin, `help(colSums)` veya `?colSums` kullanarak bu fonksiyon hakkında daha fazla bilgi edinebilirsiniz.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Veri Manipülasyonu için Bir Dilbilgisi\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir illüstrasyon
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), Tidyverse içinde yer alan bir paket olup, veri manipülasyonu için bir dilbilgisi sunar ve en yaygın veri manipülasyonu sorunlarını çözmenize yardımcı olan tutarlı bir fiil seti sağlar. Bu bölümde, dplyr'ın bazı fiillerini keşfedeceğiz! \n", + "
\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` fonksiyonu, `dplyr` paketinde bulunan ve belirli sütunları seçmenize veya hariç tutmanıza yardımcı olan bir işlevdir.\n", + "\n", + "Veri çerçevenizi daha kolay çalışılabilir hale getirmek için, `select()` kullanarak ihtiyacınız olan sütunları tutup diğerlerini kaldırabilirsiniz.\n", + "\n", + "Örneğin, bu alıştırmada analizimiz `Package`, `Low Price`, `High Price` ve `Date` sütunlarını içerecek. Hadi bu sütunları seçelim.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()`, `dplyr` paketinde bulunan ve mevcut sütunları korurken yeni sütunlar oluşturmanıza veya mevcut sütunları değiştirmenize yardımcı olan bir fonksiyondur.\n", + "\n", + "`mutate` fonksiyonunun genel yapısı şu şekildedir:\n", + "\n", + "`data %>% mutate(yeni_sutun_adi = icerigi)`\n", + "\n", + "`mutate` fonksiyonunu `Date` sütunu üzerinde şu işlemleri yaparak deneyelim:\n", + "\n", + "1. Tarihleri (şu anda karakter türünde) ay formatına dönüştürün (bunlar ABD tarih formatında, yani `MM/DD/YYYY`).\n", + "\n", + "2. Tarihlerden ay bilgisini çıkararak yeni bir sütuna ekleyin.\n", + "\n", + "R dilinde, [lubridate](https://lubridate.tidyverse.org/) paketi Tarih-Zaman verileriyle çalışmayı kolaylaştırır. Bu nedenle, yukarıdaki hedeflere ulaşmak için `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` fonksiyonlarını kullanalım. Date sütununu, sonraki işlemlerde artık ihtiyaç duymayacağımız için kaldırabiliriz.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 🤩\n", + "\n", + "Şimdi, bir yeni sütun olan `Price` oluşturalım. Bu sütun, bir kabağın ortalama fiyatını temsil ediyor. Şimdi, `Low Price` ve `High Price` sütunlarının ortalamasını alarak yeni Price sütununu dolduralım.\n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Evet!💪\n", + "\n", + "\"Ama dur bir dakika!\", tüm veri setini `View(pumpkins)` ile hızlıca gözden geçirdikten sonra diyeceksiniz ki, \"Burada bir gariplik var!\"🤔\n", + "\n", + "Eğer `Package` sütununa bakarsanız, kabakların birçok farklı şekilde satıldığını göreceksiniz. Bazıları `1 1/9 bushel` ölçüsünde, bazıları `1/2 bushel` ölçüsünde, bazıları kabak başına, bazıları pound başına, ve bazıları da farklı genişliklerde büyük kutular içinde satılıyor.\n", + "\n", + "Hadi bunu doğrulayalım:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika!👏\n", + "\n", + "Balkabaklarını tutarlı bir şekilde tartmak oldukça zor görünüyor, bu yüzden onları filtreleyelim. Bunun için `Package` sütununda *bushel* kelimesini içeren balkabaklarını seçip yeni bir veri çerçevesi olan `new_pumpkins` içine koyalım.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() ve stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): Verilerin yalnızca **satırlarının** koşullarınızı sağladığı bir alt kümesini oluşturur, bu durumda `Package` sütununda *bushel* kelimesini içeren kabaklar.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): Bir dizgede bir desenin varlığını veya yokluğunu algılar.\n", + "\n", + "[`stringr`](https://github.com/tidyverse/stringr) paketi, yaygın dizge işlemleri için basit fonksiyonlar sağlar.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kabakları kile ile içeren yaklaşık 415 satıra kadar daralttığımızı görebilirsiniz.🤩\n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Ama durun! Yapılacak bir şey daha var**\n", + "\n", + "Her satırda buşel miktarının değiştiğini fark ettiniz mi? Fiyatlandırmayı normalize etmeniz gerekiyor, böylece fiyatlandırmayı 1 1/9 veya 1/2 buşel yerine buşel başına gösterebilirsiniz. Şimdi fiyatları standartlaştırmak için biraz matematik yapma zamanı.\n", + "\n", + "Fiyat sütununu bazı koşullara bağlı olarak *değiştirmek* için [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) fonksiyonunu kullanacağız. `case_when`, birden fazla `if_else()` ifadesini vektörleştirmenize olanak tanır.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi, bushel ölçümüne göre birim fiyat analizini yapabiliriz. Ancak, tüm bu kabak bushel'leri üzerine yapılan çalışma, `verinizin doğasını anlamanın` ne kadar `önemli` olduğunu gösteriyor!\n", + "\n", + "> ✅ [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308)'e göre, bir bushel'in ağırlığı ürün türüne bağlıdır çünkü bu bir hacim ölçüsüdür. \"Örneğin, bir domates bushel'i 56 pound ağırlığında olmalı... Yapraklar ve yeşillikler daha az ağırlıkla daha fazla alan kaplar, bu yüzden bir ıspanak bushel'i sadece 20 pound'dur.\" Oldukça karmaşık bir konu! Bushel'den pound'a dönüşüm yapmaya uğraşmayalım, bunun yerine bushel üzerinden fiyatlandırma yapalım. Ancak, tüm bu kabak bushel'leri üzerine yapılan çalışma, verinizin doğasını anlamanın ne kadar önemli olduğunu gösteriyor!\n", + ">\n", + "> ✅ Yarım bushel ile satılan kabakların çok pahalı olduğunu fark ettiniz mi? Nedenini çözebilir misiniz? İpucu: Küçük kabaklar büyük olanlardan çok daha pahalıdır, muhtemelen bir bushel'de daha fazla yer kapladıkları için. Büyük, içi boş bir turta kabağının kapladığı kullanılmayan alan nedeniyle bushel başına daha fazla küçük kabak bulunur.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Son olarak, sırf macera olsun diye 💁‍♀️, Haydi Ay sütununu da ilk sıraya, yani `Package` sütununun `öncesine` taşıyalım.\n", + "\n", + "Sütunların yerini değiştirmek için `dplyr::relocate()` kullanılır.\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tebrikler! 👌 Artık yeni regresyon modelinizi oluşturabileceğiniz temiz ve düzenli bir veri kümesine sahipsiniz! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. ggplot2 ile Veri Görselleştirme\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan infografik
\n", + "\n", + "\n", + "\n", + "\n", + "Şöyle bir *bilgece* söz vardır:\n", + "\n", + "> \"Basit bir grafik, veri analistinin zihnine diğer tüm araçlardan daha fazla bilgi taşımıştır.\" --- John Tukey\n", + "\n", + "Bir veri bilimcinin rolü, üzerinde çalıştığı verinin kalitesini ve doğasını göstermektir. Bunu yapmak için genellikle verinin farklı yönlerini gösteren ilginç görselleştirmeler, grafikler, çizimler ve tablolar oluştururlar. Bu şekilde, görsel olarak ilişkileri ve aksi takdirde ortaya çıkarması zor olan boşlukları gösterebilirler.\n", + "\n", + "Görselleştirmeler, veriye en uygun makine öğrenimi tekniğini belirlemeye de yardımcı olabilir. Örneğin, bir çizgiye uyar gibi görünen bir dağılım grafiği, verinin doğrusal regresyon çalışması için iyi bir aday olduğunu gösterir.\n", + "\n", + "R, grafik oluşturmak için birkaç sistem sunar, ancak [`ggplot2`](https://ggplot2.tidyverse.org/index.html) en zarif ve en esnek olanlardan biridir. `ggplot2`, grafikleri **bağımsız bileşenleri birleştirerek** oluşturmanıza olanak tanır.\n", + "\n", + "Hadi Price ve Month sütunları için basit bir dağılım grafiğiyle başlayalım.\n", + "\n", + "Bu durumda, [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html) ile başlayacağız, bir veri seti ve estetik eşleme ([`aes()`](https://ggplot2.tidyverse.org/reference/aes.html) ile) sağlayacağız, ardından dağılım grafikleri için katmanlar ekleyeceğiz (örneğin [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)).\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bu faydalı bir grafik mi 🤷? Sizi şaşırtan bir şey var mı?\n", + "\n", + "Pek faydalı değil, çünkü tek yaptığı verilerinizi belirli bir ayda noktalar halinde göstermek. \n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Nasıl faydalı hale getirebiliriz?**\n", + "\n", + "Grafiklerin faydalı veriler göstermesini sağlamak için genellikle verileri bir şekilde gruplandırmanız gerekir. Örneğin, bizim durumumuzda, her ay için kabakların ortalama fiyatını bulmak, verilerimizdeki temel desenlere dair daha fazla içgörü sağlayacaktır. Bu bizi bir başka **dplyr** özelliğine yönlendiriyor:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "R'de gruplandırılmış toplama işlemleri kolayca şu şekilde yapılabilir:\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` analiz birimini tüm veri setinden, ay gibi bireysel gruplara değiştirir.\n", + "\n", + "- `dplyr::summarize()` her bir gruplama değişkeni için bir sütun ve belirttiğiniz özet istatistikler için bir sütun içeren yeni bir veri çerçevesi oluşturur.\n", + "\n", + "Örneğin, **Month** sütunlarına göre kabakları gruplandırmak ve her ay için **ortalama fiyatı** bulmak için `dplyr::group_by() %>% summarize()` kullanabiliriz.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Kısa ve öz!✨\n", + "\n", + "Aylar gibi kategorik özellikler, bir çubuk grafik kullanılarak daha iyi temsil edilir 📊. Çubuk grafiklerden sorumlu katmanlar `geom_bar()` ve `geom_col()`dur. Daha fazla bilgi için `?geom_bar` komutuna göz atabilirsiniz.\n", + "\n", + "Hadi bir tane oluşturalım!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩Bu, daha kullanışlı bir veri görselleştirme! Görünüşe göre en yüksek kabak fiyatları Eylül ve Ekim aylarında gerçekleşiyor. Bu beklentinizi karşılıyor mu? Neden veya neden değil?\n", + "\n", + "İkinci dersi tamamladığınız için tebrikler 👏! Verilerinizi model oluşturma için hazırladınız, ardından görselleştirmeler kullanarak daha fazla içgörü ortaya çıkardınız!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/2-Regression/2-Data/solution/notebook.ipynb b/translations/tr/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..1e0cf5a57 --- /dev/null +++ b/translations/tr/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:31+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/3-Linear/notebook.ipynb b/translations/tr/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..9ae5293d1 --- /dev/null +++ b/translations/tr/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kabak Fiyatlandırması\n", + "\n", + "Gerekli kütüphaneleri ve veri setini yükleyin. Verileri aşağıdaki alt küme verilerini içeren bir dataframe'e dönüştürün:\n", + "\n", + "- Sadece kile başına fiyatlandırılmış kabakları alın\n", + "- Tarihi bir aya dönüştürün\n", + "- Fiyatı, yüksek ve düşük fiyatların ortalaması olarak hesaplayın\n", + "- Fiyatı, kile miktarına göre fiyatlandırmayı yansıtacak şekilde dönüştürün\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Temel bir saçılım grafiği, elimizde yalnızca Ağustos'tan Aralık'a kadar olan ay verilerinin bulunduğunu hatırlatır. Muhtemelen doğrusal bir şekilde sonuçlar çıkarabilmek için daha fazla veriye ihtiyacımız var.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:09:14+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/tr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..8082d76ed --- /dev/null +++ b/translations/tr/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1086 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:24:39+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Kabak Fiyatlandırması için Doğrusal ve Polinom Regresyon - Ders 3\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan bilgi grafiği
\n", + "\n", + "\n", + "\n", + "\n", + "#### Giriş\n", + "\n", + "Şimdiye kadar, bu derste kullanacağımız kabak fiyatlandırma veri setinden toplanan örnek verilerle regresyonun ne olduğunu keşfettiniz. Ayrıca bunu `ggplot2` kullanarak görselleştirdiniz. 💪\n", + "\n", + "Artık ML için regresyona daha derinlemesine dalmaya hazırsınız. Bu derste, iki tür regresyon hakkında daha fazla bilgi edineceksiniz: *temel doğrusal regresyon* ve *polinom regresyon*, bu tekniklerin altında yatan matematikle birlikte.\n", + "\n", + "> Bu müfredat boyunca, matematik bilgisi gereksinimini minimumda tutmayı hedefliyoruz ve diğer alanlardan gelen öğrenciler için erişilebilir hale getirmeye çalışıyoruz. Bu nedenle notlar, 🧮 matematiksel açıklamalar, diyagramlar ve kavramayı kolaylaştıracak diğer öğrenme araçlarına dikkat edin.\n", + "\n", + "#### Hazırlık\n", + "\n", + "Hatırlatma olarak, bu veriyi yükleyerek ona sorular sormayı amaçlıyorsunuz.\n", + "\n", + "- Kabak almak için en iyi zaman ne zaman?\n", + "\n", + "- Mini kabakların bir kasasının fiyatı ne kadar olabilir?\n", + "\n", + "- Kabakları yarım sepetlik sepetlerde mi yoksa 1 1/9 sepetlik kutularda mı almalıyım? Bu veriyi daha fazla incelemeye devam edelim.\n", + "\n", + "Önceki derste, bir `tibble` (veri çerçevesinin modern bir yeniden tasarımı) oluşturdunuz ve orijinal veri setinin bir kısmını fiyatlandırmayı sepet başına standartlaştırarak doldurdunuz. Ancak bunu yaparak, yalnızca yaklaşık 400 veri noktası ve yalnızca sonbahar ayları için veri toplayabildiniz. Belki veriyi daha fazla temizleyerek onun doğası hakkında biraz daha ayrıntı elde edebiliriz? Göreceğiz... 🕵️‍♀️\n", + "\n", + "Bu görev için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages).\n", + "\n", + "- `janitor`: [janitor paketi](https://github.com/sfirke/janitor), kirli verileri incelemek ve temizlemek için basit araçlar sağlar.\n", + "\n", + "- `corrplot`: [corrplot paketi](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html), değişkenler arasındaki gizli desenleri tespit etmeye yardımcı olmak için otomatik değişken sıralamasını destekleyen korelasyon matrisinde görsel bir keşif aracı sağlar.\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Aşağıdaki script, bu modülü tamamlamak için gereken paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bu harika paketleri daha sonra yükleyip mevcut R oturumumuzda kullanılabilir hale getireceğiz. (Bu sadece bir örnekleme için, `pacman::p_load()` bunu zaten sizin için yapıyor)\n", + "\n", + "## 1. Bir doğrusal regresyon çizgisi\n", + "\n", + "1. Derste öğrendiğiniz gibi, doğrusal regresyon çalışmasının amacı, aşağıdakilere en uygun *çizgiyi* çizebilmektir:\n", + "\n", + "- **Değişken ilişkilerini göstermek**. Değişkenler arasındaki ilişkiyi göstermek.\n", + "\n", + "- **Tahminlerde bulunmak**. Yeni bir veri noktasının bu çizgiye göre nerede yer alacağını doğru bir şekilde tahmin etmek.\n", + "\n", + "Bu tür bir çizgiyi çizmek için **En Küçük Kareler Regresyonu** adı verilen bir istatistiksel teknik kullanırız. `En küçük kareler` terimi, regresyon çizgisinin çevresindeki tüm veri noktalarının karelerinin alınması ve ardından toplanması anlamına gelir. İdeal olarak, bu toplam mümkün olduğunca küçük olmalıdır, çünkü daha az hata, yani `en küçük kareler` istiyoruz. Bu nedenle, en uygun çizgi, kare hataların toplamı için en düşük değeri veren çizgidir - bu yüzden adı *en küçük kareler regresyonu*.\n", + "\n", + "Bunu yapmamızın nedeni, tüm veri noktalarımızdan en az toplam mesafeye sahip bir çizgi modellemek istememizdir. Ayrıca, büyüklüğüyle ilgilendiğimiz için terimleri toplama işleminden önce karesini alırız, yönüyle değil.\n", + "\n", + "> **🧮 Matematiği Göster**\n", + ">\n", + "> Bu çizgi, *en uygun çizgi* olarak adlandırılır ve [bir denklemle](https://en.wikipedia.org/wiki/Simple_linear_regression) ifade edilebilir:\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X`, '`açıklayıcı değişken` veya `tahmin edici`'dir. `Y`, '`bağımlı değişken` veya `sonuç`'tur. Çizginin eğimi `b` ve `a` ise y-keseni, yani `X = 0` olduğunda `Y`'nin değerini ifade eder.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"eğim = $y/x$\")\n", + " Jen Looper tarafından hazırlanan infografik\n", + ">\n", + "> İlk olarak, eğim `b` hesaplanır.\n", + ">\n", + "> Başka bir deyişle, ve kabak verilerimizin orijinal sorusuna atıfta bulunarak: \"Bir ay boyunca bir kabak sepetinin fiyatını tahmin edin\", `X` fiyatı ifade ederken, `Y` satış ayını ifade eder.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Jen Looper tarafından hazırlanan infografik\n", + "> \n", + "> `Y` değerini hesaplayın. Eğer yaklaşık 4 dolar ödüyorsanız, bu Nisan olmalı!\n", + ">\n", + "> Çizgiyi hesaplayan matematik, çizginin eğimini göstermelidir, bu da aynı zamanda kesişim noktasına, yani `X = 0` olduğunda `Y`'nin konumuna bağlıdır.\n", + ">\n", + "> Bu değerlerin hesaplama yöntemini [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html) web sitesinde gözlemleyebilirsiniz. Ayrıca, sayıların değerlerinin çizgiyi nasıl etkilediğini görmek için [bu En Küçük Kareler hesaplayıcısını](https://www.mathsisfun.com/data/least-squares-calculator.html) ziyaret edin.\n", + "\n", + "Korkutucu değil, değil mi? 🤓\n", + "\n", + "#### Korelasyon\n", + "\n", + "Anlamanız gereken bir diğer terim, verilen X ve Y değişkenleri arasındaki **Korelasyon Katsayısı**dır. Bir dağılım grafiği kullanarak bu katsayıyı hızlıca görselleştirebilirsiniz. Verilerin düzgün bir çizgi üzerinde sıralandığı bir grafik yüksek korelasyona sahiptir, ancak X ve Y arasında her yere dağılmış veri noktalarına sahip bir grafik düşük korelasyona sahiptir.\n", + "\n", + "İyi bir doğrusal regresyon modeli, En Küçük Kareler Regresyonu yöntemiyle ve bir regresyon çizgisiyle yüksek (1'e daha yakın, 0'dan uzak) bir Korelasyon Katsayısına sahip olan modeldir.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Verilerle dans: modelleme için kullanılacak bir veri çerçevesi oluşturma**\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir sanat eseri
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Gerekli kütüphaneleri ve veri setini yükleyin. Verileri, aşağıdaki alt küme verilerini içeren bir veri çerçevesine dönüştürün:\n", + "\n", + "- Sadece kile başına fiyatlandırılan kabakları alın\n", + "\n", + "- Tarihi bir aya dönüştürün\n", + "\n", + "- Fiyatı, yüksek ve düşük fiyatların ortalaması olarak hesaplayın\n", + "\n", + "- Fiyatı, kile miktarına göre fiyatlandırmayı yansıtacak şekilde dönüştürün\n", + "\n", + "> Bu adımları [önceki derste](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb) ele almıştık.\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Saf bir macera ruhuyla, kirli verileri incelemek ve temizlemek için basit işlevler sağlayan [`janitor paketi`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor)'ni keşfedelim. Örneğin, verilerimiz için sütun adlarına bir göz atalım:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Daha iyisini yapabiliriz. Bu sütun adlarını [snake_case](https://en.wikipedia.org/wiki/Snake_case) kuralına dönüştürerek `janitor::clean_names` kullanarak `friendR` yapalım. Bu fonksiyon hakkında daha fazla bilgi edinmek için: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Çok düzenli 🧹! Şimdi, önceki derste olduğu gibi `dplyr` kullanarak verilerle bir dans! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tebrikler! 👌 Artık yeni regresyon modelinizi oluşturabileceğiniz temiz ve düzenli bir veri setine sahipsiniz!\n", + "\n", + "Bir dağılım grafiği ister misiniz?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bir dağılım grafiği, elimizde yalnızca Ağustos'tan Aralık'a kadar olan ay verilerinin bulunduğunu hatırlatıyor. Doğrusal bir şekilde sonuçlara varabilmek için muhtemelen daha fazla veriye ihtiyacımız var.\n", + "\n", + "Modelleme verilerimize tekrar bir göz atalım:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bir kabağın `fiyatını`, karakter türündeki `şehir` veya `paket` sütunlarına dayanarak tahmin etmek isteseydik ne olurdu? Ya da daha basit bir şekilde, örneğin `paket` ve `fiyat` arasında (her iki girdinin de sayısal olması gerektiği) korelasyonu nasıl bulabilirdik? 🤷🤷\n", + "\n", + "Makine öğrenimi modelleri, metin değerlerinden ziyade sayısal özelliklerle daha iyi çalışır, bu nedenle genellikle kategorik özellikleri sayısal temsillere dönüştürmeniz gerekir.\n", + "\n", + "Bu, tahmin edicilerimizi bir modelin etkili bir şekilde kullanmasını kolaylaştıracak şekilde yeniden biçimlendirme yolunu bulmamız gerektiği anlamına gelir; bu sürece `özellik mühendisliği` denir.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Modeller için verileri tariflerle ön işleme 👩‍🍳👨‍🍳\n", + "\n", + "Tahmin edici değerleri yeniden biçimlendirerek bir modelin bunları daha etkili kullanmasını sağlama faaliyetlerine `özellik mühendisliği` denir.\n", + "\n", + "Farklı modellerin farklı ön işleme gereksinimleri vardır. Örneğin, en küçük kareler yöntemi `ay, çeşit ve şehir_adı gibi kategorik değişkenlerin kodlanmasını` gerektirir. Bu, basitçe `kategorik değerler` içeren bir sütunun, orijinal sütunun yerine geçen bir veya daha fazla `sayısal sütuna` dönüştürülmesini içerir.\n", + "\n", + "Örneğin, verilerinizde aşağıdaki kategorik özellik bulunduğunu varsayalım:\n", + "\n", + "| şehir |\n", + "|:--------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokyo |\n", + "\n", + "*Ordinal kodlama* uygulayarak her kategoriye benzersiz bir tam sayı değeri atayabilirsiniz, şöyle:\n", + "\n", + "| şehir |\n", + "|:-----:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "Ve işte bunu verilerimize uygulayacağız!\n", + "\n", + "Bu bölümde, verilerinizi modelinizi eğitmeden **önce** ön işleme konusunda size yardımcı olmak için tasarlanmış bir başka harika Tidymodels paketi olan [recipes](https://tidymodels.github.io/recipes/) paketini keşfedeceğiz. Temelde bir tarif, bir veri setine modelleme için hazır hale getirmek amacıyla hangi adımların uygulanması gerektiğini tanımlayan bir nesnedir.\n", + "\n", + "Şimdi, tahmin edici sütunlardaki tüm gözlemler için benzersiz bir tam sayı atayarak verilerimizi modelleme için hazırlayan bir tarif oluşturalım:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika! 👏 İlk tarifimizi oluşturduk ve bu tarif bir sonucu (fiyat) ve buna karşılık gelen tahmin edicileri belirtiyor, ayrıca tüm tahmin edici sütunların bir dizi tam sayıya kodlanması gerektiğini ifade ediyor 🙌! Hadi bunu hızlıca parçalarına ayıralım:\n", + "\n", + "- `recipe()` çağrısı, bir formül ile birlikte, `new_pumpkins` verilerini referans alarak değişkenlerin *rollerini* tarife bildirir. Örneğin, `price` sütunu bir `outcome` rolüne atanmışken, diğer sütunlar bir `predictor` rolüne atanmıştır.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` tüm tahmin edicilerin 0'dan başlayan bir numaralandırma ile bir dizi tam sayıya dönüştürülmesi gerektiğini belirtir.\n", + "\n", + "Eminiz ki şu tür düşünceleriniz olabilir: \"Bu çok havalı!! Ama ya tariflerin tam olarak beklediğim gibi çalıştığını doğrulamam gerekirse? 🤔\"\n", + "\n", + "Bu harika bir düşünce! Görüyorsunuz, tarifiniz bir kez tanımlandıktan sonra, veriyi gerçekten ön işlemek için gereken parametreleri tahmin edebilir ve ardından işlenmiş veriyi çıkarabilirsiniz. Tidymodels kullanırken genellikle bunu yapmanız gerekmez (birazdan normal yöntemi göreceğiz-\\> `workflows`), ancak tariflerin beklediğiniz gibi çalıştığını doğrulamak için bir tür kontrol yapmak istediğinizde işe yarayabilir.\n", + "\n", + "Bunun için iki ek fiile ihtiyacınız olacak: `prep()` ve `bake()`. Her zamanki gibi, [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) tarafından hazırlanan küçük R arkadaşlarımız bunu daha iyi anlamanıza yardımcı oluyor!\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış sanat eseri
\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): bir eğitim setinden gerekli parametreleri tahmin eder ve bu parametreler daha sonra diğer veri setlerine uygulanabilir. Örneğin, belirli bir tahmin edici sütunu için hangi gözlem 0, 1, 2 gibi bir tam sayı ile atanacak.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): hazırlanmış bir tarifi alır ve işlemleri herhangi bir veri setine uygular.\n", + "\n", + "Öyleyse, tariflerimizi hazırlayıp uygulayalım ve gerçekten doğrulayalım ki perde arkasında tahmin edici sütunlar önce kodlanacak, ardından bir model oluşturulacak.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo!🥳 İşlenmiş veri `baked_pumpkins` tüm tahmin edicilerinin kodlandığını doğruladı, bu da tarif olarak tanımlanan ön işleme adımlarının beklendiği gibi çalışacağını gösteriyor. Bu, sizin için okumayı zorlaştırabilir ama Tidymodels için çok daha anlaşılır hale getirir! Hangi gözlemin ilgili bir tam sayıya eşlendiğini bulmak için biraz zaman ayırın.\n", + "\n", + "Ayrıca, `baked_pumpkins` üzerinde hesaplamalar yapabileceğimiz bir veri çerçevesi olduğunu belirtmekte fayda var.\n", + "\n", + "Örneğin, verilerinizdeki iki nokta arasında iyi bir korelasyon bulmaya çalışabiliriz, böylece potansiyel olarak iyi bir tahmin modeli oluşturabiliriz. Bunu yapmak için `cor()` fonksiyonunu kullanacağız. Fonksiyon hakkında daha fazla bilgi edinmek için `?cor()` yazabilirsiniz.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Görünüşe göre, Şehir ve Fiyat arasında yalnızca zayıf bir ilişki var. Ancak Paket ve Fiyatı arasında biraz daha iyi bir ilişki bulunuyor. Bu mantıklı, değil mi? Genelde, ürün kutusu ne kadar büyükse, fiyat da o kadar yüksek olur.\n", + "\n", + "Bu sırada, tüm sütunların bir korelasyon matrisini `corrplot` paketi kullanarak görselleştirmeyi de deneyelim.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Çok daha iyi.\n", + "\n", + "Bu verilerle şimdi sorulabilecek iyi bir soru şu olabilir: '`Belirli bir kabak paketi için hangi fiyatı bekleyebilirim?`' Haydi başlayalım!\n", + "\n", + "> Not: **`pumpkins_prep`** tarifini **`new_data = NULL`** ile **`bake()`** ettiğinizde, işlenmiş (yani kodlanmış) eğitim verilerini elde edersiniz. Örneğin, başka bir veri setiniz (örneğin bir test seti) varsa ve bir tarifin onu nasıl ön işleyeceğini görmek istiyorsanız, **`pumpkins_prep`** tarifini **`new_data = test_set`** ile **`bake()`** etmeniz yeterlidir.\n", + "\n", + "## 4. Doğrusal regresyon modeli oluşturun\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan bilgi grafiği
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Artık bir tarif oluşturduğumuza ve verilerin uygun şekilde ön işleneceğini doğruladığımıza göre, şimdi şu soruyu yanıtlamak için bir regresyon modeli oluşturalım: `Belirli bir kabak paketi için hangi fiyatı bekleyebilirim?`\n", + "\n", + "#### Eğitim seti kullanarak bir doğrusal regresyon modeli eğitin\n", + "\n", + "Muhtemelen zaten fark etmişsinizdir, *price* sütunu `sonuç` değişkeni iken *package* sütunu `tahmin edici` değişkendir.\n", + "\n", + "Bunu yapmak için, önce verileri %80'i eğitim setine ve %20'si test setine gidecek şekilde böleceğiz, ardından tahmin edici sütunu bir dizi tam sayıya kodlayacak bir tarif tanımlayacağız ve ardından bir model spesifikasyonu oluşturacağız. Tarifimizi hazırlayıp pişirmeyeceğiz çünkü verileri beklendiği gibi ön işleyeceğini zaten biliyoruz.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika iş çıkardınız! Artık bir tarifimiz ve bir model spesifikasyonumuz olduğuna göre, bunları bir araya getirip, önce veriyi ön işleme (arka planda hazırlık + pişirme), ardından ön işlenmiş veri üzerinde modeli eğitme ve potansiyel olarak son işlem aktivitelerine olanak tanıyan bir nesneye dönüştürmenin bir yolunu bulmamız gerekiyor. İçiniz rahatladı mı!🤩\n", + "\n", + "Tidymodels'de, bu kullanışlı nesne [`workflow`](https://workflows.tidymodels.org/) olarak adlandırılır ve modelleme bileşenlerinizi pratik bir şekilde barındırır! Python'da buna *pipelines* derdik.\n", + "\n", + "O halde her şeyi bir workflow içinde bir araya getirelim!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Üstelik, bir iş akışı tıpkı bir model gibi uygun hale getirilebilir/eğitilebilir.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Model çıktısından, eğitim sırasında öğrenilen katsayıları görebiliriz. Bu katsayılar, gerçek ve tahmin edilen değişken arasındaki toplam hatayı en aza indiren en iyi uyum çizgisinin katsayılarını temsil eder.\n", + "\n", + "#### Test seti kullanarak model performansını değerlendirme\n", + "\n", + "Modelin nasıl performans gösterdiğini görme zamanı 📏! Bunu nasıl yaparız?\n", + "\n", + "Artık modeli eğittiğimize göre, test_set için tahminler yapmak için `parsnip::predict()` fonksiyonunu kullanabiliriz. Ardından, bu tahminleri gerçek etiket değerleriyle karşılaştırarak modelin ne kadar iyi (ya da kötü!) çalıştığını değerlendirebiliriz.\n", + "\n", + "Hadi test seti için tahminler yaparak başlayalım ve ardından sütunları test setine bağlayalım.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Evet, bir model eğittiniz ve tahminler yapmak için kullandınız!🔮 Peki, bu model ne kadar iyi? Hadi modelin performansını değerlendirelim!\n", + "\n", + "Tidymodels'de bunu `yardstick::metrics()` kullanarak yapıyoruz! Lineer regresyon için şu metriklere odaklanalım:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: [MSE](https://en.wikipedia.org/wiki/Mean_squared_error)'nin karekökü. Bu, etiketle (bu durumda bir kabağın fiyatı) aynı birimde mutlak bir metrik sağlar. Değer ne kadar küçükse, model o kadar iyidir (basit bir anlamda, tahminlerin ortalama olarak ne kadar yanlış olduğunu temsil eder!)\n", + "\n", + "- `Coefficient of Determination (genellikle R-squared veya R2 olarak bilinir)`: Daha yüksek bir değerin daha iyi bir uyumu temsil ettiği göreceli bir metrik. Temelde, bu metrik modelin tahmin edilen ve gerçek etiket değerleri arasındaki varyansın ne kadarını açıklayabildiğini gösterir.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Model performansı düşüyor. Paket ve fiyatın bir dağılım grafiğini görselleştirerek daha iyi bir gösterge elde edip, ardından yapılan tahminleri kullanarak en iyi uyum çizgisini üzerine ekleyebilir miyiz, bir bakalım.\n", + "\n", + "Bu, test setini hazırlayıp işleyerek paket sütununu kodlamamız ve ardından bunu modelimizin yaptığı tahminlerle birleştirmemiz gerektiği anlamına geliyor.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika! Gördüğünüz gibi, doğrusal regresyon modeli bir paketin fiyatı ile arasındaki ilişkiyi gerçekten iyi bir şekilde genelleştiremiyor.\n", + "\n", + "🎃 Tebrikler, birkaç çeşit kabak fiyatını tahmin etmeye yardımcı olabilecek bir model oluşturdunuz. Tatil kabak bahçeniz harika görünecek. Ancak muhtemelen daha iyi bir model oluşturabilirsiniz!\n", + "\n", + "## 5. Polinom regresyon modeli oluşturun\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan bilgi grafiği
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bazen verilerimiz doğrusal bir ilişkiye sahip olmayabilir, ancak yine de bir sonucu tahmin etmek isteyebiliriz. Polinom regresyon, daha karmaşık doğrusal olmayan ilişkiler için tahmin yapmamıza yardımcı olabilir.\n", + "\n", + "Örneğin, kabak veri setimizdeki paket ve fiyat arasındaki ilişkiyi ele alalım. Bazen değişkenler arasında doğrusal bir ilişki olabilir - kabak hacmi büyüdükçe fiyatın artması gibi - ancak bazen bu ilişkiler bir düzlem veya doğru olarak çizilemez.\n", + "\n", + "> ✅ İşte [polinom regresyon](https://online.stat.psu.edu/stat501/lesson/9/9.8) kullanabilecek verilere dair bazı örnekler\n", + ">\n", + "> Önceki grafikte Çeşit ve Fiyat arasındaki ilişkiye tekrar bir göz atın. Bu dağılım grafiği mutlaka bir doğru ile analiz edilmesi gereken bir ilişki gibi mi görünüyor? Belki de hayır. Bu durumda, polinom regresyonu deneyebilirsiniz.\n", + ">\n", + "> ✅ Polinomlar, bir veya daha fazla değişken ve katsayıdan oluşabilen matematiksel ifadelerdir.\n", + "\n", + "#### Eğitim seti kullanarak bir polinom regresyon modeli eğitin\n", + "\n", + "Polinom regresyon, doğrusal olmayan verilere daha iyi uyum sağlamak için *eğri bir çizgi* oluşturur.\n", + "\n", + "Bir polinom modelinin tahmin yapmada daha iyi performans gösterip göstermeyeceğini görelim. Daha önce izlediğimiz prosedüre benzer bir yol izleyerek devam edeceğiz:\n", + "\n", + "- Verilerimizi modellemeye hazırlamak için uygulanması gereken ön işleme adımlarını belirten bir tarif oluşturun, örneğin: tahmin edicileri kodlama ve *n* dereceli polinomlar hesaplama\n", + "\n", + "- Bir model spesifikasyonu oluşturun\n", + "\n", + "- Tarif ve model spesifikasyonunu bir iş akışında birleştirin\n", + "\n", + "- İş akışını uydurarak bir model oluşturun\n", + "\n", + "- Modelin test verilerinde ne kadar iyi performans gösterdiğini değerlendirin\n", + "\n", + "Haydi başlayalım!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Model performansını değerlendirin\n", + "\n", + "👏👏Bir polinom modeli oluşturdunuz, şimdi test seti üzerinde tahminler yapalım!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo, hadi `yardstick::metrics()` kullanarak modelin test_set üzerindeki performansını değerlendirelim.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Çok daha iyi performans.\n", + "\n", + "`rmse` yaklaşık 7'den yaklaşık 3'e düştü, bu da gerçek fiyat ile tahmin edilen fiyat arasındaki hatanın azaldığını gösteriyor. Bunu *kabaca* şu şekilde yorumlayabilirsiniz: Ortalama olarak, yanlış tahminler yaklaşık \\$3 kadar yanlıştır. `rsq` yaklaşık 0.4'ten 0.8'e yükseldi.\n", + "\n", + "Tüm bu metrikler, polinom modelinin doğrusal modelden çok daha iyi performans gösterdiğini gösteriyor. Harika iş!\n", + "\n", + "Hadi bunu görselleştirebilir miyiz bir bakalım!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Verilerinize daha iyi uyan bir eğri çizgisi görebilirsiniz! 🤩\n", + "\n", + "Bunu daha da düzgün hale getirmek için `geom_smooth` fonksiyonuna bir polinom formülü geçirerek şu şekilde yapabilirsiniz:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tıpkı pürüzsüz bir eğri gibi!🤩\n", + "\n", + "İşte yeni bir tahmin yapmanın yolu:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "`polynomial model` tahmini, `price` ve `package` dağılım grafikleri göz önüne alındığında mantıklı görünüyor! Ve eğer bu model önceki modelden daha iyiyse, aynı verilere bakarak, bu daha pahalı kabaklar için bütçe ayırmanız gerekecek!\n", + "\n", + "🏆 Tebrikler! Bir derste iki regresyon modeli oluşturdunuz. Regresyonun son bölümünde, kategorileri belirlemek için lojistik regresyonu öğreneceksiniz.\n", + "\n", + "## **🚀Meydan Okuma**\n", + "\n", + "Bu not defterinde birkaç farklı değişkeni test edin ve korelasyonun model doğruluğuyla nasıl ilişkili olduğunu görün.\n", + "\n", + "## [**Ders sonrası test**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Gözden Geçirme ve Kendi Kendine Çalışma**\n", + "\n", + "Bu derste Doğrusal Regresyon hakkında bilgi edindik. Regresyonun diğer önemli türleri de vardır. Stepwise, Ridge, Lasso ve Elasticnet teknikleri hakkında okuyun. Daha fazla bilgi edinmek için çalışabileceğiniz iyi bir kurs [Stanford Statistical Learning course](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Harika Tidymodels çerçevesini nasıl kullanacağınızı öğrenmek istiyorsanız, lütfen aşağıdaki kaynaklara göz atın:\n", + "\n", + "- Tidymodels web sitesi: [Tidymodels ile Başlayın](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn ve Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **TEŞEKKÜRLER:**\n", + "\n", + "[R için daha sıcak ve ilgi çekici hale getiren harika illüstrasyonları oluşturan Allison Horst](https://twitter.com/allison_horst?lang=en). Daha fazla illüstrasyonu onun [galerisinde](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) bulabilirsiniz.\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/2-Regression/3-Linear/solution/notebook.ipynb b/translations/tr/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..fa297ac73 --- /dev/null +++ b/translations/tr/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kabak Fiyatlandırması için Doğrusal ve Polinomial Regresyon - Ders 3\n", + "\n", + "Gerekli kütüphaneleri ve veri setini yükleyin. Verileri aşağıdaki alt küme içeren bir veri çerçevesine dönüştürün:\n", + "\n", + "- Sadece kile ile fiyatlandırılan kabakları alın\n", + "- Tarihi bir aya dönüştürün\n", + "- Fiyatı, yüksek ve düşük fiyatların ortalaması olarak hesaplayın\n", + "- Fiyatı kile miktarına göre fiyatlandırmayı yansıtacak şekilde dönüştürün\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bir dağılım grafiği, elimizde yalnızca Ağustos'tan Aralık'a kadar olan ay verilerinin bulunduğunu hatırlatır. Muhtemelen doğrusal bir şekilde sonuçlar çıkarabilmek için daha fazla veriye ihtiyacımız var.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Görünüşe göre korelasyon oldukça küçük, ancak başka daha önemli bir ilişki var - çünkü yukarıdaki grafikteki fiyat noktaları birkaç belirgin küme oluşturuyor gibi görünüyor. Haydi farklı kabak çeşitlerini gösterecek bir grafik yapalım:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Doğrusal Regresyon\n", + "\n", + "Doğrusal regresyon modelini eğitmek için Scikit Learn kullanacağız:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Doğrunun eğimi, doğrusal regresyon katsayılarından belirlenebilir:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Eğitilmiş modeli fiyat tahmini yapmak için kullanabiliriz:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polinom Regresyon\n", + "\n", + "Bazen özellikler ile sonuçlar arasındaki ilişki doğası gereği doğrusal olmayabilir. Örneğin, kabak fiyatları kış aylarında (aylar=1,2) yüksek olabilir, yaz aylarında (aylar=5-7) düşebilir ve ardından tekrar yükselebilir. Doğrusal regresyon bu ilişkiyi doğru bir şekilde bulamaz.\n", + "\n", + "Bu durumda, ek özellikler eklemeyi düşünebiliriz. Basit bir yöntem, giriş özelliklerinden polinomlar kullanmaktır, bu da **polinom regresyon** ile sonuçlanır. Scikit Learn'de, polinom özelliklerini otomatik olarak önceden hesaplamak için boru hatlarını kullanabiliriz:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAD4CAYAAAATpHZ6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbw0lEQVR4nO3de3Cc1Znn8e+jm93ClmRb8kWyjYBgDb6ATQQhFwIhFzu7meBQNVOVyu5Sm9RQSWWnJlMTZ3BIZWq2dpcMnprZzM5WTbEDFVLDsJOZOM4USTAEkkBYMJExjOwYY8AXkGRLsi35otb92T+6JbfurXa3ut+j36eqS2+ffvvto0f2T6/Oe/q0uTsiIhKGonx3QEREskehLiISEIW6iEhAFOoiIgFRqIuIBKRkLl+surra6+vr5/IlRUQib//+/Z3uXpPOvnMa6vX19TQ1Nc3lS4qIRJ6ZnUh3Xw2/iIgERKEuIhIQhbqISEAU6iIiAVGoi4gEZMbZL2a2Bvg+sBIYBh529++a2S7gd4F+4G3gP7t7Vw77KnNgz4EWdu09QmtXnNqqGDu2NrB9S12+uyUiaUrnTH0Q+BN3vwG4Dfiqma0HngE2uvuNwJvAztx1U+bCngMt7NzdTEtXHAdauuLs3N3MngMt+e6aiKRpxlB39zZ3fzW5fQE4DNS5+9PuPpjc7WVgde66KXNh194jxAeGxrTFB4bYtfdInnokIrM1qzF1M6sHtgD7xj30ReBnUzznPjNrMrOmjo6OjDopc6O1Kz6rdhEpPGmHupktAn4IfM3dz6e0P0BiiObxyZ7n7g+7e6O7N9bUpPUuV8mT2qrYrNpFpPCkFepmVkoi0B93990p7fcCnwG+4PoIpcjbsbWBWGnxmLZYaTE7tjbkqUciMlvpzH4x4BHgsLv/VUr7NuBPgTvcvSd3XZS5MjLLRbNfRKLLZjrBNrOPAC8AzSSmNAJ8E/gbYAFwJtn2srt/ebpjNTY2uhb0EhGZHTPb7+6N6ew745m6u/8asEke+ulsOyYiIrmld5SKiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQGb8jFKRXNtzoIVde4/Q2hWntirGjq0NbN9SV/DHlolU7/xTqEte7TnQws7dzcQHhgBo6Yqzc3czwBWHQS6PLROp3oVBwy+SV7v2HhkNgRHxgSF27T1S0MeWiVTvwqBQl7xq7YrPqr1Qji0Tqd6FQaEueVVbFZtVe6EcWyZSvQuDQl3yasfWBmKlxWPaYqXF7NjaUNDHlolU78KgC6WSVyMX0HIxYyKXx5aJVO/CYO4+Zy/W2NjoTU1Nc/Z6IiIhMLP97t6Yzr4znqmb2Rrg+8BKYBh42N2/a2ZLgX8C6oHjwO+7+7lMO50PmlMrIqFJZ0x9EPgTd78BuA34qpmtB+4HnnX364Fnk/cjY2RObUtXHOfynNo9B1ry3TURkYzNGOru3uburya3LwCHgTrgbuCx5G6PAdtz1Mec0JxaEQnRrGa/mFk9sAXYB6xw9zZIBD+wfIrn3GdmTWbW1NHRcYXdzR7NqRWREKUd6ma2CPgh8DV3P5/u89z9YXdvdPfGmpqaTPqYE5pTKyIhSivUzayURKA/7u67k82nzWxV8vFVQHtuupgbmlMrIiGaMdTNzIBHgMPu/lcpD/0rcG9y+17gx9nvXu5s31LHg/dsoq4qhgF1VTEevGeTZr+ISKTNOE/dzD4CvAA0k5jSCPBNEuPqPwDWAieB33P3s9MdS/PURURmL6vz1N3914BN8fDHZ9MxERHJLa39IiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgFRqIuIBEShLiISEH1GqUiG9MlZUogU6iIZGPnkrJEPWhn55CxAwS55peEXkQzok7OkUCnURTKgT86SQqVQF8mAPjlLCpVCXSQD+uQsKVS6UCqSgZGLoZr9IoVGoS6Soe1b6hTiUnA0/CIiEhCFuohIQBTqIiIBUaiLiAREoS4iEhCFuohIQAp+SmNUV8KLar9FJNoKOtSjuhJeVPstItFX0MMvUV0JL6r9FpHomzHUzexRM2s3s4MpbZvN7GUze83Mmszs1lx0Lqor4UW13yISfemcqX8P2Dau7SHgz919M/Dt5P2si+pKeFHtt4hE34yh7u7PA2fHNwMVye1KoDXL/QKiuxJeVPstItGX6YXSrwF7zewvSfxi+FDWepQiqivhRbXfIhJ95u4z72RWDzzp7huT9/8G+JW7/9DMfh+4z90/McVz7wPuA1i7du37T5w4ka2+i4jMC2a2390b09k309kv9wK7k9v/DEx5odTdH3b3RndvrKmpyejFuuMD9A8OZ/RcEZH5JNNQbwXuSG7fBRzNTncm97+ePcqt/+PnfGtPM/tPnCWdvy5EROajGcfUzewJ4E6g2szeA/4M+APgu2ZWAvSSHF7JlbtuWE77hT7+Zf97/MPLJ1m7tDzxAQWba7m2ZlEuX1pEJFLSGlPPlsbGRm9qasr4+Rf7Btl78BR7Xmvhxbc6GXa4aU0Vn9tcy+/eVMuyRQuy2FsRkcIwmzH1SIV6qtPne/nX11r50YEWftt2nuIi4451NWzfUscnb1hBrKx45oOIiETAvAj1VEdOXWDPay38+EALrd29XFVWzLaNq/jcljo+eN0yioss668pIjJX5l2ojxgedvYdO8ueAy38tLmNC32DrKhYwN2b69i+uY4bVi3GTAEvItEyb0M9Ve/AEM+90c6PDrTwyyPtDAw5DSsWs31LHXdvrtVb9kUkMhTq45y71M+TzW3sOdDC/hPnMIPbrlnG57bUsW3TSioWls55n0RE0qVQn8aJM5f4cfIC67HOS5SVFHFXw3I+vWklH/ud5Qp4ESk4CvU0uDuvv9c9Ov7efqGP0mLjw++rZtuGlXxi/QqqNUVSRAqAQn2WhoedA+92sffQKZ46eIqTZ3soMrilfinbNq5k64aVGoMXkbxRqF8Bd+dw2wWeOnSKvQdPceT0BQBuWl3JpzasZNvGlVynd7GKyBxSqGfROx0X2XvoNE8dOsXr73YBcP3yRaNn8BtqKzRNUkRySqGeI23dcZ4+dJqnDp5i37EzDDusXhJjW/IM/ua1SyjSG51EJMsU6nPgzMU+nj3czlOHTvHro530Dw1Ts3gBn1q/gm0bV3LbtcsoLS7oz/UWkYhQqM+xC70D/OJIB3sPnuIXR9rp6R+iYmEJn7hhBZ9Yv4IPX1dNZbmmSopIZhTqedQ7MMQLRzt56uApfn74NN3xAYossZrk7dfX8NHrq9m8pooSncWLSJoU6gVicGiY197t4vmjnbxwtIPX3+1i2GHxghI+9L5lyZCvYe2y8nx3VUQKmEK9QHX3DPDi24mAf/7NTlq64gBcvaycj15fw+3XV/PB65axWO9qFZEUCvUIcHeOdV7ihaOdPP9mBy+9c4ae/iGKi4yb11YlQn5dDZvqKrV0sMg8p1CPoP7BYV49eY4XjnbwwtFOmlu6cYfKWCkfeV81t19fze3raqjTO1tF5h2FegDOXOzjxbfP8MKbiZA/db4XgOtqrkqMxa+r5rZrl1FeNuPHzIpIxCnUA+PuvNV+kV8lA37fsTP0DgxTWmw0Xr2U29dV89Hra1i/qkJvfhIJkEI9cL0DQ+w/cY7nkxdcD7edB2DpVWWjQzUfuGYZa5bGtISBSAAU6mnac6CFXXuP0NoVp7Yqxo6tDWzfUpfvbs1a+4VeXnyrkxfe7OT5o510XuwDYPniBdxSv5TG+iXcUr+U31m5WPPjRSJIoZ6GPQda2Lm7mfjA0GhbrLSYB+/ZFMlgHzE87LzZfoGm4+doOn6W3xw/Nzp18qqyYm6+egmNVyeCfvOaKq5aoDF5kUKnUE/Dh7/z3GjYpaqrivHi/XfloUe509oVp+nE5ZB/49R53KG4yNhQW0Hj1Uu5pX4J769fwvLFC/PdXREZZzahPm9P01onCfTp2qOstirGZ6tifPamWgDO9w5w4GRXMuTP8o+vnODRF48BUL+snMb6pdy0poqNtRXcsKqChaXF+ey+iMzCvA312qrYpGfq8+ETjioWlnLHuhruWFcDJObIH2rtpun4OX5z/CzPvdHOv+x/D4Aig/ctX8TG2krW11awsS7xNZuf5RrKtQ2RQjBvh19CHVPPBnentbuXgy3dHGrp5lDreQ62dnP6fN/oPlcvK2djbSUb6irYUFvJxtoKlmXwma76OYjMLKvDL2b2KPAZoN3dN6a0/yHwX4BB4Cfu/o0M+5sXI4GhM8SJzIy6qhh1VTG2blg52t5xoY9DrcmQb+mmuaWbnzS3jT6+qnIhG2qTIV9XyYbaClZVLpx2WuWuvUfGBDpAfGCIXXuP6GchkoF0hl++B/wt8P2RBjP7GHA3cKO795nZ8tx0L7e2b6lTcMxCzeIF3NmwnDsbLv+4u3sGONTWzW+TQX+w9TzPvdHOcPIPwKVXlaUEfeLr1UvLR98kNZ+ubYjMhRlD3d2fN7P6cc1fAb7j7n3Jfdpz0DeZpXyMTVeWl/Kh66r50HXVo209/YMcbruQOKtvSQzdPPLrdxgYSiT9ogUlrK+tYENtBVXlpZzrGZhw3Gxd29B4vcw3mV4oXQfcbmb/HegFvu7uv5lsRzO7D7gPYO3atRm+nMxk/Nh0S1ecnbubAeY8xMrLSnj/1Ut4/9VLRtv6Boc4evoih1q7OZgM+ideOUnvwPCkx7imupyDLd3UV1/Fogzn0hdSTUTmSloXSpNn6k+OjKmb2UHgOeCPgFuAfwKu9RkOVkgXSkMTxXn3Q8POBx98lvYLfdPut3zxAq6pvmrM7dqaq1iztJwFJVNPt4xiTSQcQ8NOW3eck2d7ePdsDx9rWM7yiszeBzIX89TfA3YnQ/wVMxsGqoGODI8nVyiKY9PFRUbHNIH+d//hZt7pvMSxjksc67zEM789zZlL/aOPFxmsXlI+JuhHtmsrY5GsiUTL+d4BTp5JhPbJlNu7Z3to6YqPDjkC/J//1Mgn1+f+zX2Zhvoe4C7gl2a2DigDOrPVKZm9qM67n6rfdVUxtm1cNaG9u2eAY2cucazzIsc6LvFO5yWOn7lE0/GzXOq/PIumrKSI4iJjcHjiH48rKxbi7lrsTGY0MDRMW1fv5bA+dzm0T57toWvc9aCq8lLWLi1nQ10ln960irVLy0dvqyrn5t3a6UxpfAK4E6g2s/eAPwMeBR5NDsP0A/fONPQiubVja8Ok8713bG3IY69mNtt+V5aXsrm8is1rqsa0uzsdF/oSZ/bJ2/97q5NDrecZ/w+z7Xwv67+9l5WVC1lZsZBVlQtZUZn4urJiYaK9ciHVVy3QUsaBcncu9Q/R1dNPV88AXT0DnO3p571zY8+6W7t6GUo5MSgtNlYvKWfN0nJuXF05GthrkrdsvikvU/P2zUchiupMj1z2e8+BFh566g1au3upXlTGv9+0ijVLyznV3Uvb+V5OdSdup8/3TjirLykyVqSE/MgvgJUp4b988ULKSrTyZT71Dgxxrqefc5cG6IpfDulzPf10xwc4d6mfrvjAaICf6xmgO94/ZmgkVfWiskRILykfE9prl5WzsmJhXj5eUgt6iczS8LDTeamP0919tHXHOZUS+G3J0G/tjk+YrWMGVbFSqsrLqIiVUhUrpTJ5qyq/vH25rWz0Ma2pM1b/4HAieKcJ4smCu29w8hlUAAtLi6iKlVFVnqj5kvKR7TKqYon7lcn2JeWl1FbFCnLlUi3oNU/pTD1zRUXG8sWJM+9Nqysn3cfdOR8fpO18fEzgd17sozs+kDgr7Onn+JlLo/enO2cqKylKBHzKL4GK5HZbdy8vvX2G7vgAS8pL2b6ljtuvr6asuJiykqLErTjxdUHylto+3br5uaq3uzM07Ay5c7F3cFwQpwb05cA+d+ly3Xr6h6Y8dmmxjQnixPBHMpxHwjo27v48/cWpM/VARHUNlaj2Ox3Dw86FvkG6ewZGQ74r3j+6PdLelfJ4d3yAzot90559pqPIGA34BaXFia8lRcQHhjh1vnfMLxszWF0VY/HCUoZHgjkZzkPDzvDoNqOPj7QNpmynEyVFxmjwTnamXJn8Ov7suryseF5f2NaZ+jwU1TVUotrvdBQV2eiwy2xMNb++ZtEC/vcXbqZ/cJj+oSH6BobpHxqmb3A40TaYuJ+63TcwNGafnx8+PSF83aH9Qh8NKxdTZEZxUcrNjKLUr0WMaSsuTn4tsjHPXbSgZNJhjsULSnTxOccU6oGI6pzsqPY7l6b63jsv9nHrNUuv6NjX3P+TSdv7B4f5+3tvuaJjS2HQZftATDUfPQrz1GfTPh/ksiaqd/gU6oHYsbWB2LiLQlGZpx7FfudSLmuieodPwy+BiOr68FHtdy7lsiaqd/g0+0VEpMDNZvaLhl9ERAKiUBcRCYhCXUQkIAp1EZGAKNRFRAKiUBcRCYhCXUQkIAp1EZGA6B2lMkYhrG0uIplTqMuo8Wubt3TF2bm7GUDBLhIRGn6RUdOtbS4i0aBQl1Fa21wk+hTqMkprbYtEn0JdRmmtbZHo04VSGaW1tkWiT6EuY2zfUqcQF4kwhbrknebGT5TLmkT12FE11zVRqEteaW78RLmsSVSPHVX5qMmMF0rN7FEzazezg5M89nUzczOrzknvJHiaGz9RLmsS1WNHVT5qks7sl+8B28Y3mtka4JPAySz3SeYRzY2fKJc1ieqxoyofNZkx1N39eeDsJA/9NfANYO4+uVqCo7nxE+WyJlE9dlTloyYZzVM3s88CLe7+ehr73mdmTWbW1NHRkcnLScA0N36iXNYkqseOqnzUZNYXSs2sHHgA+FQ6+7v7w8DDAI2NjTqrlzE0N36iXNYkqseOqnzUxNxnzlkzqweedPeNZrYJeBboST68GmgFbnX3U9Mdp7Gx0Zuamq6sxyIi84yZ7Xf3xnT2nfWZurs3A8tTXuw40OjunbM9lkiuad60zDfpTGl8AngJaDCz98zsS7nvlsiVG5kj3NIVx7k8R3jPgZZ8d00kZ2Y8U3f3z8/weH3WeiOSRdPNEdbZuoRKqzRKsDRvWuYjLRMgwaqtitEySYDP53nTuRbVaxhR7fdkdKYuwdK86bkV1WsYUe33VBTqEqztW+p48J5N1FXFMKCuKsaD92yK7BlYoYvq2i9R7fdUNPwiQdP68HMnqtcwotrvqehMXUSyIqprv0S131NRqItIVkT1GkZU+z0VDb+ISFZEde2XqPZ7Kmmt/ZItWvtFRGT2ZrP2i4ZfREQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQColAXEQmIQl1EJCAKdRGRgCjURUQCMmOom9mjZtZuZgdT2naZ2Rtm9m9m9iMzq8ppL0VEJC3pnKl/D9g2ru0ZYKO73wi8CezMcr9ERCQDM4a6uz8PnB3X9rS7DybvvgyszkHfRERklrIxpv5F4GdTPWhm95lZk5k1dXR0ZOHlRERkKlcU6mb2ADAIPD7VPu7+sLs3untjTU3NlbyciIjMoCTTJ5rZvcBngI+7u2evSyIikqmMQt3MtgF/Ctzh7j3Z7ZKIiGQqnSmNTwAvAQ1m9p6ZfQn4W2Ax8IyZvWZmf5fjfoqISBpmPFN3989P0vxIDvoiIiJXSO8oFREJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYAo1EVEAqJQFxEJiEJdRCQgCnURkYBkvPSuiOTOt/Y088S+dxlyp9iMz39gDf9t+6asHHvPgRZ27T1Ca1ec2qoYO7Y2sH1LXVaOLfmnUBcpMN/a08w/vHxy9P6Q++j9Kw32PQda2Lm7mfjAEAAtXXF27m4GULAHQsMvIgXmiX3vzqp9NnbtPTIa6CPiA0Ps2nvkio8thUGhLlJghqb4ILGp2mejtSs+q3aJHoW6SIEpNptV+2zUVsVm1S7Ro1AXKTCf/8CaWbXPxo6tDcRKi8e0xUqL2bG14YqPLYVBF0pFCszIxdBczH4ZuRiq2S/hMs/COF26Ghsbvampac5eT0QkBGa2390b09lXwy8iIgFRqIuIBEShLiISEIW6iEhAFOoiIgGZ09kvZtYBnACqgc45e+HCpTqoBqAajFAdpq7B1e5ek84B5jTUR1/UrCnd6TkhUx1UA1ANRqgO2amBhl9ERAKiUBcRCUi+Qv3hPL1uoVEdVANQDUaoDlmoQV7G1EVEJDc0/CIiEhCFuohIQLIe6ma2xsx+YWaHzeyQmf3RuMe/bmZuZtUpbTvN7C0zO2JmW7Pdp3yYrg5m9ofJ7/WQmT2U0h5UHaaqgZltNrOXzew1M2sys1tTnhNUDQDMbKGZvWJmryfr8OfJ9qVm9oyZHU1+XZLynKDqME0NdpnZG2b2b2b2IzOrSnlOUDWAqeuQ8viV56O7Z/UGrAJuTm4vBt4E1ifvrwH2knwDUrJtPfA6sAC4BngbKM52v+b6NlUdgI8BPwcWJB9bHmodpqnB08Cnk+3/DvhlqDVIfl8GLEpulwL7gNuAh4D7k+33A38Rah2mqcGngJJk+1+EXIPp6pC8n5V8zPqZuru3ufurye0LwGFgZAX+vwa+AaRenb0b+L/u3ufux4C3gFuJuGnq8BXgO+7el3ysPfmU4OowTQ0cqEjuVgm0JreDqwGAJ1xM3i1N3pzE9/tYsv0xYHtyO7g6TFUDd3/a3QeT7S8Dq5PbwdUApv23AFnKx5yOqZtZPbAF2GdmnwVa3P31cbvVAakfk/4el38JBCG1DsA64HYz22dmvzKzW5K7BV2HcTX4GrDLzN4F/hLYmdwt2BqYWbGZvQa0A8+4+z5ghbu3QeIXILA8uXuQdZiiBqm+CPwsuR1kDWDyOmQzH3MW6ma2CPghif/Ag8ADwLcn23WStmDmWabWwd3Pk/gIwSUk/vTcAfzAzIyA6zBJDb4C/LG7rwH+GHhkZNdJnh5EDdx9yN03kzgTvdXMNk6ze5B1mK4GZvYAiZx4fKRpskPkvJNzYJI63EgW8zEnoW5mpST+Ez/u7ruB60iMB71uZsdJfDOvmtlKEr95Uj9RdzWX/xyPtEnqAInvd3fyz7BXgGESi/gEWYcpanAvMLL9z1z+czLIGqRy9y7gl8A24LSZrQJIfh0Zigu6DuNqgJndC3wG+IInB5IJvAYwpg53k818zNGFgO8D/3OafY5z+ULABsZeCHiHcC6ITKgD8GXgvya315H408pCrMM0NTgM3Jnc/jiwP/B/CzVAVXI7BrxAIsR2MfZC6UOh1mGaGmwDfgvUjNs/uBpMV4dx+1xRPpZMk/eZ+jDwH4Hm5LgRwDfd/aeT7ezuh8zsByR+sIPAV919KAf9mmuT1gF4FHjUzA4C/cC9nvjphViHqWrwB8B3zawE6AXug6D/LawCHjOzYhJ/Hf/A3Z80s5dIDL99CTgJ/B4EW4epavAWicB6JjEKycvu/uVAawBT1GGqnTOpg5YJEBEJiN5RKiISEIW6iEhAFOoiIgFRqIuIBEShLiISEIW6iEhAFOoiIgH5/+EaqS+WjFbpAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Çeşitlerin Kodlanması\n", + "\n", + "İdeal bir dünyada, farklı balkabağı çeşitleri için fiyatları aynı modelle tahmin edebilmek isteriz. Çeşidi hesaba katmak için, önce onu sayısal bir forma dönüştürmemiz, yani **kodlamamız** gerekir. Bunu yapmanın birkaç yolu vardır:\n", + "\n", + "* Basit sayısal kodlama, farklı çeşitlerin bir tablosunu oluşturur ve ardından çeşit adını bu tablodaki bir indeksle değiştirir. Bu, doğrusal regresyon için en iyi fikir değildir, çünkü doğrusal regresyon indeksin sayısal değerini dikkate alır ve bu sayısal değer muhtemelen fiyatla sayısal olarak ilişki kurmaz.\n", + "* Tekil kodlama (one-hot encoding), `Variety` sütununu 4 farklı sütunla değiştirir. Her sütun, ilgili satırın belirli bir çeşide ait olup olmadığını göstermek için 1 veya 0 içerir.\n", + "\n", + "Aşağıdaki kod, bir çeşidi nasıl tekil kodlayabileceğimizi gösteriyor:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Çeşit Üzerinde Doğrusal Regresyon\n", + "\n", + "Şimdi yukarıdakiyle aynı kodu kullanacağız, ancak `DayOfYear` yerine giriş olarak tek-seçenekli kodlanmış çeşidimizi kullanacağız:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Aynı şekilde diğer özellikleri kullanmayı deneyebilir ve bunları `Month` veya `DayOfYear` gibi sayısal özelliklerle birleştirebiliriz:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Polinom Regresyon\n", + "\n", + "Polinom regresyon, tekil sıcak kodlanmış kategorik özelliklerle de kullanılabilir. Polinom regresyonu eğitmek için kullanılan kod, yukarıda gördüğümüzle temelde aynı olacaktır.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Orijinal belgenin kendi dilindeki hali yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan herhangi bir yanlış anlama veya yanlış yorumlama durumunda sorumluluk kabul edilmez.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:12:16+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/4-Logistic/notebook.ipynb b/translations/tr/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..df7a180bf --- /dev/null +++ b/translations/tr/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Balkabağı Çeşitleri ve Renk\n", + "\n", + "Gerekli kütüphaneleri ve veri setini yükleyin. Verileri, verilerin bir alt kümesini içeren bir veri çerçevesine dönüştürün:\n", + "\n", + "Renk ve çeşit arasındaki ilişkiye bir göz atalım\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:48+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/tr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..186c47227 --- /dev/null +++ b/translations/tr/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lojistik Regresyon Modeli Oluşturma - Ders 4\n", + "\n", + "![Lojistik ve doğrusal regresyon infografiği](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Ders Öncesi Test](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Giriş\n", + "\n", + "Regresyon üzerine olan bu son derste, temel *klasik* ML tekniklerinden biri olan Lojistik Regresyonu inceleyeceğiz. Bu tekniği, ikili kategorileri tahmin etmek için desenleri keşfetmek amacıyla kullanabilirsiniz. Bu şeker çikolata mı, değil mi? Bu hastalık bulaşıcı mı, değil mi? Bu müşteri bu ürünü seçecek mi, seçmeyecek mi?\n", + "\n", + "Bu derste şunları öğreneceksiniz:\n", + "\n", + "- Lojistik regresyon teknikleri\n", + "\n", + "✅ Bu tür regresyonla çalışmayı daha iyi anlamak için şu [Learn modülüne](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott) göz atabilirsiniz.\n", + "\n", + "## Ön Koşul\n", + "\n", + "Balkabağı verileriyle çalıştıktan sonra, üzerinde çalışabileceğimiz bir ikili kategori olduğunu fark edecek kadar aşina olduk: `Renk`.\n", + "\n", + "Şimdi, bazı değişkenlere dayanarak *belirli bir balkabağının muhtemelen hangi renkte olduğunu* (turuncu 🎃 veya beyaz 👻) tahmin etmek için bir lojistik regresyon modeli oluşturalım.\n", + "\n", + "> Neden regresyonla ilgili bir ders grubunda ikili sınıflandırmadan bahsediyoruz? Sadece dilsel kolaylık açısından, çünkü lojistik regresyon [aslında bir sınıflandırma yöntemi](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), ancak doğrusal tabanlı bir yöntemdir. Verileri sınıflandırmanın diğer yollarını bir sonraki ders grubunda öğrenin.\n", + "\n", + "Bu ders için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages/).\n", + "\n", + "- `janitor`: [janitor paketi](https://github.com/sfirke/janitor), kirli verileri incelemek ve temizlemek için basit araçlar sağlar.\n", + "\n", + "- `ggbeeswarm`: [ggbeeswarm paketi](https://github.com/eclarke/ggbeeswarm), ggplot2 kullanarak beeswarm tarzı grafikler oluşturmak için yöntemler sunar.\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Alternatif olarak, aşağıdaki script, bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksikse sizin için yükler.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Soruyu Tanımlayın**\n", + "\n", + "Bizim amacımız için bunu bir ikili olarak ifade edeceğiz: 'Beyaz' veya 'Beyaz Değil'. Veri setimizde ayrıca 'çizgili' bir kategori var, ancak çok az örneği olduğu için bunu kullanmayacağız. Zaten veri setinden eksik değerleri çıkardığımızda bu kategori kayboluyor.\n", + "\n", + "> 🎃 Eğlenceli bilgi, bazen beyaz balkabaklarına 'hayalet' balkabakları diyoruz. Oyması çok kolay değil, bu yüzden turuncu olanlar kadar popüler değiller ama oldukça havalı görünüyorlar! Bu yüzden sorumuzu şu şekilde de yeniden formüle edebiliriz: 'Hayalet' veya 'Hayalet Değil'. 👻\n", + "\n", + "## **Lojistik Regresyon Hakkında**\n", + "\n", + "Lojistik regresyon, daha önce öğrendiğiniz doğrusal regresyondan birkaç önemli şekilde farklıdır.\n", + "\n", + "#### **İkili Sınıflandırma**\n", + "\n", + "Lojistik regresyon, doğrusal regresyonla aynı özellikleri sunmaz. İlki, `ikili bir kategori` (\"turuncu veya turuncu değil\") hakkında bir tahmin sunarken, ikincisi `sürekli değerler` tahmin edebilir, örneğin bir balkabağının kökeni ve hasat zamanı verildiğinde, *fiyatının ne kadar artacağı*.\n", + "\n", + "![Dasani Madipalli tarafından hazırlanan infografik](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Diğer Sınıflandırmalar\n", + "\n", + "Lojistik regresyonun başka türleri de vardır, bunlar arasında çok kategorili ve sıralı olanlar bulunur:\n", + "\n", + "- **Çok kategorili**, birden fazla kategoriye sahip olmayı içerir - \"Turuncu, Beyaz ve Çizgili\".\n", + "\n", + "- **Sıralı**, mantıksal olarak sıralanmış kategorileri içerir, örneğin sonuçlarımızı sınırlı sayıda boyuta göre sıralamak istersek (mini, küçük, orta, büyük, xl, xxl).\n", + "\n", + "![Çok kategorili vs sıralı regresyon](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Değişkenlerin İLİŞKİLİ Olması GEREKMEZ**\n", + "\n", + "Doğrusal regresyonun daha fazla ilişkili değişkenlerle daha iyi çalıştığını hatırlıyor musunuz? Lojistik regresyon bunun tersidir - değişkenlerin uyumlu olması gerekmez. Bu, zayıf korelasyonlara sahip olan bu veri için işe yarar.\n", + "\n", + "#### **Çok Temiz Veriye İhtiyacınız Var**\n", + "\n", + "Lojistik regresyon, daha fazla veri kullanırsanız daha doğru sonuçlar verir; küçük veri setimiz bu görev için ideal değil, bunu aklınızda bulundurun.\n", + "\n", + "✅ Lojistik regresyona uygun olabilecek veri türlerini düşünün\n", + "\n", + "## Alıştırma - Veriyi Düzenleyin\n", + "\n", + "Öncelikle, eksik değerleri çıkararak ve yalnızca bazı sütunları seçerek veriyi biraz temizleyin:\n", + "\n", + "1. Aşağıdaki kodu ekleyin:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Yeni veri çerçevenize her zaman bir göz atabilirsiniz, aşağıdaki gibi [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) fonksiyonunu kullanarak:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "İkili sınıflandırma problemi yapacağımızı doğrulayalım:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Görselleştirme - kategorik grafik\n", + "Şimdiye kadar balkabağı verilerini tekrar yüklediniz ve birkaç değişkeni içeren bir veri setini koruyacak şekilde temizlediniz, bunlar arasında Renk de bulunuyor. Hadi ggplot kütüphanesini kullanarak veri çerçevesini not defterinde görselleştirelim.\n", + "\n", + "ggplot kütüphanesi, verilerinizi görselleştirmek için bazı güzel yöntemler sunar. Örneğin, her Çeşit ve Renk için verilerin dağılımlarını kategorik bir grafikte karşılaştırabilirsiniz.\n", + "\n", + "1. Balkabağı verilerimizi kullanarak, her balkabağı kategorisi (turuncu veya beyaz) için bir renk eşlemesi belirterek geombar fonksiyonunu kullanarak böyle bir grafik oluşturun:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verilere bakarak, Renk verilerinin Çeşitlilik ile nasıl ilişkili olduğunu görebilirsiniz.\n", + "\n", + "✅ Bu kategorik grafiğe bakarak, hangi ilginç keşifleri hayal edebilirsiniz?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Veri Ön İşleme: Özellik Kodlama\n", + "\n", + "Kabak veri setimizdeki tüm sütunlar metin değerleri içeriyor. Kategorik verilerle çalışmak insanlar için sezgisel olsa da makineler için aynı durum geçerli değil. Makine öğrenimi algoritmaları sayılarla daha iyi çalışır. Bu nedenle, kodlama veri ön işleme aşamasında çok önemli bir adımdır; çünkü kategorik verileri sayısal verilere dönüştürmemizi sağlar ve bu süreçte hiçbir bilgi kaybı yaşanmaz. İyi bir kodlama, iyi bir model oluşturmanın temelini oluşturur.\n", + "\n", + "Özellik kodlama için iki ana kodlayıcı türü vardır:\n", + "\n", + "1. **Ordinal kodlayıcı**: Bu kodlayıcı, sıralı değişkenler için uygundur. Sıralı değişkenler, verilerin mantıksal bir sıralamayı takip ettiği kategorik değişkenlerdir. Örneğin, veri setimizdeki `item_size` sütunu gibi. Bu kodlayıcı, her kategorinin bir sayı ile temsil edildiği bir eşleme oluşturur. Bu sayı, sütundaki kategorinin sırasını ifade eder.\n", + "\n", + "2. **Kategorik kodlayıcı**: Bu kodlayıcı, nominal değişkenler için uygundur. Nominal değişkenler, verilerin mantıksal bir sıralamayı takip etmediği kategorik değişkenlerdir. Örneğin, veri setimizdeki `item_size` dışındaki tüm özellikler. Bu kodlama yöntemi \"one-hot encoding\" olarak adlandırılır. Her kategori, ikili bir sütunla temsil edilir: kodlanmış değişken, kabak o çeşide aitse 1, değilse 0 değerini alır.\n", + "\n", + "Tidymodels, veri ön işleme için başka bir kullanışlı paket sunar: [recipes](https://recipes.tidymodels.org/) - veri ön işleme için bir paket. Bir `recipe` tanımlayacağız; bu, tüm tahmin edici sütunların bir dizi tam sayıya kodlanması gerektiğini belirtir. Ardından, gerekli miktarları ve istatistikleri tahmin etmek için `prep` işlemini gerçekleştireceğiz ve son olarak yeni verilere hesaplamaları uygulamak için `bake` işlemini kullanacağız.\n", + "\n", + "> Normalde, recipes genellikle modelleme için bir ön işleyici olarak kullanılır. Bu durumda, bir veri setine modelleme için hazır hale getirilmesi amacıyla hangi adımların uygulanması gerektiğini tanımlar. Bu durumda, bir tarifi manuel olarak `prep` ve `bake` ile tahmin etmek yerine bir `workflow()` kullanmanız **şiddetle tavsiye edilir**. Bunu birazdan daha ayrıntılı göreceğiz.\n", + ">\n", + "> Ancak şu an için, recipes + prep + bake kullanarak bir veri setine hangi adımların uygulanması gerektiğini belirliyoruz. Bu adımları uygulayarak ön işlenmiş veriyi çıkarıyoruz.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Öğe Boyutu sütunu için bir sıralı kodlayıcı kullanmanın avantajları nelerdir?\n", + "\n", + "### Değişkenler arasındaki ilişkileri analiz etme\n", + "\n", + "Artık verilerimizi ön işleme tabi tuttuğumuza göre, özellikler ve etiket arasındaki ilişkileri analiz ederek modelin, özellikler verildiğinde etiketi ne kadar iyi tahmin edebileceği hakkında bir fikir edinebiliriz. Bu tür bir analizi gerçekleştirmenin en iyi yolu verileri görselleştirmektir. \n", + "Bu analiz için tekrar ggplot geom_boxplot_ fonksiyonunu kullanacağız ve Öğe Boyutu, Çeşit ve Renk arasındaki ilişkileri kategorik bir grafikte görselleştireceğiz. Verileri daha iyi görselleştirmek için kodlanmış Öğe Boyutu sütununu ve kodlanmamış Çeşit sütununu kullanacağız.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Bir swarm plot kullanın\n", + "\n", + "Renk, ikili bir kategori olduğu için (Beyaz veya Değil), görselleştirme için '[özel bir yaklaşım](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf)' gerektirir.\n", + "\n", + "Renk dağılımını item_size ile ilişkili olarak göstermek için bir `swarm plot` deneyin.\n", + "\n", + "[ggbeeswarm paketi](https://github.com/eclarke/ggbeeswarm)'ni kullanacağız. Bu paket, ggplot2 ile arı kovanı tarzı grafikler oluşturmak için yöntemler sağlar. Arı kovanı grafikleri, normalde üst üste binecek noktaları yan yana düşecek şekilde düzenlemenin bir yoludur.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Artık renklerin ikili kategorileri ile daha büyük boyut grubu arasındaki ilişki hakkında bir fikrimiz olduğuna göre, bir kabağın muhtemel rengini belirlemek için lojistik regresyonu inceleyelim.\n", + "\n", + "## Modelinizi oluşturun\n", + "\n", + "Sınıflandırma modelinizde kullanmak istediğiniz değişkenleri seçin ve veriyi eğitim ve test setlerine ayırın. [rsample](https://rsample.tidymodels.org/), Tidymodels içinde yer alan bir paket, verilerin verimli bir şekilde bölünmesi ve yeniden örneklenmesi için altyapı sağlar:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Artık modeli, eğitim özelliklerini eğitim etiketi (renk) ile eşleştirerek eğitmeye hazırız.\n", + "\n", + "Verilerimizi modellemeye hazırlamak için yapılması gereken ön işleme adımlarını belirten bir tarif oluşturarak başlayacağız, yani: kategorik değişkenleri bir dizi tam sayıya kodlama. Tıpkı `baked_pumpkins` gibi, bir `pumpkins_recipe` oluşturuyoruz ancak bunu `prep` ve `bake` yapmıyoruz çünkü bu, birkaç adım sonra göreceğiniz bir iş akışına dahil edilecek.\n", + "\n", + "Tidymodels'de lojistik regresyon modelini belirtmenin oldukça fazla yolu vardır. `?logistic_reg()`'e bakabilirsiniz. Şimdilik, varsayılan `stats::glm()` motoru aracılığıyla bir lojistik regresyon modeli belirteceğiz.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Artık bir tarif ve model spesifikasyonuna sahip olduğumuza göre, bunları bir araya getirerek bir nesne oluşturmanın bir yolunu bulmamız gerekiyor. Bu nesne, önce veriyi ön işleme tabi tutacak (arka planda hazırlık + pişirme), modeli ön işlenmiş veri üzerinde eğitecek ve ayrıca olası son işlem aktivitelerine izin verecek.\n", + "\n", + "Tidymodels'da, bu kullanışlı nesne [`workflow`](https://workflows.tidymodels.org/) olarak adlandırılır ve modelleme bileşenlerinizi pratik bir şekilde bir arada tutar.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bir iş akışı *belirlendikten* sonra, bir model [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html) fonksiyonu kullanılarak `eğitilebilir`. İş akışı, bir tarifi tahmin edecek ve verileri eğitmeden önce ön işleyecektir, bu yüzden bunu manuel olarak prep ve bake kullanarak yapmamıza gerek kalmayacak.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modelin eğitim sırasında öğrendiği katsayılar çıktı olarak gösterilir.\n", + "\n", + "Artık modeli eğitim verileriyle eğittik, test verileri üzerinde tahminler yapabiliriz. Bunun için [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html) fonksiyonunu kullanabiliriz. Hadi başlayalım ve modelimizi test seti için etiketleri ve her etiket için olasılıkları tahmin etmek için kullanalım. Olasılık 0.5'ten büyük olduğunda tahmin edilen sınıf `WHITE`, aksi takdirde `ORANGE` olur.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Çok güzel! Bu, lojistik regresyonun nasıl çalıştığına dair daha fazla bilgi sağlıyor.\n", + "\n", + "### Bir karmaşıklık matrisi ile daha iyi anlama\n", + "\n", + "Her tahmini, karşılık gelen \"gerçek değer\" ile karşılaştırmak, modelin ne kadar iyi tahmin yaptığını belirlemek için çok verimli bir yöntem değildir. Neyse ki, Tidymodels'in elinde birkaç numara daha var: [`yardstick`](https://yardstick.tidymodels.org/) - performans metriklerini kullanarak modellerin etkinliğini ölçmek için kullanılan bir paket.\n", + "\n", + "Sınıflandırma problemleriyle ilişkili performans metriklerinden biri [`karmaşıklık matrisi`](https://wikipedia.org/wiki/Confusion_matrix). Bir karmaşıklık matrisi, bir sınıflandırma modelinin ne kadar iyi performans gösterdiğini açıklar. Karmaşıklık matrisi, her sınıftaki kaç örneğin model tarafından doğru bir şekilde sınıflandırıldığını tablo halinde gösterir. Bizim durumumuzda, kaç turuncu balkabağının turuncu olarak sınıflandırıldığını ve kaç beyaz balkabağının beyaz olarak sınıflandırıldığını gösterecek; ayrıca karmaşıklık matrisi, **yanlış** kategorilere sınıflandırılanların sayısını da gösterir.\n", + "\n", + "Yardstick paketindeki [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) fonksiyonu, gözlemlenen ve tahmin edilen sınıfların bu çapraz tablosunu hesaplar.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hadi karışıklık matrisini yorumlayalım. Modelimizden balkabaklarını iki ikili kategori arasında sınıflandırması isteniyor: kategori `beyaz` ve kategori `beyaz değil`.\n", + "\n", + "- Eğer modeliniz bir balkabağını beyaz olarak tahmin ederse ve gerçekte 'beyaz' kategorisine aitse, buna `doğru pozitif` denir ve bu, sol üstteki sayı ile gösterilir.\n", + "\n", + "- Eğer modeliniz bir balkabağını beyaz değil olarak tahmin ederse ve gerçekte 'beyaz' kategorisine aitse, buna `yanlış negatif` denir ve bu, sol alttaki sayı ile gösterilir.\n", + "\n", + "- Eğer modeliniz bir balkabağını beyaz olarak tahmin ederse ve gerçekte 'beyaz değil' kategorisine aitse, buna `yanlış pozitif` denir ve bu, sağ üstteki sayı ile gösterilir.\n", + "\n", + "- Eğer modeliniz bir balkabağını beyaz değil olarak tahmin ederse ve gerçekte 'beyaz değil' kategorisine aitse, buna `doğru negatif` denir ve bu, sağ alttaki sayı ile gösterilir.\n", + "\n", + "| Gerçek |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Tahmin** | BEYAZ | TURUNCU |\n", + "| BEYAZ | DP | YP |\n", + "| TURUNCU | YN | DN |\n", + "\n", + "Tahmin edebileceğiniz gibi, daha fazla doğru pozitif ve doğru negatif sayısına sahip olmak ve daha az yanlış pozitif ve yanlış negatif sayısına sahip olmak tercih edilir, bu da modelin daha iyi performans gösterdiğini ifade eder.\n", + "\n", + "Karışıklık matrisi faydalıdır çünkü bir sınıflandırma modelinin performansını daha iyi değerlendirmemize yardımcı olabilecek diğer metriklerin ortaya çıkmasını sağlar. Şimdi bunlardan bazılarını inceleyelim:\n", + "\n", + "🎓 Kesinlik: `DP/(DP + YP)` tahmin edilen pozitiflerin gerçekten pozitif olma oranı olarak tanımlanır. Ayrıca [pozitif öngörü değeri](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\") olarak da adlandırılır.\n", + "\n", + "🎓 Duyarlılık: `DP/(DP + YN)` gerçekte pozitif olan örnekler arasından pozitif sonuçların oranı olarak tanımlanır. Ayrıca `hassasiyet` olarak da bilinir.\n", + "\n", + "🎓 Özgüllük: `DN/(DN + YP)` gerçekte negatif olan örnekler arasından negatif sonuçların oranı olarak tanımlanır.\n", + "\n", + "🎓 Doğruluk: `DP + DN/(DP + DN + YP + YN)` Bir örnek için doğru bir şekilde tahmin edilen etiketlerin yüzdesi.\n", + "\n", + "🎓 F Ölçüsü: Kesinlik ve duyarlılığın ağırlıklı ortalaması, en iyi değer 1 ve en kötü değer 0'dır.\n", + "\n", + "Haydi bu metrikleri hesaplayalım!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bu modelin ROC eğrisini görselleştirin\n", + "\n", + "Hadi, sözde [`ROC eğrisi`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic)'ni görmek için bir görselleştirme daha yapalım:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC eğrileri, bir sınıflandırıcının çıktısını doğru ve yanlış pozitifler açısından değerlendirmek için sıklıkla kullanılır. ROC eğrileri genellikle Y ekseninde `Doğru Pozitif Oranı`/Duyarlılık ve X ekseninde `Yanlış Pozitif Oranı`/1-Özgüllük gösterir. Bu nedenle, eğrinin dikliği ve orta çizgi ile eğri arasındaki boşluk önemlidir: eğrinin hızla yukarı çıkıp çizginin üzerine geçmesini istersiniz. Bizim durumumuzda, başlangıçta yanlış pozitifler vardır ve ardından çizgi düzgün bir şekilde yukarı çıkıp geçer.\n", + "\n", + "Son olarak, gerçek Eğri Altındaki Alanı hesaplamak için `yardstick::roc_auc()` kullanabiliriz. AUC'yi yorumlamanın bir yolu, modelin rastgele bir pozitif örneği rastgele bir negatif örnekten daha yüksek sıralama olasılığı olarak değerlendirilmesidir.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sonuç yaklaşık `0.975`. AUC'nin 0 ile 1 arasında değiştiğini göz önünde bulundurursak, yüksek bir skor elde etmek istersiniz çünkü tahminlerinde %100 doğru olan bir modelin AUC değeri 1 olur; bu durumda model *oldukça iyi*.\n", + "\n", + "Gelecekteki sınıflandırma derslerinde, modelinizin skorlarını nasıl iyileştirebileceğinizi öğreneceksiniz (bu durumda dengesiz veriyle başa çıkmak gibi).\n", + "\n", + "## 🚀Meydan Okuma\n", + "\n", + "Lojistik regresyon hakkında keşfedilecek çok şey var! Ancak öğrenmenin en iyi yolu denemektir. Bu tür bir analize uygun bir veri seti bulun ve onunla bir model oluşturun. Ne öğreniyorsunuz? ipucu: İlginç veri setleri için [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets)'ı deneyin.\n", + "\n", + "## Gözden Geçirme ve Kendi Kendine Çalışma\n", + "\n", + "Stanford'dan [bu makalenin](https://web.stanford.edu/~jurafsky/slp3/5.pdf) ilk birkaç sayfasını okuyarak lojistik regresyonun bazı pratik kullanım alanlarını öğrenin. Şimdiye kadar incelediğimiz regresyon türlerinden hangisinin hangi görevler için daha uygun olduğunu düşünün. Hangisi daha iyi çalışır?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:37:00+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/tr/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/tr/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..2c5be3543 --- /dev/null +++ b/translations/tr/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lojistik Regresyon - Ders 4\n", + "\n", + "Gerekli kütüphaneleri ve veri setini yükleyin. Verileri, verilerin bir alt kümesini içeren bir veri çerçevesine dönüştürün:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Haydi verilerimize bir göz atalım!\n", + "\n", + "Seaborn ile görselleştirerek\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Veri Ön İşleme\n", + "\n", + "Verileri daha iyi görselleştirmek ve modeli eğitmek için özellikleri ve etiketleri kodlayalım.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Dikkat edin**: Uyarıları görmezden gelmek, genellikle iyi bir uygulama değildir ve mümkün olduğunca kaçınılmalıdır. Uyarılar, kodumuzu geliştirmemize ve bir sorunu çözmemize yardımcı olabilecek faydalı mesajlar içerir. \n", + "Bu özel uyarıyı görmezden gelmemizin nedeni, grafiğin okunabilirliğini garanti altına almaktır. Tüm veri noktalarını daha küçük bir işaretçi boyutuyla çizerken, palet rengindeki tutarlılığı korumak, net olmayan bir görselleştirme oluşturur.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modelinizi oluşturun\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:28:21+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/3-Web-App/1-Web-App/notebook.ipynb b/translations/tr/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/tr/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/tr/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..795c3f821 --- /dev/null +++ b/translations/tr/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:21+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/1-Introduction/notebook.ipynb b/translations/tr/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..bec34b0ae --- /dev/null +++ b/translations/tr/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:50:59+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/tr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..101918c25 --- /dev/null +++ b/translations/tr/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,727 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T15:01:10+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Lezzetli Asya ve Hint Mutfağı: Bir sınıflandırma modeli oluşturun\n" + ], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Sınıflandırmaya Giriş: Verilerinizi Temizleyin, Hazırlayın ve Görselleştirin\n", + "\n", + "Bu dört derste, klasik makine öğreniminin temel odak noktalarından biri olan *sınıflandırmayı* keşfedeceksiniz. Asya ve Hindistan'ın tüm muhteşem mutfakları hakkında bir veri seti kullanarak çeşitli sınıflandırma algoritmalarını inceleyeceğiz. Umarım acıkmışsınızdır!\n", + "\n", + "

\n", + " \n", + "

Bu derslerde pan-Asya mutfaklarını kutlayın! Görsel: Jen Looper
\n", + "\n", + "\n", + "\n", + "\n", + "Sınıflandırma, regresyon teknikleriyle birçok ortak noktası olan bir [denetimli öğrenme](https://wikipedia.org/wiki/Supervised_learning) biçimidir. Sınıflandırmada, bir öğenin hangi `kategoriye` ait olduğunu tahmin etmek için bir model eğitirsiniz. Makine öğrenimi, veri setlerini kullanarak değerleri veya şeylerin isimlerini tahmin etmekle ilgiliyse, sınıflandırma genellikle iki gruba ayrılır: *ikili sınıflandırma* ve *çoklu sınıflandırma*.\n", + "\n", + "Unutmayın:\n", + "\n", + "- **Doğrusal regresyon**, değişkenler arasındaki ilişkileri tahmin etmenize ve yeni bir veri noktasının bu çizgiyle ilişkili olarak nerede yer alacağını doğru bir şekilde tahmin etmenize yardımcı oldu. Örneğin, *Eylül ve Aralık aylarında bir kabağın fiyatının ne olacağını* tahmin edebilirsiniz.\n", + "\n", + "- **Lojistik regresyon**, \"ikili kategorileri\" keşfetmenize yardımcı oldu: bu fiyat noktasında, *bu kabak turuncu mu yoksa turuncu değil mi*?\n", + "\n", + "Sınıflandırma, bir veri noktasının etiketini veya sınıfını belirlemenin diğer yollarını belirlemek için çeşitli algoritmalar kullanır. Bu mutfak verileriyle çalışarak, bir grup malzemeyi gözlemleyerek, bu malzemelerin hangi mutfağa ait olduğunu belirleyip belirleyemeyeceğimizi görelim.\n", + "\n", + "### [**Ders Öncesi Testi**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Giriş**\n", + "\n", + "Sınıflandırma, makine öğrenimi araştırmacısının ve veri bilimcisinin temel faaliyetlerinden biridir. Basit bir ikili değerin sınıflandırılmasından (\"bu e-posta spam mi değil mi?\"), bilgisayarla görme kullanarak karmaşık görüntü sınıflandırma ve segmentasyona kadar, verileri sınıflara ayırmak ve onlara sorular sormak her zaman faydalıdır.\n", + "\n", + "Bu süreci daha bilimsel bir şekilde ifade etmek gerekirse, sınıflandırma yöntemi, giriş değişkenleri ile çıkış değişkenleri arasındaki ilişkiyi haritalamanıza olanak tanıyan bir tahmin modeli oluşturur.\n", + "\n", + "

\n", + " \n", + "

Sınıflandırma algoritmalarının ele alması gereken ikili ve çoklu sınıf problemleri. Bilgilendirme görseli: Jen Looper
\n", + "\n", + "\n", + "\n", + "Verilerimizi temizleme, görselleştirme ve makine öğrenimi görevlerimiz için hazırlama sürecine başlamadan önce, makine öğreniminin verileri sınıflandırmak için kullanılabileceği çeşitli yollar hakkında biraz bilgi edinelim.\n", + "\n", + "[İstatistikten](https://wikipedia.org/wiki/Statistical_classification) türetilen klasik makine öğrenimi ile sınıflandırma, `sigara içen`, `kilo` ve `yaş` gibi özellikleri kullanarak *X hastalığını geliştirme olasılığını* belirler. Daha önce gerçekleştirdiğiniz regresyon egzersizlerine benzer bir denetimli öğrenme tekniği olarak, verileriniz etiketlenir ve makine öğrenimi algoritmaları bu etiketleri kullanarak bir veri setinin sınıflarını (veya 'özelliklerini') sınıflandırır ve bunları bir gruba veya sonuca atar.\n", + "\n", + "✅ Bir mutfaklar hakkında bir veri seti hayal etmek için bir an durun. Çoklu sınıf modeli neyi cevaplayabilir? İkili model neyi cevaplayabilir? Belirli bir mutfağın çemen otu kullanma olasılığını belirlemek isteseydiniz ne olurdu? Ya yıldız anason, enginar, karnabahar ve yaban turpu dolu bir market çantası hediye aldığınızda, tipik bir Hint yemeği yapıp yapamayacağınızı görmek isteseydiniz?\n", + "\n", + "### **Merhaba 'sınıflandırıcı'**\n", + "\n", + "Bu mutfak veri setine sormak istediğimiz soru aslında bir **çoklu sınıf sorusu**, çünkü çalışabileceğimiz birkaç olası ulusal mutfak var. Bir grup malzeme verildiğinde, bu birçok sınıftan hangisine veri uyacak?\n", + "\n", + "Tidymodels, çözmek istediğiniz problemin türüne bağlı olarak verileri sınıflandırmak için kullanabileceğiniz çeşitli algoritmalar sunar. Önümüzdeki iki derste, bu algoritmalardan birkaçını öğreneceksiniz.\n", + "\n", + "#### **Ön Koşul**\n", + "\n", + "Bu ders için, verilerimizi temizlemek, hazırlamak ve görselleştirmek için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimi işlemlerini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages/).\n", + "\n", + "- `DataExplorer`: [DataExplorer paketi](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html), EDA sürecini ve rapor oluşturmayı basitleştirmek ve otomatikleştirmek için tasarlanmıştır.\n", + "\n", + "- `themis`: [themis paketi](https://themis.tidymodels.org/), Dengesiz Verilerle Başa Çıkmak için Ek Tarif Adımları sağlar.\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternatif olarak, aşağıdaki script, bu modülü tamamlamak için gereken paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Daha sonra bu harika paketleri yükleyip mevcut R oturumumuzda kullanılabilir hale getireceğiz. (Bu sadece bir örnekleme için, `pacman::p_load()` bunu zaten sizin için yaptı)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Alıştırma - Verilerinizi temizleyin ve dengeleyin\n", + "\n", + "Bu projeye başlamadan önceki ilk görev, daha iyi sonuçlar elde etmek için verilerinizi temizlemek ve **dengelemektir**.\n", + "\n", + "Haydi verilerle tanışalım!🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "İlginç! Görünüşe göre, ilk sütun bir tür `id` sütunu. Hadi veri hakkında biraz daha bilgi edinelim.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Çıktıdan hemen görebiliyoruz ki `2448` satır ve `385` sütun ile `0` eksik değerimiz var. Ayrıca 1 ayrık sütunumuz var, *cuisine*.\n", + "\n", + "## Alıştırma - mutfaklar hakkında bilgi edinme\n", + "\n", + "Şimdi işler daha ilginç hale gelmeye başlıyor. Haydi, veri dağılımını her bir mutfak için keşfedelim.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Mutfak türleri sınırlıdır, ancak veri dağılımı dengesizdir. Bunu düzeltebilirsiniz! Ancak önce biraz daha keşfetmeye devam edin.\n", + "\n", + "Sonraki adımda, her bir mutfak türünü kendi tibble'ına atayalım ve her mutfak türü için ne kadar veri olduğunu (satırlar, sütunlar) öğrenelim.\n", + "\n", + "> Bir [tibble](https://tibble.tidyverse.org/) modern bir veri çerçevesidir.\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir eser
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Alıştırma - dplyr ile mutfaklara göre en popüler malzemeleri keşfetmek**\n", + "\n", + "Artık verilere daha derinlemesine bakabilir ve her mutfak için tipik malzemelerin neler olduğunu öğrenebilirsiniz. Mutfaklar arasında kafa karışıklığına neden olan tekrar eden verileri temizlemeniz gerekiyor, bu sorunu birlikte inceleyelim.\n", + "\n", + "R'de bir `create_ingredient()` fonksiyonu oluşturun ve bu fonksiyon bir malzeme veri çerçevesi döndürsün. Bu fonksiyon, işe yaramaz bir sütunu kaldırarak başlayacak ve malzemeleri sayısına göre sıralayacak.\n", + "\n", + "R'deki bir fonksiyonun temel yapısı şu şekildedir:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "R fonksiyonlarına dair düzenli bir giriş [burada](https://skirmer.github.io/presentations/functions_with_r.html#1) bulunabilir.\n", + "\n", + "Hadi başlayalım! Daha önceki derslerimizde öğrendiğimiz [dplyr fiillerini](https://dplyr.tidyverse.org/) kullanacağız. Hatırlatma olarak:\n", + "\n", + "- `dplyr::select()`: hangi **sütunları** tutacağınızı veya hariç tutacağınızı seçmenize yardımcı olur.\n", + "\n", + "- `dplyr::pivot_longer()`: veriyi \"uzatmanıza\" yardımcı olur, satır sayısını artırır ve sütun sayısını azaltır.\n", + "\n", + "- `dplyr::group_by()` ve `dplyr::summarise()`: farklı gruplar için özet istatistikler bulmanıza ve bunları düzenli bir tabloya koymanıza yardımcı olur.\n", + "\n", + "- `dplyr::filter()`: yalnızca koşullarınızı karşılayan satırları içeren bir veri alt kümesi oluşturur.\n", + "\n", + "- `dplyr::mutate()`: sütunlar oluşturmanıza veya mevcut sütunları değiştirmenize yardımcı olur.\n", + "\n", + "Allison Horst tarafından hazırlanan, dplyr *(Tidyverse'in bir parçası)* içindeki bazı kullanışlı veri düzenleme fonksiyonlarını tanıtan bu [*sanat*-dolu learnr eğitimine](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) göz atabilirsiniz.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi, mutfağa göre en popüler on malzeme hakkında bir fikir edinmek için fonksiyonu kullanabiliriz. Hadi bunu `thai_df` ile deneyelim.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Önceki bölümde `geom_col()` kullandık, şimdi de çubuk grafikler oluşturmak için `geom_bar` nasıl kullanılır görelim. Daha fazla bilgi için `?geom_bar` kullanın.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Haydi Japonca veriler için de aynısını yapalım.\n" + ], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Peki ya Çin mutfağı?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [ + "Son olarak, Kore malzemelerini çiz.\n" + ], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Veri görselleştirmelerinden, artık farklı mutfaklar arasında karışıklık yaratan en yaygın malzemeleri `dplyr::select()` kullanarak çıkarabiliriz.\n", + "\n", + "Herkes pirinci, sarımsağı ve zencefili sever!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Tarifler kullanarak veri ön işleme 👩‍🍳👨‍🍳 - Dengesiz veriyle başa çıkma ⚖️\n", + "\n", + "

\n", + " \n", + "

Çizim: @allison_horst
\n", + "\n", + "Bu dersin mutfaklarla ilgili olduğunu göz önünde bulundurarak, `tarifleri` bağlama oturtmamız gerekiyor.\n", + "\n", + "Tidymodels, veri ön işleme için başka bir harika paket sunar: `recipes` - veri ön işleme için bir paket.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Haydi mutfaklarımızın dağılımına bir kez daha göz atalım.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Gördüğünüz gibi, mutfak türlerinin sayısında oldukça eşitsiz bir dağılım var. Kore mutfakları, Tayland mutfaklarının neredeyse 3 katı. Dengesiz veriler genellikle model performansı üzerinde olumsuz etkilere sahiptir. Bir ikili sınıflandırmayı düşünün. Eğer verilerinizin çoğu bir sınıfa aitse, bir ML modeli, sadece o sınıfa ait daha fazla veri olduğu için, o sınıfı daha sık tahmin edecektir. Verilerin dengelenmesi, herhangi bir dengesizliği giderir ve bu eşitsizliği ortadan kaldırmaya yardımcı olur. Birçok model, gözlem sayılarının eşit olduğu durumlarda en iyi performansı gösterir ve bu nedenle dengesiz verilerle çalışırken zorlanır.\n", + "\n", + "Dengesiz veri setleriyle başa çıkmanın temel olarak iki yolu vardır:\n", + "\n", + "- azınlık sınıfına gözlem eklemek: `Over-sampling` örneğin, bir SMOTE algoritması kullanarak\n", + "\n", + "- çoğunluk sınıfından gözlem çıkarmak: `Under-sampling`\n", + "\n", + "Şimdi bir `recipe` kullanarak dengesiz veri setleriyle nasıl başa çıkılacağını gösterelim. Bir recipe, bir veri setine hangi adımların uygulanması gerektiğini tanımlayan bir plan olarak düşünülebilir ve veri analizi için hazır hale getirilmesini sağlar.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ön işleme adımlarımızı inceleyelim.\n", + "\n", + "- Bir formül ile `recipe()` çağrısı, `df_select` verilerini referans alarak değişkenlerin *rollerini* tarif eder. Örneğin, `cuisine` sütunu bir `outcome` rolü alırken, diğer sütunlar bir `predictor` rolü almıştır.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html), bu vakaların en yakın komşularını kullanarak azınlık sınıfının sentetik olarak yeni örneklerini oluşturan bir tarif adımının *spesifikasyonunu* oluşturur.\n", + "\n", + "Şimdi, ön işlenmiş veriyi görmek istersek, tarifimizi [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) ve [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) ile işlememiz gerekir.\n", + "\n", + "`prep()`: Eğitim setinden gerekli parametreleri tahmin eder ve bu parametreler daha sonra diğer veri setlerine uygulanabilir.\n", + "\n", + "`bake()`: Hazırlanmış bir tarifi alır ve işlemleri herhangi bir veri setine uygular.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Haydi şimdi mutfaklarımızın dağılımını kontrol edelim ve bunları dengesiz verilerle karşılaştıralım.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Yum! Veriler temiz, dengeli ve çok lezzetli 😋!\n", + "\n", + "> Normalde, bir tarif genellikle modelleme için bir ön işlemci olarak kullanılır ve bir veri setine modellemeye hazır hale getirmek için hangi adımların uygulanması gerektiğini tanımlar. Bu durumda, genellikle (önceki derslerimizde gördüğümüz gibi) bir `workflow()` kullanılır, tarifleri manuel olarak tahmin etmek yerine.\n", + ">\n", + "> Bu nedenle, tidymodels kullanırken genellikle tarifleri **`prep()`** ve **`bake()`** yapmanız gerekmez, ancak tariflerin beklentilerinizi karşılayıp karşılamadığını doğrulamak için bizim durumumuzda olduğu gibi bu işlevler faydalı olabilir.\n", + ">\n", + "> Bir hazırlanmış tarifi **`new_data = NULL`** ile **`bake()`** ettiğinizde, tarifi tanımlarken sağladığınız veriyi geri alırsınız, ancak ön işleme adımlarından geçmiş olur.\n", + "\n", + "Şimdi bu verinin bir kopyasını gelecekteki derslerde kullanmak için kaydedelim:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bu yeni CSV artık kök veri klasöründe bulunabilir.\n", + "\n", + "**🚀Meydan Okuma**\n", + "\n", + "Bu müfredat birkaç ilginç veri seti içeriyor. `data` klasörlerini inceleyin ve ikili veya çok sınıflı sınıflandırma için uygun olabilecek veri setleri içerip içermediğini kontrol edin. Bu veri seti hakkında hangi soruları sorardınız?\n", + "\n", + "## [**Ders sonrası test**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Gözden Geçirme ve Kendi Kendine Çalışma**\n", + "\n", + "- [package themis](https://github.com/tidymodels/themis)'e göz atın. Dengesiz veriyle başa çıkmak için başka hangi teknikleri kullanabiliriz?\n", + "\n", + "- Tidy models [referans web sitesi](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham ve G. Grolemund, [*R for Data Science: Görselleştirme, Modelleme, Dönüştürme, Düzenleme ve Veri İçe Aktarma*](https://r4ds.had.co.nz/).\n", + "\n", + "#### TEŞEKKÜRLER:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) R'yi daha sıcak ve ilgi çekici hale getiren harika çizimler oluşturduğu için. Daha fazla çizimi onun [galerisinde](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) bulabilirsiniz.\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) ve [Jen Looper](https://www.twitter.com/jenlooper) bu modülün orijinal Python versiyonunu oluşturdukları için ♥️\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış sanat eseri
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/tr/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..6bb9c545f --- /dev/null +++ b/translations/tr/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,717 @@ +{ + "cells": [ + { + "source": [ + "# Lezzetli Asya ve Hint Mutfağı\n", + "\n", + "## Giriş\n", + "\n", + "Asya ve Hint mutfağı, zengin tatları ve çeşitli malzemeleriyle dünya çapında popülerdir. Bu mutfaklar, baharatların ustaca kullanımı ve benzersiz pişirme teknikleriyle tanınır.\n", + "\n", + "## Neden Asya ve Hint Mutfağı?\n", + "\n", + "Asya ve Hint yemekleri, sadece lezzetli olmakla kalmaz, aynı zamanda genellikle sağlıklı ve besleyicidir. Bu mutfaklar, sebzeler, baklagiller, baharatlar ve otlar gibi doğal malzemelere dayanır. Ayrıca, farklı kültürlerin etkilerini bir araya getirerek her damak tadına hitap eden bir çeşitlilik sunar.\n", + "\n", + "## Popüler Yemekler\n", + "\n", + "### 1. Sushi\n", + "\n", + "Sushi, Japon mutfağının en ünlü yemeklerinden biridir. Genellikle çiğ balık, pirinç ve deniz yosunu ile hazırlanır. Farklı türleri arasında nigiri, maki ve sashimi bulunur.\n", + "\n", + "### 2. Butter Chicken\n", + "\n", + "Butter Chicken, Hint mutfağının ikonik yemeklerinden biridir. Tavuk, kremalı bir domates sosunda pişirilir ve genellikle naan ekmeği veya pirinçle servis edilir.\n", + "\n", + "### 3. Pad Thai\n", + "\n", + "Pad Thai, Tayland mutfağından gelen bir erişte yemeğidir. Pirinç eriştesi, karides veya tavuk, yumurta, yer fıstığı ve özel bir sosla hazırlanır.\n", + "\n", + "### 4. Dim Sum\n", + "\n", + "Dim Sum, Çin mutfağından gelen küçük porsiyonlarda servis edilen bir dizi yemektir. Buharda pişirilmiş veya kızartılmış hamur işleri, köfteler ve tatlılar içerir.\n", + "\n", + "## Baharatların Önemi\n", + "\n", + "Baharatlar, Asya ve Hint mutfağının temel taşlarından biridir. Yemeklere derinlik ve karakter kazandırır. Örneğin:\n", + "\n", + "- **Zerdeçal**: Hint yemeklerinde yaygın olarak kullanılır ve parlak sarı rengiyle bilinir.\n", + "- **Zencefil**: Hem tatlı hem de tuzlu yemeklerde kullanılır.\n", + "- **Soya Sosu**: Asya mutfağında tuzlu bir tat için sıklıkla tercih edilir.\n", + "\n", + "## Evde Denemek İçin İpuçları\n", + "\n", + "- Taze malzemeler kullanın: Sebzeler, otlar ve baharatlar ne kadar taze olursa, yemekleriniz o kadar lezzetli olur.\n", + "- Baharatları dikkatli ölçün: Baharatlar güçlüdür, bu yüzden miktarları dengeli kullanmaya özen gösterin.\n", + "- Yeni tarifler denemekten korkmayın: Asya ve Hint mutfağı, keşfedilecek sonsuz bir çeşitlilik sunar.\n", + "\n", + "## Sonuç\n", + "\n", + "Asya ve Hint mutfağı, hem lezzetli hem de kültürel açıdan zengin bir deneyim sunar. Bu mutfakları keşfetmek, sadece yemek yapmayı değil, aynı zamanda farklı kültürleri anlamayı da içerir. Şimdi mutfağa girin ve bu harika tatları kendiniz deneyimleyin!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Imblearn'i yükleyin, bu SMOTE'u etkinleştirecektir. Bu, sınıflandırma yaparken dengesiz verileri yönetmeye yardımcı olan bir Scikit-learn paketidir. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Bu veri kümesi, belirli bir mutfak setinden çeşitli mutfaklardaki her türlü malzemeyi gösteren 385 sütun içermektedir.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Mutfakları bir çubuk grafikle göster\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "SMOTE aşırı örnekleme ile verileri en yüksek sınıfa dengeleyin. Daha fazla bilgi için burayı okuyun: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [ + "Dosyayı gelecekte kullanmak için kaydedin\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:53:01+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/tr/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/tr/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..bc18d6b21 --- /dev/null +++ b/translations/tr/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:49+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "Sınıflandırma Modelleri Oluştur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Orijinal belgenin kendi dilindeki hali, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/tr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..e658c02a5 --- /dev/null +++ b/translations/tr/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1302 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:40:20+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Lezzetli Asya ve Hint Mutfağı: Bir Sınıflandırma Modeli Oluşturun\n" + ], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Mutfak sınıflandırıcıları 1\n", + "\n", + "Bu derste, *bir grup malzemeye dayanarak belirli bir ulusal mutfağı tahmin etmek* için çeşitli sınıflandırıcıları keşfedeceğiz. Bunu yaparken, algoritmaların sınıflandırma görevlerinde nasıl kullanılabileceği hakkında daha fazla bilgi edineceğiz.\n", + "\n", + "### [**Ders öncesi test**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Hazırlık**\n", + "\n", + "Bu ders, [önceki dersimize](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) dayanır. Önceki derste:\n", + "\n", + "- Asya ve Hindistan'ın tüm muhteşem mutfakları hakkında bir veri seti kullanarak sınıflandırmalara nazik bir giriş yaptık 😋.\n", + "\n", + "- Verilerimizi hazırlamak ve temizlemek için bazı [dplyr fiilleri](https://dplyr.tidyverse.org/) keşfettik.\n", + "\n", + "- ggplot2 kullanarak güzel görselleştirmeler yaptık.\n", + "\n", + "- [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html) kullanarak dengesiz verileri ön işleme ile nasıl ele alacağımızı gösterdik.\n", + "\n", + "- Tarifimizi `prep` ve `bake` ederek, beklenildiği gibi çalışacağından emin olmayı gösterdik.\n", + "\n", + "#### **Ön Koşul**\n", + "\n", + "Bu ders için, verilerimizi temizlemek, hazırlamak ve görselleştirmek için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages/).\n", + "\n", + "- `themis`: [themis paketi](https://themis.tidymodels.org/), dengesiz verilerle başa çıkmak için ekstra tarif adımları sağlar.\n", + "\n", + "- `nnet`: [nnet paketi](https://cran.r-project.org/web/packages/nnet/nnet.pdf), tek bir gizli katmanlı ileri beslemeli sinir ağlarını ve çoklu lojistik regresyon modellerini tahmin etmek için işlevler sağlar.\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Alternatif olarak, aşağıdaki betik bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksikse sizin için yükler.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "Haydi başlayalım!\n", + "\n", + "## 1. Veriyi eğitim ve test setlerine ayırın.\n", + "\n", + "Önceki dersimizden birkaç adımı seçerek başlayacağız.\n", + "\n", + "### Farklı mutfaklar arasında karışıklığa neden olan en yaygın malzemeleri `dplyr::select()` kullanarak çıkarın.\n", + "\n", + "Herkes pirinci, sarımsağı ve zencefili sever!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika! Şimdi verileri %70'i eğitim ve %30'u test olacak şekilde bölelim. Ayrıca, verileri bölerken `stratifikasyon` tekniğini uygulayarak `her bir mutfağın oranını` eğitim ve doğrulama veri setlerinde koruyacağız.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), Tidymodels içinde verimli veri bölme ve yeniden örnekleme için altyapı sağlar:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Dengesiz Verilerle Başa Çıkmak\n", + "\n", + "Orijinal veri setinde ve eğitim setimizde fark etmiş olabileceğiniz gibi, mutfakların sayısında oldukça eşitsiz bir dağılım var. Kore mutfakları, *neredeyse* Tay mutfaklarının 3 katı kadar. Dengesiz veriler genellikle model performansı üzerinde olumsuz etkiler yaratır. Birçok model, gözlem sayısı eşit olduğunda en iyi performansı gösterir ve bu nedenle dengesiz verilerle mücadele etmekte zorlanır.\n", + "\n", + "Dengesiz veri setleriyle başa çıkmanın iki ana yolu vardır:\n", + "\n", + "- azınlık sınıfına gözlem eklemek: `Over-sampling` örneğin, SMOTE algoritması kullanarak azınlık sınıfının yeni örneklerini bu vakaların en yakın komşularını kullanarak sentetik olarak oluşturmak.\n", + "\n", + "- çoğunluk sınıfından gözlem çıkarmak: `Under-sampling`\n", + "\n", + "Önceki dersimizde, dengesiz veri setleriyle nasıl başa çıkılacağını bir `recipe` kullanarak göstermiştik. Recipe, bir veri setine veri analizi için hazır hale getirmek amacıyla hangi adımların uygulanması gerektiğini açıklayan bir taslak olarak düşünülebilir. Bizim durumumuzda, `eğitim setimiz` için mutfaklarımızın sayısında eşit bir dağılım elde etmek istiyoruz. Hadi başlayalım.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tarifi beklendiği gibi çalışacağından emin olmak için (hazırlık + pişirme kullanarak) kontrol edebilirsiniz - tüm mutfak etiketlerinin `559` gözlemi olduğunu göreceksiniz.\n", + "\n", + "Bu tarifi modelleme için bir ön işleyici olarak kullanacağımızdan, bir `workflow()` bizim için tüm hazırlık ve pişirme işlemlerini gerçekleştirecek, böylece tarifi manuel olarak tahmin etmemize gerek kalmayacak.\n", + "\n", + "Şimdi bir model eğitmeye hazırız 👩‍💻👨‍💻!\n", + "\n", + "## 3. Sınıflandırıcınızı Seçmek\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir illüstrasyon
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi hangi algoritmayı kullanacağımıza karar vermeliyiz 🤔.\n", + "\n", + "Tidymodels'de, [`parsnip paketi`](https://parsnip.tidymodels.org/index.html), farklı motorlar (paketler) arasında modellerle çalışmak için tutarlı bir arayüz sağlar. [Model türleri ve motorları](https://www.tidymodels.org/find/parsnip/#models) ile ilgili belgeleri ve bunlara karşılık gelen [model argümanlarını](https://www.tidymodels.org/find/parsnip/#model-args) incelemek için parsnip belgelerine göz atabilirsiniz. Çeşitlilik ilk bakışta oldukça kafa karıştırıcı olabilir. Örneğin, aşağıdaki yöntemlerin tümü sınıflandırma tekniklerini içerir:\n", + "\n", + "- C5.0 Kural Tabanlı Sınıflandırma Modelleri\n", + "\n", + "- Esnek Ayrımcı Modeller\n", + "\n", + "- Doğrusal Ayrımcı Modeller\n", + "\n", + "- Düzenlenmiş Ayrımcı Modeller\n", + "\n", + "- Lojistik Regresyon Modelleri\n", + "\n", + "- Multinom Regresyon Modelleri\n", + "\n", + "- Naive Bayes Modelleri\n", + "\n", + "- Destek Vektör Makineleri\n", + "\n", + "- En Yakın Komşular\n", + "\n", + "- Karar Ağaçları\n", + "\n", + "- Toplu Yöntemler\n", + "\n", + "- Sinir Ağları\n", + "\n", + "Liste uzayıp gidiyor!\n", + "\n", + "### **Hangi sınıflandırıcıyı seçmeli?**\n", + "\n", + "Peki, hangi sınıflandırıcıyı seçmelisiniz? Çoğu zaman, birkaçını denemek ve iyi bir sonuç aramak test etmenin bir yoludur.\n", + "\n", + "> AutoML, bu karşılaştırmaları bulutta çalıştırarak, verileriniz için en iyi algoritmayı seçmenize olanak tanıyarak bu sorunu kolayca çözer. [Buradan](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott) deneyin.\n", + "\n", + "Ayrıca sınıflandırıcı seçimi problemimize bağlıdır. Örneğin, sonuç `iki sınıftan daha fazla` kategorize edilebiliyorsa, bizim durumumuzda olduğu gibi, `ikili sınıflandırma` yerine `çok sınıflı sınıflandırma algoritması` kullanmanız gerekir.\n", + "\n", + "### **Daha iyi bir yaklaşım**\n", + "\n", + "Ancak rastgele tahmin etmekten daha iyi bir yol, bu indirilebilir [ML Cheat Sheet](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott) üzerindeki fikirleri takip etmektir. Burada, çok sınıflı problemimiz için bazı seçeneklerimiz olduğunu keşfediyoruz:\n", + "\n", + "

\n", + " \n", + "

Microsoft'un Algoritma Cheat Sheet'inin bir bölümü, çok sınıflı sınıflandırma seçeneklerini detaylandırıyor
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Mantık**\n", + "\n", + "Elimizdeki kısıtlamalar göz önüne alındığında farklı yaklaşımları değerlendirelim:\n", + "\n", + "- **Derin sinir ağları çok ağır**. Temiz ama minimal bir veri setimiz olduğu ve eğitim işlemini yerel olarak notebooklar üzerinden gerçekleştirdiğimiz için, derin sinir ağları bu görev için fazla ağır kalıyor.\n", + "\n", + "- **İki sınıflı sınıflandırıcı yok**. İki sınıflı bir sınıflandırıcı kullanmıyoruz, bu nedenle one-vs-all (birine karşı tümü) yöntemi devre dışı kalıyor.\n", + "\n", + "- **Karar ağacı veya lojistik regresyon işe yarayabilir**. Karar ağacı veya çok sınıflı lojistik regresyon/multinomial regresyon çok sınıflı veri için işe yarayabilir.\n", + "\n", + "- **Çok sınıflı Boosted Karar Ağaçları farklı bir problemi çözüyor**. Çok sınıflı boosted karar ağacı, sıralama oluşturma gibi parametrik olmayan görevler için en uygun olanıdır, bu nedenle bizim için kullanışlı değil.\n", + "\n", + "Ayrıca, genellikle daha karmaşık makine öğrenimi modellerine (örneğin ensemble yöntemleri) geçmeden önce, en basit modeli oluşturmak ve neler olup bittiğini anlamak iyi bir fikirdir. Bu ders için, `multinomial regression` modeliyle başlayacağız.\n", + "\n", + "> Lojistik regresyon, sonuç değişkeni kategorik (veya nominal) olduğunda kullanılan bir tekniktir. İkili lojistik regresyonda sonuç değişkeni sayısı iki iken, multinomial lojistik regresyonda sonuç değişkeni sayısı ikiden fazladır. Daha fazla bilgi için [İleri Regresyon Yöntemleri](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html) bağlantısına bakabilirsiniz.\n", + "\n", + "## 4. Multinomial lojistik regresyon modelini eğitmek ve değerlendirmek.\n", + "\n", + "Tidymodels'de, `parsnip::multinom_reg()`, multinomial dağılımı kullanarak çok sınıflı veriyi tahmin etmek için doğrusal tahminciler kullanan bir modeli tanımlar. Bu modeli fit etmek için kullanabileceğiniz farklı yollar/motorlar hakkında bilgi almak için `?multinom_reg()`'e bakabilirsiniz.\n", + "\n", + "Bu örnek için, varsayılan [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) motoru üzerinden bir Multinomial regresyon modeli fit edeceğiz.\n", + "\n", + "> `penalty` değerini rastgele seçtim. Bu değeri seçmek için daha iyi yöntemler var, örneğin `resampling` ve modeli `tuning` yaparak, ki bunu daha sonra tartışacağız.\n", + ">\n", + "> Model hiperparametrelerini nasıl ayarlayacağınızı öğrenmek isterseniz [Tidymodels: Başlangıç](https://www.tidymodels.org/start/tuning/) bağlantısına göz atabilirsiniz.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika iş çıkardınız 🥳! Artık bir tarifimiz ve bir model spesifikasyonumuz olduğuna göre, bunları bir araya getirip önce veriyi ön işleyen, ardından ön işlenmiş veri üzerinde modeli eğiten ve potansiyel olarak son işlem aktivitelerine de olanak tanıyan bir nesneye dönüştürmenin bir yolunu bulmamız gerekiyor. Tidymodels'de, bu kullanışlı nesne [`workflow`](https://workflows.tidymodels.org/) olarak adlandırılır ve modelleme bileşenlerinizi pratik bir şekilde bir arada tutar! Python'da buna *pipelines* derdik.\n", + "\n", + "Şimdi her şeyi bir workflow içinde bir araya getirelim!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "İş akışları 👌👌! Bir **`workflow()`** tıpkı bir model gibi uyarlanabilir. Öyleyse, bir model eğitme zamanı!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Modelin eğitim sırasında öğrendiği katsayılar çıktı olarak gösterilir.\n", + "\n", + "### Eğitilmiş Modeli Değerlendirme\n", + "\n", + "Modelin nasıl bir performans sergilediğini görmek zamanı geldi 📏! Bunu bir test seti üzerinde değerlendirerek yapacağız. Hadi, test seti üzerinde tahminler yaparak başlayalım.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Harika iş! Tidymodels'de, model performansını değerlendirmek [yardstick](https://yardstick.tidymodels.org/) kullanılarak yapılabilir - performans metrikleri kullanarak modellerin etkinliğini ölçmek için kullanılan bir paket. Lojistik regresyon dersimizde yaptığımız gibi, bir karmaşıklık matrisi hesaplayarak başlayalım.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "Birden fazla sınıfla uğraşırken, bunu bir ısı haritası olarak görselleştirmek genellikle daha sezgiseldir, şöyle:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Karmaşıklık matrisi grafiğindeki daha koyu kareler, yüksek vaka sayılarını gösterir ve umarım tahmin edilen ve gerçek etiketin aynı olduğu durumları gösteren koyu karelerden oluşan bir diyagonal çizgi görebilirsiniz.\n", + "\n", + "Şimdi karmaşıklık matrisi için özet istatistikleri hesaplayalım.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Eğer doğruluk, duyarlılık, ppv gibi bazı metriklere odaklanırsak, başlangıç için fena değiliz 🥳!\n", + "\n", + "## 4. Daha Derine İnmek\n", + "\n", + "Hadi ince bir soru soralım: Tahmin edilen sonuç olarak belirli bir mutfak türüne karar vermek için hangi kriterler kullanılıyor?\n", + "\n", + "Aslında, lojistik regresyon gibi istatistiksel makine öğrenimi algoritmaları `olasılık` temellidir; yani bir sınıflandırıcı tarafından tahmin edilen şey, olası sonuçlar kümesi üzerinde bir olasılık dağılımıdır. En yüksek olasılığa sahip sınıf, verilen gözlemler için en olası sonuç olarak seçilir.\n", + "\n", + "Hadi bunu hem kesin sınıf tahminleri hem de olasılıklarla uygulayarak görelim.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "Çok daha iyi!\n", + "\n", + "✅ Modelin neden ilk gözlemin Tayland mutfağı olduğundan oldukça emin olduğunu açıklayabilir misiniz?\n", + "\n", + "## **🚀Meydan Okuma**\n", + "\n", + "Bu derste, temizlenmiş verilerinizi kullanarak bir dizi malzemeye dayanarak ulusal bir mutfağı tahmin edebilen bir makine öğrenimi modeli oluşturdunuz. Tidymodels'in verileri sınıflandırmak için sunduğu [birçok seçeneği](https://www.tidymodels.org/find/parsnip/#models) ve multinomial regresyonu uyarlamak için [diğer yolları](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) incelemek için biraz zaman ayırın.\n", + "\n", + "#### TEŞEKKÜRLER:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) R'yi daha sıcak ve ilgi çekici hale getiren harika çizimler oluşturduğu için. Daha fazla çizimi onun [galerisinde](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) bulabilirsiniz.\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) ve [Jen Looper](https://www.twitter.com/jenlooper) bu modülün orijinal Python versiyonunu oluşturdukları için ♥️\n", + "\n", + "
\n", + "Biraz espri eklemek isterdim ama yemek kelime oyunlarını anlamıyorum 😅.\n", + "\n", + "
\n", + "\n", + "Keyifli öğrenmeler,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Öğrenci Elçisi.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/tr/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..4eb07caa7 --- /dev/null +++ b/translations/tr/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "source": [ + "# Sınıflandırma Modelleri Oluştur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çeviriler hata veya yanlışlıklar içerebilir. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan herhangi bir yanlış anlama veya yanlış yorumlama durumunda sorumluluk kabul edilmez.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:19+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/tr/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/tr/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..0e7f65b8b --- /dev/null +++ b/translations/tr/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:31+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/tr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/tr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..5dc70d53e --- /dev/null +++ b/translations/tr/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,650 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:49:42+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [ + "# Lezzetli Asya ve Hint Mutfağı: Bir sınıflandırma modeli oluşturun\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Mutfak Sınıflandırıcıları 2\n", + "\n", + "Bu ikinci sınıflandırma dersinde, kategorik verileri sınıflandırmanın `daha fazla yolunu` keşfedeceğiz. Ayrıca bir sınıflandırıcıyı diğerine tercih etmenin sonuçlarını öğreneceğiz.\n", + "\n", + "### [**Ders Öncesi Testi**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Ön Koşul**\n", + "\n", + "Önceki dersleri tamamladığınızı varsayıyoruz, çünkü daha önce öğrendiğimiz bazı kavramları burada devam ettireceğiz.\n", + "\n", + "Bu ders için aşağıdaki paketlere ihtiyacımız olacak:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/), veri bilimini daha hızlı, kolay ve eğlenceli hale getirmek için tasarlanmış bir [R paketleri koleksiyonudur](https://www.tidyverse.org/packages).\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) çerçevesi, modelleme ve makine öğrenimi için bir [paketler koleksiyonudur](https://www.tidymodels.org/packages/).\n", + "\n", + "- `themis`: [themis paketi](https://themis.tidymodels.org/), Dengesiz Verilerle Çalışmak için Ek Tarif Adımları sağlar.\n", + "\n", + "Bu paketleri şu şekilde yükleyebilirsiniz:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Alternatif olarak, aşağıdaki script, bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Bir sınıflandırma haritası**\n", + "\n", + "[Önceki dersimizde](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1), şu soruyu ele almaya çalıştık: birden fazla model arasında nasıl seçim yaparız? Büyük ölçüde, bu seçim veri özelliklerine ve çözmek istediğimiz problemin türüne (örneğin sınıflandırma veya regresyon) bağlıdır.\n", + "\n", + "Daha önce, Microsoft'un cheat sheet'ini kullanarak verileri sınıflandırırken sahip olduğunuz çeşitli seçenekleri öğrenmiştik. Python'un Makine Öğrenimi çerçevesi olan Scikit-learn, benzer ancak daha ayrıntılı bir cheat sheet sunar ve bu, tahmincilerinizi (sınıflandırıcılar için başka bir terim) daha da daraltmanıza yardımcı olabilir:\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> İpucu: [bu haritayı çevrimiçi ziyaret edin](https://scikit-learn.org/stable/tutorial/machine_learning_map/) ve belgeleri okumak için yol boyunca tıklayın.\n", + ">\n", + "> [Tidymodels referans sitesi](https://www.tidymodels.org/find/parsnip/#models) ayrıca farklı model türleri hakkında mükemmel bir dokümantasyon sunar.\n", + "\n", + "### **Plan** 🗺️\n", + "\n", + "Bu harita, verilerinizi net bir şekilde anladığınızda çok faydalıdır, çünkü yolları boyunca 'yürüyerek' bir karara varabilirsiniz:\n", + "\n", + "- \\>50 örneğimiz var\n", + "\n", + "- Bir kategori tahmin etmek istiyoruz\n", + "\n", + "- Etiketlenmiş verilerimiz var\n", + "\n", + "- 100K'dan az örneğimiz var\n", + "\n", + "- ✨ Linear SVC seçebiliriz\n", + "\n", + "- Eğer bu işe yaramazsa, çünkü sayısal verilerimiz var\n", + "\n", + " - ✨ KNeighbors Classifier deneyebiliriz\n", + "\n", + " - Eğer bu da işe yaramazsa, ✨ SVC ve ✨ Ensemble Classifiers deneyin\n", + "\n", + "Bu takip edilmesi çok faydalı bir yol. Şimdi, [tidymodels](https://www.tidymodels.org/) modelleme çerçevesini kullanarak işe koyulalım: iyi istatistiksel uygulamaları teşvik etmek için geliştirilmiş, tutarlı ve esnek bir R paketleri koleksiyonu 😊.\n", + "\n", + "## 2. Veriyi böl ve dengesiz veri setiyle başa çık.\n", + "\n", + "Önceki derslerimizden, mutfaklarımız arasında ortak olan bir dizi bileşen olduğunu öğrendik. Ayrıca, mutfakların sayısında oldukça eşitsiz bir dağılım vardı.\n", + "\n", + "Bunlarla şu şekilde başa çıkacağız:\n", + "\n", + "- Farklı mutfaklar arasında kafa karışıklığı yaratan en yaygın bileşenleri `dplyr::select()` kullanarak çıkaracağız.\n", + "\n", + "- Veriyi modellemeye hazırlamak için bir `recipe` kullanarak bir `over-sampling` algoritması uygulayacağız.\n", + "\n", + "Yukarıdakilere önceki derste zaten baktık, bu yüzden bu iş kolay olacak 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Dengesiz Verilerle Başa Çıkma\n", + "\n", + "Dengesiz veriler genellikle model performansı üzerinde olumsuz etkilere sahiptir. Birçok model, gözlem sayısının eşit olduğu durumlarda en iyi performansı gösterir ve bu nedenle dengesiz verilerle başa çıkmakta zorlanır.\n", + "\n", + "Dengesiz veri setleriyle başa çıkmanın temel olarak iki yolu vardır:\n", + "\n", + "- azınlık sınıfına gözlem eklemek: `Over-sampling` (Aşırı örnekleme), örneğin, bir SMOTE algoritması kullanarak. Bu algoritma, azınlık sınıfının yeni örneklerini, bu vakaların en yakın komşularını kullanarak sentetik olarak üretir.\n", + "\n", + "- çoğunluk sınıfından gözlem çıkarmak: `Under-sampling` (Azaltılmış örnekleme)\n", + "\n", + "Önceki dersimizde, dengesiz veri setleriyle bir `recipe` (tarif) kullanarak nasıl başa çıkılacağını göstermiştik. Bir tarif, bir veri setine hangi adımların uygulanması gerektiğini tanımlayan bir plan olarak düşünülebilir. Bizim durumumuzda, `training set` (eğitim seti) için mutfak türlerimizin sayısında eşit bir dağılım elde etmek istiyoruz. Hadi başlayalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Artık modelleri eğitmeye hazırız 👩‍💻👨‍💻!\n", + "\n", + "## 3. Multinom regresyon modellerinin ötesinde\n", + "\n", + "Önceki dersimizde multinom regresyon modellerine baktık. Şimdi sınıflandırma için daha esnek modelleri keşfedelim.\n", + "\n", + "### Destek Vektör Makineleri\n", + "\n", + "Sınıflandırma bağlamında, `Destek Vektör Makineleri` sınıfları \"en iyi\" şekilde ayıran bir *hiper düzlem* bulmaya çalışan bir makine öğrenimi tekniğidir. Basit bir örneğe bakalım:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ sınıfları ayırmaz. H2~ ayırır, ancak yalnızca küçük bir boşlukla. H3~ ise sınıfları maksimum boşlukla ayırır.\n", + "\n", + "#### Doğrusal Destek Vektör Sınıflandırıcı\n", + "\n", + "Destek-Vektör kümeleme (SVC), ML teknikleri ailesinden Destek-Vektör makinelerinin bir alt dalıdır. SVC'de, hiper düzlem, eğitim gözlemlerinin `çoğunu` doğru bir şekilde ayıracak şekilde seçilir, ancak bazı gözlemleri `yanlış sınıflandırabilir`. Bazı noktaların yanlış tarafta olmasına izin vererek, SVM aykırı değerlere karşı daha dayanıklı hale gelir ve dolayısıyla yeni verilere daha iyi genelleme yapar. Bu ihlali düzenleyen parametre `maliyet` olarak adlandırılır ve varsayılan değeri 1'dir (bkz. `help(\"svm_poly\")`).\n", + "\n", + "Bir polinom SVM modelinde `degree = 1` ayarlayarak doğrusal bir SVC oluşturalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Artık ön işleme adımlarını ve model spesifikasyonunu bir *iş akışına* dahil ettiğimize göre, linear SVC'yi eğitip sonuçları değerlendirerek devam edebiliriz. Performans metrikleri için, `accuracy`, `sensitivity`, `Positive Predicted Value` ve `F Measure` değerlendirecek bir metrik seti oluşturalım.\n", + "\n", + "> `augment()` verilen verilere tahminler için sütun(lar) ekleyecektir.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Destek Vektör Makinesi\n", + "\n", + "Destek vektör makinesi (SVM), sınıflar arasındaki doğrusal olmayan bir sınırı karşılamak için destek vektör sınıflandırıcısının bir uzantısıdır. Temelde, SVM'ler sınıflar arasındaki doğrusal olmayan ilişkileri uyarlamak için özellik uzayını genişletmek amacıyla *çekirdek hilesi*ni kullanır. SVM'ler tarafından kullanılan popüler ve son derece esnek bir çekirdek fonksiyonu *Radyal tabanlı fonksiyon*dur. Şimdi, verilerimiz üzerinde nasıl bir performans göstereceğini görelim.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Çok daha iyi 🤩!\n", + "\n", + "> ✅ Lütfen bakınız:\n", + ">\n", + "> - [*Destek Vektör Makineleri*](https://bradleyboehmke.github.io/HOML/svm.html), R ile Uygulamalı Makine Öğrenimi\n", + ">\n", + "> - [*Destek Vektör Makineleri*](https://www.statlearning.com/), R ile İstatistiksel Öğrenime Giriş\n", + ">\n", + "> daha fazla bilgi için.\n", + "\n", + "### En Yakın Komşu Sınıflandırıcılar\n", + "\n", + "*K*-en yakın komşu (KNN), her bir gözlemin diğer gözlemlere olan *benzerliğine* dayanarak tahmin edildiği bir algoritmadır.\n", + "\n", + "Haydi, verilerimize bir tane uygulayalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Bu modelin performansı pek iyi görünmüyor. Muhtemelen modelin argümanlarını değiştirmek (bkz. `help(\"nearest_neighbor\")`) model performansını artıracaktır. Mutlaka denemelisiniz.\n", + "\n", + "> ✅ Lütfen bakınız:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> *K*-En Yakın Komşu sınıflandırıcıları hakkında daha fazla bilgi edinmek için.\n", + "\n", + "### Topluluk sınıflandırıcıları\n", + "\n", + "Topluluk algoritmaları, birden fazla temel tahmin ediciyi birleştirerek optimal bir model oluşturur. Bu, şu yöntemlerle yapılır:\n", + "\n", + "`bagging`: temel modellerin bir koleksiyonuna *ortalama alma fonksiyonu* uygulamak\n", + "\n", + "`boosting`: birbiri üzerine inşa edilen bir model dizisi oluşturarak tahmin performansını iyileştirmek.\n", + "\n", + "Hadi bir Random Forest modelini deneyerek başlayalım. Bu model, büyük bir karar ağacı koleksiyonu oluşturur ve ardından daha iyi bir genel model için bir ortalama alma fonksiyonu uygular.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Tebrikler 👏!\n", + "\n", + "Hadi bir de Boosted Tree modeliyle deney yapalım.\n", + "\n", + "Boosted Tree, bir dizi ardışık karar ağacı oluşturan ve her ağacın önceki ağaçların sonuçlarına bağlı olduğu bir topluluk yöntemini tanımlar. Amaç, hatayı kademeli olarak azaltmaktır. Bu yöntem, yanlış sınıflandırılmış öğelerin ağırlıklarına odaklanır ve bir sonraki sınıflandırıcıyı düzeltmek için uyumu ayarlar.\n", + "\n", + "Bu modeli oluşturmanın farklı yolları vardır (bkz. `help(\"boost_tree\")`). Bu örnekte, Boosted Tree'leri `xgboost` motoru aracılığıyla oluşturacağız.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ Lütfen bakınız:\n", + ">\n", + "> - [Sosyal Bilimciler için Makine Öğrenimi](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [R ile Uygulamalı Makine Öğrenimi](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [R Uygulamalarıyla İstatistiksel Öğrenime Giriş](https://www.statlearning.com/)\n", + ">\n", + "> - - xgboost'a iyi bir alternatif olan AdaBoost modelini inceler.\n", + ">\n", + "> Ensemble sınıflandırıcılar hakkında daha fazla bilgi edinmek için.\n", + "\n", + "## 4. Ekstra - birden fazla modeli karşılaştırma\n", + "\n", + "Bu laboratuvarda oldukça fazla model oluşturduk 🙌. Farklı ön işleme setlerinden ve/veya model tanımlarından birçok iş akışı oluşturmak ve ardından performans metriklerini tek tek hesaplamak yorucu veya zahmetli olabilir.\n", + "\n", + "Bunu, eğitim seti üzerinde bir iş akışı listesini eğiten ve ardından test setine dayalı performans metriklerini döndüren bir fonksiyon oluşturarak çözebilir miyiz, bir bakalım. Liste içindeki her bir elemana fonksiyon uygulamak için [purrr](https://purrr.tidyverse.org/) paketinden `map()` ve `map_dfr()` fonksiyonlarını kullanacağız.\n", + "\n", + "> [`map()`](https://purrr.tidyverse.org/reference/map.html) fonksiyonları, birçok for döngüsünü daha özlü ve okunması daha kolay bir kodla değiştirmenize olanak tanır. [`map()`](https://purrr.tidyverse.org/reference/map.html) fonksiyonları hakkında bilgi edinmek için en iyi yer, R for Data Science kitabındaki [iterasyon bölümü](http://r4ds.had.co.nz/iteration.html)dir.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "[**workflowset**](https://workflowsets.tidymodels.org/) paketi, kullanıcıların çok sayıda modeli oluşturmasını ve kolayca eğitmesini sağlar, ancak esas olarak `çapraz doğrulama` gibi yeniden örnekleme teknikleriyle çalışmak üzere tasarlanmıştır. Bu, henüz ele almadığımız bir yaklaşımdır.\n", + "\n", + "## **🚀Meydan Okuma**\n", + "\n", + "Bu tekniklerin her birinin, örneğin SVM'lerdeki `cost`, KNN'deki `neighbors`, Random Forest'taki `mtry` (Rastgele Seçilen Tahminciler) gibi ayarlayabileceğiniz çok sayıda parametresi vardır.\n", + "\n", + "Her birinin varsayılan parametrelerini araştırın ve bu parametreleri değiştirmenin modelin kalitesi açısından ne anlama gelebileceğini düşünün.\n", + "\n", + "Belirli bir model ve parametreleri hakkında daha fazla bilgi edinmek için şu komutu kullanabilirsiniz: `help(\"model\")` örneğin `help(\"rand_forest\")`.\n", + "\n", + "> Uygulamada, genellikle bu parametrelerin *en iyi değerlerini* tahmin etmek için bir `simüle edilmiş veri seti` üzerinde birçok model eğitir ve bu modellerin ne kadar iyi performans gösterdiğini ölçeriz. Bu sürece **tuning** (ince ayar) denir.\n", + "\n", + "### [**Ders Sonrası Testi**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Gözden Geçirme ve Kendi Kendine Çalışma**\n", + "\n", + "Bu derslerde çok fazla teknik terim var, bu yüzden [bu listeyi](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) gözden geçirmek için bir dakikanızı ayırın. Faydalı terminolojiler içeriyor!\n", + "\n", + "#### TEŞEKKÜRLER:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) R'ı daha sıcak ve ilgi çekici hale getiren harika illüstrasyonlar oluşturduğu için. Daha fazla illüstrasyonu [galerisinde](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) bulabilirsiniz.\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) ve [Jen Looper](https://www.twitter.com/jenlooper) bu modülün orijinal Python versiyonunu oluşturdukları için ♥️\n", + "\n", + "Keyifli Öğrenmeler,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Öğrenci Elçisi.\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir çalışma
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/tr/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..810c44b1b --- /dev/null +++ b/translations/tr/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Farklı sınıflandırıcıları deneyin\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:43:04+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/tr/4-Classification/4-Applied/notebook.ipynb b/translations/tr/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..b5d56e471 --- /dev/null +++ b/translations/tr/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:41+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/4-Classification/4-Applied/solution/notebook.ipynb b/translations/tr/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..8f1c9442d --- /dev/null +++ b/translations/tr/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:42:05+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/1-Visualize/notebook.ipynb b/translations/tr/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..5fad876be --- /dev/null +++ b/translations/tr/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:08:11+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/tr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..26fcd9485 --- /dev/null +++ b/translations/tr/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,493 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Spotify'den Toplanan Nijerya Müziği - Bir Analiz**\n", + "\n", + "Kümeleme, bir tür [Denetimsiz Öğrenme](https://wikipedia.org/wiki/Unsupervised_learning) yöntemidir ve bir veri setinin etiketlenmemiş olduğunu veya girdilerinin önceden tanımlanmış çıktılarla eşleşmediğini varsayar. Bu yöntem, çeşitli algoritmalar kullanarak etiketlenmemiş verileri analiz eder ve verideki desenlere göre gruplamalar sağlar.\n", + "\n", + "[**Ders öncesi test**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Giriş**\n", + "\n", + "[Kümeleme](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124), veri keşfi için oldukça faydalıdır. Nijeryalı dinleyicilerin müzik tüketim alışkanlıklarında trendleri ve desenleri keşfetmeye yardımcı olup olamayacağını görelim.\n", + "\n", + "> ✅ Kümelemenin kullanım alanlarını düşünmek için bir dakika ayırın. Gerçek hayatta, kümeleme çamaşır yığınınızı aile üyelerinizin kıyafetlerine göre ayırmanız gerektiğinde gerçekleşir 🧦👕👖🩲. Veri biliminde ise, kümeleme bir kullanıcının tercihlerini analiz ederken veya etiketlenmemiş bir veri setinin özelliklerini belirlerken gerçekleşir. Kümeleme, bir anlamda, kaosu anlamlandırmaya yardımcı olur, tıpkı bir çorap çekmecesi gibi.\n", + "\n", + "Profesyonel bir ortamda, kümeleme pazar segmentasyonu belirlemek, örneğin hangi yaş gruplarının hangi ürünleri satın aldığını anlamak için kullanılabilir. Bir diğer kullanım alanı ise anomali tespiti olabilir; örneğin, kredi kartı işlemleri veri setinden dolandırıcılığı tespit etmek. Ya da tıbbi taramalardaki tümörleri belirlemek için kümeleme kullanılabilir.\n", + "\n", + "✅ Bankacılık, e-ticaret veya iş dünyasında 'doğada' kümelemeyle karşılaştığınız durumları düşünmek için bir dakika ayırın.\n", + "\n", + "> 🎓 İlginç bir şekilde, kümeleme analizi 1930'larda Antropoloji ve Psikoloji alanlarında ortaya çıkmıştır. Sizce o zamanlar nasıl kullanılmış olabilir?\n", + "\n", + "Alternatif olarak, arama sonuçlarını gruplamak için kullanılabilir - örneğin alışveriş bağlantıları, görseller veya incelemeler. Kümeleme, büyük bir veri setini küçültmek ve daha ayrıntılı analiz yapmak istediğinizde faydalıdır, bu nedenle diğer modeller oluşturulmadan önce veri hakkında bilgi edinmek için kullanılabilir.\n", + "\n", + "✅ Verileriniz kümeler halinde organize edildikten sonra, her birine bir küme kimliği atarsınız. Bu teknik, bir veri setinin gizliliğini korumak için faydalı olabilir; bir veri noktasına daha açıklayıcı ve tanımlayıcı veriler yerine küme kimliğiyle atıfta bulunabilirsiniz. Küme kimliğiyle diğer küme öğelerine atıfta bulunmak yerine başka nedenler düşünebilir misiniz?\n", + "\n", + "### Kümelemeye Başlangıç\n", + "\n", + "> 🎓 Kümeleri nasıl oluşturduğumuz, veri noktalarını gruplara nasıl topladığımızla yakından ilgilidir. Bazı terimleri açalım:\n", + ">\n", + "> 🎓 ['Transdüktif' vs. 'indüktif'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Transdüktif çıkarım, belirli test durumlarına eşlenen gözlemlenen eğitim durumlarından türetilir. İndüktif çıkarım ise genel kurallara eşlenen eğitim durumlarından türetilir ve bu kurallar yalnızca test durumlarına uygulanır.\n", + ">\n", + "> Bir örnek: Elinizde yalnızca kısmen etiketlenmiş bir veri seti olduğunu hayal edin. Bazı şeyler 'plak', bazıları 'cd' ve bazıları boş. Göreviniz, boş olanlara etiket vermektir. İndüktif bir yaklaşım seçerseniz, 'plak' ve 'cd' arayan bir model eğitirsiniz ve bu etiketleri etiketlenmemiş verinize uygularsınız. Bu yaklaşım, aslında 'kaset' olan şeyleri sınıflandırmakta zorlanır. Transdüktif bir yaklaşım ise bu bilinmeyen veriyi daha etkili bir şekilde ele alır, benzer öğeleri gruplandırır ve ardından bir gruba etiket uygular. Bu durumda, kümeler 'yuvarlak müzik şeyleri' ve 'kare müzik şeyleri' gibi görünebilir.\n", + ">\n", + "> 🎓 ['Düz' vs. 'düz olmayan' geometriler](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Matematiksel terminolojiden türetilen düz ve düz olmayan geometriler, noktalar arasındaki mesafelerin 'düz' ([Öklid](https://wikipedia.org/wiki/Euclidean_geometry)) veya 'düz olmayan' (Öklid dışı) geometrik yöntemlerle ölçülmesini ifade eder.\n", + ">\n", + "> Bu bağlamda 'düz', Öklid geometrisini (bazı bölümleri 'düzlem' geometrisi olarak öğretilir) ifade ederken, 'düz olmayan' Öklid dışı geometriyi ifade eder. Geometri, makine öğrenimiyle nasıl ilişkilidir? Matematiğe dayalı iki alan olarak, kümelerdeki noktalar arasındaki mesafeleri ölçmek için ortak bir yol bulunmalıdır ve bu, verinin doğasına bağlı olarak 'düz' veya 'düz olmayan' şekilde yapılabilir. [Öklid mesafeleri](https://wikipedia.org/wiki/Euclidean_distance), iki nokta arasındaki doğru parçasının uzunluğu olarak ölçülür. [Öklid dışı mesafeler](https://wikipedia.org/wiki/Non-Euclidean_geometry) ise bir eğri boyunca ölçülür. Verileriniz görselleştirildiğinde bir düzlemde bulunmuyorsa, bunu ele almak için özel bir algoritma kullanmanız gerekebilir.\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan infografik
\n", + "\n", + "> 🎓 ['Mesafeler'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Kümeler, mesafe matrisleriyle tanımlanır, örneğin noktalar arasındaki mesafeler. Bu mesafe birkaç şekilde ölçülebilir. Öklid kümeleri, nokta değerlerinin ortalamasıyla tanımlanır ve bir 'merkez' veya merkez noktası içerir. Mesafeler, bu merkeze olan uzaklıkla ölçülür. Öklid dışı mesafeler ise 'clustroid' olarak adlandırılan, diğer noktalara en yakın noktayı ifade eder. Clustroid'ler çeşitli şekillerde tanımlanabilir.\n", + ">\n", + "> 🎓 ['Kısıtlı'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Kısıtlı Kümeleme](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf), bu denetimsiz yönteme 'yarı denetimli' öğrenme ekler. Noktalar arasındaki ilişkiler 'bağlanamaz' veya 'bağlanmalı' olarak işaretlenir, böylece veri setine bazı kurallar uygulanır.\n", + ">\n", + "> Bir örnek: Bir algoritma etiketlenmemiş veya yarı etiketlenmiş bir veri setinde serbest bırakılırsa, ürettiği kümeler düşük kaliteli olabilir. Yukarıdaki örnekte, kümeler 'yuvarlak müzik şeyleri', 'kare müzik şeyleri', 'üçgen şeyler' ve 'kurabiyeler' olarak gruplandırılabilir. Eğer algoritmaya bazı kısıtlamalar veya kurallar verilirse (\"öğe plastikten yapılmış olmalı\", \"öğe müzik üretebilmelidir\"), bu algoritmanın daha iyi seçimler yapmasına yardımcı olabilir.\n", + ">\n", + "> 🎓 'Yoğunluk'\n", + ">\n", + "> 'Gürültülü' olarak kabul edilen veri 'yoğun' olarak değerlendirilir. Kümelerindeki noktalar arasındaki mesafeler, incelendiğinde daha az veya daha çok yoğun, yani 'kalabalık' olabilir ve bu veri uygun kümeleme yöntemiyle analiz edilmelidir. [Bu makale](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html), düzensiz küme yoğunluğuna sahip gürültülü bir veri setini keşfetmek için K-Means kümeleme ve HDBSCAN algoritmalarını kullanmanın farkını göstermektedir.\n", + "\n", + "Kümeleme tekniklerini daha iyi anlamak için bu [Öğrenme modülünü](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott) inceleyin.\n", + "\n", + "### **Kümeleme Algoritmaları**\n", + "\n", + "100'den fazla kümeleme algoritması vardır ve bunların kullanımı, eldeki verinin doğasına bağlıdır. Önemli olanlardan bazılarını tartışalım:\n", + "\n", + "- **Hiyerarşik kümeleme**. Bir nesne, yakınındaki bir nesneye olan yakınlığına göre sınıflandırıldığında, kümeler üyelerinin diğer nesnelere olan mesafelerine göre oluşturulur. Hiyerarşik kümeleme, iki kümeyi tekrar tekrar birleştirerek karakterize edilir.\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan infografik
\n", + "\n", + "- **Merkez kümeleme**. Bu popüler algoritma, oluşturulacak küme sayısını ('k') seçmeyi gerektirir, ardından algoritma bir kümenin merkez noktasını belirler ve veriyi bu nokta etrafında toplar. [K-means kümeleme](https://wikipedia.org/wiki/K-means_clustering), önceden tanımlanmış K gruplarına bir veri setini ayıran popüler bir merkez kümeleme versiyonudur. Merkez, en yakın ortalama ile belirlenir, bu nedenle adı buradan gelir. Kümeden olan kare mesafesi minimize edilir.\n", + "\n", + "

\n", + " \n", + "

Dasani Madipalli tarafından hazırlanan infografik
\n", + "\n", + "- **Dağılım tabanlı kümeleme**. İstatistiksel modellemeye dayalı olan dağılım tabanlı kümeleme, bir veri noktasının bir kümeye ait olma olasılığını belirler ve buna göre atama yapar. Gaussian karışım yöntemleri bu türe aittir.\n", + "\n", + "- **Yoğunluk tabanlı kümeleme**. Veri noktaları, yoğunluklarına veya birbirleri etrafındaki gruplarına göre kümelere atanır. Gruptan uzak olan veri noktaları, aykırı değerler veya gürültü olarak kabul edilir. DBSCAN, Mean-shift ve OPTICS bu tür kümelemeye aittir.\n", + "\n", + "- **Izgara tabanlı kümeleme**. Çok boyutlu veri setleri için bir ızgara oluşturulur ve veri, ızgaranın hücreleri arasında bölünerek kümeler oluşturulur.\n", + "\n", + "Kümelemeyi öğrenmenin en iyi yolu, onu kendiniz denemektir, bu yüzden bu alıştırmada bunu yapacaksınız.\n", + "\n", + "Bu modülü tamamlamak için bazı paketlere ihtiyacımız olacak. Şu şekilde yükleyebilirsiniz: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Alternatif olarak, aşağıdaki script eksik olan paketleri kontrol eder ve bu modülü tamamlamak için gerekenleri sizin için yükler.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Alıştırma - Verilerinizi kümeleyin\n", + "\n", + "Kümeleme tekniği, doğru görselleştirme ile büyük ölçüde desteklenir, bu yüzden müzik verilerimizi görselleştirerek başlayalım. Bu alıştırma, bu verilerin doğasına en uygun kümeleme yöntemini seçmemize yardımcı olacak.\n", + "\n", + "Verileri içe aktararak işe koyulalım.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Bazen verilerimiz hakkında biraz daha fazla bilgiye ihtiyaç duyabiliriz. `data` ve `yapısını` [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) fonksiyonunu kullanarak inceleyebiliriz:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "İyi iş!💪\n", + "\n", + "`glimpse()` fonksiyonunun, toplam satır (gözlem) ve sütun (değişken) sayısını verdiğini, ardından değişken adından sonra her bir değişkenin ilk birkaç girişini satır halinde gösterdiğini gözlemleyebiliriz. Ayrıca, değişkenin *veri tipi* her değişken adının hemen ardından `< >` içinde belirtilir.\n", + "\n", + "`DataExplorer::introduce()` bu bilgiyi düzenli bir şekilde özetleyebilir:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Harika! Verilerimizde eksik değer olmadığını yeni öğrendik.\n", + "\n", + "Bu sırada, yaygın merkezi eğilim istatistiklerini (örneğin [ortalama](https://en.wikipedia.org/wiki/Arithmetic_mean) ve [medyan](https://en.wikipedia.org/wiki/Median)) ve dağılım ölçülerini (örneğin [standart sapma](https://en.wikipedia.org/wiki/Standard_deviation)) `summarytools::descr()` kullanarak inceleyebiliriz.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hadi verilerin genel değerlerine bir göz atalım. Popülerliğin `0` olabileceğini unutmayın, bu sıfır sıralamaya sahip şarkıları gösterir. Bunları birazdan kaldıracağız.\n", + "\n", + "> 🤔 Eğer etiketlenmiş verilere ihtiyaç duymayan, denetimsiz bir yöntem olan kümeleme ile çalışıyorsak, neden bu verileri etiketlerle gösteriyoruz? Veri keşfi aşamasında bu etiketler işe yarar, ancak kümeleme algoritmalarının çalışması için gerekli değildir.\n", + "\n", + "### 1. Popüler türleri keşfet\n", + "\n", + "Hadi en popüler türleri 🎶 bulalım ve göründükleri örneklerin sayısını hesaplayalım.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Bu iyi gitti! Bir resmin binlerce veri çerçevesi satırına bedel olduğu söylenir (aslında kimse bunu söylemez 😅). Ama ne demek istediğimi anladınız, değil mi?\n", + "\n", + "Kategorik verileri (karakter veya faktör değişkenleri) görselleştirmenin bir yolu çubuk grafikler kullanmaktır. Haydi, en popüler 10 türün çubuk grafiğini oluşturalım:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Şimdi `eksik` türlerin olduğunu fark etmek çok daha kolay 🧐!\n", + "\n", + "> İyi bir görselleştirme, beklemediğiniz şeyleri size gösterebilir veya veri hakkında yeni sorular ortaya çıkarabilir - Hadley Wickham ve Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Not: En üst tür `Eksik` olarak tanımlandığında, bu Spotify'ın onu sınıflandırmadığı anlamına gelir, bu yüzden ondan kurtulalım.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Veriler üzerinde yapılan küçük bir keşif sonucunda, en üst üç türün bu veri setine hakim olduğunu öğreniyoruz. Hadi `afro dancehall`, `afropop` ve `nigerian pop` türlerine odaklanalım, ayrıca veri setini filtreleyerek popülerlik değeri 0 olan her şeyi kaldıralım (bu, veri setinde popülerlik ile sınıflandırılmamış ve bizim amaçlarımız için gürültü olarak kabul edilebilir anlamına gelir):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Veri setimizdeki sayısal değişkenler arasında belirgin bir doğrusal ilişki olup olmadığını görelim. Bu ilişki, matematiksel olarak [korelasyon istatistiği](https://en.wikipedia.org/wiki/Correlation) ile ölçülür.\n", + "\n", + "Korelasyon istatistiği, -1 ile 1 arasında bir değer alır ve ilişkinin gücünü gösterir. 0'ın üzerindeki değerler *pozitif* bir korelasyonu ifade eder (bir değişkenin yüksek değerleri genellikle diğer değişkenin yüksek değerleriyle örtüşür), 0'ın altındaki değerler ise *negatif* bir korelasyonu ifade eder (bir değişkenin yüksek değerleri genellikle diğer değişkenin düşük değerleriyle örtüşür).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Veriler güçlü bir şekilde ilişkili değil, sadece `energy` ve `loudness` arasında bir bağlantı var, ki bu mantıklı, çünkü yüksek sesli müzik genellikle oldukça enerjik olur. `Popularity` ile `release date` arasında bir ilişki var, bu da mantıklı, çünkü daha yeni şarkılar muhtemelen daha popülerdir. Uzunluk ve enerji arasında da bir korelasyon olduğu görülüyor.\n", + "\n", + "Bu verilerle bir kümeleme algoritmasının neler yapabileceğini görmek ilginç olacak!\n", + "\n", + "> 🎓 Korelasyonun nedensellik anlamına gelmediğini unutmayın! Korelasyonun kanıtına sahibiz, ancak nedenselliğin kanıtına sahip değiliz. Bu noktayı vurgulayan bazı görsellerin bulunduğu [eğlenceli bir web sitesi](https://tylervigen.com/spurious-correlations) var.\n", + "\n", + "### 2. Veri dağılımını keşfet\n", + "\n", + "Daha ince sorular soralım. Türler, popülerliklerine göre dans edilebilirlik algısında önemli ölçüde farklı mı? İlk üç türümüzün popülerlik ve dans edilebilirlik verilerinin dağılımını belirli bir x ve y ekseni boyunca [yoğunluk grafikleri](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves) kullanarak inceleyelim.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Nijerya'daki zevklerin bu tür için belirli bir dans edilebilirlik seviyesinde birleştiği söylenebilir mi? Türden bağımsız olarak, eşleşen eşmerkezli daireler görüyoruz.\n", + "\n", + "Genel olarak, üç tür popülerlik ve dans edilebilirlik açısından uyum gösteriyor. Bu gevşek bir şekilde hizalanmış verilerde kümeleri belirlemek zor olacak. Bakalım bir dağılım grafiği bunu destekleyebilir mi?\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Aynı eksenlerin bir dağılım grafiği, benzer bir yakınsama modelini gösterir.\n", + "\n", + "Genel olarak, kümeleme için dağılım grafikleri kullanarak veri kümelerini gösterebilirsiniz, bu nedenle bu tür görselleştirmeyi öğrenmek oldukça faydalıdır. Bir sonraki derste, bu filtrelenmiş veriyi alıp k-means kümeleme yöntemini kullanarak ilginç şekillerde örtüşen grupları keşfedeceğiz.\n", + "\n", + "## **🚀 Meydan Okuma**\n", + "\n", + "Bir sonraki derse hazırlık olarak, üretim ortamında keşfedebileceğiniz ve kullanabileceğiniz çeşitli kümeleme algoritmaları hakkında bir grafik oluşturun. Kümeleme hangi tür problemleri çözmeye çalışıyor?\n", + "\n", + "## [**Ders Sonrası Test**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Gözden Geçirme ve Kendi Kendine Çalışma**\n", + "\n", + "Kümeleme algoritmalarını uygulamadan önce, öğrendiğimiz gibi, veri kümenizin doğasını anlamak iyi bir fikirdir. Bu konu hakkında daha fazla bilgi edinin [buradan](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html).\n", + "\n", + "Kümeleme teknikleri hakkındaki bilginizi derinleştirin:\n", + "\n", + "- [Tidymodels ve arkadaşları ile Kümeleme Modellerini Eğitme ve Değerlendirme](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Ödev**\n", + "\n", + "[Kümeleme için diğer görselleştirme yöntemlerini araştırın](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## TEŞEKKÜRLER:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper), bu modülün orijinal Python versiyonunu oluşturduğu için ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded), makine öğrenimi kavramlarını daha anlaşılır ve kolay hale getiren harika illüstrasyonları oluşturduğu için.\n", + "\n", + "Mutlu öğrenmeler,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Öğrenci Elçisi.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel bir insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan herhangi bir yanlış anlama veya yanlış yorumlama durumunda sorumluluk kabul edilmez.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:18:06+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/tr/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..134e77490 --- /dev/null +++ b/translations/tr/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,821 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Veri çerçevesi hakkında bilgi alın\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verilerin genel değerlerine bakın. Popülerliğin '0' olabileceğini unutmayın - ve bu değere sahip birçok satır var.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Haydi türleri inceleyelim. Oldukça fazla sayıda 'Eksik' olarak listelenmiş, bu da onların veri kümesinde bir türle kategorize edilmediği anlamına geliyor.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Eksik türleri kaldırın, çünkü Spotify'da sınıflandırılmamış.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Genel olarak, üç tür popülerlik ve dans edilebilirlik açısından uyum gösterir. Aynı eksenlerin bir dağılım grafiği benzer bir yakınsama modeli gösterir. Tür başına veri dağılımını kontrol etmek için bir dağılım grafiği deneyin.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:09:59+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/2-K-Means/notebook.ipynb b/translations/tr/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..248dea39a --- /dev/null +++ b/translations/tr/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,231 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:19:57+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Son derste bitirdiğimiz yerden başlayın, veriler içe aktarılmış ve filtrelenmiş durumda.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Yalnızca 3 türe odaklanacağız. Belki 3 küme oluşturabiliriz!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/tr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..e5aaf843b --- /dev/null +++ b/translations/tr/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,639 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:30:55+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## R ve Tidy veri prensiplerini kullanarak K-Means kümeleme keşfi.\n", + "\n", + "### [**Ders öncesi quiz**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "Bu derste, Tidymodels paketi ve R ekosistemindeki diğer paketleri (onlara arkadaşlar 🧑‍🤝‍🧑 diyeceğiz) kullanarak kümeler oluşturmayı ve daha önce içe aktardığınız Nijerya müzik veri setini nasıl kullanacağınızı öğreneceksiniz. K-Means kümeleme için temel bilgileri ele alacağız. Unutmayın, önceki derste öğrendiğiniz gibi, kümelerle çalışmanın birçok yolu vardır ve kullandığınız yöntem verinize bağlıdır. En yaygın kümeleme tekniği olduğu için K-Means yöntemini deneyeceğiz. Haydi başlayalım!\n", + "\n", + "Öğreneceğiniz terimler:\n", + "\n", + "- Siluet skoru\n", + "\n", + "- Dirsek yöntemi\n", + "\n", + "- Atalet\n", + "\n", + "- Varyans\n", + "\n", + "### **Giriş**\n", + "\n", + "[K-Means Kümeleme](https://wikipedia.org/wiki/K-means_clustering), sinyal işleme alanından türetilmiş bir yöntemdir. Verileri özelliklerindeki benzerliklere göre `k kümeye` ayırmak ve bölmek için kullanılır.\n", + "\n", + "Kümeler, bir nokta (veya 'tohum') ve ona karşılık gelen bölgeyi içeren [Voronoi diyagramları](https://wikipedia.org/wiki/Voronoi_diagram) olarak görselleştirilebilir.\n", + "\n", + "

\n", + " \n", + "

Jen Looper tarafından hazırlanan infografik
\n", + "\n", + "K-Means kümeleme şu adımları içerir:\n", + "\n", + "1. Veri bilimci, oluşturulacak kümelerin istenen sayısını belirleyerek başlar.\n", + "\n", + "2. Daha sonra algoritma, veri setinden rastgele K gözlem seçerek kümeler için başlangıç merkezleri (yani, merkez noktalar) olarak kullanır.\n", + "\n", + "3. Ardından, kalan her bir gözlem en yakın merkez noktaya atanır.\n", + "\n", + "4. Daha sonra, her bir kümenin yeni ortalamaları hesaplanır ve merkez noktası bu ortalamaya taşınır.\n", + "\n", + "5. Merkezler yeniden hesaplandığına göre, her bir gözlem tekrar kontrol edilir ve farklı bir kümeye daha yakın olup olmadığına bakılır. Tüm nesneler, güncellenmiş küme ortalamalarını kullanarak yeniden atanır. Küme atama ve merkez noktası güncelleme adımları, küme atamaları değişmeyi bırakana kadar (yani, yakınsama sağlandığında) tekrarlanır. Genellikle, algoritma her yeni iterasyonda merkez noktalarının hareketi önemsiz hale geldiğinde ve kümeler statik hale geldiğinde sona erer.\n", + "\n", + "
\n", + "\n", + "> Başlangıç merkez noktaları olarak kullanılan ilk k gözlemlerin rastgele seçilmesi nedeniyle, prosedürü her uyguladığımızda biraz farklı sonuçlar alabiliriz. Bu nedenle, çoğu algoritma birkaç *rastgele başlangıç* kullanır ve en düşük WCSS'ye sahip iterasyonu seçer. Bu nedenle, K-Means'i her zaman birkaç *nstart* değeriyle çalıştırmanız ve *istenmeyen yerel optimumdan* kaçınmanız şiddetle tavsiye edilir.\n", + "\n", + "
\n", + "\n", + "Allison Horst'un [çizimleri](https://github.com/allisonhorst/stats-illustrations) kullanılarak hazırlanan bu kısa animasyon, kümeleme sürecini açıklıyor:\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından hazırlanan çizim
\n", + "\n", + "Kümeleme ile ilgili temel bir soru şudur: Verilerinizi kaç kümeye ayırmanız gerektiğini nasıl bileceksiniz? K-Means kullanmanın bir dezavantajı, `k` yani `merkez noktalarının` sayısını belirlemeniz gerekmesidir. Neyse ki, `dirsek yöntemi` `k` için iyi bir başlangıç değeri tahmin etmenize yardımcı olur. Birazdan bunu deneyeceksiniz.\n", + "\n", + "### \n", + "\n", + "**Ön Koşul**\n", + "\n", + "[Önceki derste](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb) kaldığımız yerden devam edeceğiz. Bu derste veri setini analiz ettik, birçok görselleştirme yaptık ve ilgi çekici gözlemleri filtreledik. Mutlaka göz atın!\n", + "\n", + "Bu modülü tamamlamak için bazı paketlere ihtiyacımız olacak. Şu şekilde yükleyebilirsiniz: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Alternatif olarak, aşağıdaki script, bu modülü tamamlamak için gerekli paketlere sahip olup olmadığınızı kontrol eder ve eksik olanları sizin için yükler.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Haydi başlayalım!\n", + "\n", + "## 1. Verilerle dans: En popüler 3 müzik türünü belirleyin\n", + "\n", + "Bu, önceki derste yaptıklarımızın bir özeti. Hadi biraz veri inceleyip analiz edelim!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 Bu harika oldu!\n", + "\n", + "## 2. Daha fazla veri keşfi.\n", + "\n", + "Bu veriler ne kadar temiz? Aykırı değerleri kutu grafikleri kullanarak kontrol edelim. Daha az aykırı değere sahip sayısal sütunlara odaklanacağız (ancak aykırı değerleri temizleyebilirsiniz). Kutu grafikleri, verilerin aralığını gösterebilir ve hangi sütunların kullanılacağına karar vermenize yardımcı olabilir. Ancak unutmayın, kutu grafikleri varyansı göstermez, bu da iyi kümeleme yapılabilir veriler için önemli bir unsurdur. Daha fazla bilgi için lütfen [bu tartışmaya](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) göz atın.\n", + "\n", + "[Kutu grafikleri](https://en.wikipedia.org/wiki/Box_plot), `sayısal` verilerin dağılımını görsel olarak göstermek için kullanılır, bu yüzden popüler müzik türleriyle birlikte tüm sayısal sütunları *seçerek* başlayalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Seçim yardımcısı `where`'in bunu ne kadar kolaylaştırdığını görüyor musunuz 💁? Bu tür diğer fonksiyonları [burada](https://tidyselect.r-lib.org/) keşfedin.\n", + "\n", + "Her bir sayısal özellik için bir kutu grafiği oluşturacağımız ve döngü kullanmaktan kaçınmak istediğimiz için, verilerimizi *daha uzun* bir formata dönüştürelim. Bu, `facets`'ten - her biri verinin bir alt kümesini gösteren alt grafiklerden - faydalanmamıza olanak tanıyacak.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Daha uzun! Şimdi biraz `ggplot` zamanı! Peki hangi `geom`'u kullanacağız?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Kolay-gg!\n", + "\n", + "Şimdi bu verilerin biraz gürültülü olduğunu görebiliyoruz: Her sütunu bir kutu grafiği olarak gözlemlediğinizde, aykırı değerleri fark edebilirsiniz. Bu aykırı değerleri veri setinden çıkarabilirsiniz, ancak bu durumda veri oldukça sınırlı hale gelir.\n", + "\n", + "Şimdilik, kümeleme çalışmamızda kullanacağımız sütunları seçelim. Benzer aralıklara sahip sayısal sütunları seçelim. `artist_top_genre` sütununu sayısal olarak kodlayabiliriz, ancak şimdilik bunu çıkaracağız.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. R'de k-means kümeleme hesaplama\n", + "\n", + "R'de yerleşik `kmeans` fonksiyonunu kullanarak k-means hesaplayabiliriz, bkz. `help(\"kmeans()\")`. `kmeans()` fonksiyonu, birincil argüman olarak tüm sütunları sayısal olan bir veri çerçevesini kabul eder.\n", + "\n", + "K-means kümeleme kullanırken ilk adım, nihai çözümde oluşturulacak küme sayısını (k) belirtmektir. Veri setinden çıkardığımız 3 şarkı türü olduğunu biliyoruz, o halde 3 deneyelim:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "kmeans nesnesi, `help(\"kmeans()\")` içinde iyi bir şekilde açıklanan birkaç bilgi içerir. Şimdilik, birkaçına odaklanalım. Verilerin 65, 110, 111 boyutlarında 3 kümeye ayrıldığını görüyoruz. Çıktı ayrıca 5 değişken üzerinden 3 grup için küme merkezlerini (ortalama değerlerini) içerir.\n", + "\n", + "Kümeleme vektörü, her bir gözlem için küme atamasını ifade eder. Küme atamasını orijinal veri setine eklemek için `augment` fonksiyonunu kullanalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Harika, veri setimizi 3 gruba ayırdık. Peki, kümelememiz ne kadar iyi 🤷? Hadi `Silhouette skoru`na bir göz atalım.\n", + "\n", + "### **Silhouette skoru**\n", + "\n", + "[Silhouette analizi](https://en.wikipedia.org/wiki/Silhouette_(clustering)), ortaya çıkan kümeler arasındaki ayrım mesafesini incelemek için kullanılabilir. Bu skor -1 ile 1 arasında değişir ve skor 1'e yakınsa, küme yoğun ve diğer kümelerden iyi bir şekilde ayrılmıştır. 0'a yakın bir değer, komşu kümelerin karar sınırına çok yakın örneklerle örtüşen kümeleri temsil eder. [kaynak](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Ortalama silhouette yöntemi, farklı *k* değerleri için gözlemlerin ortalama silhouette skorunu hesaplar. Yüksek bir ortalama silhouette skoru, iyi bir kümelemeyi gösterir.\n", + "\n", + "`silhouette` fonksiyonu, cluster paketinde ortalama silhouette genişliğini hesaplamak için kullanılır.\n", + "\n", + "> Silhouette, [mesafe](https://en.wikipedia.org/wiki/Distance \"Distance\") metriği ile hesaplanabilir, örneğin [Öklid mesafesi](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") veya [Manhattan mesafesi](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") gibi. Bu metrikleri [önceki derste](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb) tartışmıştık.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Skorumuz **.549**, yani tam ortada. Bu, verilerimizin bu tür bir kümeleme için özellikle uygun olmadığını gösteriyor. Bu tahminimizi görsel olarak doğrulayıp doğrulayamayacağımıza bakalım. [factoextra paketi](https://rpkgs.datanovia.com/factoextra/index.html), kümelemeyi görselleştirmek için (`fviz_cluster()`) işlevlerini sağlar.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Kümeler arasındaki örtüşme, verilerimizin bu tür bir kümeleme için pek uygun olmadığını gösteriyor, ancak devam edelim.\n", + "\n", + "## 4. Optimum küme sayısını belirleme\n", + "\n", + "K-Means kümeleme ile ilgili sıkça ortaya çıkan temel bir soru şudur: Bilinen sınıf etiketleri olmadan, verilerinizi kaç kümeye ayırmanız gerektiğini nasıl bilebilirsiniz?\n", + "\n", + "Bunu öğrenmenin bir yolu, bir veri örneği kullanarak `artan küme sayısıyla bir dizi kümeleme modeli oluşturmak` (örneğin 1'den 10'a kadar) ve **Silhouette skoru** gibi kümeleme metriklerini değerlendirmektir.\n", + "\n", + "Optimum küme sayısını belirlemek için farklı *k* değerleri için kümeleme algoritmasını çalıştırıp **Küme İçi Kareler Toplamı** (WCSS) değerini değerlendirelim. Toplam küme içi kareler toplamı (WCSS), kümeleme kompaktlığını ölçer ve mümkün olduğunca küçük olmasını isteriz; daha düşük değerler, veri noktalarının birbirine daha yakın olduğunu gösterir.\n", + "\n", + "Bu kümeleme üzerinde `k` için 1'den 10'a kadar farklı seçimlerin etkisini inceleyelim.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Her bir kümeleme algoritması için merkez *k* ile toplam küme içi kareler toplamını (tot.withinss) elde ettikten sonra, optimal küme sayısını bulmak için [dirsek yöntemi](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) kullanılır. Bu yöntem, WCSS'yi küme sayısının bir fonksiyonu olarak çizmek ve kullanılacak küme sayısı olarak [eğrinin dirseğini](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Eğrinin dirseği\") seçmekten oluşur.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Grafik, kümelerin sayısı birden ikiye çıkarken WCSS'de (dolayısıyla daha fazla *sıkılık*) büyük bir azalma ve iki kümeden üç kümeye geçerken daha belirgin bir azalma gösteriyor. Bundan sonra azalma daha az belirgin hale geliyor ve grafikte yaklaşık üç kümede bir `dirsek` 💪 oluşuyor. Bu, veri noktalarının iki ila üç makul şekilde ayrılmış küme oluşturduğuna dair iyi bir göstergedir.\n", + "\n", + "Şimdi `k = 3` olduğu durumda kümeleme modelini çıkarabiliriz:\n", + "\n", + "> `pull()`: tek bir sütunu çıkarmak için kullanılır\n", + ">\n", + "> `pluck()`: listeler gibi veri yapılarında indeksleme yapmak için kullanılır\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Harika! Hadi elde edilen kümeleri görselleştirelim. `plotly` kullanarak biraz etkileşim eklemeye ne dersiniz?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Belki de her kümenin (farklı renklerle temsil edilen) farklı türlere (farklı şekillerle temsil edilen) sahip olmasını beklerdik.\n", + "\n", + "Şimdi modelin doğruluğuna bir göz atalım.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Bu modelin doğruluğu fena değil, ama harika da değil. Bunun nedeni, verilerin K-Means Kümeleme için uygun olmaması olabilir. Bu veri seti çok dengesiz, çok az ilişkilendirilmiş ve sütun değerleri arasında çok fazla varyans var, bu da iyi bir kümeleme yapılmasını zorlaştırıyor. Aslında, oluşan kümeler muhtemelen yukarıda tanımladığımız üç tür kategorisinden büyük ölçüde etkileniyor veya çarpıtılıyor.\n", + "\n", + "Yine de, bu oldukça öğretici bir süreçti!\n", + "\n", + "Scikit-learn dokümantasyonunda, bu tür kümelerin çok iyi ayrılmadığı bir modelin 'varyans' problemi yaşadığını görebilirsiniz:\n", + "\n", + "

\n", + " \n", + "

Scikit-learn'den bir bilgi grafiği
\n", + "\n", + "\n", + "\n", + "## **Varyans**\n", + "\n", + "Varyans, \"ortalamanın karesel farklarının ortalaması\" olarak tanımlanır [kaynak](https://www.mathsisfun.com/data/standard-deviation.html). Bu kümeleme problemi bağlamında, veri setimizdeki sayıların ortalamadan biraz fazla sapma eğiliminde olduğunu ifade eder.\n", + "\n", + "✅ Bu sorunu düzeltmek için düşünebileceğiniz tüm yolları değerlendirmek için harika bir an. Verileri biraz daha düzenlemek mi? Farklı sütunlar kullanmak mı? Farklı bir algoritma mı denemek? İpucu: Verilerinizi normalize etmek için [verilerinizi ölçeklendirmeyi deneyin](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) ve diğer sütunları test edin.\n", + "\n", + "> Bu '[varyans hesaplayıcıyı](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' kullanarak kavramı biraz daha iyi anlayabilirsiniz.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Meydan Okuma**\n", + "\n", + "Bu not defteriyle biraz zaman geçirin ve parametreleri değiştirin. Verileri daha fazla temizleyerek (örneğin, aykırı değerleri kaldırarak) modelin doğruluğunu artırabilir misiniz? Belirli veri örneklerine daha fazla ağırlık vermek için ağırlıklar kullanabilirsiniz. Daha iyi kümeler oluşturmak için başka neler yapabilirsiniz?\n", + "\n", + "İpucu: Verilerinizi ölçeklendirmeyi deneyin. Not defterinde, veri sütunlarının aralık açısından birbirine daha çok benzemesini sağlamak için standart ölçeklendirme ekleyen yorumlanmış kodlar var. Verilerin ölçeklendirilmemiş haliyle bırakılması, daha az varyansa sahip verilerin daha fazla ağırlık taşımasına izin verdiği için, silüet puanı düşse de dirsek grafiğindeki 'kırılma' yumuşar. Bu sorun hakkında biraz daha fazla bilgi edinmek için [burayı okuyun](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Ders Sonrası Testi**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Gözden Geçirme ve Kendi Kendine Çalışma**\n", + "\n", + "- Bir K-Means Simülatörüne göz atın [örneğin bu](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Bu aracı kullanarak örnek veri noktalarını görselleştirebilir ve merkezlerini belirleyebilirsiniz. Verinin rastgeleliğini, küme sayılarını ve merkez sayılarını düzenleyebilirsiniz. Bu, verilerin nasıl gruplanabileceği hakkında bir fikir edinmenize yardımcı oluyor mu?\n", + "\n", + "- Ayrıca, Stanford'dan [bu K-Means el kitabına](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) göz atın.\n", + "\n", + "Yeni edindiğiniz kümeleme becerilerinizi K-Means kümeleme için uygun veri setlerinde denemek ister misiniz? Şunlara göz atabilirsiniz:\n", + "\n", + "- [Kümeleme Modellerini Eğit ve Değerlendir](https://rpubs.com/eR_ic/clustering) Tidymodels ve arkadaşlarıyla\n", + "\n", + "- [K-means Kümeleme Analizi](https://uc-r.github.io/kmeans_clustering), UC İş Analitiği R Programlama Rehberi\n", + "\n", + "- [Tidy veri ilkeleriyle K-means kümeleme](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Ödev**\n", + "\n", + "[Farklı kümeleme yöntemlerini deneyin](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## TEŞEKKÜRLER:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) bu modülün orijinal Python versiyonunu oluşturduğu için ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) R'ı daha sıcak ve çekici hale getiren harika illüstrasyonları için. Daha fazla illüstrasyonu [galerisinde](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) bulabilirsiniz.\n", + "\n", + "Keyifli Öğrenmeler,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Gold Microsoft Learn Öğrenci Elçisi.\n", + "\n", + "

\n", + " \n", + "

@allison_horst tarafından yapılmış bir çalışma
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/tr/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..46ce61187 --- /dev/null +++ b/translations/tr/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,548 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:21:55+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Son derste bitirdiğimiz yerden başlayın, veriler içe aktarılmış ve filtrelenmiş durumda.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Yalnızca 3 türe odaklanacağız. Belki 3 küme oluşturabiliriz!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Bu veri ne kadar temiz? Aykırı değerleri kutu grafikleri kullanarak kontrol edin. Daha az aykırı değere sahip sütunlara odaklanacağız (ancak aykırı değerleri temizleyebilirsiniz). Kutu grafikleri, verinin aralığını gösterebilir ve hangi sütunların kullanılacağını seçmeye yardımcı olur. Not: Kutu grafikleri, iyi kümeleme yapılabilir verinin önemli bir unsuru olan varyansı göstermez (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [ + "Benzer aralıklara sahip birkaç sütun seçin. Türlerimizi düzenli tutmak için artist_top_genre sütununu eklediğinizden emin olun.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Bu sayılar bizim için pek bir şey ifade etmiyor, bu yüzden doğruluğu görmek için bir 'silüet skoru' alalım. Skorumuz ortada.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Bu modeli kullanarak, Dirsek Yöntemi ile oluşturulacak en iyi küme sayısını belirleyin\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Bu modelin doğruluğu fena değil, ancak harika da değil. Verilerin K-Means Kümeleme için uygun olmayabileceği düşünülebilir. Farklı bir yöntem deneyebilirsiniz.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/tr/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..94266a981 --- /dev/null +++ b/translations/tr/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,343 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:23:00+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Son derste bitirdiğimiz yerden başlayın, veriler içe aktarılmış ve filtrelenmiş durumda.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Yalnızca 3 türe odaklanacağız. Belki 3 küme oluşturabiliriz!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/tr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..368002742 --- /dev/null +++ b/translations/tr/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:21+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dilindeki hali, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/tr/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/tr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/tr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..3ca92b264 --- /dev/null +++ b/translations/tr/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:21:59+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/tr/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..56ba72294 --- /dev/null +++ b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:42+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..63beaabc7 --- /dev/null +++ b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:23:02+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..8fa44997f --- /dev/null +++ b/translations/tr/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:22+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/tr/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..06a44991b --- /dev/null +++ b/translations/tr/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bu defterde, aşağıdaki konuları nasıl yapacağımızı gösteriyoruz:\n", + "- bu modül için zaman serisi verilerini ayarlama\n", + "- verileri görselleştirme\n", + "\n", + "Bu örnekteki veriler, GEFCom2014 tahmin yarışmasından alınmıştır. 2012 ile 2014 yılları arasında 3 yıl boyunca saatlik elektrik yükü ve sıcaklık değerlerini içermektedir.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli ve Rob J. Hyndman, \"Olasılıksal enerji tahmini: Global Energy Forecasting Competition 2014 ve sonrası\", International Journal of Forecasting, cilt 32, sayı 3, s. 896-913, Temmuz-Eylül, 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "CSV'den verileri bir Pandas veri çerçevesine yükle\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tüm mevcut yük verilerini grafiğe dök (Ocak 2012 - Aralık 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlama veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:01:55+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/tr/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..aeab6371c --- /dev/null +++ b/translations/tr/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Veri Kurulumu\n", + "\n", + "Bu not defterinde, aşağıdaki işlemleri nasıl yapacağımızı göstereceğiz:\n", + "\n", + "bu modül için zaman serisi verilerini hazırlamak \n", + "verileri görselleştirmek \n", + "Bu örnekte kullanılan veriler, GEFCom2014 tahmin yarışmasından alınmıştır. Veriler, 2012 ile 2014 yılları arasında 3 yıllık saatlik elektrik yükü ve sıcaklık değerlerinden oluşmaktadır.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli ve Rob J. Hyndman, \"Olasılıklı enerji tahmini: Küresel Enerji Tahmin Yarışması 2014 ve sonrası\", International Journal of Forecasting, cilt.32, sayı.3, s. 896-913, Temmuz-Eylül, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:43+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/tr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..5445e429a --- /dev/null +++ b/translations/tr/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# ARIMA ile Zaman Serisi Tahmini\n", + "\n", + "Bu not defterinde, aşağıdaki adımları nasıl gerçekleştireceğimizi göstereceğiz:\n", + "- ARIMA zaman serisi tahmin modeli için zaman serisi verilerini eğitime hazırlama\n", + "- Zaman serisinde bir ARIMA modelini kullanarak bir sonraki HORIZON adımlarını (zaman *t+1*'den *t+HORIZON*'a kadar) tahmin etme\n", + "- Modeli değerlendirme\n", + "\n", + "Bu örnekteki veriler, GEFCom2014 tahmin yarışmasından alınmıştır. 2012 ile 2014 yılları arasındaki 3 yıllık saatlik elektrik yükü ve sıcaklık değerlerinden oluşmaktadır. Görev, elektrik yükünün gelecekteki değerlerini tahmin etmektir. Bu örnekte, yalnızca geçmiş yük verilerini kullanarak bir zaman adımı ilerisini tahmin etmeyi göstereceğiz.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli ve Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, cilt 32, sayı 3, s. 896-913, Temmuz-Eylül, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Bağımlılıkları Yükleme\n", + "Çözüm için gerekli olan bazı bağımlılıkları yükleyerek başlayın. Bu kütüphaneler ve ilgili sürümleri çözümle uyumlu olarak çalışmaktadır:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tüm mevcut yük verilerini çiz (Ocak 2012 - Aralık 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Eğitim ve test veri setlerini oluşturun\n", + "\n", + "### Eğitim ve test veri setlerini ayırma\n", + "\n", + "Makine öğrenimi modellerini eğitmek ve değerlendirmek için veri setinizi genellikle iki ana bölüme ayırmanız gerekir: eğitim veri seti ve test veri seti. Eğitim veri seti, modelin öğrenmesi için kullanılırken, test veri seti modelin performansını değerlendirmek için kullanılır.\n", + "\n", + "### Veri setini ayırma yöntemleri\n", + "\n", + "Veri setinizi ayırmanın birkaç yaygın yöntemi vardır:\n", + "\n", + "1. **Rastgele ayırma**: Veri setinizi rastgele bir şekilde eğitim ve test veri setlerine bölersiniz. Örneğin, veri setinizin %80'ini eğitim için, %20'sini test için ayırabilirsiniz.\n", + "2. **Zaman temelli ayırma**: Eğer verileriniz zaman serisi içeriyorsa, daha eski verileri eğitim için, daha yeni verileri test için kullanabilirsiniz.\n", + "3. **Katmanlı ayırma**: Veri setinizdeki sınıf dağılımını koruyarak ayırma işlemi yapılır. Bu, özellikle dengesiz veri setlerinde faydalıdır.\n", + "\n", + "### Örnek kod\n", + "\n", + "Aşağıda, veri setinizi rastgele ayırmak için bir örnek kod verilmiştir:\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Veri setini yükleyin\n", + "data = @@INLINE_CODE_1@@\n", + "\n", + "# Özellikler ve hedef değişkeni ayırın\n", + "X = data['features']\n", + "y = data['target']\n", + "\n", + "# Eğitim ve test veri setlerini ayırın\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(\"Eğitim veri seti boyutu:\", len(X_train))\n", + "print(\"Test veri seti boyutu:\", len(X_test))\n", + "```\n", + "\n", + "### Veri setini ayırırken dikkat edilmesi gerekenler\n", + "\n", + "[!IMPORTANT] Veri setinizi ayırırken aşağıdaki noktalara dikkat edin:\n", + "- Eğitim ve test veri setlerinin birbirinden bağımsız olması gerekir. Test veri seti, modelin daha önce görmediği verilerden oluşmalıdır.\n", + "- Veri setinizin boyutuna bağlı olarak, test veri seti için %20-%30 arasında bir oran genellikle yeterlidir.\n", + "- Eğer veri setiniz dengesizse, katmanlı ayırma kullanmayı düşünün.\n", + "\n", + "### Veri setini ayırmanın önemi\n", + "\n", + "Veri setini doğru bir şekilde ayırmak, modelinizin gerçek dünyadaki performansını değerlendirmek için kritik öneme sahiptir. Eğer test veri seti modelin daha önce gördüğü verilerden oluşuyorsa, modelin performansı olduğundan daha iyi görünebilir. Bu nedenle, veri setinizi ayırırken özenli olun.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Orijinal ve ölçeklendirilmiş veri:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hadi test verilerini de ölçeklendirelim\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Modeli değerlendirin\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Her bir HORIZON adımı için bir test veri noktası oluşturun.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Test verileri üzerinde tahminler yapın\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "Tahminleri gerçek yükle karşılaştır\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tüm tahminler için **ortalama mutlak yüzde hatasını (MAPE)** hesaplayın\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Test setinin ilk haftası için tahminleri ve gerçek değerleri karşılaştırın.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T13:59:36+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/tr/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..044eae09b --- /dev/null +++ b/translations/tr/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,59 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:49+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# ARIMA ile Zaman Serisi Tahmini\n", + "\n", + "Bu not defterinde, aşağıdaki adımları nasıl gerçekleştireceğimizi göstereceğiz:\n", + "- ARIMA zaman serisi tahmin modeli için zaman serisi verilerini eğitime hazırlama\n", + "- Zaman serisinde bir sonraki HORIZON adımlarını (zaman *t+1*'den *t+HORIZON*'a kadar) tahmin etmek için basit bir ARIMA modeli uygulama\n", + "- Modeli değerlendirme\n", + "\n", + "Bu örnekteki veriler, GEFCom2014 tahmin yarışmasından alınmıştır. 2012 ile 2014 yılları arasında 3 yıllık saatlik elektrik yükü ve sıcaklık değerlerinden oluşmaktadır. Görev, elektrik yükünün gelecekteki değerlerini tahmin etmektir. Bu örnekte, yalnızca geçmiş yük verilerini kullanarak bir zaman adımı ileriye tahmin yapmayı göstereceğiz.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli ve Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, cilt 32, sayı 3, s. 896-913, Temmuz-Eylül, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/tr/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..84b2a0097 --- /dev/null +++ b/translations/tr/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1023 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bu not defterinde şunları nasıl yapacağımızı gösteriyoruz:\n", + "\n", + "- 2D zaman serisi verilerini bir SVM regresör modeli için eğitime hazırlamak \n", + "- RBF çekirdeği kullanarak SVR'yi uygulamak \n", + "- Modeli grafikler ve MAPE kullanarak değerlendirmek \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modüllerin İçe Aktarılması\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Veri hazırlama\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Verileri yükle\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Şimdi, verilerinizi filtreleme ve ölçeklendirme işlemleri yaparak eğitim için hazırlamanız gerekiyor.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verileri (0, 1) aralığında olacak şekilde ölçeklendir.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Zaman adımlarıyla veri oluşturma\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "SVR için, giriş verilerini `[batch, timesteps]` biçiminde dönüştürüyoruz. Bu nedenle, mevcut `train_data` ve `test_data` verilerini, zaman adımlarını ifade eden yeni bir boyut olacak şekilde yeniden şekillendiriyoruz. Örneğimizde, `timesteps = 5` olarak alıyoruz. Bu durumda, modelin girdileri ilk 4 zaman adımına ait veriler olacak ve çıktı 5. zaman adımına ait veriler olacaktır.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Model tahmini yap\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Tam veri seti tahmini\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABrgAAAHgCAYAAAD+LG2qAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOydd5jcxN3Hv7pzBVNsmumYEjoYQkvoEFog1FDCGwiEmkACISSU0EIvCYRuG0wHUwwGA8bYuIMx7r33frbvznfnO1/Z1bx/bNNqVUbS7Eq7+/08zz23K41mZlVmRr+qCSFACCGEEEIIIYQQQgghhBBCSLFQEXYHCCGEEEIIIYQQQgghhBBCCPECFVyEEEIIIYQQQgghhBBCCCGkqKCCixBCCCGEEEIIIYQQQgghhBQVVHARQgghhBBCCCGEEEIIIYSQooIKLkIIIYQQQgghhBBCCCGEEFJUUMFFCCGEEEIIIYQQQgghhBBCiop2YXfAie23317stddeYXeDEEIIIYQQQgghhBBCCCGEFJhJkyZtEELsYLUv0gquvfbaCxMnTgy7G4QQQgghhBBCCCGEEEIIIaTAaJq2zG4fQxQSQgghhBBCCCGEEEIIIYSQooIKLkIIIYQQQgghhBBCCCGEEFJUUMFFCCGEEEIIIYQQQgghhBBCiopI5+AihBBCCCGEEEIIIYQQQgiJKm1tbVi5ciWam5vD7kpR06lTJ+y2225o37699DFUcBFCCCGEEEIIIYQQQgghhPhg5cqV2GqrrbDXXntB07Swu1OUCCFQXV2NlStXokePHtLHMUQhIYQQQgghhBBCCCGEEEKID5qbm7HddttRuRUATdOw3XbbefaCo4KLEEIIIYQQQgghhBBCCCHEJ1RuBcfPOaSCixBCCCGEEEIIIYQQQgghpMj5/PPPoWka5s6d61juf//7H5qamny389Zbb+HWW2/1fbwqqOAihBBCCCGEEEIIIYQQQggpcvr164cTTjgB/fr1cywXVMEVFajgIoQQQgghhBBCCCGEEEIIKWI2bdqE77//Hn379sWHH34IAIjH47jzzjtxyCGH4LDDDsOLL76IF154AatXr8app56KU089FQDQpUuXdD39+/fHNddcAwD48ssvceyxx+KII47Ar371K1RVVRX8dznRLuwOEEIIIYQQQgghhBBCCCGEFD033ZSfenv3di3yxRdf4Oyzz8bPfvYzbLfddpg0aRLGjx+PpUuXYurUqWjXrh1qamrQrVs3PPvssxgxYgS23357xzpPOOEEjBs3Dpqm4fXXX8fTTz+N//73v6p+VWCo4CKEEEIIIYQQQgghhBBCCCli+vXrh9tuuw0AcMUVV6Bfv35YsmQJbr75ZrRrl1AFdevWzVOdK1euxOWXX441a9agtbUVPXr0UN7vIFDBRQghhBBCCCGEEEIIIYQQEhQJT6t8UFNTg+HDh2PGjBnQNA3xeByapuHoo4+WOl7TtPTn5ubm9Oe//OUvuOOOO3D++edj5MiReOihh1R3PRDMwUUIIYQQQgghhBBCCCGEEFKk9O/fH1dddRWWLVuGpUuXYsWKFejRowcOP/xw9O7dG7FYDEBCEQYAW221FRoaGtLH77TTTpgzZw50XceAAQPS2+vq6rDrrrsCAN5+++0C/iI5qOAihBBCCCGEEEIIIYQQQggpUvr164eLLrooa9sll1yCNWvWYI899sBhhx2Gww8/HB988AEA4MYbb8TZZ5+NU089FQDw5JNP4rzzzsMvf/lL7Lzzzuk6HnroIVx66aX4+c9/7pqvKww0IUTYfbDlqKOOEhMnTgy7G4QQQgghhBBCCCGEEEIIITnMmTMHBx54YNjdKAmszqWmaZOEEEdZlacHFyGEEEIIIYSUK1VVQH192L0ghBBCCCGEEM+0C7sDhBBCCCGEEEJCoKEBeOCBxOeQkmETQgghhBBCiF/owUUIIYQQQggh5ci6dWH3gBBCCCGEEEJ8QwUXIYQQQgghhJQjsVjYPSCEEEIIIYQQ31DBRQghhBBCCCHliK6H3QNCCCGEEEII8Q0VXIQQQgghhBBSjlDBRQghhBBCCCliqOAihBBCCCGEkHKithZYuRKIx8PuCSGEEEIIIUQBlZWV6NmzJw455BBceumlaGpq8l3XNddcg/79+wMArr/+esyePdu27MiRIzF27FjPbey1117YsGGD7z6moIKLEEIIIYQQQsqJu+8GHnkE2Lgx7J4QQgghhBBCFNC5c2dMnToVM2fORIcOHdCrV6+s/TGf+Xdff/11HHTQQbb7/Sq4VEEFFyGEEEIIIYSUI599FnYPCCGEEEIIIYo58cQTsXDhQowcORInnngizj//fBx00EGIx+P4xz/+gaOPPhqHHXYYevfuDQAQQuDWW2/F/vvvj1/96ldYt25duq5TTjkFEydOBAAMHjwYRx55JA4//HCcfvrpWLp0KXr16oXnnnsOPXv2xJgxY7B+/XpccsklOProo3H00Ufjhx9+AABUV1fjzDPPxMEHH4zrr78eQgglv7WdkloIIYQQQgghhESX2lpg8mTghBMy2zZvDq8/hBBCCCGEEOXEYjF88803OPvsswEAkydPxsyZM9GjRw/06dMH22yzDSZMmICWlhYcf/zxOPPMMzFlyhTMmzcPs2fPRlVVFQ466CD88Y9/zKp3/fr1uOGGGzB69Gj06NEDNTU16NatG26++WZ06dIFd955JwDgyiuvxN/+9jeccMIJWL58Oc466yzMmTMH//73v3HCCSfggQcewNdff42+ffsq+b1UcBFCCCGEEEJIqfPMM0B1NfDxx2H3hBBCCCGEkJJG09TX6ebwtHnzZvTs2RNAwoPruuuuw9ixY3HMMcegR48eAIAhQ4Zg+vTp6fxadXV1WLBgAUaPHo3f/e53qKysxC677ILTTjstp/5x48bhpJNOStfVrVs3y3589913WTm76uvrsWnTJowePRqfJSNInHvuuejataun328HFVyEEEIIIYQQUupUV4fdA0IIIYQQQsoCRdH3PJHKwWVmyy23TH8WQuDFF1/EWWedlVVm0KBByvqh6zrGjRuHTp06KavTCebgIoQQQgghhBBCCCGEEEIIKWHOOussvPrqq2hrawMAzJ8/H42NjTjppJPw0UcfIR6PY82aNRgxYkTOsccddxxGjx6NJUuWAABqamoAAFtttRUaGhrS5c4880y8+OKL6e8ppdtJJ52EDz74AADwzTffoLa2VslvooKLEEIIIYQQQgghhBBCCCGkhLn++utx0EEH4cgjj8QhhxyCm266CbFYDBdddBH2228/HHTQQbj66qvxi1/8IufYHXbYAX369MHFF1+Mww8/HJdffjkA4De/+Q0GDBiAnj17YsyYMXjhhRcwceJEHHbYYTjooIPQq1cvAMCDDz6I0aNH4+CDD8Znn32GPfbYQ8lv0kQY/nKSHHXUUWLixIlhd4MQQgghhBBCipubbnLe37t3YfpBCCGEEEJIiTFnzhwceOCBYXejJLA6l5qmTRJCHGVVnh5chBBCCCGEEFLuRNjwkRBCCCGEEEKsoIKLEEIIIYQQQgghhBBCCCGEFBVUcBFCCCGEEEJIuaPrYfeAEEIIIYQQQjxBBRchhBBCCCGElDsMUUgIIYQQQohvBNfTgfFzDqngIoQQQgghhBBCCCGEEEII8UGnTp1QXV1NJVcAhBCorq5Gp06dPB3XLk/9IYQQQgghhBBSLPBlnBBCCCGEEF/stttuWLlyJdavXx92V4qaTp06YbfddvN0DBVchBBCCCGEEEIIIYQQQgghPmjfvj169OgRdjfKEoYoJIQQQgghhJByhx5chBBCCCGEkCKDCi5CCCGEEEIIKXeo4CKEEELCYeVKYNmysHtBCCFFCUMUEkIIIYQQQgghhBBCSBg88kji/8svA+0oqiWEEC9IeXBpmrZU07QZmqZN1TRtYnJbN03ThmqatiD5v2tyu6Zp2guapi3UNG26pmlHGur5Q7L8Ak3T/pCfn0QIIYQQQgghxBP04CKEEELCJRYLuweEEFJ0eAlReKoQoqcQ4qjk97sBDBNC7AdgWPI7AJwDYL/k340AXgUSCjEADwI4FsAxAB5MKcUIIYQQQgghhBBCCCGkbKGxCSGEeCZIDq4LALyd/Pw2gAsN298RCcYB2FbTtJ0BnAVgqBCiRghRC2AogLMDtE8IIYQQQgghRAW6HnYPCCGEkPKGCi5CCPGMrIJLABiiadokTdNuTG7bSQixJvl5LYCdkp93BbDCcOzK5Da77YQQQgghhBBCCCGEEFJeGJVaVHARQohnZDMXniCEWKVp2o4AhmqaNte4UwghNE1TMgonFWg3AsAee+yhokpCCCGEEEIIIU5QqEYIIYSEC72pCSHEM1IeXEKIVcn/6wAMQCKHVlUy9CCS/9cli68CsLvh8N2S2+y2m9vqI4Q4Sghx1A477ODt1xBCCCGEEEIIIYQQQkgxQAMTQggJhKuCS9O0LTVN2yr1GcCZAGYCGAjgD8lifwDwRfLzQABXawmOA1CXDGX4LYAzNU3rqmla12Q93yr9NYQQQgghhBBCvEMBGyGEEFJ4jF5bnIsJIcQzMiEKdwIwQNO0VPkPhBCDNU2bAOBjTdOuA7AMwGXJ8oMA/BrAQgBNAK4FACFEjaZpjwCYkCz3sBCiRtkvIYQQQgghhBDiDwrVCCGEEEIIIUWGq4JLCLEYwOEW26sBnG6xXQC4xaauNwC84b2bhBBCCCGEEEIIIYQQUkIYDUxobEIIIZ6RysFFCCGEEEIIIaSEoVCNEEIIKTxUcBFCSCCo4CKEEEIIIYQQQgghhJBCE5ZSa8oU4OOPs3OAEUJIESKTg4sQQgghhBBCSClDq3FCCCGk8ITlwdWrV+L/nnsCxx5buHYJIUQx9OAihBBCCCGEkHKHCi5CCCGk8IQ9/1ZXh9s+IYQEhAouQgghhWXNGqCpKexeEEIIIcRI2AI2QgghpBxhDi5CCAkEFVyEEEISbN4MvPwyMHVq/tqoqgIeegi45578tUEIIYQQQgghhBQDVHARQkggqOAihBCSYMgQYPp04NVX89fG/PmJ/83N+WuDEEIIKWUWLwaefTbhEa0SCtUIIYSQwsP5lxBCAkEFFyGEkASbN+e/DV3PfxuEEEJIKfPUU8C8eZnk8KqggI0QQggpPGF7cHH+J4QUOVRwEUIISaBpYfeAEEIIIbKsXRt2Dwgh+aa5mQZihJQ6VDARQkggqOAihBCSwKjgamzMTxtcvBNCCCHRhHM0IdGiuRm47TbgwQfD7gkhpFDQg4sQQjxDBRchhJAERgXXHXcA69eH1xdCCCGEOLP//mH3gBCST1asSPxfty7cfhBC8kvYIQoJIaTIoYKLEEKINVOnqq+TC3ZCCCEkMR/GYsHrUAnnaEKiBcOHE1IecP4lhJBAUMFFCCEkQYVpSggqeCOEEEKINU8/DdxyC9DSEnZPMlDARki0oIKLEFIIOP8TQoocKrgIIYQkML9E86WaEEIIyQ+LFyf+v/VWIs+OH+jBRUhpw7U4IeUH52JCCPEMFVyEEEISFELBxQU7IYQQkmHyZOD998PuBSGEEEIIIYQUJVRwEUJI2EyeDMyZE3YvCmMlSgUXIYQQko3fNQA9uAgpbejBRUj5EcZczPmfEFLktAu7A4QQUtY0NwO9eyc+v/IKUFkZXl8YopAQQggpHqjgIqS04VqckPKgtTXzmXMxIYR4hh5chBASJm1t1p/DgC/RhBBCSOGhMIsQYgXX5oSUPvX1wP33h9sHrkMIIUUOFVyEEBImup75HI+H1w+AHlyEEEJIGERFsBSVfhBCCCHlwtSp2d/t5uIFC4DHHgNWrMh7lwghpNiggosQQsLEqOAyfg6DQii4KDwjhBBCsvE7N3JOJSQabNoEvPwyMHt22D0hhBQbsu/c//kPsHw58Oqr6vvA9QQhpMihgosQQsLEqNR6912gpSW8vhBCCCEk/0RVkBTVfhESdb74Apg+HXj++bB7Qggpdtzm4uZm9W2GbWhLCCEBoYKLEELCxBiWcNo04JtvwuuLGYYoJIQQQtRjFiT5FSwpUkhpfXpjSf12VHAR4pfNm8PuASGkVAhjLub8TwgpcqjgIoSQsGhpAfr2zd5WWxtOX4DCLGy5eCaEEFLuqLKU9jinPj7lHNww+veW+zY0d+EcTYhfKivD7gEhpFjxOvdyriaEkByo4CKEkLAYNQpYujR7W6l7TXFBTgghpNwxem8DBZv7X551Ml6feyK0Pr1z9lVonJ8J8U2pr98JIYXD7X2Z4QQJISQHKrgIISQsmppytxXyBbmqCnjhhVwlW4oKThGEEEKIclQpuBQajVDBRQghhBQBNBglhJAcKL0khJAoUUgFV//+wKxZwBNPFK5NLsgJIYSUO2br6wh4f1RqtAgnhBBCQieIB9fGjUAsprQ7hBBSDFDBRQghUaKQQq716533R0DgRgghhJQcZg+uCKDRg0sOGuoQQghRiXlecZtn7PZXVwN33QU8/LCafhFCSBFBBRchhISF1eK0kIITvwmxN2wAXnsNWLnS23Hz5gGffeavTUIIIaRUMM/1IYQo/NOYK/HdygOU1VcW6Drw2GPAO++E3RMSNWgURggpFHYeXAsWJP5XVRWuL4QQEhGo4CKEkChRDMKlvn2BiROBp5/2dtyzz+anP4QQQkgxE4KCq9eck/Ha3BN9H1+WLFsGrFgB/PBD2D0h5UifPkBLS9i9IISoxqsHl52Ci4p2QkgZQwUXIYREiSgpuCpspoiamsR/vmQTQggh3gljrndpUwgtWmuQKELhIQmTSZOA777L3rZyJdDQEE5/CCHRgnM4IaSMaRd2BwghpGyxWoQ6JY3NN7KLYjvFFyGEEEK8E4IHFwB8vPgojHp3v0RVoPLGE0JQ4UUKj1GZtW4d8Mgjic+9e4fTH0KIevzO7UHkCFSOEUKKHEopCSGEeIMCHUIIIcQ/qnJwKaBq8zahta2EhobCebAYr1PKm50QoHDPcDye+bxmTWHaJIQUFjdlE9/FCSEkByq4CCEkSoRpPWVu+/33gfnzw+kHrcgIIYSUCyF5cBU9QgB33pn4K/S5oICRhE1lZdg9IISoQNX8ZayHqQQIIWUGFVyEEELs+e9/c7flU4gkBPDMM8Czz+avDUIIIaQUUD0fF7PCjAouQoLT3JwIdzh9etg9IaR88TufGY/761+BtjbXQ2J6BVrjVJYTQoofKrgIISQsrBavxSBcMvZx2jS1dcfjwKJFCc+xYjgXhBBCSDHgMqcW5ZRr7HQsVti2qeAipcjQodBuvgmj/jUk7J4QQuywm3/ME/nata5V/d/w67D3h48p6BQhhIQLFVyEEFKuqJBmvfJK8DrsKLSwihBCCCkCnpx6Fir6vOr5OCedjIAGDBgQoFchoOuZz1wzkDJkSf122NjSWV2FjY0AgOk1u6mrkxDiDRUeXED2HGnDpA17YFVjV3/tEUJIhKCCixBCokQxmFDnO0RhColFOSGksCya04q+z9UDs2Yxvj8hfgk4j07esAcEKtTPx3PnAuvWqa0znxh/f6HXTxV8jSbhs/eHj+P/hl8XdjcIIUFIzl+//uZWPDHlbP/zmfndWaKeCq0IZA+EECIBV+aEEBIWYSuzzKbcYfcHyF6YR6E/hJAsnrxiKq6/Y2sse+y9/HpwElJORGm+KybFdaEVXFG6TiRaaBoW129f2FskuY6vbd1CTX31SeMVQkhhSQ4c36w4FP0WHe0+18iGKJRAA+c1QkhpQAUXIYSERdiCkrDbtyJMa2xCiDutCeH3Xv2ewIKfakLuDCElgsf5Li3a8nycRPmZMz3VGSqF9vo2tkcPLmJE17HPh4/h25UHh90T/9x/f9qDkytwQgqIYf6K6RXAf/+b8Ki2Q6GCS8mxhBASAbgyJ4SQKFEMi8tChSgshnNBSBnTHG8XdhcIKVqmVe+GNU1b+zo2rajyMk+6lE3vnTLFV59CgR5cJAp8/jkwdiwAoKGtY37byuM9WL2xEv8af0He6ieE2GB4ruMiKaLt0ydQPZbfLWCIQkJIqUAFFyGEhMyMml1Q39op8aUYhCdUcBFCCCGB6Pnp/bjsuxt9Hav5FEjZ2HwDAITQUh981R0KRq+tQntwFdN5Ivnlm2/SH+N68YpXhqw8CI9P/XXY3SCk/DDMX2kFlx8vYR8KLoYoJISUCsW7AiOEkGInueg8rP+DuOuni0PuTESg8IgQQkipk5zfWnx6QaYtrj16cEkpxo4/3lefQoEeXCRi6I5qZAUYQ5PZhSlTQGNbR1RV5a16QogRq7nFyWhDVYjCXr3yOYwUjro6oKEh7F4QQkKGCi5CCIkAzfH2YXfBmi0sElfTg4sQQggJjJ6y1Pabg8ujZMrJUlukat1yS091hoYQwIwZ2d8L0SYhDqQ9IRVy8ZCbce/4C50Lvf460NKirM17J1yE7t2VVUcIcUKVB7IXDy4hgClTit+DKx4H/vlP4M47w+4JISRkqOAihJAIUKElF7ZhCk+EwNyNO+W9mf9MOwOt8UrbPlh+JoQQQkqF5PxW6FlOSh9WLHPv1KnAm29mvhciRCEpb1pagCeeAIYNsy2i50HBNWDpEXh/4TG5O5IP9I9V+0C74Xpg+HDlbRNCCouUkrytDfj4Y6uD5RuKxQA4hy4uCpqbM5+LZf1CCMkLVHARQkhYGBZhHyw8Bl3feja09lMc+PHD2NjS2bFM0MXjP376LeZulDAL5SKVEEJICePX2yOvFtfFMvfOm5f9nR5cJN/88AOwdKm1YDlJvkIULt+0Hfb78GHne7CpKVAbvLsJCQk/c4uDol2K1lYA/nN6Roakog5AwpuLEFK2+Av8TgghRCnN8Q5ojncIV3iStARt1V2mBgV9lFpMU5BESOQoektPQiJE3vP1mJBSjBXL3GsWZBXag6tYzhNRh43wVAhgScP2eW9+Yb1LlAXek4QUPYGeYrObttOYUCrKoLa2zGd6chNS1tCDixBCwiIP3lGB2k9+jxstyvPUnwq75TtDFBJSNOQj1wgh5cS06t2x1ZvP+5/vqqqAgQPlygrhqE4ruinXLJwruh9ASoVvVhyCfT58DEB5zIvV1cDq1WH3gpASx2OOTctjnObFpDKo6HNwJT3RAHAdQEiZQwUXIYSQLOK6y9SgYPFYUezhEAiJADU1wLp1YfeCEBKETW2dPB+T5QX99ddK+pFWfxWLgMgYlggojOV2sZwbUjC++w6YvGGPsLsBAHh31B5BoxTmYuHlcdppwK67Km6HEJKNCgWXE8n5zE8zkYIeXISQJFRwEUJIuWLrwZX/qUHThLugyLxI3biRwiVCDJx4bCt22qkEXk4JKTds5t98U1Lhgc0KLubgIvnGYrI94wzg/okXhNAZ5PTn6peOwTffKG5jxYqcTTSsIUQxYcwtpTKfyUR/EYLKL0LKACq4CCEkLKIWojC1GRpWbOqK3w//o6fjvDUtIZE3tjN7NnDXXcBHHwVum5CSIB7H6uVt7uUUsvPOwNvzjytom4QQEytX5m6rrZU6tOhDERmhsIqEwMK6HRDXNQgBHH987n7nQKBy1NQYHKc8rrkL+Rqxfn3h2iKkZInFgAUL8lO304CQ8uBKrQuKVeFl7LfduuDFF4G//S07nCEhpOSggosQQkgWQgDfrToQ7y881lEJd+e4S7CxpbO/NqwEAG1twDvvWB8weHDi/4gRvtojpOQwv8StXZv3JteuBVr19nlvh5Cyw4tg6ZFHcrfdfTewaJHzcU1Nct6epSDkKkQbxXqeSCD2++hRvDHveOg6MHZsftrYbruEPDYSONznO+5YwH4QUqq8/z4wb56aurzMS2kFV5Ejo+CaNQtobgaWLi1Ilwgh4UAFFyGElCNjxyaS01sgoElZef93+pkYs3Y/X80LgdxF+OTJwIwZme/GRWplZebzZ5/5apOQksZOOUwIKTksBVKff+4s3PrnPx3ndqprCJGjavPWtvv6Lz4SL78U/GlatSpwFWqwGFMYFpkQhSQ15Vqf3gCcvUD7LTw6XS4wpWik4fabSvE3E0LSSCu4NE2r1DRtiqZpXyW/v6Vp2hJN06Ym/3omt2uapr2gadpCTdOma5p2pKGOP2iatiD59wflv4YQQoqdQi283n7bvgseqtFlQg1atmFxnDmov/FctGuX+fztt77aJKSksUgETwgpTSxzac2fD0yd6rvORyafiz9//7viFQAVImRhsZ4bEhwhgMZGAIm1r52SZ9CKQ3HrXzT/90qyjbSGy6keapoIKV0snu+5G7s7HyMEZtbskvXdqWyimRKa19zWAZzDiZnx44Hvvgu7F0QRXjy4bgMwx7TtH0KInsm/qclt5wDYL/l3I4BXAUDTtG4AHgRwLIBjADyoaVrXAH0nhJDiJuwcXDZtC6E5L3YNfYwLf47AAsgVyPNFnRBCSDkQcK63nS1HjfJ3HICvlx+GV2efUjwCoDDWDAxRWL689RYwaFD6q+vl37jRXztDhybqnzULeO457/fZlCn+2rXhvc+7KK2PEOKM0yPfvsL07mxR+ND+D2Jpw3bulZVKiEIjbgquOWZxNil7+vYFPvnE/5xNIoWUZFLTtN0AnAvgdYniFwB4RyQYB2BbTdN2BnAWgKFCiBohRC2AoQDO9tlv4sLLLwOnHtMI3HcfY80SQixZv7mLZZiD8wbfiocnnZf44vJi7duDS2jA3/+eSKxrx3PPZSxZmUyekJLhqquAjz4KuxeERAiPQmzbUINOcyokLbWLVXFTrP0mxcG4cemPTiHE0rg8i7YY17tz5wLLl9uXtVLyjh/vr10brvq7c6Kt004DPv5YaZOEEBsqNbn34ZZ4O/dCpfJu7cXwJJXTu6EB+PHHRP5vQgCgpSXsHhAFyJre/w/APwGYR8HHkmEIn9M0rWNy264AVhjKrExus9tO8kD//sDICVsC69cntF2EkOKggAKaplgHy+3z6rpjccMO9gcq8eDSEguJNWty9n29/BBMWLcnUFsLfPllTpuW3wkhRcN77wFvvBF2LwgJEc5hwZH14Fq6NDHg1NcHb5PXjaQYMcJ5v9+wwcacs4Dne64gd2hba/rjiBHAgAGFaJSQMsNijstRcNm8Hw9deSAGrziYIQrtePHFhFfuF1/ktTukiOD6riRwlUxqmnYegHVCiEmmXfcAOADA0QC6AbhLRYc0TbtR07SJmqZNXL9+vYoqiV8LMkJIftE0NLZZK5kKQYdKibEhT2EUszy/TPWdN/gvuHL4dYkvTU3JA0rEyowQVQiR/e4bwsJ87eat4XepxvcIQgx49eCyE0i5KH1sPb+KEfNvtTqHDQ3AE08AP/1EKTwJxIpNXdNRD4asPBD//VeNmordnv0CTpZCIirDd98BsU3NBegNIeVJ2kPUYj6vkFRG/WXs73DON391aSgVorCE1gWy4+WyZYn/M2bkry+kKHhx5qnFnX+WZCFjen88gPM1TVsK4EMAp2ma9p4QYk0yDGELgDeRyKsFAKsA7G44frfkNrvtWQgh+gghjhJCHLXDDg4eBMSZdesyn/mwEhJJ2mIaurz5YvbGAj6vlVoq55b3YxfW7eD7WMBgYdrcDNx9N9CvX1ZlMZG0YE1ZwJobosKLkNA5a9DtOPZYNXWtWcM0fITIknpU3l9wjKPBSM5xMs9Ysb43WPX7zjsznzdtUtLGmDX7oq61U/GeJ+KL5Zu6pT+PrdoXd42/xPkAmfvjgw+Ae+5xDo0UgftMCGDgwMT4ccYZwPrmrcPuEiFlSY6CK8j4YM7BFYGxxhfGfnuVD/j1tCUlwwszTy2u/LPEEVcFlxDiHiHEbkKIvQBcAWC4EOL3ybxa0DRNA3AhgJnJQwYCuFpLcByAOiHEGgDfAjhT07SumqZ1BXBmchvJA9pGg1WZz4f1sMPotUtIPpGK4Z/P9pNDQ5temb3BheUNXbHfR48GbDv526dOTST1HDkya39MT05PdgtVLkJIuRORZ8CvB5dZ0F6jyBiekHIg5cH1+xHXYXXjNpkdbgquUrLUTg4iM2p2weL67d3HRBUadCFw0pf/wKOTzw1eFyGjRiXCcU+fntmWvE9jemUij86SJZ6qlPHCssPOM1QIYN48f3XW1tJJghCvNMXaW273rOAqhxCFXnJwEWIiZfBN4+nSwF/ylATva5o2A8AMANsDSEk7BwFYDGAhgNcA/BkAhBA1AB4BMCH593ByG8kju7//hG/DhBkzgCFD1PaHEJLBcg0WwsIsreCSpDWeKe9XSZc+zphrwOjB5aZ04yKEkEjg24uT76CEeMdscQ3TPOyq4JJvo1g4rP+DOO5zi0j55nWCwt9VXGeIBEFF6jZX2toyn5MKrudnno5ubz8LTJhgf1wRuD3/+c8Jo1k7hABOP71w/SEk6izftB22fOMly30V5hxcZrzkrC4FD66mJuCrrzLfKR8gHkk/U8V4/5McPCm4hBAjhRDnJT+fJoQ4VAhxiBDi90KITcntQghxixBin+T+iYbj3xBC7Jv8e1PtTyFWrGzshljc/+J3wgTgoYfU9YcQUiCEcJ2oU0KxtHAsT/m2LNtOVVthPQ3F3SxQuQghpLA0Niqtbvly4MknlVZJSPHgRQhlJCm8MXpiCQAT1++ZyA9kV08pzpkG4X5zvH3ubzQqDQA1gq9SPI/EkW22AX79a+Bhr157Cu6VpljHwHWoIMhPaXZJ1xWLAcOH+6+fkJLFKgeX2bwiYIjCVY3bYnPc2lvMb50F5aOPgLlz/bfPOb3sqSx2D0aSRRAPLlIGTJgA/PvfYfeCkNIkb2sqXU9Ijvv0cW4/VdxjOBNNZIREfn+CpeeX4YTEBUMUEhIWq1cDL5mNR01hRM0MGpSINirLvHmJ1COEEA+kFFyGF3IhNCyo2zFrvx1FH4rISEUFbh97GYDkmsK8LjCHsOC6gfjkm2+AISsPDrsbAIC1Gzvl5VZuaAA2xzqor9gBXWcKHEK8kBOi0IyXwUEI7Pb+U5hWvXuwTqUYOhT45z+BDRvU1CeDOYQrPbiIR9IeXCF4RC9dyqWpaqjgItY0NeVsmjYN+OyzooiGQEhRkLcQhWvXJmbMyZNd2k95cHlr2m8Oj+emn479Pnw4q027N9uYW9jEWMxXH7LYsCERh9Vs5U2IQhQ7PtmjcIX82mvAX/7i7ZhzzwUefhhYvFhZNwgpH2Sf3+ScaQ5RmDZUUeHBVSxv25qG52cmYptJradUe3AVy3kikeXDhUdhyzdeyN7o8qK9819+iw8+UN+X3XcHrh99teW+fN3qt9wC7LSTe7lVq4ApU4BHHslPPwgpFpTm4FIdxrd//0RM1y++CFaPD/b78GH8bwZjnRLvpKfcENZ0PXoAX35Z8GZLGiq4iDXDhmV/13X07AlcckkovSGkJAlbNpLyotKFw1TgFubQg/fXyDU/w8L6nbKPMyqqDG3pTmETAeDvfwcWLJBu25IXXwQ+/RQYMCBYPYTYMHUq0KVLniov9AAiYd3y3HPAPvsUoC+ElCsWShohXEING47za6ASScwhjt3CPiocM2nrVyb88IP/YyXutx/X7e0ehrBj7v71M9YkPljMy35z49bV2e/L13Jj4sRMjjOnNnbbDTjySOCBB/LTD0KKBc8KLify9WAX8v0k2dbC+p0wbNUBvo8nJCwaGsLuQWlBBRexxuxVsWhROP0gpIQReuFyXlm2L6z/u/XHr4Ds6+WHpj+7hUVMN+t0Pj780Fc/0qxdm/i/cGGwegixYf36AjaWx7FD14HPf5QwsyaE+MNrDi4t2yAkPae6hPUtqSgMbj8mHwouCsPKixA8EcwsWr9V7sbRYwrfkYBoGrBpk/1+PlqEuJMOp2aHF8/lUlBwGWSWUvKJvfbKX18IIaFDBRexxmwVyXi2hBSGQiq4Uh5c0DwFAc5Obm+R98KGuMiEHUxbmNoIqBpjnaD16Z2p26qcijCFhOSRoMLk++7LWDd7RdfVTd3z5gEXPfpzy32NjcF/Z2MjLdhImeFnrt+8GXj9dQCmEIVGgxGXEIVSAqBikTQb3lUENPdwSwp/l1BcH4koQSY3D/fHwhcGJeLwWbS574eP+e+DIsTUaf6PNZwGp5DNfJwIMWEefxYtQoV5DlfpwVWMD6Fh3tc04f4bevTI/l6Mv5mUBqkUGQXLZVAeUMFFrHEL+0EICYzfMCKuGIU8Ds9uxklKA554AqLOpyRdcnz400Ej058vHPInXD3imuzFuxBoirWXr1tVZuqSMmknpcRjjwHff+/v2OOPB847T21/8sUppwC/+EXYvSAk4nz1FTBnDoBcQ5P0d1cFlwTFsuY3KrisvMLNCi+FObjiOl+hS52aGmDqul0K0tZ+Hz2KaXe+6/3AQj2rH32kpBqn7hbLsENIPpC6/59+OjdEYZCKS+GhM6wDnNY3G1s6Y1H99rk7SuEckOJk0KDEfybhUgpX5yVKtmWnjwqo4CIk7+TtsXJLgp7clhIIpRVttbVS1WsiIyQSAtJCo207bE5/3tC8FT5Z/HOgogIPTTwPVU1bAUJgyzdecqyjTa/IhGLiuEQijnkq9YPtbe5y/48bB4wa5b29p57yd5xfNC2RhyMNPcZJOeBn/jIkyTHZhmSEXql6N2wAHnkEmDIla7vmVTgWVVpbgeXL018L7VEVd8pdSkqCv/4VOOKt23wf/9ubunkq39zsELEgZERLq+djUjZoTj8naxwbWcCFByERQ9botTIZovCiITdjbdPWwXJP5suDq5Dv55Lj5Z+/vzLhDUvZATERWm7aVIoMenAphatzQggJCdmcV55x82wytZFSGEnn4PKZ4Na8gEi1++/Jv8EXy3o69lUI4I25v8TWbz6PO8f91lO7lrz1lv9jCZFEhZyq0Pqeu+8GRowwbSxkHs7vvitcW4REBZn5zFAmx4MrNS+nBozhw4GVK4FevdLbn59xGiZv2NN732pro6d4fuklYPHi9FdL4WA+BFkpDy4quEqe+AY5oy87Ph20haKehI/XiBPz5gHt2nls5OOPPR5ASOkg+w6eMmb5fOkR+LFqb/d5rtQ9uCRpaOtkvaOMzgGJFq2610mSyMDVeRmgJAxa1F5sCSkB8ramMj6vVs9ucltqbAjcDcnxwRxWQUADhgxJfBawPiHJbfXNHXDd6D+gOd4B49ftlbXPFz/+6P9YQmRZswZAQtHVr5+/Kvx6cCnlnXcK19bs2YVri5AixWhoImCI3JAaF8we2ULg+ZmnyVVuHFtmzUpovV97zW9X88O8eVlfLdcQybXJ8k1dccIX/1AzZibriDFEYcnTfvHcgrZnGWbTVz1KqgnEunWJ/6efDowZI3fMZ4t7YsaM/PWJkCijS8rrjO/SMStDCy9eWfmS74XkweXkyW3boygMmCQaFPBe+PproOM//XuIE3u4OieEkCihUADjxIcLj8LLs04BAOjJBbKsMlzLWkxq8h5cJgWXLkz1WHmeJetujVemN9WnrLC4KCURYuFCoKkpe5vW74P051mzfFQIQIyVVMZaPA9F+YgUZacJKQBZHlzGzYYcXCmBVceOOcf6MngbPTrxf/Jk78cWGhvB3vh1PfBD1b5Km4qJCo5VJU5lWOE8vbh+F+gedB07bCJHDB8OVFcb6nHo7hXDbsCf/+yjc4QUObrQsMQqP5QFRgVXXA84D5kVXMU4pxllEg5GArb7ivE3k7yw6zGFybkJAEuXFqypsoMKrjJAybhtrmTTJl/VHHooUFWloD+ElAB5W1MZXzRtGrntx8vxn+ln5rcfRtracl6PjS/MutCAWCz3uJS3maGPbXpS2eW34/mKOU7Kmv32A+65x36/59ssmXxWnzzVd5+8EIsB++9fkKac8XiiYjFgwoQ89YWQAnHL6Muwfr18+dwQhakvNnl8hPA31VnNyxHE0tgmlXdMZX4Frhciz4Rek/DEFdOica1WrXLcHb1sWw44uVdNm+arSothyh1DaFJCSoFPFv8c+3/8SO4OyxCFGaVUTFTm7M/ByUur2CM0DRyYcReFN6NbUj5cey0wZ457udVV7XItVUnRQQVXiRI4ibSL9djQJyf50nHNnAnMLWzEB0Iii6+cEUOGAE88ATQ325cxLlht6mtfkVGCpcIi+MkJJj3S3HprbohC88GSgrS0FVaqAq8LdCq4SJ6oq8v+niWE9nCbtbQA01bvACDb0/G997JlOyoFt5s3A/PnK6rMaLJtxvAyaonH57FfP+CYYzwdQkj4mO7zV2aenJv7zkxybV7dvGWWYEs3enDZPT9+5zm3nJ4RwdI6m3N7WfL008C9Hx2eyEEXNqkceAVESWoCc50C0ObPsy8Q96cI95WndNkyX20RElU2tnSWLlthWPfHgnpw5Wt+L9Tc+/XX0kUZorB8eestYMAAycLvv5/PrpACQAVXGTBq9X7BhVYmYfqZj52MF1/0VxXnEUIS+HoWPv004df8/ff2ZdwUXEJkKbi8xv3PqdLth2zeDCBXGC8MU5CABrRvb9uY8YVdGPf16wfcdhtQXy/T9QSlEJKBFJxhw9zLqMrl/MILQM+nfgcgOy7/VVcBDz8sX09otLTY73NxU5m/bltpA7rTTwfuustDvwiJMGKe3GJ9+3eexUuzTs0cB0PYIjsPLi+GIMbBqkgUXAAsjVfu/ukivDrnZOVtqMqXRPKI0zwkgRIDksbG4HVY4LSeGLn6Z/jsM8XteVSa+VJcgctxQtzIycHlZrTp5aEq0gewtmULAM5Gt+kxTMsY9U5Yt2fR/mbiDenLPM/BkCNPBHZMIVlQwVUG/PqrP+Pyy7O3tbQAXbt6qMTCs4LzASHBELq18kmKtjb7fRIeXB0qMs90ykNE+gVWMt51mqSAzOzBZUQXGtDZwoLNQliX7qcQwMiRQGsrMG6cez/s4GBGXGhqAn71K/dyZhmyUcgj4vIC5qROOHGc6RlLt1GI+1ZRG62thi8ukq/9n74O996b+Lxxo3O9w4cDa9YE6hopR4QAxo+P3s0zapTzfptn54WZp+HS725KfLF7ZuNx+Tm+COdEy+Tyuo6npp2NYasOVNiQsP5MokcUwm/5uUcktUNCAOfdlJszpM/ck3DJJR7bzJMizozb6Vi1Cnj88YJ0hZDoYzHfaOYcXF7q8LKvSKhr7YRubz8HwFsOrs+X9sQxn99bEueAuBOF5UAWxWQ8VmRQwVWm1Ne7C47yhV+rLkKIAacHyWOIwrTQSzKufY6QzG1xKJEHQwDWk33aWtqiOYnfacfqxm0wdOWB0Pr09nQcIV7IClE49kfp4yocVme2i/R8vKQ55dxw4PTLumXZxXTsCHz7bfKLxCKgoSHxv2tXJuIleWDRIqBvX+Chh8LuSRauj7DNs7NiU7fMl9QAoWnY0Lxl1nZfXkcVFfhu5QGRlwFJhXxW+CPyEQaOBEfTgGGrD0h8iYAAydKQzQ3JF2VdB74e2cV7/VasXeu4u1DP/7JlwL/+ld82Zs/2aORLSJ6xfeSND56u44Jv/4SLhvw5vSkmKqXfwaVQ9aAXcMHQGm/nq+3qli3dC5GSwe62+P57IC5CUIkMHlz4NssEKrjKhfo61yK/+x1QW5tQfp12d/4SWkT9JZmQQhHoWVCo4Ep7cEn2J0vZ5EHIY9XlIz/9V7JOTdq8JrBgSQicMPAfOHPQ7envhDjhpHAy4mgo2SCfuDLL8ws2Hlwy6AEFfDU1vg4b/kOnHIPw5cv9daFAhuWknKiqCrsHlvid23bqbAjRa/B63uGdZ/HdyqSw36/5qqbhjEF/w8L6Hf0dXyCEQF4VWuY6uWpQz+bNwIYNweupjZDgsmgUoZWV7mXygcwzqjiX2pQp4Rn5EmJFpWY9P2c9HkJg6MqDsvbHrDy4QswzHdMrMHXDbgVrD8j2aHMOUZhN2vuNMoCywM7Y5MQTgRk1hb1nAWSl1nhq6lm4+mreh6qggqtEyfGUMGe+t+DDD4GpUxOGrSOmb2eqsEgW6IQUEZZrKtmFlouC64kpZ2NR/fYOIQotPLgk8fvCbuXBNaV6j0ydkh5ci+p3THheBViU1rcawiFycUtckJ0CzbdS9otXMGWwXRuWpEKYxgIquCIX04EQBUTAs8PqQXZ9tG0GhpjR+tRU74bmpIdHPC6vlLHoW5sekgBcEgGJXCQq2xMa1w6KueYaYIcdwu5FBiV5McK8R7y07WLF4/Vn2K1hBgwwpRBeIWH5smqVt8YJKTLaVVivSXSj17XFejxulYMrRN6ZfxyO+Oz+0Prk5T1LLxbjAxKM5PyhjxsvVdyX17UPjDKx6TW74913uaZUBRVcZUJgC7KIKbgWLOAYQIofqZA6rpWI3Bx5uo57J1yEXrNtEqsLkR2iUCT+amStXrNycDmUmzABeO45YPp0ABI5uKwqc2qgqcmtp7ZkCS44mJQ9jzwCbLONezmVkUCccFRwpV5ynRpTpZjKh4JLYj0R9Dw2Nwc7npQ4xaq4tXl2Plh4bOaLIUQhYBDi6Lq/d4Gk4LsopskCenC9vzB/kS7KFb9evlEmX8+NgOZet5fGXeZlVb/j1luBq64yNCtjbGDuW8RkEoQEpdLm/dis4DKXiukShh1O6x3Fc2ZjrKNzAV0HhgwBqqsDtWOHbRjmn37K2detY1PqoLz0hUSECRMAAGL9eqnioXpdR8H4rgSggqtMcVobWu6L2GLyZz8Dvvgi7F4QEgwlIQr79AFuuSXbSzO5mG3TK20XtlkhCqHhrfm/xK7vP51TrrGtQ26/sz47WJy8/jowdy7wzjuJLjvYj+tCA0aOzN2R8uDSXKYrtxhypiReWX3h4rbsGTcuK1qALUEUXF7uMsvbORm7SZ8+y3vjMuT5OUhVv2Rl+7y209QEdO7sXo6UMVEY8608uPzkyJKoF0DCgyvAzy5Ka+c8Xue4iLZHG0EknnNfwjLJd27XnxdRRb7UZVmxIvNZtQwiJeiM6Pkh5UeFTYjCrFvUwkglFrF5yDWX0WuvAZ9+Ctx7r7I2jesmy6GlrQ14442cfbtvWZOqQFlfSATRUmk4LOYRC0WrrmIdTkKFCi6ShYoxvrY2eB0ybJJPZ0JI6TJ5cvZ/IP0g24YUMnlwPT7lHPxx1B8si65q3Db3cMPU4Ulo7+bBZYXLoKQLDc2xds4vwC0twP33A/37+2qDlCarVgE77ZT4LCs/sZKH6LrB4KK1xf5YD0lsK9auzt04f36inpbWxHeJ+3ZzvIP0b9M++lC2e75pagL2PmOf/FTe0AAgE52REFsiaiVpKQzfuBF46ilg0iS5gco0LqRf6r38ZmMdTsKBlhbg8cfTXtqhU0APLpI/ImZTGQiv4Y7mzgV2uOwURY2ru1cLbtU+c2b+6h44MPE/K1YiIeHRzk7BZZx3LZ5nSw8uM46Wd2rns3iqv3b1GmUVisiRH7h4sDU2J2QjqXnm+u+ukMnkQoqVpMWo8T5Ztiwpr07NBUa4xit6qOAiclg87FOmAO+9l1u0Wzdg8OAC9ImQIsfyxVfhxNqmV1rXN2hQloLr0yVH2taxyS3cgCTLN3VFtUMIRDfrcLvTcu/4C9H5jZftJSJNTcD//pew2Bw6NF1ZKQlQiD/mzwfWrfN2jNV9uGgRcOGFic+tk2dhzZrMPr+3mfbj2Nw206HC8nTz/vhj7jaF45Fmk2ZPGV99lW6HkGLE8nEbOBBYvDjhra1pifyTMpWkQhSmxgu/3gopBZfVaNa7d0JS8PLL/uoOQHOsHeZv3NG5UAFzcpEIYLawDHi9VUwlXhVDU6YAG+pyIydY1j1nrnMBm2e+uhr4xS88dStSHHD1MaipcS4zYYLEWkAyZBUh+abSRsEV100hCk1DmqUha5B5L+CY6cWQTwUxvQLN8UxUCEtj2eRvSuUjzTqnAPrO/gWmvD87f50k4ZJ6dzbMxXvtBVx6qXVxLhOLHyq4iG/698+Oo23Eac0Yi+WmDCKkHPFjEbm2aWs8P+M0i8psksKbt69bBwwebMrB5a0fxn7LJlrf84Mn8eTUc2z32y6KXYRyM2p2TXywC1HYr19COGiCIQpJpeG90FUQkoxfKBpyXYfbGyLufbb0SOyyi3UV3tJhZAr/bvgNOOUUmU76p60N2BzLb+jAvJN03aKCixQrlmsC44JZ5ua2y9Gn61jd1FWyI1YeXBblOqoxgPHDE1PPwf4fP5K90UvuEb8Irh0Kga9TmzJiKmJk56/X5p6A/7uw0bmQzUmcPTsRltkLbtfDtzGPQ2QHO+at2MJqWZ9m6lSmMSDFhV2EEzcPro2tWxQuObAE8QKHd/v98D9ir35PpL/bpU2ob+2E2bU7A7AWF+iDaJkfJVpb7YPueMZmUq2pgXt6i0LDNaUSInZViSpcp5fRoxP/582Tq9DlgauvT0QqkeGMM4rbcowQVVg+Vi7P2r8mXIDbf7zc+ThjiEJzfc3NALJzcDm1aDWW5GP+dYt5bCX4i+sGNZXdImW2tVVW1eZtDJVzQVGOtGsnX1a88SYAQO/7Zs6+9pJ6odQ9PHJkItF6r172Zc13+6hRsFykG7e0xf0v6f7wB2Cnd5/xfXwkSD7HVHCRUuOJKWcnQvH6ID13+nWfdPLg2n57f3UqoL61U+5GC8v1Si2PbqN9+uSv7jLH17IsgoO/198h+xPmbtwZ/Zf83HN/amsTUU+9kq/fIVWBRWVO/TniCOCxx5JfnJTcXPuTiNCuwl8OrriMkWkBPbjieoWSemSZtGGPrO+WBru6jpO+vBOteuJlLSU5yBKbFGOO0RLm229zPaySaai9k17DWmAhO3K7F+bOBfr2td7X2AisXeuxf0Q5VHCVK198nvj/8cdKqttmG+Dqq+XLT5wYvE2uS0mxYzmJutzYb8w7Qbp+SwVXcrVstBZzUi5Z6uA0Yw4uOQ8uN/zk4Gr3ei9MTi1u7d6mrRLyMGwRQbYHlysLFgAA9Lm5RiHarBm527RcT+bUXXbqqYmIXv/7n4f2U5U60OGR+20X3W7MmJGdW6+YiaCMk0QNTcOqxm3DTSZtMe/YzUT3TrgIU6t393Vzp+sMGKKwKBJvWyq4dPv9AdtoWUZJRqSI4ODv7vlU2PVnz57A+edbdcT53P17/Nn4ZsUhSvvSowcwcf2eruU2Nwe4rqNG+T+WEJXMnQu8+aalRbhdiELduC63GEzSCqUoMGqUu6Jo662VvnKbhy3LqoXAwrodco4x9rUo1jdlTF0dsMMO7uVkyBJ9e/TgisWAhx8Grr8+8f3ZZ4F33snsv+46YOedA3SO8iglRGhUJKGQeuFNBbOuqoLWvNlXVQsXZj4X4vmsrQVWr85/O4TkjSAPisSLvGVs7rSCK7OYjgeImb2pTU2IIjcFl92pSnti2ZwPEdfx+dLDcdXwa/H+gmOsy3hMAk6Kn/p6oHH6IgDAb89vcc97nA7TlX2fjRgBfPnINMtDzFZcL8w8HdtIOg5aCr3MnhQWFcyYWt7xf//7X+CKKxKf+Z5AnNjt/afw1rxohROwtD5OzYHQpOb9HJsWP/O7VYhCK8GVuT8tLcB//pNwU80zUroMIVDpIwSaW50pThp4JweaPKHEgytoDi4F907BPZ9cGl++3F9Vz089BcNXHyB/QDJahBNLlwJtuo1navJErFsHbHFirqea9HldtEiyICF55rnnErFBLRLV2ym6zTm4zMREhbvRZqG8GD/4IGM8a1fvIYeg4rXemFUTRAuQwXzenHJwpY+xcvKigivSWNkq++XyZBAku1vU6ZFo3z6R+SLF3/8O3Hln5rsxB7cvuJ5UAhVcZYLt85IKWfJmMuzS++8Db73loYLwuO02YNddw+4FIf5R8VjN27gT/jjyasvKWvXK3IVtspzRgysu7F1Z3EIU/n2cTZZOAK/OPgk9P70PG1s625ZJodtZfcmeJBupQLPoiIuG/BnvLTwOfecdb90EQxOUFTfeCOy2G3D6jfsAAD79sqO0sYT5Xrn4YuDm739vXdbi1k2m8rLdn8LqdtaOPsq9fz+Ndy3jCYVzfyGipLzxBvDVV8HqIOVDbesWobZfYbLadvLq1oU/BVf6u98HwikHF4AmY+6+sWMT3q5GCUCesBQIWvx48zlWyfj1PfKT56tc2VgrVeyHH4CqKosdHrVD8+YBDQ2Z7/feC9x3n6cqXInCPNTcnG2EWhA++sh2l5dz0miTZsy2jlq5eyhVySOP2OcTJyQveIgRag5RaM7VZWnAEmTQ8Xts8jg7T7Q0yTG6avPW/toxV2fuhlVUGSFMucNTZU3HkciiwvBDCC3rgVI1N7e0AIMGJT576acKAxpiDRVc5U7q6TZOtj6DnEZhEU9IMeEnB9cNB4zJ+v7pkiPx5vzjE2/8QOLlLjXTWpHy4AoQFkV2ITho+aGYVr07ur79P/c63XJwuXVXYgBKt2Eqq8c5eJUTr72WLdRy49NPgUcm/RpA7m3mtJjNd97nufMtlnCb/Xlg54sNG4BHHw1Wh+x5iuta1vUQcQqeiQ3JGyXIPKikGx7al7UwzjHeDiq4cfLgqqjAlm+8hNFr9kt8j4XsQWphua48ap0X63jiCa22Jv3Zadw/4YREHsvcCrxd7AMOAG6/PfF5jz2AJ54AHn/cUxWu3Dvu/PTy3C1XtXIha/IkPv44sN9+aqs2M379Xrjppsx3rUpN+E7Pz6/HMeid11vw3ntqPQQIsWLFpq7Y84PHLe9Ru2c/K0ShrluvWfL9siFDdTUAw5rGrs3kA61EuC8E5tV1z9ok48GV2Zyr9CLRYtSoxNisbB33xRfZ363yO3qch+vrgXPPTXz2GPEwF96ISqCCq0xJPbpvTTsCAwYAj4w5JczuEFKW+JnHOlRkL4zTdaRcUJ57Ltu01UYYU2mT0Danj1YbFYeBAfzl4JIhy2rLa9uEAHjoIeDxKecAyF34ui26nQTYxlt7/PiEIk3quOT/A4/uguqWLu6Vh8gXXwD335+nyk2/sd0fr8asWYbdAz7PU8OkVDBbQxeaHOtjh+7IhijUTYYcqkIUOs2Ta5qcQwVbsnAhfndpDD/95L17iabkrl3elZhUcDnSqxfwyis+Dhw92nG35bPiQwqW8qpesSK73uXLbcJ8e+T56afixRcTnzt1CsGTCon8JflmUf2O6NNHXX3PfbanJ0MkO4569AIMH26zU4i0gW+HDsHbIsSJmTW7YPmm7Tytz83zrqVHchQUXMmkxq5rqjwbF1kay0rM0YENgUheOOUUU86soJgnAwUKLqvqvv/edxVEAVRwlTnXfnkxLr4YeGDU6flvzGKCveWWhGCPkHJExZqzvi0Z/u/4ZPg9c9wWm3hFsotLK+FYPtbKcVGButZOuTuM+Ud8oGsZAYVd7iLKp0gai5vBuP71ogxdtw72Cihk34bXXQf89rfGNv09ZHaekJqWrUCz2p9v1q3zJ9wbNw7WOdLMSc5MiNFjHPcTUqGJhPdzGJOAEDnPuZ2XFAC0xNt5U3DZfJftWxqnNv0OHLNmAc88gw/7t8MnTy32V4cVFuudLGMeFYsXt3wnJIs//SnxrucVMeibwG1XHHaw72P33BN4e/4vA/cByL5FamrsywGFmYvTRMzj28gdfQ7Ad9/5ONB0Aict2wHffmtd9K73D0Nty5Y+GiHEO2nlj8V6wy2TSKKQ8GeU45RsyLyO9jufJX+T5paDK+3B5a8ZM2fuNivru2WrQlh6axnXW74MgUj+WDA//TEWU+AZheT1bpfJ+ej5Vp8/371MkhNPdPfYtoTrSSXwaSY5SMW194OFJuuVV4C337Y/pK4OWLIkeNOEFA0uz5pZAPb0tLMSHzpZKIes6hMC8zbuhNVN2/rujtDUTx1vz/8Ftn3refsO+FwNG8M72An/qeAqD2bOBB5/zGUumzPHcbcXD64zzwTO/9ZesuckQ3a624XQfIUCmzrV8yGBMf7G++8HjjxS7jjj+bjhBuDkk10qt9pNi0xiR8qKWBPAO+8AP/4YSjfM+Sos7+jkm31TrIPUPW2r4FLoDW2LpqGxrQPiuktZwzirL8yjgssupJPiNoh6VISYFULLuVxLlyLb07fA8iS39pQquNwa++ILNDYmzkkhWb4cOOggubJ250NsbrbcfvUtuUZFDQvXWhrXPD3wAEcjJEJU4qT8sX0/NYlqLRVcpvqEbirz8cfWwvlJkzJJa53o1w+46SbnuU42/UFyPeMlPLMtQqCdaQ2liwpLuYdbRghGcokYQ4amPwoRcF405pGV0JTZrnXNBuQWCSKN/cxSTstCBZcSqOAickg8cK6Dzxhv1tSaBmy/PbD33p4OI6RosJpEXSMNmL63r3ARdFtUeMDHD+PblXKWrX766Ie1m7ex6YCQa9OmQJaCy+ZQV2EcKQleegn4130u19piRZqV18lDDi4veK7nGxfrdovnoU+fRO6SYkAIAM3N2d89QrkzcSMtZFm1qvCNC+GekB0w5MCSew5EPJ41jgVW9CYFAk65LdIhCgF0efNFPDDxfOc6x40zV6EGC8GWZUgnlVAgkR9cBvAfxui44ALv1ZxyCnDIIf675QdPt0hTU976kUN1Nf76V6BHj8I1mcLFlsgV8fEnltvf/TjX2O/Vz7rnPQcZIW6kZ1AvIQqN45eFB5d0Ta++mrvNKo6aVd9Gjkz8f+89+/pTCq5U/2bPBqZMyS1nNC5SgNT5kMnRraQ3JB+IhYvUVZZjTWqxrrV7ITd4fwGwvK+8KLgoecofVHCVKEqSNxpYvKpj8EpMA8a++7ofYmmk7svnk5DoYbXm8mpFdMtBIxMfbM0cg40FUglb8y3gEcJ3EgFLDy5ziEJaboXOgAH5D82jIsTBru8/hYcfznwP0ufUbbj77sD06R4P9uGOtW4d0snuC4JdXCAZGhqAT6wFWISoIr1WNr+4FohKs3DGai4yhBEzW3NboYsK4N//Nnz34cFlZWHuUK45njx/tbUAgIX1OzrXb0iu41cBJ3uU8nmFHlwFwdaDa948AMDadRUYONC0zyqfhulytbU57w8b7dP+hWtM111DJkaWVN7hAjJmDNCtW8GbJSVC2qDGMkShjQeXOQeXhCpGekzzOndNm2a/Lx7HFd9djxdmnpbZ1qtXbrlUiEII5/okMXuCCaG5yiisQhTaedCR8NHH/ACtbmPii4XXlCvJe25R/Q4YujRj6WD3nDwy7lfWObRM64s9922fvb+tjR5cEYEKrjLFzjW477zj8ers3FhA+1z2c0/1jxoFfPihc5lFSYW852f56689HkBINPGj4DIvwnLe581S/DyEJSpo6C9dB955B2JtlXtZq8PNHlzJ87Fth8wiifKp8Jk9O/9tSCm4XCyymuMdspyRgwhP9Vhi9btyZe4+p/Adq5u2wRdz97fcZ5dnzpVNNtncA4wfQXKoNE+dgyUTq50LuZx8p65feGEw/RspcpL3TjrvwoIFPt9Gg2H2LrKcWysNeSQlwrZtaO6SCKWS/I3xoLklzOfKiPEhGzMGGJoIKxPT5dv0a2AiFU5diCyB4IjFe/pqyxEuIArLihWeihsvz7Bhmcd83pzEjuWz6vHWW9nHXHFFgP5ZYLwtXadUlR5cycbspsqaxo55G/ZUhCBzCk2VDzmgW50//pjW4RPimfS9bBmi0PoYc4SRnDWDhELHFq9zV/v29vt0HR8tPhqLG3ZwriN5EpZt2g4tL/YJ9iBb5DHVrdZQQtjIMgzHUcEVWQQA9O6d+PyWQ14bFz5beiTO/PSm7I0WE8yT40/H7bcnHBY1zT6E7/Iqk/PHoEFZX0N4pSBJqOAqU+wE1B8sPBa95lglu3AgudoThknqjTeA3/3Od/ek2iOkFPG6yEq/RKbCCOmA1qd3pkBAbyvLHFzmbXm2OBE/jMXXyw/1dWy2gkszJMI1lKF8qizwq4wyH6fqdhe19l6JTn2dsL4HLvzgMjWdSOHTQ9INv+fqw0XHYO8Xbs9b2198AXz0UaDqSQmQnm8XL07cFAVGamxJh+nVpOaq9ZuTOWVGjw7WuRTG/AU2feu53cqs8EWfLZVMtof8G8xUVmRO2qp6Bfl26MFVEJTNs4Z6fvWrTBqNAw5KrA0nztsa116bfUw+54aC5uByqXu7e27El18WoOE8kI9xwzVEfMoSy+BVS4gsTh5cduR4cMnk4DIVOX/wn20saj3OXb/4hf0+2bqS48yVw6/H0QPuCaYFELmjgF3UmayfP2YMMHdudhEGjIssQmhpj119gcJwhQ737KRJwFVXJT7L5qjsfOm5GDEi813UbvTdNRIMKrhKkIJrjFMKJ58eFp4pskU4IV5w9eAyfTdbL7XFXYZ1j1IDS2uoQiIElm3aDn8Z66Ixt/ldQjOFKNT1HKtuz/KpZcuATz8FWls9HkjsKIRXvpQHl8XN4DTlaHGXHHg+UZKAOWQaYx3DfUQY6oG4kHWHjB0bVjekEJBTcG3XKTuEi6/HQIhEKIb33zfkALMfCM1tHLfjYuf6t946bYij1HLaxYMrL1baVHA54veVzfZ+81hhFC6P0zOYkxJE5dxfAnOg0lf+tEebz/MyJykUN+QQJESW9H1nGQJYIkShdA6u7K1fLj/cukNWAsNNm3I2/fn73+Hnn93r6sElg9HodEbNbsoHaJm0CkIAeO65rIg49OCKFsZ58JXZJ+PMQbcBUKuIFKvXSE0w6dvHpWxzW7vs1Dp9+/roVPHP2VGACq4SpF07YMjKg7O2mQcEpY/PwoWJ/xT2EuKJQCEKNS37mUuHETIdb148evbgKkyIwp232Gi7L8jCM6evKQWXIcyD5/X1448DQ4YwxplKUmGH8mihIaPg0o48IifNo+OatslHPPAk+VjGph/vCCySd3v/Kdx6q5q6GhuBnXbydozbOBWBU0RCRggNrfFKnP7V38JoPDd/hNU9u/vuqeLQ4+43rXm+9B229IMPEl5gybHZ8ujk5Gnu9xHbL3eu25DzzNdjOG2a9bhs8Ruz5nq7tYQQCTNdc4Imu7JGpkxxP4ZIIaXc8ZhMM+oKLuO+q0Zci8em/Dr/HSoBfI0bVvGgjXW6eXD5aZOQJOnZxzIHlzX+PLgkcmfb9ANz5sCc3HDYqgMweYNLeF9TXV8uOwzNsdzcpuaQi0EHaPP50K1CNpra+O13N2FzrH3WuoUKrmhhvDZTq/fAT+v2TmzXCz8K+31XPK3P5Xj00QI1RrKggosEpq4xnOTcslxwATBypPfjhgzJ1dnNnAkce6ySbhHiKQ63JX/9a444bGNrZ+djPC4mpXJwRWFCtumDcdEqbMr5zlGyfr2/40gO2uRJiQ8TJ+atjYrNcsooYw7bt9+2j9738stAc9zBotEFJwWMXwtjHVrJRs9Zt05tfVEYtkhIGAxCNjR3wfDVB0TihrDsQSr8sKQH17Tq3bCmaWvnOl07kps0yCkJu2fhkC6hdLJDCOCVV+z3mTAKwGx/w6hRwBNPAH36eOsLkCMMJGqwfRzNmk03pcV3w9R0KAANDYmQR4DzMLOofkdMchMk54n11WpFQr69pAw8+ijwpz9Z73Maj2xxG0CjoA0lJUuFgweXHTk5uKxm9Ndf97d+sbvfTbnuLZVqLnWd/+0t+GjxUTnFchRcgUMUmhVc1rlCje9bQ1YejJWNXbOLhB2thkgRxnXyOy3MqNkN/fur7QuRgwquMqFNr8zyqPe1MLRh24tPkytoYwYfj3vMqesxXsHAgcANN3jPCXbWWcDHH2dvGzUKGD/eWz2E2OHLg8tojW1c3CWfi08Xm3JfBMzBZdUf2Sq8tORkWR3kNVlUVGa3kVyprG/OCABlrOIt4cuwepqb81a19uMPno+55hpgkSnkd+r+v/VWoK51C9/9cQxb5LPOV2efgi22ACq3lu9XlUt04WeeAe6dcKHPHoWHqzV2+PoMEhbJi69DS8+pYViGmnGyvJbNwfXb727GlcOuT3/XRUWwmz3pbWUpWEidx2S/F9TtmO4rvvnGvl3DdkuBlBPJY42eWU4YhXO25+/HHxP/p0+Xbt+9UhIIu3vH/A64Zo1jNfrn4Ssghw4FjkrKemtrgY0bkzuWLPFvYCWDh+d+x8N3zl8/fDJlSo6sXQl2MhAx6Bu54wLGTdR1YMaMQFWQIsQpB5fdPWlelpjnPSE0YNUqYMECwzaLiqzereLx7JzdNvhRcAFAdXNuzsscfZbi+TMu671m2tzQ1klpP0ieUPjiJgClIQqt4PIwHKjgKhPmbtzZMTdkmPTpA2y5ZX7bWLgQ+PBD78eZPbgoECP5JqibvKt3ldcQhYF6I09Mr3Qv5ITdAtZwPgQ0y9WG7ncq5ICgHp5TJehmK0kHundPxsK34dFHgc+XHqGiWwWFCi7ixquzT8YpX94BIBovok7WqfeMv0haCTe2au/sDUOGeOtIWxuOGXA3rh5xTTrvhuXzklK+QYMQwM8+eiT5HcDnn9srjLbbLlOFt56lJWSVMkI3U86SvIQh8hgyj8ghnYMr5pwHM2rD/HnnAT//efLL6tV4cZakkahPFiwANq2ozWsbkcHNQMpl0hdjf3Ter8h74PPPgcMOU1IVKSL85eAyzC8WObjSjB6dVc7M8zNOw5CBpudDctFTKWNMYlFXbUuuoV1O1wKGpTd7itp6cLmkW/jz9x6t4Eko+DJEs1FMyY7nQd4VPd/efDFVAlflpUaID8a8up1w1lnB61m2DDjgAA8H1OZv4S4Tjp8Qv/jx4EqTnLA3xTpmVRbTTcN6UAVXgXJwtdkpuKTdxdxDFCY26DllvQo259R2R49+j3Ehkg/yeE5l79r5812NwpVg9Rxdn3S8UJpovlzh80nsSN4bi+p3xML6RHK3giu4hNxMeuHDCeXy4oYdpKtu1dunLbOFAPDZZ5l8uTLU1GDC+h4YtuqAtBe07uLBZbl2SbuqmNh1V0MVHtcTLS14ceapmFGzi21/jBhDOilRcJnboIJLinwZWdY3FF9oqcWLkx8KMEf97GdA38+6uhcsBT74INjxBVozeIpaQ0oPD4uNrJB+FgquxlhHzN+4I1BdnSlmMVff/uPl+Me17mH1jeGNU1Rtzt2Wg8WzYxWm1DIvdgDMv1SHRQ4uieeaObiihd07sN2qOT2nJhk9Wt1wTg+u4oOr8lJj7FjXIiNGAO8tUJ9IqinW0dlIVHJgmDoVmDdPvt0HH+sgX9gjUQhZQ8oL9xCFGZpi7bGsYbus/XttVW06oAAhCq3qlBTepbBVcEFSAGbnwRWLG4rYeHB5WNgKAQxffQCWNmxPAXpA2tospoUInNNf/AL4dQHyvFvdd3375r/dYmbiREjnGHMbgUSbteX/0qWuTgGk2LEYZ6LwIprTrQ0b8MWPO2X2+1iT6kgqn1atkj/IoLRJ9cnOMjqxT0NMZObwxfU74IWZp0qGKPQoNKiqwl/HXoGBy3o6l1u0CHjllayQTkIA+OknoFevHAs2rU9vLK7f3r19Krh8YQyTL4PsUmCbs47Dhg1O9URYcBkw1F2UCeWXzZ4d6HBpr2/TdXvuuYTsghAn0sYWHvJvm+dHs4JrwNIjsP/HjwBHeI+y0BbLfUp3ee8ZTFi3Z9a2dT4VXMfvtMiioIkgCy8hLDy4rA1xrE6vMWf5o0eHH8qWZPAiQZo7F9hnn+xtJ58MLFmSMFjVzjrT/mCZEIWN/i0S4i32L5M0ZM0fXJWXGvPnO+8XAjfeCNwx7rLC9CfJTz8Bv3n9AtdyCxdKvIObBqOH/5uH+IZJDZv2/WiXgoSoRXqtJwSuHnEtBq04NPE1KfjaeYu6nHL+Gkge7vc11aOiIuaQhyCIx5he15D5LCwsu+DtlHzwAXDrD8lQBiUsnCgERiVCVm65CNDYaL9PmVWYlaVhksjcWhG5HimOPhp49lnJwnYncfXqxP/Jky139+gBvPSS976RIsJqHojAK1HOXPfGG9n7fTyO94y/GId88oA3RUyyoZZ4+/QmJ0WUDi3Le3zoqoNw29grpBRcnuf3Dg5Gbcb2nnsOAHJDFL7xRiK5z6RJmbLJsWK9Rc4QVyIzWJcWXu6LtNGDxbUIIym9NPm+dyI2f6vE8rq6nc/k+bDyLDHu99QmgDvuAJ56yrlpIHA0NhJlqqtd75/U7SniFjm4bEMUmhRcdkJx470vEbIfAOpbOlqWa4xZb1eB0hCFdoa0c+Zkf9+wAXGRbUT7yORf47Lvbkp/36KdKScJiSRWt7ad0aOu53p2GVnW0A039znSvkCqzU8/A+DT8LGm2r1MVmOlO2cXkvDf5oha3BZ3M2cWph8mPv8c+Gr23q7l9tsPuOWW/PfHle+/T/xfvsJyt6Yl3o8JCYKfEIVpa9RPPkl4EZnqyjk+4GQpgITbZ9Y2iTY8ths40baxvaoq4N57gR9+gNg1k1tIAJYhCuMe8hWlZOMAKNjKB1zcAYiWZVfUbvOWFrlyZm+XxkagoQEJsz6L/UaqPb6TkCIjCh5clnk4TDQ0ZO/3OSzM2bgLsPvu8gckhU7VLV3S873l2iR50oTQrOdwu5MaxINL8kKJ1jbcP+H8rNVKVlvmJLvw6S0etQEyYng5PVJFLSqMgvelLyoD5p51g+spT1gpHrL2O+yTuc/btQPq6tzLkSJi3DjgppsS75wuyd47VCQk5LVN8gqkXA8um3vUwuvaDTulmq/3cb9jTeAQhdnttsbboe6r0dlWihZGMV8tz06Cx6EyOkybBqxq3NZyn5frJATQ0eFRa4x1Qu+h7rJpUVOLpUuB9r88Wr7xJJ7f5XkjKoEKrlLDbYW1cmVh+qGYX/0qei8wASMhEGKJlxCFWZ9TCi435ZOfHFymRbtUFTYhAeywDH8En2FCv/02IaF+5x3reN8+c3Bt3myy4KFgSwktLZn7vs9Xu+Qt96GXhWYh1pjrNm8NrcL6HrK1MFbBpk3yZYWAs1gnupif/VNOAQ4+GHLx+CO23iD5J5LX3OR1Vf/jLP91eRnULE6GkyfM2/N/gR/W7pO7w846O8uDy2PfPFh8PzrlXDTGMsItXWgYtXo/vDjz1OyCQebynXf2f2xZ4G/+CHi7+qqnkJx2Gko6vGW+jXRWNm7rKeqqDNL3SoDxornZ96Ekgog33sTYtXsnPJhHjnQsm1rXt7TlPvd2t55bDi5D5Vnl7Duc2ZeTs1vicCONjS5NWawZzOVjrQEVXKbzsaKxG7Z96/ns9xwLQ4LaluzoT5H29C0zevYE/vT9/1nu83KdhFAjprn0uxt95xDlfRUOpbuyKlck3fOLjWHDDO+0Fr/xwAMZUogUH5aOTx4mQ6MyzDFPRtZB3haTljm4zH3s1cs5plsAdKHJDVvG39U5E1c76xzBJgeX5FS49dYJI700JSycKCRbbgk8OOl8AMBNzx+EGTPy0w71kUnK0D1J0xL5u1YYnLKdhhWGEipxouDBhVzhTI4HkWnQ+vmAf/lvzMsPNCqg7LzDDTsHrTgUvx78V8d6sjbrxvqt52WZvuUwdWrGq8xKuAbgXxMuxF/HXpG9w4snjbn9gw6SP7bciMcL8t4pJWRdsCDv/fDCiBHgoiQA/zf8ehx+uGljnmUgKvK58ZKXFgvqdsTxA+9C+9dfxfTqXaWO8bK+NM+7trePlisPcGvczoB0ty617h1raUGXLsB7z613L2vA3Lf2e8mdM7vKbM+H8SQXqeyTBEOVgqsp1hFr1wavhxQOSudIXqmpkS87Zozzfqf5ae5cYOhQ+bacGDkS0K6/Tk1lhHhENkSh1qc3plbvkdm+bDnQ2Jh1/JQNFiGJPC70hq46CHWtnWz3/2aPacDy5cCXXwZqxw6/1i9an94YV9UjqxtCwFrBFZfra078Zb6pKsH8smcROargLFyYUGhaUfTvSh4Cie9/w4moqyvO+9ztMjkJq4zDxJo1wLp1avpEIoLFQ+wlVG2+cJvvzFbH3ir36SWVHC88hxIEbBVXxs26jeGJVN9MvNJ/x0TIKFgr5nRdQ/uK5PHG85FUcEmtN4p+AiggmzZJe/IsXJgdGsvWe99jiMJ0LcOHS/WjoOQ5RKHWddu81m/FnrvHsWFDYdqqlZDDe8Ht0U6PDxb3YCyWiI5OyosWvV36c9Vmm5eGJKk1p1UIQLv1qPmetA1RaLgnf/u/45064dhHq75c87OxuYWGDQMALO/3g209a5ucz4cKbKNdZCV5lvjNivpD8kvJL79K/gcWBiq4Sg03jwIhoG1uKkxfAGy3XeK/jBz4pJOCtaVqTJjlEAGG4w5RiZ8cXLZ1zV8A3HVX1vGrm7qits40Jni8iZ+b8StcP+rq7CoMQqDtOyXDAJhyhQAOyXA9IIQPoVPy86zaXbKFW6IiWaEpRKFfq0x6cOUFpxCFa9YUTohgcUsXjPBF7Qnmr+oSdhdySD++eVQwGwWm++wDHHts3poiYWDlwSVp6KCKWQs6YEPzVlnbEhFB89QPL0okQ9nLe5+W2ORnnrT5LcauCKF5NGm3/x23/HBlTlbxZZsyuUp14aLg8nHqy/W9YPFiYK+9XAq1a2e7q1ev7Mu+337AuHWZfBieohk43NpXjfgj3nwT0TRIKsE15PKVlXjjDWD02v0K33jAa/zilBMwerTNTpcUD598AnTv7t6GppXvmFGKvDr7ZOmyqTHNyphGdryrlAhR+PUUa6+oGTW7ZQ2Wdvehea5vV2ExP2/cmN2sRWXXjromZ5vqe1/Kg0sCFd6ZJP94WRf84x/Af5/MU84DEmlKb2VFnBEC2BSi1K4IcFwf19fLlSPEJ+45uKz3d+z7CuItsZzju/28h6kC76vL/kt+bluFgJbwFLOQMKjII6Tr/hVQFZqAqDeNd1YeXH4XthwECs7++wNHHRVuHwohnMhn/gptX4tcOUVE+vy7XAi7F9ZYm8NxqVCrS5ekN23ejIJZpJPwKGiIwrY2HHLuntb78jXAeM3OneSzpUcmNvlRu0t4cAmHcpa4Ca6SCeWt+pul4DLiZS73mcOz1JgyBVi2LHHqvv3WppDDef3Tn4DVq+3rn13THeutol9Z1Dl3rr3+YfCKQ/Daa859CY0SVHABwF13AXeOuzTv7XgeKl0OuGvMefiXXRTYKVPw0KTfeGwwl+OOA26+OXA1JCKs2NRNumzq9rPy4HI7JvXFNgeX7MNgDFFoM6ebt6fX0sY2kmOX07vKEdstl+vTwoVy5cwIYd++h0gVJEJ4yRHtwpdfAl8Obq+sPlI8lObKqpyJ4gJeNXn6jam0JI7vG4MG5aVtUp748eByWsI2xTokwv2Y2bzZuVEXurQ3ZUU21PFj1d448rP7ch8cp4W4B+qaO8oJ1rJNwtP/RENmsWQnSIvLKLhoclk4HASYDQ3A+vXZuZRS3Hqrcy7GfCc9J4VF22Vnx/1240b70060PygVxsqUr6UcllZlhZUHVyEteOfOtdwskEfzfg+amMbNuQthlR5cxiFeFxWZDTNnAq+/DrS02Nfp9jtSObj2zfUgEQIZBZcxLnpSse1LiVemawPjz545U339x3/2d1x+uVzZCy4ATj/dfr8QiKYyKYp9KjIMdqfuRGAiX7wYaEoF0mkqXEQdkh+Mo7+sgapl/m07vZVpTrI1HE1VMH26Yx+s8muambRhD/z0U+a75e9KKbgc3vOv3T83tKFlmwHyI9q27zEHl990CCTB7NmKKnKJO1vyy62S/4GFgSsrUjJYjgmrVkkfv/32idBYEVj/kjImiJCtRW+XEBaZSSbDi8eBb8d5j4l98V5Tsr4bF4Lz6pIxOSxe1FU8SiNW7Ov9nCQHg9rWLbCycdvMZkOuj0O6ZsYGy3NmZsYMb30g/nExJGhpAfbYI3f7yy8Dzz+fpz4VmHzPQ+XqdUAIgPAVXDaC7RwjDJUvux7qWlW7BXbsnC05tjw/bgOVVw+uF18EJkxwzpckOXiJrbfJPVRoGcMbg8vP4pptAQCXfnejRMXZ55GCsfzhRf5vtOOyJIovd1RwBUIIYJvcx9z5ABWoupe++05NPSQ0jJEC2nS5nHpWc4atN5XJg8vVUO+zz5z3Syh+rhl5LY47zrma1DOQ7nWQNZXHcIJphMPs6zlEob8ukAQHHww0N7uXc2PA4M6O+9+fd5Q6ZVoU4Y2oBK6syo1ye3DmzfNUXNez3zei+D5EShvHBaHL89tr9smOllY/fl6Fs+88NEj3Et2wWlJWVOC11wyPnBD2yXA90L4iLrdItjg3d467FGcNuj27iK4DQmC3LhkrIV1mKnSVnhCv2N7ONt4N5US+vc3KIXqHiuXOxImJ/1wLlBgWN4eXsEGBsVNwCZMHl8o1uwettoZcQZqld7jPNo2bdaHllnPSbLj9jtQ5swlHXN/WKfHlrLMSMfbmzcM+zyRihq1p2ta5bmP9JFLoOjBr+VaW+4QAB/FyII/XeMZS63srEOWwECtxjLNBS9w+56Cx7KTVztEH7FsAtu1g8y4qMb/v1LkOF1yRUSDIGmekyxmfr7SCy34+tFTk6R7c16T6ZreD83Sh8XvKR47MRIe/+MbtHcv+edTluOcef+2Q8oEKrnJDxvojorgNnC0twV1khXBeHxfnmSNRxWqh53ifC+GYCPWbFQc736PvvC3dNycs+1hRgRtvBJ58MrNJxatmlw6t3gWPNicxvdg259AwntM2+YSktY0dvPWL5B2n50dFTjgAGDUq/7IyVX21w6/BZBSQfYlS4Vlx9NHyZdescT6vzz4LXHll4C6RPFFQDy4bjF7GiQ3heHBpeu6NbOXp7FqlTYHsn5j5zY1tHbBu81b2CZUA98ErFaLQouljd1uFDhVJoXJlJfD44xD/fda5PhcoRyswNpPvqlXAIX851fNxocKbpzgQAof99ZTM9yjeSyQUjOvM7ls4x8tMvb//X/+LsHixuR7nY9w74j6WVG3eBgO/yeQkcj3E7JJjPCCl4NIs9iWRXlPJGN/YvJvbRmDJ1zqKKOfUU4FnnpEvn3M5izQkCaeR/CEtNdQ0rVLTtCmapn2V/N5D07SfNE1bqGnaR5qmdUhu75j8vjC5fy9DHfckt8/TNO0s5b+GRBYVD7Hb/DR0aMJF1q3hRYvsIxcKwYgRJFzcFoROgu+YXmm92EtO/hX5VNFqubHFVeTg6tKuWUrBta7WkEjUTsFlOLfGz+lzPmlSIpHT6NE5x7a25tY3fNZOrv0iDhTporQU4PueR4T7vbrLLsCrr9rv79sX6NdPYZ+IfywegJdmnYoRI0Loi4G0l3EKlWOkFw8ui/vdcszwubjP+okGpd7/Db8OO737H6C9Q2Jwt8HLkIPTTIfKODqkcnAlF/uBFeEcTO2RPTeFyEVEaVLpo+AaazELQbpNkrmWFuAPVyiIy0WKFuO7ZKdKZwNJ41xj1tfYKbKy5ifFc43r3Dd/vv0+KQ8ui21WHlwua5NB/xyJe475Dli92lSZsG/d49qpFEMNT58OPPVU2L2wZ8aMzJDtJbxhzmPw2aeJ/9OmKelXqHA9qQQvovzbAMwxfH8KwHNCiH0B1AK4Lrn9OgC1ye3PJctB07SDAFwB4GAAZwN4RdM0uWC1hPjFNFBoGrDvvsAJJ1jPfUIAWL++MH0jZY/VgsotRKHTEsw2/vfGjQD8K5yMSrWffkok/7ZDjJ+Q/KAmROEN316CqdW7uffRuMy19eCyJn3O330XA5b0hP7eBzllRk72nruMOCM+/Eh5nRs3Ap9/rrzaglLQ9e3y5QVsrLCc/83NeFuN0yq0Frm3L7vlw6RJnpxDSb6xeMj+M/1M/POf4bUPJNcE+QpR6MWDS+iobu6Stc0qRKFfoZDRCcsYovCLZT0TGw85xFe9iU7ZK7gAoENl0oNL0zB34074ernHsM0UQKhn4EDLzap0UpG9ZJHtWJEicT5/+1ug79wT7AtszihbdT05b9tYxa5eDbzzUSevvSQRZ8IEoHdv78fJ5uACPEQikB0i/BjDBBl/kgYiTka3Vko7yzWDS9+feW9nPDn1HFhZINl6uHlcR5XiSPzcc8Ddd4fdC3uM+spAU+HcZG6MMWOweXMi8nRUKNaoacWOlIJL07TdAJwL4PXkdw3AaQD6J4u8DeDC5OcLkt+R3H96svwFAD4UQrQIIZYAWAjgGAW/gZQJKt8Dli5NRCexbGPQIKk6aAxI8kGQHFw6NOsiHyQUNpVBFE5JF6ZRo2z2//BD4r8hprwKD67Ztbvg3QVu2W6BdhXuvy0tPDSdpLSHmBC4eOifMLs2Nz56xYZ1ch0ucu65p4ApsMaOVV5ldTVw0UXKqy04+Z5e0o+A7QMdXZ56Cth7b/dy46r2xvvvB2jIh8DAbl1w1FHAggUB+kLUYuvlW+B+mNvP2RBeiMKYyF4kW4YodBupJEIUDlzWM+dZO+rGI6X66dimxfMrdJHxZNc0XDfqapz/7S3+24KNRTrxRr11aC+VXoOEAMCnnwLDVx9gX8Bwz/35z8BOOyH3nrOIWEFKh3vvBW6+Wa6s8RZ4bMqvncsaCuuN2bm0CupBlLTEcr1/rQRlKczPhGyIQqtGXdbaaVmChWe3qhxc0qEgiTI6dvR3nO2l1XXccw+w115+e6Qeu+f6hReASev3KHBvygdZD67/AfgngNQItB2AjUKIlCRzJYBdk593BbACAJL765Ll09stjiGq4MI/G8kJLlWsuhrYHGdeHVIYrAQj7h5c9vf0MTssxe+GX5+7o6YGcx/+GIsbdvDTzWTHdOM/W95Z8Iv0Z1WjUatL4l4gswDWdaBqo/WqKcugy7A9dc5bGhNTWsfK3MTPFVMmOVdo4txzgQsucOl0BHnySSjzenHDbuFntz3o9FYss6MQ+bf6EmvW5rX+fBKPA0uWyJUNJIAyeBXQCo8UAiGyPbiUKk+8KGwtykoLq2SqN9dlqmfSvGzvMU9tpjy4bEb8tLBMCPv8HRL1lzvG09CvH3DTTdbljFfhgguAMWOsCuV3dhYi/22QCKA4H8LEiUBtbW69Hw/bLki1aZau28JzPST/aI2bpMsaFSOLG7Z3LmscDU2x4+ymFWnFl5d56Ztv5A5JKrgs17/pEIUOXbLy+rZq02VtMq8umQ6gnUkW4JSXnCEKSxbb+zYeR3V1Qbvim9tuA/rMPSl3B9eXSnBd2Wuadh6AdUIIC+meejRNu1HTtImapk1cz1BxxEAhnvk99wRu/eF3Dn3ITID932nEf/6T/z6R8sJWwaXrCe2DA9+tOgBNMQvljq7jwAcvwxXDbgjQMfvE7QCg9emdXVZRiEIAaJFQcKU61vflZnS/9bfWRZIL2BWrKlDf2jm9PXXOU7+to0UcdStvNKchadAg4Kuv3LtNCkcxKSnyLovzE3+ljHhj3vHY4lJna1wrKEMtEmylSQXKCygZonDuum6F6Y8Elvk0fA6pObKn2lp/FTlUbqccTM3lIhZH3GK9VV8np0BLf6VgDJMmAX36uJcbOBD47DOLHXYDZ31doH6liKzMKLIdK1+0OsNY1NCQ3Jh9fz76diJsupfLJ9pyDed+ee/JnvtH8k/FimSMs2R4fyfO2T2Tn83V9sLwuTUuF84wq0qnBrwodIwxgp2w8eCaORPQLjgfgPN7lVtO8UxB576vauya+NC5c84+q9DJ5jpf/7SrXD9KjGKaXvz09frrgTrTEqHo38GK6aJFGBnTteMBnK9p2lIAHyIRmvB5ANtqmpaSOu4GIBWgeBWA3QEguX8bANXG7RbHpBFC9BFCHCWEOGqHHQJ4GxBrwnhwZs1SVpUQQGOLhLDbR71e+WzwlvjHP5R3hZQ5tgvCRYuweWkVYg4Wx3E/1sgS1LZkrAxFnXUomSyWLIFbvjBP7be6WzmmhFnrvxpnW2ZR/Y6AENijZzeMW5eJcWZeIMf13PNoqeBiSINAKJ2OWlqyvmpaIoFtMVKIaVo0yFuoFjNO51IAaGoybUy+HdW2bInNMe/e3EX/clUu2N0Yy1dYby8Q/5pwYdb3WSu3UVe5h4HFyovZ0ttJ0psqp664abtVaPDp0z3VKYNR6XXwX07FhPU9cspssy0fYmUEncxUmmNzcC57vh2dKyDPwZAsU9TUJD6YFgp+bmvxQb+cbWtqO+Onn7zXRfJLOq9USsHpwK5bbkx/9vLW+9KsU7O+20a0kH3PdPFcziKZSsD1Pq6wliksXZr57DSsqvLgsu2PkweXoaEbHt7duoxbv0gg3ngDeOUV+/1Z944Hp5bUterbF5gyxV/fSGnjKg0VQtwjhNhNCLEXgCsADBdC/B+AEQBSZvJ/APBF8vPA5Hck9w8XQojk9is0TeuoaVoPAPsBGK/sl5DoMmCAkmr22gvo0QPocuOVSuojJGysFn+2Cq54HNu+9RzeNYQAlD42IAOX9cwsnocNdz8guQhVkYMLAJa6hH3Ianpjjef6U4K7zm+8DABYvinXar6dlmvxlq/zTeQYOdKg1zKbcSFaiWajRrm8zLW2AvPmWe/7cNEx2HLL/Pfhlcc3Kq1PCHkDXGKD0wMQy1XuFBRD35R6nXrJwWXRrtV859uDyyVEIQBguMRaw4rUWsWiyufHH5f22pqzcmtf1WsXZsceZg4ub/zvf8A2WycFmlVVif8SyqdFixJ/fhRVDFFIAODsP+7iqXxaeJ4z4drfS6efnmNvlajrB+ucs+NNkrBYDFi40EMniXLS85+E4sX4Hr9th8225aZOTeSUTtHQ1slUjwQSE67UnOzi5exWuWUbQXJwNTZ664cMqWsnm6qEnti+sTvFf/4zcItFitOffrI4ZvJkf4YDpmM4zRNAPgeXFXcBuEPTtIVI5Njqm9zeF8B2ye13ALgbAIQQswB8DGA2gMEAbhFC8BU9BAr+7CuSpK1fnz+BpZhmbSlaTCGtSGlgu8iKxdCq5yZYNZJXhUvyObYNB2CkQ8LzQVWIQhlSw0ylm1LNZhE+eXLm+yNTzs0p061T7gKcgq1g2Obakhx3Tz0VeOcd+/0eQ7BHhkLk4CoXDdeYMcABDvnkZZF9abIqN27A6uAdMHDvvSiIYq4cEUD4Ci4DQsuPV7ZruxbDg6WCy209YDPOxHVrwdfle0/IfG9tda7bDgfh3YC5B2Ftk0KvOOLIF19VWq4Z6xuS9/Wbbyb+SwywBx2U+PNFg0TkAVL0qBZS262SZi7ewnaaGD48IcA1ernE48DRA+6xLG++9V97DdhvP89dJQr5aV2uZ68dxnnx1oNH2JY74gjgpjG/T3+XXYHL3tMff7s19LjI9Y62QssOy2/fuE0o5dVrMlUZfonZEHRja67HpGWVk5yz4HSqtF8L2P6E1AsgXSTzTuqamq+t2eFu0ybg+++B445L/Nc2ZzxjX5h1Gk44wVt7OW1qWvEruMrk3TzfeHpzEkKMFEKcl/y8WAhxjBBiXyHEpUKIluT25uT3fZP7FxuOf0wIsY8QYn8hxDdqfwoBEEnVdSweTp9qaoD9/mDv7ZJFr1757QwhFlgJYGyVVBJCN7uE6deNuspTvyxJW25JPM9CKA1RKEWyf5U+lGpxUYF6g/wjZhGiEFuUtkR52bLoTB9e7hynx4KWXfZwDe2NIArH9g3evUqdmDTJ2kKcyNPn2z0tt0+v3k2Je5wQwJo1LgUsOLH7gvw9nB7qlVVw+cWyrngcndsZ8l/a5kmTM2KxKyZlpOOBY/5+otL6osCgQe6PgcztdOHvOtuuSwFkctxITM6trYm/pmbvSl9RU+teKAw4EatFwfm0nOst6v36K2Hb3MknA4cdlvn+r38BU6v3kGpfIu0TyTM1LV2kyxrnsutHX4ULL5Q7zhwK3+7dWvaWvvyFE7Dy5S/kDPtS463PEMOWiRQtyv53+pkSnXHPR/b3w76z3uEYgzy5zy6Eg7m4VCnixrBhmdvLrOB65hngxORyKRYD8P776X0t8fYYa+3kmkNVFdAvGfFVCGBzLGP8zfd8AgTz4CLFSAiL6fb/vg+PP17wZrFkCbBwlXvuHoCuySQ6pBbLmzcDzz5r2CEh0bQT3LwxT9IsRgIpIVdynFEVolAK2TYtQiHoqMjK9xHTcxfbYptt7ZosCZYsyXwu1AJR9vSNGwfceKNNHRLvN8VGQXJwiUSqiXlr/IXpIiZqkwJUg9n21KkJxbHqcbBY7+socVOvIyy3x0SlkhM8YgSwi7doWACA7lvUYcKkzKvZVh2aA/cljZffZVHWMp+GmxezXQ4u8zpip50AXc+eE/y64NqZEyfp3tnem+dXu8723NyMZaU3hp57bmLeTbHffsAXX9iXLyRbnnqM52OE0CIp+XrmTffw207eCyQb9R5c9vWJWNxxSG02DN3zpstbpHB+D5+9t0rmA5K4GMa5rDnewXqctHjvbF8hZ0hjvgedetQwZip+XL6rVL2AvAdXOidZyvPLw3N22WXAr3/t3OaCuh0d60hFg/l+5rbS7abXD8UayqOIMC655s9PfP7Zz3Jv+xyDVIvUAjJMmQJcmcxWs24d8KfvM6lrIjjNkxCggouULCoGuYa2TtiwIXg9hFjhZCU9dizw978bdkhYlZstwpTiJYFtckFZyBCfmRCFLovZmTNzNulCQ8vgTGiJmIXFsaW3nUtTXGip4e23E2FbvGJ8vnbcEXhj3i/VdSqf+Fz0e0IIvPQScMBdF7iXJdjYuoWtkhUA8PXXAABt+rT0piOOAM45B6hQPA5SAJZnFJzgmlWbfdX1yeKjcMzpW6W/b9UuHAWXdIhCn6cqp66ePRMKLuP2gAouO+Xbjp0bbA+1Mm4hiZxAOSnRli8PpS9+iOqQ+c/ndnYt06myzbUMSVLARbemO7+TZXVl0ya5cuD8HgX26JL0upfJeSXzTpwygDJwyd6Ts75L1ePSn3snXIhT3r/BvZ6UosqrgUpaDmDYZOi3VW2ffAJ88w3gJExrc5l3Uy2ceOexjuWy8KjgkopOU0RoWuLdOfW5qcm5vGoWLChMO1deCUyv2T3xJaVdK2Y4ASiBCi5Scsyb5318sJvk/z7uUnTvbihHTy+SZ1KCn5z3NBlLsnzenxYLW8eyQoTiweWHuNDw78nnGb5bTI2SFu2lQCwGNNjIAJub1elfVLxQOF1243vN+vXAmqZtA7dXCETfNzIWk/lqA/bXuOxocxcg6qLCWclq8xI9Zw7QZ+5JPjtmDd9/8oyKE/zJJ4n/U6cG60pYc4xdwnhFN5/l46LYg8vu3Dn9hJFr9k98cIwvWfykTu38+cCECdZl6upcwqUNHqy6WznoQsOKFYnPQXQXWtAKQqRU15l+2dJK6e83X58FxrPtNFZowtmDK4sK+WsoFi5KfFD4m4g30u+uEnOQZVQTifjoZsMn24i8rj3IMLV6dw+l5T24zHyz/JD05/Tvd6lM9O6T+G8hb4vt6OzubmssKxPCQ/IhLfVxNp92kx5Pdf6Ix4t1ms8Q+kksDajgIiXHAQcA48erq09BOgZCpLFVcPldaKvCSw6ulAdXARVcqTWBpXJKgo0tmWS4Vp5wVovyUl2HPPMMsLVN1KWrrwa23bag3XFE1xMKuV0P385yX1FSAMt4oRetvE89EycGryN5MmW8VmVzNNhRquNOFNi2Q6OSEyw2Jz24JPM/5LByZbIihRc7qAeXVYhCN6GQbIhCAIjHs/I1TZpiM5e7/Y7UwG9TTmqN8Pnn7mWKkBdfTEQGqEwazJ9xBnCMTcS/c89NONYVBJvJaML6HthDLnWRIxwyS5xBgwConxtT45H220ty9rktn7JvafvSOR5cqSgTP/3k3kGSF1Ih8byGKMxsNL18WIxvxrlz40Zgft1OlvV7MQRcvin3PciSVHIknzm4vq/aN1NErkW0rd9oW6Xbujnr9KXWRg79A+C6Dih3mprCt+PJR5SfSL7X8h4sOFRwkZJk82Zvg1ypW26QaGI155329h+wcqU/Dy6/yh0pku27KdE6VrZlPLgKKVaQ7J8dxvOdDk9hxEJb4nZJ4nGgutpXd5Tz6KPAzTfb75ddfy1apKY/gLpxt6UFWL3WIm9aka4pCzIfCRHNF4EwMFqx+L1pNBvDBAuC5rIp1vs6Mji4pWzRrjU0pVIWScmDykvda/RB0mWtum0laHMNcWTB2rXAqIUWeUJ0PWv+PmrAvzzXneiUc4hCqXWShFdnFionRh+0tQE1FssWM3/9aya3669+5W5LsWxZ5nOOIL7A702B5ytOeCWB5WUMQVLrdjtl7XcomxuiMLmB1rWh4cmDy+rimo773+tb5hQxzk433aDjqWlnW9afNc4qXvx59eDSfnMeBg8GZtdmPK50SbmD3pBIyLSpuV3OPlcFl3H/wIGmLto8XEL+GqaLm36vn4hQfpg/Pyt9b14wjzM33eQvT6wVqXP0wAOZ4AWEhAkVXOVGGUlGZN5lzjkHmDbNvRwhhWTCBECrSWpGUpmKJRZpec3BlcRNqCEE0n0taIjCJH7yFZgXyGfsNie3jE8Prr339tydvPDii0Dv3onPCxda5jzOIWzrLhmEsB/ry2i688zYdftg1aqwexENBv3YlfdKGbHpmVdt97WviCsZOH5a18O5gE/L6SD8qV+wUJm+QhRalL/tNuDW/qdYNKDniLlSjnBudVo2bbNWkTKCmTVLqo00IUt17rsP2E7SeD/FsGFy5YIq5FVB/RSxJfnOoVLp+tBDwOqmbWz3a8L5nUzWg8vMg5POT3yooIguLNK5nKUip1hcJ5Ny8m8P5t5HxvfO+okFzh2UysHlwwP7nHNci1iypGF7NDUBB9x9Yc4+N1mBcf8RD18oF6XJaw4u5K5vDjgAOOoo4KqrpKrwzf77A8d6SC/mRkuLxcZ167K+5uP9/rnngBEj7PcXYg4v+nUCX0aVwNmzhBACaGzJtYwwFypk2LCoM3hwIjd8Y6xj2F0hZYjdPBabMQfa228lvqSslfxakqki2b6bcMi4SAwjRGH3LeqdC0qsfuJWluo+f0q9S3fCYL/9gDvvdC+3yy72VuEqFpEq1nGlqOASev5t43/zzS3o1SvPjRQJ5/7zYM/y7Bw8hCgMSuq+1jSm6fBD68p1zgUUxDbdodOmYBV4CQmcB6wMOixDFPq43SvX2UhWdD1HWNinj/f63fJB9J13gvdK3fDq8aUYo6cVkPAcv+MONXUHDakaGYpU8hXWGBBVLOdYhdc29d7y738DNS1dHEoK+fHPgwcXCZ+0QkXiAlsWkVhDGFf5Wtx+/siq36U/O3WWTLQke9PJ/P7U7xDCUWE2o2ZX+2nSpR3jMz+1apcs4wzbNhWFKJw8GXjvvUBVSNHWpkbptHw50KmTxY6UhWse8HOKBw4E+i85Um1Hfv5zoMUiR2PYFKswooihgquEGDgQ6HLtpY5lJizsmt8wZkXIww8D3d56VqosxyhSCMToMRnrwNWrE//DzsGVxE303qa3w73/7ZZY7BbyxVw2RKHVyk/TsrzfLD3hfIQojAo//3mO8VaO4s3ut5hfSArxm1vi7dKOi24UbZ4tF6Zs8JYsmgSjWJ5lILuvlh4uxJE2PTecaRYKboaOlbHEB7/a95BvyHk1O+RsMyqfGhqAFSuAqjorSYoB0+9YtgzQl6+0LWeev/1E6VKyDtoyN6SUIyFrms23y8iRCWvqUoKKAOJGIYdNDYD4ZrD9ftn71a7TvOFDIx1eX1GIQitk71Uv5m5e5z4Vz4tsmy1xewN8x35YhFNPl1eYg0sky771FjBlitQhSqmtVRMy0DYCt09r2+Zm93WYn/vof/8DXph5uq8+2bLrrtAmTFBbp0Luugs47TQAmwIawBFXqOkoIVZavDOaOeauU7HAJpFlKeFk1W/G0pWXkALgtCbIcdn3m+xWFWmLcveiT/TZPn/9sCH1EuB6DiqtczXdeOCY9HcrIwCr310sypXJkzOfHdLPeKZPH2DUKGDqVH/H2724/fqbv+Dgg+XrKTU5gICGZ6afFXY3yorAL/rpHFz5lbAtWpStdC61e78QFELBFdiTLw8ht7xwVv8bcrYZQxRecw2wxx7APv+42FO9e+0FfLT4aNv95rNmOce6XJ/UGiDQZfR6cMj5cgoh2M/XWNPUBLz1/b55bV9AK9rBsr6tc9hdiBQNETkf2uYm4MexsqXtdxkHOeODXKT3aymgefDgMnsdb9uh0bMHl3M5eaQN2GVDFEr8ju/XJsduiXNld0t3ahezP8ii3qxHxu44jyEKdVEBCIFrrwXuvlvqkFBYvBgYM0bt8PC3vwFvv229r3PnRL4uR71RnYvnYAEtDwoRRcMvgwYlQzguXGhfqJisLSMMFVwlBNdCGaZNyxbqFjuy3gykNNA0kZvkNmwPrpSCS1bgtnJlKMI5t3PwxGc/s9y+ZbuMpjtm9ZJgseiwXYbU1jr24ccfgfXrHYt4ZvZsIObwjpCia1d1bd50E3DKKcARR6irEwBiohKLF8uVdVoLFus6UTDuXMFRpazO94i3776J8SPdHtd9nmnV7S2JNQ1qFFxuik63NpIKkyiNYcauVFXJdWz+mq0kK095cGXPvZN/8j4Wpg23gzyN225rvb2hwXp7AS6UrtvLkcK4TyzblEnuaeLrr4Fr+7qHjQwy1hUiPy0Jl1WrgJHLC5f0Vvt8gPN+4/0qG6LQmFyIk3s4CIv3bwfM75wCWvZxNsYPxuHTMo+XD5QruFLpBhyKDFpxqFSTTm1NXbOT/Rym6znnWGa9Xr+pQr4wkj81SgsuC2bPTuRAOylYOtUsvvwy4U3Vq5e9o0TfvsAxx9jXIV5+xbkRGeGECiI4Zi5r6Iabbk58TttXO/Uz4vdgscAVXwkRwec6NP72N+CPfwy7F+ro3Llw8wOJBhXmJLdSFmF5HNLTCi5JCq1hllTA3fvuQTnb/jf9VLwy++T0dythiFW9tiEYXay5f/lL4PbbHYt45uCD7S2wgpDPtZaKqh37VywudiaYc6N4OPJI4IorgJ73nONemESC1gJ4cFXYjG4//ggsWSLRRiyGTz8FTvlKIlligUhZOAOAWL5C6phVNR68LYTIEWT1699B/vgkSjy4Tj7ZevsXX1hvL4BQ4uWX7fVu5uZDk5FE1BpvY2tnviSXOH/+M3Dmh8Ff/GXvEq3VOfxL9u0m6cG1dq1k6ySfpK+WhGdujvJFmBRcqTzaJozrfCfDzKz3AUnv5SjipD+68rPfYqydM6QQaF+RfR3i1RvT++zel1as65guI9c/Q060iJ7Ggw8G5s9XW+f55yf+jxsH7O4SGV/TgKoqix1uIcTKWGnTqrdHn9dMMqWo3mAlBBVcJQSflzwSgcH5/fcTExApHYSN/F0ILSMgSy2wQwyBs/82a9O5XqQtzTStoI9Nqi0/C/xxVXtjek1mZScbojDI78uHwrqpyf+xERjilCOGfhd2F3wRVliyckaYhltZpkwBPvoImLY8uGvkhg3e13Fc93mnICEKba7LL38JXHaZRBu6jvFjo2XVtLAuk5dLtNplizfh8VyqmIaUCPna2Xj52SW9K8AEKhOG/r77EgIoO0ezIFTPXZflYGJJHs9DuYYoJHIovbwSxlGa5pxn2OjF69Q3baFBYs0QhZEgbWAqMZ6Z1+s5Ci4bzY3xOMcUVB7eBzx7cLn9PF3H8uXAovrcnJw5uFS2Y6cGiGXLbfc/8YR9HzpUZK+FxPffu7fpMURhXFQUrVGkEencbj6n6urqXPlFlN5Z8x0mPggc0gsHFVzElU6VDJeEmhr7fQVyrbrmGuCWWwrSFAkZAS0TIiECCq55dd2xxV47AvCwKCq0xsQmSb0fckJOCOC1abn++bY/USZuex7W0eW4eBJLl9qf7hX2L1NRphSVjVFH14Hp04F2F/1G+hirmPR3j/eWkyhFfT0wd27m+9y5wEsv+aqKuNDqkOwcgBoPLs1lgHdp45y7D4c+anTgfqjkvYXHpT//WLWP1DFeT6WUAY2bFXvy1TaQ0MWuDZvtIp5/wViFw6lJdeuxx4Du3fMTweLdwTvi2GPV1yt1j+g6gqg/OaeWOJqmdv2bEqA7Nemyv6FBw5VXuhfWjNEuSkDAXvQIw8zhI0RhXFRkv6/X11set7JxW0Md9oN7jneuw80kreCSqCvFsccCI9fs76leK7p2bIJ4403b/XPm2OywSg+QPOfHnbU1vljW0/owXT7MJADEI+z9pgJVU+DZZ8P7OkAInHQS8Mar0fTwLjgyizkSCCq4iCtR0syHhsOAc+ElFZg2LWHVUOCmSZFjd211oaWtUNKCk4jcCLrseKDrBR070h5ceWgzFgNemfKL3DbdOmP3HWr0lZdfDjz0UPB6gLByeAS/VmLCJNsXyGKFc27hEcIm9IYDTjHpvXLSScCJJyY+//OfwG9/C/zlL+7HlaNSOyiOObgg1HhwOeyTyfUweOL2iLdGT9i5ZLHwdM9lCf+c3IpsDFTO3n0mHnnESw8VhSj0eLCTDO3bbxO584JSDDKR4aNcvCP9MnFiIGNCzqkljmoF16xZ7k1KiIz79cuUloIeXJEg9f6tHX0U9JjzPGyes2J6pZRS5Z7xF2PRIus6/OK1Htd5QwhPaRUdcyJDg15rk0QSQLzRRvlhIUtIff9pUnv7BnVvspOYXhnKRDpqZEQmb0lWrMjNQOH6C4TAmDHAmppO+epWmqIYNTm25x0quEoIPi/5x+ol6YuBFejZE7j55jy1WVxzH1GAcZGaVoRE4UbQdXmlhK4rsxjyghKliek5t1NG3fzBybjqqtztw8eYFt1tmXBOqXFaVsF1112JsENWfPxxfvJuGcnXvFJVBczZuLNc4XXrbHcJAeC119R0ipQ1Uvd6i3PeDekyJhYvznx+5hkp+RrxSXNBPLiEbV2bN8u1sX5zl8D9UM2atQEEaAMGuBY2n5XBKw7BAw+Y0phEMA+JU5vDhiEtyPTL6acD771nvz8Ky0MAOP23wUO1WrJxY6DD1zRti2Xrt1DTFxI9hDfFuyuSlUkrTmX7RgVXJDDm0Gz57GvHsjkRP4BsBddOO9keu+++AGIxR8NM4z02e0F7VDdvaVvWa4hCV4SAkJlcZEI5ChdFSOMm6+0WsgSp+S6VL1TSuzpmyL1dWwt89pnUYYE55dTsa3HZZcC0aXLHLlsmH46YuZ3Dh0N64aCCq4Tgg5NHJGbT1jxFcozKiytRj5MHV4oQIxPmMm6c/AtdSKE28iHcim+0X0FaCZ1yhDwWAm+xbr1U208/DTz1lFRRPPYYcMopcmX79QO6dTNsmD5d6ji/49EbbwBDhmS+X3QRcPKXd0odq82d7dynJUustxfpgn7d5q3C7kL5sXSpo4dEmm+/dS2yse+nrmXmzMmOfMx5vnA8MeUc5wJ5DlE4e7ZcG8aQgFFBxmvBSNbP9KH4TXHBBbapTHJIG27nw5bX5ro5rdOCLIV0PWG/MXw4sDwZcddK1yNWrPDfiE9cz6/ED5e+mxQ8k2c+elLgOkhECWEC9SZzsS9845ir0jZwH/2wq98GiCqEyMrjo//wo3Nxtxxc22zj3N6IEY7vKsY7++DzemBq9R62ZaVzZKeUP25juBDK0hIIaC6/Mzc1QCwGfPJpBT5f2tOxrCXJa5AOVehWHJnrNn48cMklUocp55NPgM8/lyu7117ATTdlbws8FPrwlHZ91y6gPCjKObhST/OgMdbv+Cd0X8CXQUVQwVVC5Gst1BJ3cAEuF4TA1KnAKkPMZDNO51/XgTp7z2y3pkmZYVwIxmP2luAFp7FRvhvxeEEVDPkMURh/uVfACnKlX2L1GunDHcM+GPatXQuMGiVfZ22t4fuw4dL98cN11wF/+lPme1OTmnrfXXAc7vjxUjWVRYT9Pno07C6UHx98ILeGkvAkqJ8wz7XMQQclnokUURjey4XL9pnoXCBfIQqN9RZprhVt5Ajfx26usLc8B+B63ueOtvfkNaIkRKFHnC5nkH6sWwfceGP2tq5dgRkzTG1syFOM9CB8951rESE0T9b4QYjpVBiULELIGahIICsg1eBB+O9CczI62xX/y4RCX1vTAQDQ2AjsuaeadogEQmQ8sAHE23V0LJ7rwVXhaX7/7b8PxcT19hc4n+/RbvdvY5OP0Jp2ReDNAPXpp4H27YHLrtkC36/dL6cuWfS4XOkg53m9nL2qNF7kucb3eCekDX5kLYmS3HcfsN07zzqW0bYsnPd0pGf51asBAOf+yVpJvU2HzXwZVAQVXCUEjX3yyxFHAM/OOMPXsb17A9tu61xG04ANG6TS95ASYXmttRWH8ZKndCORuA8qKjzl4AoDaQs2B8ynOr5idbAKLc5FGGGU/KDyvsuHN+LM2l3Re87J6ismZYcqAZns2KdCyRuJeaHI6NLOxZNIwdxVYSUkXbYs89kQtraY0KZM8lTeeH9ucf2VruW7tLe/Nt1G9M+t1IK0giuIqMOuDTsPLgflSZBntNImpZVZqBVJb+UpU9TVpWKgi1Q4BKIUXS+4DMSTp4APB4edf30Err4auOOOjPcmKQzGy6VXOIc0tnyXM1xQNw+iT3/ojs3xDl66FxzJ8bTLyT9HY6MaJZcQmuOcbJ7DZs50rku2P8MWymmHheEYL4wfD+y4o+fDlOF33LP9qV6SriGhD6MjhBxa3Nk7LpLruCKFCi5CCoBRruHE3nsn4u8aoQCrNHnnHeC3b/zacp8uKtILwUi9k1dWyk/A8XhBc3ClnpN8PC/xoIsOi7fXDc1d8OCDcoe7LWCLxRmgWPpJyhNlCi7JQcj4XHOeLxxOQhYNUOTBZVGHcQAsUgWXZ2TPZbJc50r787LLFnJhEELJwWW4tOvWAUuXZr7nQ8FVKkgrIYXwHB4zp4qWPMWRJ6Gz38NXYfBgNXXF9QosXO8SVg4ePQWqnL1P7caId98F+vRxr37evEARYMuOmhrnSGzGEMPpKCo2WBp9GhVcAZcTazdvnbd3p7yE8bVBF5rj3PyLnRbb7rOqSxZP59/HxTKGGldFPpT1OT+taq1kweIi6DqBlAZUcJUQ9OAqfhoagNGjs7cV+VxTFPzrX8DXznlkPTN0aDKZvA3jx9vvEyJz3edt2A6xWETug4oK6YWl20tBvlARotCsxIvpclKmzZuT+VXMWGgpx6/vgYcf9tO7bJYtKx4hWKEVXIV8eSPFj6c11LBh9vUIuRtdhYLLy3GqBIClTIWmK8rBZVFHe4OVa6QsV+TxOqKmTuVXX0mWd9hnTALvhJIQhR4PNnpwnXkm0KOHXFXTpwPPP2+/307pnhPpQebKRGIRmc3Gls5oHP6Te0FaxxAHFm7YFg32qXI9MXLN/tjvgd+5lvMkSHXJURB0OjjgAOCZZ4LVUU5st51DbmORPZr6MpgwXNCg7yF3jrsUffsGqsKStjZg7Oq91FSWzullP0fHRCVa4vbecMfumJ1L2WGJLXVOv5i8GwBgi0o5za8w506TJB9TUzyeCVvqFelpXiLcutL2iOtd69eLkORCBRchinASjgUZrzjW5Z/HHweefFJtnWeeCbzxhv1+x7xKhmnwuHf+jP/9z+WAQlFRIb1Yn7xsu8K6WyfPj4oQhWbiknU++ihw8MEWOwKugAtx6QtxdxWpTJeUAZ6fsYEDbXd9uqgnUO2eE6fQHlznnFM+jkNOOJ1qTYPSi5FVlXEALNLB0Kt1bOr3u6ZjMlr12BATcpYca+q2yGpbKTaVGvN8pMIHbtqU3Ocw/T/5JHD77Qq6JVNonntuQOVtunDEZ/fjuj7HSjQWvDUavBCVaJq6eyq+aGngOurrnffrOtC9e+BmSoYNG+z3ZeXgcjHWtHzPNQz6KhQg+fAS6tcP+MtoRfmLhYB2nPM4/quv/4a9P3zcdv/fx12aNcy3OjjcyswG933SE3PnAttvIRcLXEBzDqtbQBnMv/8NdO4sV7atDZg/372c9FgVEW+JLu19aviKGK5R1EEFVwkRkTGJKCYKeo1ywClcQT7qdLquuilWdXU1sL7eOdFtQdDkxVtusYZVk8+FgWyIQlsL0gJbHzt5DnrloYeAt99WU5fxNBR0XFuxooCNkWLkbz9ehn//W6KgxI37tx8vA9bahP9QzMEHA7fc4l4u1e18zHPFhpPhhQZ3RYsXsqy/jQNgkXqkeH3PSM3Lk3+S06w6XZuvlh2aKuRYxwNfHZPVtiqqqoALXj3bcp/V5dwqmWLV0Zgpue+TT4A5c3LDmcumApMyJpJdGDhJfpPMnAksqd9erj4FfPL9ztKe9HbwXYqoRINQZhim93ldUU32xOOJMazsSVr5bFFvv0Yzvum6TdVu84yKkLmVG6qUG944KZDC4mRDOmUtZt9BITSp87F2LSBaPVh1paxSJNC0hGxG5rK4KZ+D8N13wP77q69X14Gn78tjx13YZYuNvo6jLJwAVHCVFHyo84jEDFZfDwwfnr1t2jTpw4M0TRSwerWaeoRIePIAzsJEfZV9g+YF8TPPAN2vP09F9wIj65WlLV9WWGuUVIgEBc+Lud+L6ncIVqHDG1I+PCq22ML7MXbXtXdvKAmlCBR+jkrfC6tWFbZhUnT8ULWvu5eJEbeBxmNCL+FT4bFiBTBypHu5VPX04CpMDq5UDVmXtcgVXFfu+5PvU1O5bo1UOadr03vOSZJ1KMDih44bBwyc0cOicHaIwlzlk3szl10GHHQQcMIJ/rpn18TPfpbotxszZhi+SMTDOvRQ4L6JF1rumz9fSkfmicuePkrag88OWkeTqKI3e9c26Lq/aaTsZQqjRgEA2k+dYL3fdIJ8OVsb6hC6gpDH348GFsvnqHJDF5ra+0BRZWPGZD5rLfYePLrQHCxKsxFxuYfENTycxb7f/z4j72lszC0Sjyfm1m22SXjMqZIz+aGutbMX/R3q6oC7Hts6fx1yoVW3D2dZqsgqbok7VHCVEFRw5RGJAWf4cOD00zPfa2qAnj3z0/Rf/0pLLNUsX574v88+Uu/3trS2Avffn/jstDDuPXAX2326KRBQZKIZCRFKAncZUs9JPoQYp331d7nG7XB4C+3QITfa2fz5wLXXZr5rVvlcCoiquSW0dRsnRxJB0rdlWxtETGKQD+AVRg+uDE7jUIXmU2pow4TF22W+JOs9uOuqolRwCaF5znGZEu5pzcHdik/dZV6qI85tqsjBZVmv/T477+Tnnwcm2MhRreo0K6BFo4fwShYsWGAQGjrMg4cdZviSirHok/33TyjsCCllVK7L/bxXnXIKcNZZHtrQs/+XLY2Nnoq7vX9bzguGjcremRW6XAldRF6OXunwfMm+58c3NkiX9ZNSYfDgjAFJly650U4++igzt155JbDrrp6b8IzddT20/4M49VRjOeffG/b9sbTBn4e41zDakSPsE18iUMFFiATazt4DVxsFSao9uF58MTenQXMzMGuW/3bKFtMJXrwY+PZb/9WpCMNW1bQ1jvv8Hv+dyBe6Lr1Y1DR14TsKjfG6KQn3l7wp7O6HJdm5dfH558BbbwGHHpo4IBbTsOWWCvpRbARc6KXvVY/eNIQUlFhMblwdMsRys4z+Vt+YCDWi1zonuy8HHD24lCnzExUd/9AZGQfSpKRM95nMPGz6LToGfedKuhglSZ3rCs0tzlMiNKTTiP+bPadLtZkSKAYxdnn5812xcKF8eaMHl5HbbwfGj7c/zjzFme8/8eZb8p1QhYKHwKMMuSAUNCcsKXlU3k1xUZEbn9SFMWPSzkgA3JfLqf2RMZiMKkKgU7uMpYGvEIVGDy5Vd4ri0MlR9OAyUllhf+Jdva2S6K/JWysLAA2N1u+KGuQMn8yPsNU8mDW9qnZ1BoDRo213eVnTyFKq+pgt26nPAeZmFFGipzIUKPUpIWikHh3Gjwe+/lqu7OTJwOP2eTdtJw/z9X76aeCQQ+TaJAYM2oXUOf3mG//VGV8e9GX2uX+6drSXAMyv28l/B/KJrktbo4U1HKleIPzhDwoqcVFwmUmVmzkzcxab5Ay584JxrLELtzF3bqLf9fUK5yIhsO++wLsLjgtUjZ3wkZC8IfEQpIpMm67l3TNWvJ540dffVJRQr4hxGoZVWX8ahVrpNUGRhygEgNfmnujrOBXnNabLvbKquIK3vvAzPPecfHnj8ys7z59yioQxlaRESkp5IzEm6TqwsUUyu32wpggpajQIqefulFOA0Wv2cyyjC82Xh7aX5yw1LhXp1FNQjNfVSSF44YUCD0+2SB+g2INL9XutritWTORBy3HgtvZhjXVRIdVmXFRId21zrAO2vuLXlvuEpLjc/Dy6PmuvvSZVryfG2Cu4/IwXTvz0k9qc3ypQ5Vnr5EHol6L3LisiqOAiJA/87nfAH/+Y+e40Ufz3v8C//mW/XyywfsE1OyV4ia1bbmyzjUMeAouL49fCbcSIhHdduuqJk2zLtjkkzI5qGEBPi9jOnQtqMZsvK6KlS533X7HPeOkQhVG2dJK2MlxjHUT8wAOBoUOBdetkG5Q4GbqORYuATW2dJCvNZe1aoN1xR/k+nhAjQ2d2x7RpwLtzj3Yu6OFNsudxnRCXyS8TwBPxd71PAeBsdOHEl18mBHWlTj5mrPRQl5oHoCHeVh5SRpH88bttuVGmsOOa4ebvf58u51xN4UMUxn0ouEaNyk0+n+PBJdkXpya9nIeXXgK6PveA/AGElCmyU/yoUUB1SxfHMjr85V7xIrBOCds/+8xzMyVJi12OH5MnsZOS4osv3C+AihxcqimGfIS/2nWu7T4hIPW86EKT/q1OMplMo854VnDlw4NLgpNPBsav3ytwPccd5+yhHgaq7uzGWAdFNcnDHFzqoIKrhKDFXHTwkmTatS6DxiQWA6Yno7TwestTX++QB6FD7iQWj/kTPp12GnDvvZnvdoqqeBy4ej/7zN9e81wUDF2Xtj/R9HDiYKhQqnlZ/HfrKOFalQpN1WZ9TnKEWgrWN17rcCqf1T+HFbvRkuvMMy0KxA1xW1evsijgoVOS1DEiG1HImf85Cz17AlcPvcq5oAcPrqDY1fPgg8CwYYnPA5YeAcC/8cQ332SHQypmXOcIBeOOsQqzgksXGtpdfkngNooBkZwuju+eh9g4NqgIUei9zczrdJDbJ3ctoC5/yCV/38u1zAp/+m9Cyg4NQtkYE5f0Ts3pgw+PjP/7v0RKg3Lnyann2O7L8uDyEwHC8J6kxINLsbFoMYQodPJ2EZAL89ylfYt011RcJ8/v8l6M1iR/iMy9Mno00BTraL2ziIWLKm9DKaNDL5itmSygaksdVHCVEmvt3XlJ4dC03Jw6dqxenZtU2oxxspo+HTj88Ew72eU8dLIMGTMGeP11ix0WCq6OaFHSpt2CqV07YHXTNp6PCx0hsgQ5TuhxeWWYCna/7WI88EA+hFpyv+L554Fp0+yqSNQhPukv12IJPMtDhxq+pBZ2hjiLUr8xcA6uol6rkxJnzhzghhvyV//DDydCFxuJS47fZkopB6DrHKF4ALZScJUbsr9YxZlPne9CzqMy11Qm2qDsfOXFgyvFZ8Ps15yEkPDwa9QYjwMDBiQ+jx4N3H+/fVnjmHHddb6aK3piMaDv0D1cyxnXCHo82ESiJkShYgWX4hCFWmf/UTZykOiYAKRC7XSsdBGwZdWpXsGlNByorYAhm6C/4+d/+QXmzQtURXgIEd0wgGPHRtV0vSShgquU+GZw2D0gFnz6KTJJxk3suivwySfOx9stjvxGKxo7VnqeLCk++cRGmGixmLrhgO+VtOm0sF3VuK3tPr9CyLzjIXbjy9NPLHhS70ceUVNPVgJbmbcAIXD77Q65XZPnTYyyj41tqs6SF15wD5noVocftE3ulkdWfP01cPPNSJuLtsYrMWKEh3Y7Bg8RQAUXCYWqKtciixbZGF3Y4eNmNh/ywszTPNcBAO3b+zoskjh7q6rPwZUmOQ/EVFuGRpgLXvwV3nvPXch32T17K2szdQWrm4LlkvJiRNZnzomu5fbbzz2cT26IQuvzdsstwE03uZcjGVQ924Sopq7Vfaxavz53W1sbcPHFic/jxwOPPmp/vFHYvmCBxw6WCHPnAte/fIRzISGy5qtY3GZsdZgQup5wUKaYgrFZebhdaJEMnQggE/HE4bzpQpOSRzwx5Rwl5//q/X70dRGUXrepUwNXIfMKMXnhNhg7tjgNbSPdZwltp/AZqpbkElEpKvFDZLXWZc5vfwt89JH/4+0mZ7uX786dgZUr7es7/nib8GHlisVk0rmdvNWPE49MOde+WbfFWxSJx6XHmeUNXfPcGWuUvwjIvARI5uCyu66ygrTbbgNefdW9O16ZMQOYvXFn2/1a0vOqRcKx0dj3Pn2A3r2R/oEbW7fEaf7k64QUFdpJJ7oXUtWWw3Rh3vfq7JPy25kiwGnuzYdRRmpM3P+SQwAAMZ/hqIqVXr2AdhXOL/efDNsOI39ojw8XHeNa3/g5Lvlsktfw2OevlO9kQJ6dcUb6s9NywC0hu+xaYN68xPyaKafmvi1l2UqhDa5I6aPqeTnu83tcy+y4Y7A2xA9j05/dIseUKrLXy1jszbm/sC7koGDZ2JDJ76UrELWqFnzrOiDG/qisPqUkf6dT2E4hqeBq0dv5uuZmtmovF9NT1oNr9epkQJOITrjFahgaWaUtIO/OF9F7otgor7esEifsAamL5ARQzjz/lPdzZDfU2V3v5uaEZXi509bmPwyaKgWTLiowYwZw8MFW+5wUXBEdmj14cFU3d0F1S+HjWt0x7jK1FbrcQ1JLkeTCRnbZIp0Py2cdZg47DLhz3KUObSYq69QJmFa9u3S9lZrK+AyEEBmamzPjhGaSA/i1Zi2ldy7Xn6IiB5fhcyoc0PzliTA+rsnMS4wffgC2bOduHXHjnVtJ1XfsLUc77heq84t4RGr+Xr3aen9jg682J67f03efPLNhg2uRsN9HCck3iWBYxXOji48z4WLK9fk0joMX7zXZskz1BoGphvec03eZY11ZLGa93dymFr33eSEAsWx52N2wJrlg7b6FfeQQHXIKrpheIf2Mzq/bybmAxCSao+CqWmdZbtddk9FNXNh7b+nbDACwaRNQ3xo8XKTU+BDRl4LIjm0yHlw0wlFG9EZd4ht6cEWf2+/uJOUFYcRuwHMbxNeujfBAXwA6dADuu0+ioMUk/YRD8lmvHHYYMHt27tzmpOCK6pM8Ye5WWNko55k1pXoPDFzWM78dyhNCKHYTTym4ZBcvra22u9KhSRsbA3Yqwc72jlueMY83FevWWu8oEEJoZT0GkvLAfI8bE8h/800i52MK38Yb1dWJ/x6MHEqVn37yVl6sWJk1ncTKTMEFyClWFyyzSXruEV2Rgsvv3CHV9tix1ttNc7+scK6+LVg4Rk9s3Fi4thTCEIVENUrvqDwJjP/xD+C114AhKzNh88p1XWw8xZ8tPRK3355b5qa/dsDkDRmDAWNEl3ffNdQhqXlQk4NLLbqIsMQwucbsUGF/fmU9uLwYC8/faK/g8qPIvv9+YOh79qHK161zf+SXLDF4fUuMDyeeCJz+9R0eemmNVqSR8qJucMA1SOGggosoozVefi/tfvAsHLELUVi30fE4pzCF5cLjjwNY7mKlpGoWFwK/2Mnedc6ss5habZ/oNqpWHMf88xSMXLN/2N0oKG63h9S1cglRmFPnkKG2+9Ivpi6xjoQAli0D7v2Ls0X42rXu/fH7KlSxeZPtvkLd4+X6Ik9KhBUr0h+HTO9uWcT8fDqNWb49uMZPSHywE8x75J57gEmTlFTlGaexx/gC+vzzwK23Zu9fvRo47jj3vhvbEL16Z6Vkay1HBVcB3+sFFCd3LyA5z7LEMS3N7qWUnn/ZRKARY/mm7cLuAikhhIjuu5qR//wHuPFG4IphmSTUKtbF69cDDf4cTkNj4cLs788/n1umpSX75Jw56HZcey1w6KHA1VcbDIgk4zxG0YMr4VUe0Xs3HfHEvn9rN28tpeDae+v10nOfU84vWYzP1aOPAt+sONSxrMx63MuzOm+efNnAbUZRAyZEpJ093E5rVv53EojojbqkaGnVSygLeR6p8PjU2Y11Wv9PbMtVVdlGQCk/vvoq62uszXRCVU0mLostB6ecHFQstIh/PAuB3e6h5L0hnU8vZn8veQlR+OGHwBMvyYV9UoXxVFTSWomQYNTVpT8OnWnjbrm5Sbq6wNbE9fZhY7zw5JOJ3ExBeeopYMwYb8fI5uC6/Xbg5Zez96cUJ2ecAUeMbejQMHduZl9DIb1tIsLvht/gXkgRQuTH0VA6l0eAac+P4LlTZ+eDlBt5SGgPaVhCiDfGzZAP6T5zZrC2VDyfO+4InHde8HoKSd++7mWExQD+1lsW51zypV6JB5diZVSkZejp92V7vl5+mNQkf8wOS30ZdbU3eY/Jni6vz5Wn61Dgi+baXAStiCJ9X3NRVFCo4Coh6PpYHFR6NN61FYq3mUKZGC7/5ZcDF1zgtWelycgFu2Z9z/F8k5gRGxqAqVNdCrlYjngJTakqBxgJQPq+cL4/pEbdZF3SObgcFuTpNVIBV3Ke1mUGV4V0F8MKUQiGKCRFjsxLpGkscPTgcsr9qEt4mCt8oFRUdffdSU9tD7gOnQ4FUn328m7/zfJD8NBD8uVJMHShKZG9+A9R6H7/2N1jud6Yap43lcuFleslQ0ly8iUlTHO8PRpjasKqAkBTs7tILqVTef+9aMh7ytGQNq1XWbNGqryKuUh16LVEiMKIjs+yJ0xCwdWmV/rKfW057xoLrLPOreVlytM0uZ8axjSqbawtfKOKiKwsXGIRFlmvyiKECi5CCkyFx8HXrjTfHR0wTCSn/i9b06fpcduydjzwAHDEES6FXFYqaQWXMUGKXVWc5CKD24JDCA3fDXO5XimLNJu6zBbnUknqXe7bRPgU527lhZdeMvYihA4QUkIY5hU7AwovITmcvINHjkyE37MiH8KQoGsYu/GtXTvgiSccjnP7LRIKCre+G6u4Y9xlGD3auTyR45rfu4eFqtBEqKnixGb3NZ4d5ttKxXO3fDkwblzgatLs/vuT5QpG2pyakGCcO/hWnPzlncrq69DOXdqdzist8R7phNYS7Ph0PUX2qipSuUSdysgF5AAGDHAumAzlqmIMX7Gpq9LxdG1dZ9S1RtSTXDZntaSCSxZhEImb18k5fXn1Vcs6vCq4VK6rBw92PyVCAJgxw7UubcBn7g1GcH4XemTVtgkjeBf5L0MUqoMKrhIisg81yaJyU517IQN2k3xMr0BTk7FckF6VGA45AnLOk8WJ+799s83Y7d4lNmwwyB9158CCrVVJi5gvv3QolSDuITEqUY8QMNwXzg/W8sZuOOOcdlL12iku//pXoLshvY6UB1cBkRWg2/YtLA8uUXwv4IRkoTgMiNM6wcnLOB/ri6DP5scfW2+Px4Hx4+2PC/JTjH1WFK2ReODt991Dod9y8qy8RM9xegaeObZ/plw8WqF7XnoJOOEEubJ8jyBEjuZ4B6X1dWznLrAfMiSh5Hry+YDKibVy3kdueE25EDoSXlczZzv/qLQSYc89nSuaPRuAGoPVXnMkjQokOeKhC/DQpN8orVMZLiH900iEiIyJCl9eMa7zoI2i1MuadsIE4Iz+N7mWk63znHPcT0k8DmDiRIk2uRBQjqzVFRdhSii2qYmQoqdi6WLH/Tn5eGwm+Uu/uxG77+6vD5oGrF3r79iiwCn5q3mSsZhMzAJ9uwXGDjsYcnTouuOioOXtDxMfNmyw71u6S5TKFwsxXW4abWkB1m7exna/xG0BwFsOrjDIGq9sQhT+6lfA3I3dUQio4CJFjeEGtlU0e0grKRyW/U7HLajfMac/YWOOElNfn+me07tkkPk1Vf/GjcA22wAPPmjTBk3OQmOLDjHoLe6eXp4xe/8b6Nwu2Z7LxOv2+JjXkMUq69Dq6yI1VhASdTqM/961zLRpwGOPBW9L1ZNZbI+4zNy/fIWkguvQQ90aM/4jsshapzS5555NhCj0oeAyrZOb4+3xt39kDFmFHvyirl8PjF61j3tfhPmDf+JxSD20mkxzmzYF7o9qhPAW0aKgyIQo5HuDMqjgIqTAVA7+2nG/eQy0GxPjohI1Ne7lrKiqAna2yVefL267DTj//MK2acVxv+6avcHixJknGaf1QDoGuYsHV8uqpAZDwuTNuSYSJaQWJELgjjuAgz95SLJOezRR+NhLqu/GYcOAmPCYjJCQMsduHsrN26O+7c+XusXo9U5Q4Vgqn+ngwQmj1IaGzL5AIeqcFmG12bkJHn44kZ9z8+YA7RHlxEe5C4vdyBGUfOW8dk8h9fzJ5uAq1rXghvVh94CQoqKQeW9UeWgUnQeXBVdd5a289NoiOcYrSzlQLpqydEh/l3KxmGtVbXqlktO2oG5H/O8lg4LLpk5t5YrgjeURiVOWwbigtmLYsEB9KTvyEVaA2FICUxNJEVmtNcmiQvM2yOXzBXf48IQRRiHyFbzwglR0Pk80NgJ/+pPFjkp74fnEaaawEhYrlfcXHpv13UkQlz7cZfJq1du59i0Fc3CFi4CWvrCTpzmHH5S6VkJg1SoP7TvUWfHj2HSdLk0qex/y/TJcLi9khBQCm+dpyqKts5yW/T52UscpNNkOWlU7w9A8fLh7+ZUrgcMPBx6adJ5r2X7vG+Zzw9wu+n2YU/aII3KFZBz5wkMIQB/4VfCKZs3K/u4Q3spoZR0kxLB5P9eChJBCMGWK92O0NvcwcVEibjGevvce8PbbCY8aqTpWJK1a3QTWKQ8uVTKccnmfkghR2LVjo3OkniRecnA5IZHZAgCg/aQw2aW5LQXXPxaDpAeXAHr1ciyjt4WY6NSGSD8iuu4qpxdCi/iPKB6o4Cohis1VvFypcBngckMU5o/TTwe22grYbrs8NpJHZs2ymYMllEhpPEwmVh7ZsgquRfU7JD4kTd7aV9ib0jAHV/EgK4BSpmyqrXEvpJgZNbtJzS85ZVIPTEgLNgGN8yIpC6qqPBS2eR7tHtPLLjN8CcFk+/rrgR9+yN1uVHDlOF1ZhJDp0QOYPh1Y37y1a5v/figxn1doeraCa/ESy/LLl5vap2IiVJSsodZlP1RSs5gQyq79xx8D/5l+ppK6Ck0Yd3+HijyEpSSkQKjOe7NiBfAbmzRL5udz2TLgyCMTnxsbgZkznetePimhDdICempedpnHtUtAYjYKj2uuAV57Ta6O+FvvJj5IvtcoiGbnqb2iR2GuIr8hCnOaMtVh997/6ZIjcO21gZvLbV8Aq2q3UFKPDJoGiKp1jmXeGb1X4P4oR4jovvO7RHkiaqEUlZACU+GyiM2NjpP/IbGuLnfb3LnR9qitqlLUPyGw3zbOK+zUhDljhuXhCVw68++U1XhSQOikGKFwLFy8vEbILp69vJs4WoBL9k6lB5cXrJpsnu2cdzCfRHaxS4gEk2Z3Tj/HWmuL1DGqPbg++cTwReED9e67UnnC0bcv8M47uduNdiw5fV+4MKe8THiWVr0SG2oq0gOZEMgSuNjN2xMm2OfjIoVF6CI0z6fp04Sll4Asxvn9rruAl2adqqJbBaeutTO+X7JrQdvssVV1QdsjJMqMGgV8JenIatQpPPSQe3qphjUJ4zWniDS1tYl8YR99lMiPacUnnwDjDE4v990HvPhi8HcXXbeuY8v29muojz+Wqzu+KRmP2K2TqRCFuqK5KMoCGZWkPbjs0SD3gquLCiXvwWaZjN07+tiqffHWW1D68h2PA59+Cuz2zysD16XrQEubu+hfg3CVNGxsbB+4P6qJtA5Y4vmNcveLDSq4SgiGKCwOKt1CFJpc/vMhQJfhwAOBzz93LlNVJZXn0xMPPADsv79zmdWrge7dHeYLT9oE97JOMr2RI+XqueeIbxIfkgquOHMQlQSyIQq93JJOdabuxVMu6mpbJmp0PvGosLtASFFy1O8PwPz5yS8O1tLG8cVprLn5wFGZ8KuTgfvv99ghhR5cmzYBX3zhGPkNf/xj4n9rKzBiRPa+yok/pT+3tZki1rQ0++rT/Lru2OHkg7BiTWJ+FqjIkv45rccefhhS5Uj+UaHgmrR+T+myqet9+M/boTHWyb5gyprM4SF1856wo3tnC0u1kJhZuytO7PV/BW2Tb8CkmDmsv1oLCad1gNlbzOiB7JZ6B8gsA6bX7G7rgfX11wmF1RVXJIxZ7PuS+fzYY8Bf/wqMHu3eBye6dQPuvDN3+95bbbA9Zto0ubrTywEXgXXKi5whCr0hYqkcXC7nTVLhJ3v+nTyAZUMUpkkv2oOz7bY2qTh80ukG96RzMt6kUbwdBbTil4VH8cQWIVRwlRCq3dtJfnA1gG7OFsyEOdZZheQz0r17RgClimHD3NcGqYTugwYl/j/yiCldguKTpunZpt9z5mSu44QJyY0ui632FclVsUT4xCAWwEQR0uEnCnutUou3UT92cCxnFaKr0FDIS0hwZCK27LFH4n9bW84SIotuHRvTY9vLLwOPPprxoip0Di4gEaLokENytz/6KLDzzsCbbya+v/cecNpp2WXazZ6e/vzAA4kQhKpoasr8zqNPzCgswh9ViQwqQhQubtg+67sKz3rtw36O++fXdU97T0g9aoaHtrIiuIU/ZSuElAZeHH5OTTqKrl0LtEg4ihvtXB54wL19J7sYq3FOIr2SI3V1wMSJudvP2G1OsIphWI+5DJZzlm8JQOE7YmOjmnoizpwFidjTrvmzJG7w6TW74sQP5LRD3bewcTOEhQeX2zwZ9AY2scFeL5s33Nc7EVwsCBFdWfhOO7kWYQ4udVDBRUgIrFolLycKy4NLts6VK73VqcLLPnXuHnss8f+BB0y5uNw6vi47trCbxYeWjKGgVSdWGRbRjwBdd6xFTwlcJCzgmYMrXLwsMvITotAe2cWb+HqQfIMligBDFJLiJ3UPy9zKV18N7LWX/X4duWNbx47AvfdmQuwNHizRmQAsWpT9vcYireDo0QlhWwqr8IJOAn2VyvWJUzKCFlklB99Rw2Pxhq0xv85dmOCGa7QFA9KXO+nBtWD1lq5FpR61n35yL1Mm0KCGECSSb8HFg8tmxNp5ZyRCrCGRk8ouQssB52SsSazGqblz092wLeOEivnTqk3XeiUajqdCDrqUFUlhhzJjQxnNYwkgYnHMmAHc8oN9SD4NkBImrWrcVrpdJ7mL+Qq6Ki2LfAGowX0+FRGMmCkE8MHCY8LuhjVdu7rKb4r7rokW7dyLEEJUIoRzSJ6c8h7qVY2MMsqrwirVz7q6hADLLRyhFa6LZbdODRyYyFwvSUVrwiS+esZq4Lztrdt3aTO9IJJQcIWVP4IkeHbGGai8N4an/+teNh8hCp2EqNJ3xpAhED3OlW9UAbqencQ5Cos1KrhIsePlHnYLs2McW4z1PvFE5vM55zisJwI+UIsWAfvu615OZrxs50EBQcqHu774JYBfBq7HnC9XlQJl1izgkDvOcy0n9aitWAHguET5SMy44cF1MyFA85padNp9dxcFlzs33gjssgtwruE1YtYsYNIkU10WlR14oHsZmX1e+e4751dxobm8f7e1AXCOjpGu3zUHV7K8qnGpTHJwaULHqFESBSUWiV68rh3zohuemLo6YMb63aXrLUY+WHgMBq84OOxueEcILN+0Xdi9sKbIlZ7FBt0ESggu7UsD8xgoPUFnxehTg8x66scfvdX54ouJMIS33AIccIC/frkuiN0mEmPMJ2EjtjDUkRIcnHfvYZg3z0ZHpVDBRQ+u8Hnm2XZy1nwFvlayQizXF7k8cPHFQM9P70t/r27uUvA+EFJqZDy4gsfF14OGwJDMwTVvnvX2VHhhFXjxsFGFtMcuV+RFj/l5cxSASa7TNU0+b61XwW+533EqQkgSUuz8u3d3AGr0IZ1M6QTvvRf4wx+yt8mMU3fcYR9C2JenlQ1nnAGcdZb9fqFAyJx+53M7wUJxDq5yUXDpcTz5pES5iy50LaN7OPdO88fE9XulP999N3Di5393q0y63SgyYOkR6DvvBMcyKp4l1USwS54QFhE2iD8oRS0hyt16r1hwW+ysb94aY8bIl0+Xq1rnXsgjytZThor+9rfEAsEuv5fMYjmwB1cq6YgDd91lbC/zbNXWZrd/yj7J7LxC5Fj8ZnUptXiSysHFoblYkBKqePXgcnjmpYVeIbguxeNAXGTu7+GrfWqwFUGBFykF/vCHROJ1GVwVXB6FLRs3mjZoGl55Bfilg4PMpk32xit2/TMPVzLjpYqcQ4TYsWzT9jjGEO1GlSW+UvmF4cEJnHuiuto98W6E4RswIUBtfWIN7jTO/FC1r1SaILM9i1Wdr77qXk9TE7B0qf3+trZE3k2VWCvOXMZwXUf7Cot4yAbSObJdF1uJ9Qk9uLyh6RJJZyXxcu5lc5875bhNUw5KijL4iaqhRKJwUIpKSAi4zX0nnSRf1hMSip28YFqYBZW9Wx2ftU0IdKx0WL0bY1nbJKV8+hmD4MCwva0te9HfqTWTmLR7Z/skpWnBoowHl86hORJIPHyyC2jZd5PmZufkutIeXFx8AmCIQlL8/PQT8Oab7uWWLpXx4PI2t4wYnl2hdtKJuP9+Z89tq3xZXpFScIWQTFp2XOXwWxpMmJD4v3Spcz4PWUO0Sk2Xvodk5q7Rs7eXq0yCd/86AXMGL1VWHyGk8KTeSdzGmY8+cq/LPAZ9+aXPTjkQjydCF99/v9p6R45M9N+os3cdp4VA147OLrZCUsGV2q0qBVe5KLgqoLvOfdK5qD2FKKTcxQt3fnhU2F3IIdJyD8UhNYkzzMFFSIHxOv5+ufwwZW2vmFgFQD52sDLhsGlgF8LZmitFPG7t8CQTorBjRQwt8faWux8afBweutO9/XR7hqt28slAO8PIqUsudtOLJwkF17y67vKdI/lD1+FmB+LVI8KNLl2AePxXtvudvASN7Pv+Q6gOSZ8dKVasgJcxj5AosmkT8PjUXzuWsQsBZMQYolBmfm+3aSOArlnbamrcj7NDWkEkUc5bXkP5so71SAjIoGl8US0xEs/WJYHr6TX7JDT8J3A1aU5+8JT056BRPK7+4OyAvQkXhgUlBIjrcgqudLhgh/ecfBqIpTIFXHyxVWoG62Pq6hI5zL2kN9h118RxTvWm0d0NEGTf+VMKKRVrgRO7L8hOrVDCVAj339nY1lGqLi8zouy7fKSVKKT44Q2mBKqrSwhaqpcejzwC3DnuUtdyP/0EVG3e2rXcstXWCh9fbNggX9ZkeTRxIjBtmvMhsVi2IsmI6/jvUqDP+MOzvrsJBozPlhDICu1g9OBxqsVLDi4SEYTAVu2dk8ZIrUU8hCh0e4fZ2NoZM2e617Nu89bl8j7kiPbSi2F3gZDIIABPL1DtNO+DiFP1Kt/dnATaZqFSwYyfZeI+kaKgQ4X8tZS9r9+cfzz691dYIUmjLBQYIUVMymPIbQi58cbkB4fILvmUK/Xqlfgfj+fOz2LWbMtj/vxn4MADvel66g2BVVyV4LruWmZd81ZS7WY8uIKfRAGUjQeXttk9SeXmeAepurwYPcheJ07N4WMXRjTS18YmWlRWkWQ5EhxXSaumaZ00TRuvado0TdNmaZr27+T2tzRNW6Jp2tTkX8/kdk3TtBc0TVuoadp0TdOONNT1B03TFiT//mDTJCEljRdrnqFD5coddxzQf8nPrXcaFkWVFXID54ABif+Oi9uqKrnOAb4G7NQC1nxodTWwetp61/acWlzTILdATVHhkMw+vYASwvHaesnBRSKCEK4LZJmwBgdecwyGDJFozxg604ZHJp+HQw+VqIvQopsQE17DsMiuGYyk5uw//hE5wnzZpcD337uX8ZRfQZGyX3Tt5lwgqeDi2FP8pK7h+PEhND7bWsDrhKzQr1Sh1yQpVs7bY7qyuvSkB5e0PsRhUvaj4Pr2W7lyjiKE777L2fTAA8CkSYnP553nvV+AhEePy3s8AFw7Uk58qdLAUAitbBRcFUMGK1OselkjKjWQoJKCkFCRedNtAXCaEOJwAD0BnK1p2nHJff8QQvRM/k1NbjsHwH7JvxsBvAoAmqZ1A/AggGMBHAPgQU3TsuOekEAEDU9BCkdB575hw9If7TyizIz7MdnBRkUJp30szNLWT6ZDzzgD+MX5OzgeO2t+ezS0dZZuyDXes8M+6RxMqXJ0tSweJDyvZB7lucu3lGtPQsFF5Bmx+mdo1RmJmZAUxkTaUiEKAzw+b74JvP66fPnRozOfZRyhvCiRAgubUtK47i7hg8PKc0ryxrHHhtDorFkQHpPZrZOI4lDK8A2YFCsqhesrqxKRWqTlDA7v5xXw/u4uk34AcA51bNX3Rx4B5s1LfB482N+rtKsSXNddx5H0O4V0iEK5vjkhgLIJUdi+Iq5MTOItB5dCDy4quPKK3XUVyhLe5QEhXOX0NNJRh6uCSyRISbnbJ/+crtAFAN5JHjcOwLaapu0M4CwAQ4UQNUKIWgBDARR3wG9CfFBwy965c9MfZZ2Htli3BACgDRxoX8hLqD0fk/30pEGbeU23aJH7sYdcsr90O7MWdMC6zc4eXU6TknSIQoOnFykSVq509XhQavVF5adSPlx0DD5ZbOPZSki5kpyDZKaidpX+PbiA3DWHU5v/z95Zh8lRpH/8WzOz7prsJht3d3dPiId4QgTiOZwASZCgh7veoXf84LAD7rA73B0O9wQICZIQ192Z/v3RPT3dPS3VMrrv53nyZLu7uqpmprvkVbtW2XZCFbs2fn75ZbFKqw20pOCiaT71sbNWj8m6fudO7+tMY7YesPCuJIgkxctcvi+8nYc//vBGwcUE+xMn7zbmppuMr8VKTsKTQ9OqDK8xeW6mPQMFMwTUHw8uxrxbP9l5r7iisbQHPvvMTY+IWJLU626OzlGIQu/gklAzxvyMsY8A/AZRSfW2dOliKQzhNYyxcMa/RgB+Uty+VTpndJ4g6hfdu8d3/FJoiHitsXP3SyEAzcyoDVaxR44AS5bwdk6n2t27AESsZrUKLq8NpDtNaYkdRjG1pR/KbMHOG/LJbmgoIgnYtctyK0O/a3LjJIcQQaQrSgtBs3XI3o++BwD4X3nRVv333AOcfnrk2I6Cy64Dq2kOLkAlEArWuhQOSYsAq7Xb79u9E2oRiaUuxB9OOiQwLuMrbhijMJcEUU/w2nI/GPRIweXALzKZ00xbfiehkGUZv0UeHbmtoPi9vr+1AVd5S+qJBxcA/PSTdRkegiH+hzHI8Q5++SXwwQcclZGSIjEkcRSczvO74Js9lYnuRr2B680XBCEoCEI3AI0B9GGMdQJwNoB2AHoDKAVwphcdYowtZ4y9xxh77/ffLfLsECqsktcRyYGdeS+8QHIDO+Vk3Hef+DevB1d2QBTSHP/KIhw0yvdpsIrduhW4+271uWAd/4d+7TN15NKtW9XX4+rk8r0o5DPPwQXxR+XNwUWkDn6/pZDJ01GXPLg8x0fzIkFEcfQo8OOPxtevvl60hqnbbE/ScNll6vl/61bg8sv5+2QHszlVm7Mi+MST9ip3yI5fRSEUKSfqF3/7pi9atfKwQloLEES9wev9oS0vGDMF1zdfO2rbS+6+G8jjifLOoQAym5fbF2/j8uBqXfQbR2fEr/Wbb4A1/xzFVd6M+pSDS+jjXUxgOx5cR0MZnrVLxBajoU34zH7u0njx6fe52HbQPDMT7Ru8w5adhSAIuwG8CGCcIAjbpTCERwDcBTGvFgD8DKBGcVtj6ZzReW0btwuC0EsQhF4VFeZ5dgg19FqkITZj8Bvxdtjn8kk+IY9yHbXIKJ+qchWrCIM4dGh00U2XmCwcdu827UubNupjIyWd3UX1rl0chQ4csCwSEhjXwpoUXClIZqbl7+bl73r2+fU7SXwsoAUjQUQIbwyzsoD//tesoFhy2L9PNylkeJvMhx8CZ55pfN02Ci2YpQeXYl4OvfCSq2Y/3FJi2SYAoK4O334rKjyI+sOuI5x5NnlhjPIxEEQ9wcsQhYBof3riibyNmyi4/m2SpsDoHs6P0rZ8h+E15TLh7rthbGirZPt2yyJmy4/irENAKGS5p5vd8j2pMot8OiHBKxFOlEd6OvPOTxZ5Tm2QKLlLOA8eEWe8euESCXn/eYKlgosxVsEYK5b+zgEwGsCXUl4tMMYYgKkAPpVueQLAcUykH4A9giBsB/AsgDGMsRLGWAmAMdI5wiOcuJIT8WfiNSN0lUB6CB4taMILztDWKJ2yLsFQZFHw8MMGzlqKkzffJODDD8W/f9Zp4utvTYaaPXvkP0dxGDp5Ff6glCdcf4a4SDELQxcSfLIgzUz4JS+0aPJKHXw+SyGTlyEK/3wVKbgIgogdt30xFG+8ab3p50nWvHat+ri6WmXrol+vy+mPZWXi1Vet69LmrAi6HKf/2M85NtfWYtMm4KOdTVy1R6QWsVjV0UqRIOoH7/7WzNP6bBl8mhho/uuHrvLfFraoMlx7awBtC3/hKielv7SGQzhgtp8TBIYnnrE2agyHslt0eUfTcp7ro+qJgmvGlQM8q8vtus8pNRM6J6Td+oKRrE2oS+0wnoLgYQK6eg7Pm18F4EXG2McA3oWYg+vfAO5jjH0C4BMA5QAukso/BeB7AN8C+AuA1QAgCMIfAC6U6ngXwAXSOYKoV2zZUZCwMLG8wvgqQa2l0h1vFSvoNTe2x6ZNZu2aNKao/PnnrftmFi7QczJFoZZZbGZuDy7yJEk9QiHrHFz0uyY1tFYkCDVXXMMRq5jjxXnsMfUxhwG1J+/j11LUJFMPLm2IQpfLhrDQyzIBfZ1J3lIibfHaUnz3wUzyPiaIesLe2hxP67NlCGqiOLn4wwny3716mSXt3GujQZEPfq02vObIe5VDq2c2pn67twJTFhXjQF22aR3hvE73Pmfcf0A0EvIqXKPAKWcg1Px2qDDRXSAIIgEErAoIgvAxgO4650cYlBcArDG4dieAO232keCEQranI9Y/Ks+aJ7zY5RUuCbv3WBfSrKDNFtSmhkc2rZLY4UMAPA4HY4T0hZklKg2ByRl9zb5eLz19iDgRDMY1RCHhPSQkJAg1PFMujweXkzXnypX27zHCbOx9YVs7ILRXUdbd/Btuq90F88wLBoMkh6qHeL2+Kzlukqf1EQRB6MK5B//uO5MJ/4kngAULAJivC66/PhI60ZP8uHv3AoWSEoPLg8v42o7DBVxN8noFfbG9GF24SloT0nikEwQRTarv98ke1ztI4koQKU7AUk2tCFHIOfhzbdY1q1htbixlfo+QmcDHroIrnlOAtBquzNlnWCSksBQ3szoz8wIjkpRQCILFNEkKLoIgUgkeQxeBo9DWrfbb/uAD+/c4RqFpcjv/BoOceTsFAc8956opIgUhwQRBEClHKGS5Bz/lFOtq2MIFuPde6e/33zMs94Qipdf8Vu8YluMWVPOEfQGAw4ft1WsC757vwqd6uG5L3TApuAgCMJFRepXDJEFQiELvSO0ngSDSHK/HOVPFlaIxvQXcunXAAw/olweAQ4eAb7+NHI8Zo2zXpFMefcjrrwf27/ekKplwerCcgHHYITkUksXnkL97mrxSB44NRapbDPFQnHkg0V1wDL1tBKGGS07C+eLsM7b9iDmWY68qRKG77Q537mpB4FIOEukFGboQBJEscE9Bb75pGQbm2mv5qvrsM+mPt982LKNMz5AXMM7VMPlZ3UBQ0fz2W+Rvs4WNtJn3YmZ+5ZfWcphkM7xcBggCw1fbKdweQZgh+DjCrycxtHPwDlJwpRFx9WwhUoqws5XpgkuxUNTbrF9xhfjPiCefBFq31r9mKlCzswoUBFOh1ldf8VfFw8dfZISbNeTDnU0AQUCncY3x+A/dDMuRACQF4ZAE14ff1UzBm+w4iuVPEGnMk89Yu33zhCgEYmOvEdrrjaXKVTdm4eBBqU6XhghHa/mMQ1koSDYs9ZD6YOhCEESKwDsJ/fabZ5N4WM5wJGS8vlDq0rxIqyEok2uafY5QCIcOAYtfWuK6zf9s7YjZszn65uE64EBdpnV4ZIKo51TcckGiu+AKWkd6Bym40gjmRTxjIql44/0sT+rhClFYGxFiGwntnS5I3ebguvlm6Q+LFWOvXvx94oI3JIAg4LNvMs2rEni0jERSwZFMpT4ouAiCqF/c91I1mje3Lse7Jjh6lL/tw3c/YF0I1lPp6efn4403xL/denBt25nNN3XT/F4voXUAQRDJgvDFl1zlXvuiTOVV5QUrXl1geE3pCe3FmBkKKuZbs/26IODnn103ZwtBYJ4o8QD+vF8EQaQuggDaQ3gEjZgEUQ9g+8Vk63uP5pgUiqzEjBRhysVaOHyfFaEQ8OSzGeYFLHjwQfH/3X+EsOtIHl/DXiBIubWsrCo4JiQSgKQeZ97Q2LJMfbC4SWUvqPrw+xCE12z5NRdbtliX4xXgZGUBH33EV7Z280+etKmk7YMX2r9JwUl/64Vnn+UoSHky6iU0zxAEkSwc2X2Iq9zgDUNw54P5luVOPtm6rvC8bLZHDyu4gkFg874y60rDLtgGLPnbiMhBMIge5T/oFxSEuMuNvWyPIjQRRPojgHJweQUpuAiiHsAOi4vdSWZxrZUKLgNrIaVgqdmQGq62X3nFogCHQOjll8X/L/0zV5P81JqHXmO8wiqeUHYkAEk5Lr+vkWWZ+qC4TOXlVn34fQgiUYTXBMr8m0Yo02WYwatQ51Eq3HYbX5s87NzJUSgBgjQi8dA8QxBEstBielfusjzbXMt9vIK8wGHDa+GgGHfcAdz51SDryt57z/Tyc19FjBBvvDsfH+xoql/Q40l51y5PqyMIgkhpY+JkgxRcaQS9FoQR4WfjSNDEk0oBT4jC3Xv5kjkOH25RwMbCM2QdMc4eVqtU2YPLAi4PLhpu05H6INhKZet0snwkiNizeLF1GV7PKy9lURbyMVsowysZwQTy4KqPkGCCIIhk4dARvv05wDcv8+SfDHNOj6cMr9XVivPjH39wVmbRue17I95nV99RaFzQY8OTHwwcxVRNetdcSu/BCILgg6QV3kES1zSCBHmEEX/s8eOtt8zL3PdoJHyh1zm4TJFWnVx1ey082rbN9DITBGzfDvzjO4vkXnZCFJJ5d1pRLxRc9MgSBGECjwDsjjv46nr86/bmBfbtA8C3GfRyzcKRkhEIhWi8rIeQhz5BEKmIz8exf+XYejMG/O9/wFnvTDcsU2ceNCUaG5o10z5yTd7e4uU6oD7sM4nY0yiPXA+TmZBAIQq9ghRcBFEP+Otj5ejf37zMaRdErJ+MFlNvv23D8oqTYC2/0spz42iL7Lo7d/tx6aXAG7+2Mq+HY0I6GvJTeo40pD5Y1qXyZ0zlvhNEqsAjh/rHP/jqWvLMbPMCQQ5XKom4K7hoc1ovIQEkQRCpCM/I9b//8dVlFcb3629tih1tTOBWCq5UzsFFHsKEF9DyNLkheYV3kIIrjYiJdw2RUFoX/Rq3tn793S+npDKzRv37371t98AB/knX88nZbx7GYcoZbXDDDRz1cHTs+k9H4owzOPtFpAz1QbC143BBortAEEQSEp76fCyO1htCuG3rsbeuDti925tmX3ie4zOSFUtakZ9hnE9GCQkgCYJIRXzMOw8uHt55B9j6E+dm3oZgy3QbnhAPLu+8Mb7fV+FJPUT9Zl9tdqK7QJhA60jvIAUXQSQx8ba2CC9izfJF8eShsENdXQKtSrKyvKmHU6j14YfeNEckD14puPpVfu9JPQRBEPHGt3NH/BqzsWD48UegpMSbZp9+hmPL5HGuDyKxNMjZy1WOQhQSBJGKeGkczVNX377ATTdzFKyrs9U5Mx3WZ1/6sWcPd1WeIAAQ/vPf+DZKECbsq82xLkQkDAEgNzuPIAUXQSQp558P/Li/NK5thsdVM6G9HQXX1q3WZR5+rthTD67fvt7NVxngnVUX5wcQBP6yRGpgpgy2A+OwoiQIgkg2vvwSeP6TSk/qsmMpnpThPMiDK63gtaitD57cBEGkHzbSXJnCmMeRhL78kqvC/fvF/0Mh47LfbA7guuu86hgnAhB67Y04N0oQRKoigHJweUUg0R0gvINxpdwmUoVNmwAgI65tRjy4jBeKv/3GX19NjXWZzT9ncsuEeMb9Bm2L+SoD4q7gItKPQ8FMT+oJhsjehCCI1EIQgNde866+a6/lKBQK4fBh4L9b23vXsEe0XTUi0V0gPIR3ZUehZQiCSEW8lB15uRXetiMTS86yFiIUFIjtmjVde1SIexqP7/4owc7DefFtlCCIlIVEid5BErU0gjwACLfweHBddZW3bWYEQjY8uDx+xj1ScB0+wFePIACf/5jvSZtEekEhjmIDzYoEEUM8Dsl3zz08bYbw4IPAvd/0965hgtDh5wPFXOXqBPN8rgRBEMmIVx5cgLeprlpPaIX/vF3EXT504JDhtWBdYnYCH+7ksPIlCIIA0KbIhgcBYQopuAiCkAl7UsUz9E+XFgc8DVFoC48SiuV0aM5VThCAjn8iC28iGvLgIggi1RBq6zyNyvfxx9Zl9uxh2MuXGokgXHE0FN8oCnZokr8z0V0gCCLF8dKDy8sc3QcP2dsTmYaJDYXi7sEFeBfCnqjflGfvS3QXiDjw5Iw7gYqKRHcjLaAQhQRByCQifURl8dHEpa3w0tyMg1deiWtzRApBHlyxgb5VgogdQ3vtxw97SuLa5imX0AaQIDJ9HkqTPSbLX4sjweRVDhIEIbLozIae1MMO7kdtbWIilIjKK+O2hWBihAyUm5HwAgqBXE847jigRYtE9yItIAVXGkHDH+GWYBD4z3+As96ZHr9GOUMcffYZsGu3x095nBVcBGHEnqM5ie5CWkIhCgkidny0Jb7KLbtkBoI4Wkfh44j0I56RFuxCuSQIon7BDuxHgwbJGYI/FIx/Di4AOBIkMSvhHppOCcIeNPKmEWOeOjnRXSDSgK++inODO3dyeXB16gQAmZ42feOjVZ7WRxBO2bKvPNFdIAiCSCv8QhAAKbiI9IO8AwiCSCZy4myn17HkZ3y2q5FlOSEYQtu2ceiQhvXvTot/o0TaQXM9QdiDgsMSBJFQhPfeT5i1559uSMCKlyAIgiCImMMY2b4SRLxJ5txlBEF4D0P8PTe5vVhDIbTL3hLTvhBErEhmb22CSEZIwUUQhEyiFE2JaDfTVxv/RgmCIAiCiAsH67IS3QWCiAkk9CIIIlk4eNiHH36Ib5uf76rmKicEQxCeeTbGvSGI2EA5uAjCHqTgIghCBU+c6mee8a49AYwrRCFBEIRdgiFa5hAEQRDpBYU1JggiWbjy3kpMnpzoXugjhATKY0SkLPTsEoQ9SPJDEISMEOKbRseP97hdmr0JgogBG9+bmuguEARBEARBOCbAgtj8/h+J7gZBpBw1+bsS3QWCcAx5axOEPUjBRRBEhARomp7YPwLr1sW9WVowEGlPjv9oortAEARBEARBuIAxAc2akjUgQdil9JOXKcwbQRBEPSGQ6A4QBJE8CMEQ4q33vual7sBLcW0SAMU0JtIfEoUQBEEQBEGkD9W5u7DtYEnM2ynLOYCdh/Ji3g5BxJKgwFAb8ie6GwThiP212YnuAkGkFOTBRRBEhFCIKwdXOhAkBReR5pCXIkEQBEEQRHpQVwdMa/ZR1Pnzp/3P03buvkvA1yuu9rROgkgEPR/diAUvHp/obhAEQRBxgBRcBEHIiB5c9QOBhj8izaHcdgRBEARBEOmBX8cRxcdCOG/6JwCATF+tJ+0EMhgy/PVnT0gQBEEQROpDEl6CICKEaDNDEARBmNO/9e+J7gJBEB7hZ0HdvwmCSCKkEBtm3vk3DnwAX8w613VTRUVA5tIFrushCIIgCMKC+hJCKw6QgosgiAi//ELjK0GkCSGBpngiNrxx3n8S3QWCIDyiUd5u+W8/I0Mngkg2GARDAZggQHbZ9zH3wak3n34TjjkGyOrQEj9+ssdlbQRBEARBEPGBpF8EQcgIt9ya6C4QBOERdQIlVSZiR+O8P1CevS/R3SAIwgXC1degOpeE2ASR0kjKLx9zH5u6WckeWZdWU+O6OoIgCIIgiLhACi6CICKEQintwVWYcSjRXSAIgqgXfP3hQTw6mowiCCJdWNvxxUR3gSAIC7QqLOW+jUEAKy+La38IgiAIgiCSAVJwEQQh4z6wRWLZW5uT6C4QBEHUC3JaN0amvy7R3SCSnExfbaK7QHByw+AHE90FgiCM4LBA9MKDy26bBEEQBEEQyUAg0R0gCCJ56PDQ+ah9LNG9IAiCIAgiHTgaykh0Fwhexo0Dbk90JwiCsIOg0GmJObg8VnIRBEEQBBE7yJjEM0jBRRCEzK4jecCRRPeCIAiCSAVoOU4QBEEQsUUp+xIEnZnXJwbl8bFQ7BomCIIgCIJIYihEYbpQSyFgCIIgCIIgCIJwAAmzCSJ5MXg/GYtcy/bX0WtMEARBEES9hBRc6UJtLY5t/n6ie0EQBEEQRD2BQiERBEEQRPzQzrqCACAkem5l+2tpXiYIgiAIol5CCi6CIAiCIIgUZduCMxLdBYIgUhhZHE6uHwSRvJi9n8OGAQDyB3aNX5sEQRAEQRBJBCm40gVBIHstgiAIgqhnVOXujX+jUlZ7kn0RROoTFGg7SBBeUJBxKDENFxfj9dcEDN4whOZlgiAIgiDqJbSjIQiCIAiCIAiCqG8IAmpDfvFvH20LCcIph49fg/4Nvo9J3UeCGfLfgqDWYIUVWgMGMu+VW6QtIwiCIAgiRaCdTLogCFELXrc0yNmD1R1e8rROgiAIgiDSA8r1QRApDmOoC9F2kCDcEvP50EDZJAjqa677IdC8ThAEQRBE6kE7mnRBECDAWwVXr4ofcNOg+z2tkyAIgiAID+nePf5tklU3QaQN+2uzEt0Fgkg5dv71n+oTVVWeG5uGyfLXyn97vd8nCIIgCIJIB0jBRRhCy2eCIAiCSHJat45bU2Mbf4bgspVAhhguiTGy9CaIVOOa/g9iY/cn5ePSrIPICxwGQMJzguCltKA26lwsZsRs/1HcOug+7vKe2p/EyZjlzSl/xpSmH8WlLYIgCIJIKshw1DNIwZUuCILni2ofC3lcI0EQBEEQnhLHRbGPheBjQmK8xgiC8IRRjb7Ayg6voFnBDgDA8xOvwZZ56xPcK4JIbfw+AUJNE8/rvXHgA1jc9s2kFIDdN+KvnqQzaF30m7i2IAiCIIj6BoUG9gxScKURXodFSL5lNEEQ8WL7gjNwepf/JLobBEFYEUehFwOA886TvcZonUAQqQeDgEadS7F57gage3eUZB1EefYBIBCgvHoEwYtm7vUxAUJuvvfNaN5J3TfUoxxc387ZoP5cFuuLea3eRbviXxy3F8bPQjTyEARBEAThikCiO0B4RAw8uBhZaRNEvaUg4zCy/HWJ7gZBEEkEYwJQWZnobhAE4ZaTTgJ++AFo0waoqwO+/RZo25ZCFBIELz61nXCsbE1kzyazBpQKLhf9aFm4A0Ce6lzzgt+xeV+F80o5CPhCMctfRhAEQRBJTRJ6aKcq5MGVLui4NY5q9LmrKn0Q6GUjiHoK5dYhCEILAzyzFCcIwh39Kr/HrBbv2b6PMQDZ2UDbtuLBqFHAypW05icIOwSi7YRjEWVIXo/7/VIbFu+p205IOTalxvH+9Etwi0kOMC9CC/pZCKE4Krh+WXA6RlR/Gbf2CIIgCIKIPaTgSiO0C97/HnOdq/pIwE0Q9RsKB0wQRBQkBCeIpGB2y/dwbo9/276PFNME4QGdO6uPBSEm6+aavF3iHzoKNT1cv98LF6oOS7IOoix7v2FxL3J2+1kIoTh6jzbI3UdyDoIgCIJIM0jBlS4IgudhRUiERRD1F3r/CSK1yPTVxrwNreCMdF0EkTg8F6bTC00Q/DRrpj5mzPN38tDSNRjR6CtgwQJDBZcXHlQy55wDNGwYdVpvZHhv2sVAx45ixBeX+FkIQSF2YqkMn3HIdWH5ipi1SxB26d/gu0R3gSAIImUhBVca4fU+19MFM0EQulTl7k7KzRUrLEh0FwiC4EESSucE4qDg0qwLPt9VFfM2CYIwxolOijwXCMIDdF4+rVOXU4ozD+CcCe8jOyApZgYPlq8dCaoVXVGhg30eKqpNBpieExoAJ5zgibzAxwRkmSih2hT9gq9nn+O4/qMnrIGfBfUvcnrGEUQ8eGPK5YnuAkEQRMpCCi7CEAphQhCxJ2nfsxh4hRIEETuaF+yIeRvaEWH7wSIAQHXurpi3TRCEGs/naPLgIgjnMIZrrwV+3XiD66r6Vm7BBZPe1b3292/7qY6jQgS6cSPTjgFS3i9dhgwBcnM9UXAxBlw74EGc3e1p3euFmYfRuug3V21ox0s5tcO0aa7qJQiCIAgiOSAFV7ogCNFJZydMcFWlnoXnwaVrXdVJEERqwCAkq+qNIAgNvx93Kk7p/HzM22FMUAnAxtV8BgDYMm895rZ8J+btEwShxomRTG3IRGhNEIQp1bm7cOyxiFYGMQa/H8gKGHsi2WLIEPH/3r1Ni0V1w83q3Y6CS8KTiC9VVWhWsBOX9HlMvw0IQLt27tshCIIgiCTh+Lav4ZvZGxPdjbSCFFzpguC9MFpvwRqPEEgEUe/o2zfRPYiCCe6TRhMEEQd8PpRnH4Dfg0TvVmj9O8qyxMTzfiagzkb+jPbF25IyNCtBpBIMApCVZfu+w3UZMegNQdQPJjT5FA89ZHydeZWIq1Ur4MorgeOPV52e3uwDdXuxNEdjDJg40bSNKA8yJ6xaZXrZ7wsBRUW2qy3MOIR+ld9HX+jUSXX43DHX4MROsTcSIghLGjdOdA8IgogThZmH0arodyBEcjevIAUXYQgFKiGI2EMRgQiCcEVxcdya0gq5wiF/fEwgrxCCiDMBn7MNcXXeHo97QhD1B18c4hvI3tIFBVEbhRaF6nDEPo1ntef7ikmTVIcntHsVj46+JdK+y++juKAOaNDAtIzTNs7u/jTenHoZMHSo+oLmSxrZ6Es0yt3tqA2C8JQePRLdA4Ig4oS8r/bKMIawVnAxxrIZY+8wxv7HGPuMMbZJOt+cMfY2Y+xbxtg/GGOZ0vks6fhb6XozRV1nS+e/YoyNjdmnqo/EIF+OJxZZBEGkJOTBRRApQk4OsHGjblhhr9GGKKwLRZaRpOAiiPjiZyHb3hvC8hVolLdb/yJj0eHOCYJQYRiST5obY51bV1s/g0bB5WWIQh16lP+Iac0/ko/thCgslby+lex6+RPxj/HjDe9zGgYxP3BE/EPnc8k1kmCRSCYSYPn68Khb494mQRAKaB7yDB4PriMARgiC0BVANwDjGGP9AFwG4BpBEFoB2AUg7D9/PIBd0vlrpHJgjHUAMAdARwDjANzMGCNpiIdEbUpdTpC0xSWI+gtjOmMKQRDJB2NATU18mtIcl2fvR/8G3wGwp+DSLk+mNP3IXccIop5Rk/cHBkjvHkEQ8WFOy3cwr1Uk3+RnM8/HX4fcG9c+6KT+SgxSSCVe5dO/xt6If429ybjA1KnAuHG6l5yEYP7k2E1Y0eEVICMD6NYtSn4YCu9xcnMBwHNDYYJIFWa0+DDRXSCIeolsnJpBocO9wlLBJYiEzW0ypH8CgBEAHpbO3wNgqvT3FOkY0vWRjDEmnX9AEIQjgiBsBvAtgD5efAgC+jm43Cq48vOiT44Z46pOgiBSgwAL0maPIAgVWi+x7EAd3phyOQCbCi7FiuXi3o/h4dG3edNBgqgn/Dj/bHQr3+p5vfHwBCWIVOX+kXdgcNW38nGHku1oVrBTPAjvu+Nsia0N3+dq+89xc5avTvxD+py8yqfcwFFk+oPmhQIB3dNOPLg6lW5Dhi8EXHcdkBct0wiF84aWlwOLFtmunyDShjiGOicIQkOrVkCHDonuRdrAlYOLMeZnjH0E4DcA/wXwHYDdgiBIKxxsBdBI+rsRgJ8AQLq+B0CZ8rzOPYRbYrCY9nXpZF2IIAj3JGMirjht0DN9tehVsSUubREEETvchCgM+EL4bs4GD3tDEPWAtWvl5cOqDi9hfqu3XVdJntsEYcGaNZG/zzsvslyWXkYvls/asINKMsMKJol4pBRQdiU7UCv+kZMjtc//gZ1+N64+o19cm6i+TkFAUFAoJAcMiENmNYJIUvqQzwFBJIwzzpDnKcI9XAouQRCCgiB0A9AYotdVu1h1iDG2nDH2HmPsvd9//z1WzaQlXm5Kx9V8imUruB4PgiBcEOtY/U54Y8plcVO69WuwGS9OvDoubREE4Y7N+8oNr5kpuMY2/gwbuj8lHzMAGDYMgOgtCgAtCnd40UWCqB906AB07iwfrmj/KtoU/equzmQ0tiGIZMLnA7p0iRxXV0dFOzgajK2g6sxuz+L1yZfJx4wB8czBBUCcv5s2BeA8P5YMh9bLdRuM6YQoVMs5nMpRijIPojp3l9OeEYSaRMzDNPcTBJEm2NJgCIKwG8CLAPoDKGaMhf3IGwP4Wfr7ZwA1ACBdLwKwU3le5x5lG7cLgtBLEIReFRUVdrpXv9ELUeiC6/r/A/37e1ghQRDGJNnCsn+D73U3g7GAQUhKJR9BpAxxDIf03u/NDK91Ld2K3HBCdw3dy3/Enzq+IB8zFrFOD/hib31OEGlH+L2nxNQEITOh5hPsXnxS7BrQWa/LuZykayXZh3Bpn0e5q5zW7EMw8M+D+ceOw4CG30e6lIg19Ny58ufNM5j3tXCFPzUYz5zk4AIArFpleCmoUWjxhGXvXbEZWf5a1bmN3Z/C+9MvcdY/gkgGfGTUThCJwE9hwT3HcjRjjFUwxoqlv3MAjAbwBURF17FSsUUAHpf+fkI6hnT9BUEQBOn8HMZYFmOsOYDWACJZWgnvcSE0N7w1yQTxBJEW1GMBFY0oBJEGZGTglsH/h98Wnm5YxGj5kOGzyMlBEIQhYeG2LSF39+4GldGMTKQ2Gb4gijIPx7VNrWLE7xNwVrdnue9/dMytqMlXewAxwPh9nDABaNFCPmxVqI54E+scXNoSYxp/jo+P3cRVfbviXzCrxXu2u+XIg6thQ6BbN8PLWg8uHt6Z9meUZe1XnfOzEBnqEakNKbgIIiFkawwmCPfwjGZVAF5kjH0M4F0A/xUE4d8AzgRwKmPsW4g5tu6Qyt8BoEw6fyqAswBAEITPADwI4HMAzwBYIwgCSTW8QhC4LI94oYUaQcSXWS3eQ1HmwUR3I0KcBF2U0J4gXCJtTGOROyc/w0JQOGqU+P/IkfAxAZn+OvPyEso1RsAXBM47D2jf3mk3CYLgpXt34PjjDS/TnEykMrF8fv8z4VouD65w+F07OJ2/n51wLZ6feI13IQodwBjQuXSbdTkABZlH8I9Rf7HdhmMPLrlxBkEj9ory4HKcH4zGTCLFIeMWgkgIpODyHksFlyAIHwuC0F0QhC6CIHQSBOEC6fz3giD0EQShlSAIMwVBOCKdPywdt5Kuf6+o62JBEFoKgtBWEISnY/ex6iGC4Klwiza4BBFf/jHqLxha9XWiuxE3BjT4Vv7baLwp1VhJEgQRe5rk78S3tz2PPYtPwqCGkfc0238UT467QV14xgxgwwZgyhRg7FhTMxul0E25l87wBYHqamDePI8+AUHUP7jX7c2bAxkZhpdjoSgniHgRy6d3dOMvdM/Lb154YhswAPjTn1y1pQzja0ZR5iHkZ2hCBMY4KkSBleGLAVyKN4O+O1IiWXwPsmLSzvd1zDFRp/wsRPoBwjs8epiE5Svi3iZBxIqXJl6J+0b81ZO6CjMOyX9PbvoRdp+buFzwWZyGoQQ/5I+aRni5nLWa5rqW/WRt2U0QROqyerWpV2iGz92E/PqUKwAYjzWDG36DAQ2+N7hKEISMxxvTbH8tWlYdRKEmzNOWuesxocmn6sI+H9Ckifj/9OnwtWllu73ybHeKbFubeIJIM0guRRARfCykCt/nOYFA1KkopTBj4rxoA603kW2UHlwxDFG4ee56HNPkExcN6GChYFrf7Slc0vsx7uoCLIjgspWWYde0IQq5IuEwphOSkvKIEikOhSgkkhBlONhGebtRlHnIpDQfT467Ac8dc4187GNCQpVMAQrT7zk0mqULHltrGVpZSQvf7mU/Yd+Sk1ReGPGmOneXdSGCSHKU26SksZzeuBFo1cpUaX5ipxewfcEZtqu+uPdjYpgXCcb0t5SvTL4STfN32q6fIAh3GFlK81hQM5/+GMag9jAJrzG+nn0OJjX52H4nCYJQ4cnqgbRlRIpjmrvKC1q3jjqlu4q12QfdfFAcdXi+bzBoMzxnNyvY6e7r9ftt37Kg9dtoX/ILfxO+kLheUbal7bQgRJSKkgzFqSTFzwQgP9/h3QShIRHzMGPo3+C7+LfrMb8sMM4DTKQW70y9BDNbvC8f+wzkRXbJ9NWpcj8netXrp6hpnkMKrjRC14LMIVa3+qRY2GEvjHhzSufn8L9jL0xI2wThOR06JLoHcaNn+Q+qMC9mIUu8zCtIEAQfRu8kl4KL85UNF2td9Bv8PlrcEwRBEO4xC9VZknUAgxt+47zyXr2AJUuiTru2MW3dOsqDy/bqV+GBEfc82gbhGFe2fxkjGynW+wzA2WcD5eW2qrebNsEX/vwDByoriSoX5cHFoyxkLKqcj4XAMqI9+wgiZfD58OqkK/DYmJsT3RNXNMjdl+guEB7RtOAP1UzmZyFP5jbG1NOBgMTadrnOL0lEQQqudEEQPA5RaO7BlQwJVUn0TaQNbdqIXlPJAsdM7+n717697unEjzIEkQJ4vDJnTL9Ot/M+q662LHN2t6dx/4i/4LOZ57tqiyDqC+H1etyF2gSRhPhM3oM/dXwRr0y+0nnlxxwD5OVFnR5c9S1O6vQ8EHQYaqikBEE9D65kZf589XGnTsDYsVHFNvZ4Cu2KNJ5XNTXAwoW2mpN/U05Not8XAtasAYYPNy3nOixkuD3OfGkEEVemTuUvyxj8PoGE7URSoTQm8Er27NN5xkMJjKCUDDL1dCOFVlOEKYIQ7e3gyoPL/GUz20DEC9rME2lFTU2iexBBGjusrBmdvIPascVAlg4gsQsOgkgZvFZwQdCNx+9jIeCUU5xXrBBOMabThiDgkj6PYU6r99ChZLvzdgiiHvHLwULvKiMhLZHiMBNlg5M16/UDHrDMOV2SdRDXDnhQrYCx+S7pKrgcvo+xzMGFqipgyBC+qpzs03WUWIwBWCHm2nxv2sXoVbHFtAo/CwHNm1t+FtmDSw5RyPfFaXvoZyHbbnwfH7sJNXl/2LqHqF/4mcvcPHaeSWk9btdbMhXJDRyR/17c5g1c1ueRBPaGMIJBQEgxJvtZyJMlKkP03JTB6tAkf6f7d84meYHD6G0xnxH2IQVXOuNGwWVxPdHaZlJuEelIMoTkK8vaH1Mhl1ZpZraYTpqcZASRAng1fvgUAkLlXOvzMaBdO8f1Rs3b2lwcTuM8DR3q7D6CSAPalfwKgHRTBAF4H93jT51eRJbPIgF9OApBr17OGmEsyqCLd58rgAHnn++s3RgTbdBm8Jks5n4fCwE9egCMoWfFj5bfjU9PyakzQDry4NKpx0norM6l20iSQejDGOpOWIn13Z82LLKs3at4dfLlnrZZX2hWEMnvPaz6K8xv/U4Ce0MYwZgQ5cHlSYhCnToCfgE/zFvvum677Fx0GrqX/xT3dtMdUnClC4IQLQzWscDmxWoASYZFWT2ai4l0J0ke5rO6PY2vZp/L3R9WWmK7jbqwxeSiRWIdRgUrK5NC4UcQSY/J+/rpsefbrw4CkJ0ddd5N6BI/CyHTr0nqG9DkrHCq4EqS8ZMgEkHAJ76Xnhh+0bvEzd+G35noLhA6mHpwxerxXrVK9G4ePdpxY65CFFZVqQ5djQWG351get1o/lYWL8064KxLNsvzrlVO7/JfnNzpOfmYawmi8/nDYzBBeILPB7/PfAdclHkI7Yt/MSkBIGTjufT5LEN6pgP/m3EBVrZ/RT7W8+YhkgMG9bzoVfhMMQeXEH2SSBtIwZVGRA3PjGHz3PV4b9rFmNL0I1t1Gb7n0gV5wOnd21a9XkGCbyIdSfQSqzDjMMqy+TagTheEdSE/MGUKUFkJAPh+XzmtKwiCE4YQfllwOnf5jqUOQ/317Al07ao6xeW5bfAy+1kI2f5aRTGBy4OrOJNvPFrT4UW0LvqVqyxBJBMvTbwSp3X5j+P7Ex1Rob6yoPXbie4CoYNZCH0GAWjc2Hadco1Gi9WsLNG72YVhqZchuRMisNULLajoy4Gla9FxQXf5WlXubu6q5c/D+f0GWIhLYLm200u4ZsBD8rET2cLK9i9jbOPPuPYxNwy4HwtbvykeDB1KsgzCMVzCfrtGY3PmpP0TWZR5KGrNVB/CMqYqQa0Hlwe/FYMAZjCXxHtMJuVqbCAFV7qgN4n5fGhWsBM9K37EY2NvsVWd2Qs3oeYTTGv2kdxGomA50RbmBJHKJDrnlKy4lt7rWEz0tSE/MGGCPGZ9s6eBYVma9glCDWNAg9x90Sc9xMcEICMDWL06+rxD/L4QAn5FDi4IQH6+6T2hZStQlbuHq/4bBz2ALqVbHfePIBLF0OpvcHHvx/HFrHMd3R9ehnslpKF5l0hlzDy4AAAFBbbrjIfQy58VsC4UDzxcTzAI8rohd9xQ0bhNYtuCM9GnYjNXPT4r7zEFl/V5BK9P0QndZnavnIOLD+XzsKH7UyjOOsR139pOL0XCozHm2GmdSE/O6PIsHh9zE5diims9bseDq55AyqzUgZWWyHKpO4bcg9KsA2mngKXnMTaQgitd0AtR6CYHl8kL9+T4GzGhyafiQVmZ4zYIglATchOixANkBVduLgDrNbaTIaY2pPbaMNvgJlrhRxApgccKLqaoU1mzKwVXVC4OAAMHAn37ysnjtesJ7o9FLqBEipPlr0PT/D8c3Rt2hKS3AGiQw6cQJ9IX5fwVdc3hHBYPRcRH//Ph8Yl/kY/dCL5i2l8bIQoZUxjM6hi06HZTpx7ZW0Vq22yv1Ln0Z7Qo3GF4XRdpEK3T7E900Xx+xgCccw53U8FQpO/kwUUoGdTwW0xu9jHXCxwUfGBFheaFBAF5gcO2+pDyHiVDhti+hd7C5IT5ffJ4ubTdG55t9ZgmlxcDoiOKxAkGABs3JqTtdIYUXGlE1JSUkeE4ITzXGDJxIjBggKP63cIgkEyLSAuUm9hELyuDAgPWrOGycHX6/rULxwwPb1TBDBfUUUp7gqjnaN8Vn0cxyVVtGAjW3Ci4AiwYfdLvB5YuFZPHA2KYp8suA664AmjRQuqL4yYJIqVwKlgKvyMNc/fwCUzNhGeMpaywR1i+AoMbfpvobhAJxtKDK34d4S87ZgxatQLalvwWuZ3zVr3X2ZXixGMPLrO+8LbkD+e4ysgAYG78Jndf+zm0x8ovThJuXvnxaFii8bxieblA48bc43edEBGk0h6H0KWBcWSTMFzPmyBg1+JTcFKn5z3oVIpgEVmKIXrMTHmlXprCWHRuylj8VgIAtG/veb08sHZtgZqahLSdzpCCK10QDBaRp5ziyMvKMLavcoE4aVJCQxQSRNogvVf/2drR02qXtn3NlgA8GPIBXbpwl7e70BCWr0D38p9U58w2qmTdSBDmXNL7MXn80JVbz5hhu07DFJwuNhZhAdVNA/9PrMtICFlcDBQWyjn6uEgGYSZBuMTpY5yZIWBxmzdQlGnPUtsIEvUQqYzZa+ToFcvOjqxFPQ8HHEKjRpG8YE7mWD3Dk4R4cOkVLcjHgbpMk6qkjmo6LCxfgXE1n8rHsgf4mDFAWZnptyR/hwb9DLAgDh+/RjwYMUI0pmnVCgBQG7IfJtLWI+HzoS5EchPCgiZNgJUrDS+/NPFKrO/+tPWzl5eHDF+IFDgW0BYieanTKrg8+K2UuSFlxowBjjkm7mFj6d2MDTTL1gdOPx2oqrJ1S4ZPx9oa0DF7SNysQIMCQZhzx9C/oSiTLzY8EG0pE1MFE2PoXvYjRjf6wjjiSexaJ4iUZ1m7V3Fmt2fleTmk9746mKOVyielNxdvVf834q/RdUr/r+74surYE4IG6xWCSAXKy13d7vMBdw27h/+GJJHmVGTvxRV9H050N4g0g8EbD66K7L14ZdIVQJs2MfO0mVDzKbZujdTtpNu6Cq4YrN0rsvebFzAIUTi/1TtY0f5l3VvMeqkM5Scb3ZaVARdeyBe+PKBRVinWNFn+OvHc7NnAmWfaNtZ1/P36/ZF9FiMpBqFG9f537274fAxs+B0KeQxamjYFJk605dmdJMsD53B8AGUReguTFwZBNQ9g1CjP6lUfQ/Ti7d/fcZ0ndnoeJ7R71X5fUv19S1JIwZUu6OXgClNaCowfb6s6QwVXmzbi/1L4IIIgPCBJZrhXfmkd1/Y+mHEx/tz3n4bXKXwHQajRfSPCCi69vBROFFxuNnyMYW6rd6NOv/1bc+d1WtG1KwAaL4gUpVDMo+H4vbMbSSHeJqoGNM7bjdO7/jfR3SDSDLMQhXbesaLMQxhcJYa8jNUbE5VDx8G76bkHl8F3N7Dhd/hj0Smma4rL+jyC9sXbVFUNq/4atw7+PwMFmF58RfGc0uBOFVWGMdMcXIwJYm5PKZyhSUHz65z3hQ+5cqb5/eTBRbgmPI4ZyurkggyYNIlUOAoYhGhb/ZLihPSFMIcxRBS5kyYBxx7LPYczmEcvUg7jgt5Jm/y5zz/RpfRn2/clifgv7aBZNl0QBNsT2KhGn6NTif7LGPAZDAwFBcC11wJnnCEeJ9KDiwYFgrDEzob+3d+aqY7Ncu7EIw8eLcoJggNpt+aV3NoXgxwm81q9Ey2I57G05BkFysvFdQlBpColJc7v9fJdZQxBITHJtr2A1gyE2fxl51WRyzoNxW/R2PDqL7HqsmbqW5SJ7zlzXuqFIY+FggsASrIOGt8nCFjX7T/IDdRad0Q67wt/XuV3LLUfVBisaNMm6Hqrh28HxNyehteNQxhe1Osxw/ss6+PB71eF3KIw7IQunGNObkateQF5b8DxnCVIqJbps/gMcYC8uJITBgG3DLoPm+euB/LzxRyxLnJBy/Uynd/c5fMf8AVxNJS6a+d0gxRc9ZiHR9+ma2kNmFiFMAbk5CRF7i2akIh0INHPcfOC33Fx78cAAOf1/LfqWkz7ptj4GrVDHhkEocbM4tlM6GOrDbEhZzdrwwIBeHnSlZjU9GPVusFyk1Jrc9Obk2OvPEEkC4wBF1wAluvwGfZyPU6WY/xkZSW6B4QODPDknZCVL0Y5ri07Yn7PTQPvtxtcRRefzvrZJyQgbG9YmK44pfoKQqGovxe1eRPTm32g/r2kv3MUijK/xujWLESh4b5FDlGo1zmR3MBRw3oN27PzaFRVqTzTksSZlkgSGAQxTFpRkXk56ZkzM0J11Yc44vf6M1i8kLqX6UVMWgoyj6BZwU71/MGB9nfO8NUZlvUpwhoLDlUjfibgSNDCa5iIG4nXUhDeYBai0IAMX9BwkegzSPzqllM7/xePjr7F0zoJgnBOadZBrO/+NAD7C1tHsrDzzhP/5xhbzIQKtw3+Oz6feZ6DDkT4/bhTMbuFvpKfIJIRBgGYODFyomVLMSE1vFMIq/Ju2b25Q4eoU70qtohrCqWCy2qsmTDBbssyKw3yfRBE0pKZKeaXcYLHHlzxIhYipbgaxUybFr+2CG4YBKCwECvav6zyDtjY/UnRk5gT2TNq6NC4yT+9ysFVnnsQ70272IMeKQh3rrKS/xajt1wSVi5r/xoeGXObuI7R8Lfhd+KNKZcB0AjBGePLwWXZOechCvXGGbOwiTInnIA6svInDGBMABYvti4HThmdrHROXg8uPQ/UeMIY2fUkK6rfRdo/8v5U2rlnUpOPcWbXZ+Rrut7SLh4EHxNwQrvXcOdQG/lwiZhBCq40wu76O8MXRFHmIXs3aV9+m4NB59Kf0aP8R3ttcnaFIAj7KIXZ5Zok0mZeFo7cxEeNAqqrxb85rHG0m9jwQvjXhafjhHavqSw8nVCefSDKMpSovwjLV6BPxWbTMp1Lt8apNyYoJ7916+SFf3gTe2jpGjTI2RNdlrd6hTWbapPAU9ewYWLidlV9Enas6hs3Bq65xtHG/JbB/4dNPZ/AvFZv87dHEAmG18CkffE2/G/GBZETXnpwxTE6g+fW5y1bUsivNEGevxzAmACMG4dbxz2OTH/Ek+mCXk+IluCc+JgAnH8+0LFjTJ4rvTW0cgzQ88zSQ/c9Ki9HzwqHe22jef6cc4AhQ4C5c/Wv6+bYUhwo1/xKD5Urr5TzEAIA/KICqDz7ANoXbxdPaUMUmnlwGV3Srml0CnItlzSfM1xfMCPb+t6SElXfabwi3PLz/HW65xvl7ZLn8/d3NIlnl2zh+TrA4iXWjiVA4iPpEPqofhfJsMLpb1WSdVDO+a6de716Bitz9mFJ2zc8qYtwBym40gUHIRT8TMCqDi+LsU1N6k1G8jKOJLoLBOEd0oJserMP4t502BJx24IzsKz9a6prvBtsbpQLT4W1utF6VBvPuEPxdgjLV6AyZ19MQjMQhNlT9ftxp+L2wX+PW1/0MNu7hQUn2YE6V4ITwxwmPNIfnw/o1cuyGFfv/H7HS5Bzez6JxW3edHYzQSQxjAFdyn5Wn5BQvi/ti7fhpE7P26s8jgouBojhmFIVsrKLGZ/PPB8PjbrN0b0+JojCsE2bVMoExqBWpPDUU1wMIDaegVY18q5xdcstWWLbI7Qsaz9mtnjPuECjRsD8+WIubhOU35WhgUyTJsCiRcCZZ0bXN2IE0KyZ6pRWKG22vuEWgLrwwFSFYQwruOYuML3nlyffj7qXwrATSri9UxiASy8FAFTn6RsDzGz+vvz3m79Ge0ga4ZXStTJnL1c5z98Ai3lZLwULTeXJCWMANmwQ55327SPneO5V/L2h+1M4s9uz0XVLKEMUOkYnPD+ROEjBlS44kAL5mICAL2Ru0ebxqC/AmwSBuYGjZHFBpB2eW/ONGsVdY1Xu3qiNstXr7+odLC8HTj8duDg6jMqQqq8BAD3KfwKDsYcVjQFEPCnIOIJADDz+svy1OKvb0/w3GLyYX+5uKP/tNITPk+NuwP0j/qrIV6F4xxxqm7TvqZ8F0an0Z4PSasjCmagv2F5uh5VRCqWUMg/flf0ewdK2rzurMw4wJgB5ea7rqcrdjS9mnSuGDjMos6bDi67bIeJDaNkKlGYfRLbfvof+9GYfYFm7iKFWlALhoou469JVHNmZA516T5u1r4NuiK+yMmDVKltt3zXsHjw46i+e7vsZA5AteTa1bau+OGAA0KJF9E25ucDZZwOIzP/aLpl7cHF8b5dcAlRVRd/Ls6fQdIb5xONgZXR9ShqUiTlgSKlFGKF9/jJ1lDEAgDZtgNJSw3f13B7/xjUDHpKvdyrhW297xTPjr8M/OdOReCETBIDjWr+JncedYjl+BYy+UyI5adJE9By2MS+VZB1Ah5Jt8vEJ7V5D66Lf5GMGqOZyMphOP0jBlUaYLpo0i/KyrP0GBTUToXazG0czh0Z5u3BVv4d0r2X7jZMFEkQqoXylPJ9iXb6vnsS5V6IVDrRuLSq6FP1kCOHlSVcBjOHsbk+j9oTV8ga+jifGve0u0WaT4CPA+DZGOf6jaCiFWPpx3ll4cNRtWN3hJcPyQxp+g0v7PMZVd6avzvC9fmRzd/lv+d21KbDuW7kZjfN361/s3l3/PC+SRfm+JSfhxn825rqF6/2UkwOry3o+fhFELJDmRdsGG+FxQDEeBEM+TRGbdfp8OKPLsyjJOmDvvgTCIKBd8a+GuXEA4MZBD6gEHt40TONLLHDztV7e7xF0L/9JPlYqfJGfD2RlcdflZyHF3CLhYVQTq/fdcjyQ+mIYacHii2xd9CuKMg/Kx/J86fQH6Nw5ugsQgAsvFI3Z2rSxXWVQ731mDOf0eApzWvLnUwvfJ/YJhuui49q8hbuG3m1ej0HEnLIyoF1ra9mE8l4SqxJmnNL5Obw6+XLjApZWqFJ0mOYfetgrc35beBrG1nyObM4UAl4pF3IDR1GafdByz6PrwUVvYlKi97vw/FZ/LDrV1HmDQVC9Ol7k4HK8NqB1ZEwgBVe6IAi2hucdi04zvPbJTHVs/7enXmrsauzIQo2PSU0+xqldntO9lu2vpTGBIDxALx51GKsxxat3UHfB4vOBMcDvExBcJlqiHg2SCziROHxM4FpcHzz+T+hQIuaOqMnfhZktPkBHj4Srmb6g4YtXqwjp6VS5I1s36rWxcCFfJXr3hnNzDRyInPPPRKAtR8gUxtQCSr0iCBl+H/H2/upW9iPO7/mvuLZJuOeZ8ddhcZvExM3/cta5XPkodQkLcpQGIm4fecZweb9HcW6PJ11WxNGUF2FhCMIEN/oopeBVnkucvqs6WEUBthT8hhVcDgXEX88+F1ObfSQfuzYI6dQJOE0tW2AQxLCQrVs7qjJkYNS2pO0buLDXE7rXrD6FmdK/JOsgFrfVD208t+U72L/kTzrtifXl5ABfvKkfLk6J/D0LAv44km9Znqg/aMeEvIyj6FbmIu+vXi5di7JudfgVOcYG9HroKZxcYeXBpZeDizx4khL9OZLvtzIzjtTWy4wa4+QfI28Hgg6f4yRNBZTqkIIrjYiJJ4Lfjz6VWxxbiNnhwl6P474Rf7Usd1GvxzC75Xs0KBBph9b62jUc72fAFzSMqX9ej3/jvWnRIQQBMYdfLJC7rNP3IyG1giscGsQNNIrUb8Y0/gxtin6xLCcsXwHGbCyuo47tP6vNCnaIiaIV+JixQufsbs/IfxsJhqww3WxmcyRR14E1qQHmzBHHmeOOA2pqOG9kltO8ygpP863He4lwUqcXsKL9K/FtlHBNx5JtKE2Qx1Lb4l9lobnt5XQ45n/Ys7JFi+hcNXb3BXEMUQjAWwWXSYhCALhr6D04rwcpoNOB4dVf4qd5Z0adVz1NjKnnXZvPmnLfK88ldiYVB++S8gnmWWuc0+PfaGqWZsAC5fjg2oMLAFq18nRNbeY1b/T9WAnz8x3m8M701yEv4ygAYNeRSGhVu2Gr5e/ZQ2Upkb648i6S3uVEhGDjGUXennopOnlk/Me7N9O+r+S9lby4+W2Uz7yVJ5gc5tfh3Der5fvWhYi4QgqudMGpJGfgQPPr0gJdrt3lZtTHQoYDVuui39AsP7JQN/pEazu9iJKsgzQpEemD9F71rtwSg6rN35OALwSMHat7rSDziOyJEnUfZ7g2FQbjlO67nJurOpzR/H3MaqFYRJxxhv32CULDP8fcgr6Vmz2pa0LNJ/hj0SkA7AmWGROABg2izm+euwG9K7aozvmYsddDA4WntZXnkx4lWQeQE857ogznA+DK1d/brk+Fw7WDlWJQJYTUlI23BxdZgaYm/hjk1bMFp6BzfM0n6hOjRkkXxov5dv70J5WnKIN+KC1Twmt+xaN859B78PwxV9urhwPP306TEIUA0KdyS1xDNRGxI8MXROP83fjg3MfQVmGgolpLap8Hm/tkpphrT+r0gv08boEARjf6HCOqv9SvX2/dq8wLwhGi8IJe/0KG0fjFMecqW5DXDAHvIiW41V+XZh/Ez/PXAYMHu2+TMcxo/j5O7/Jf40Lr1hnWq+f5AZhHwdBDHpPJSJfQYFuuZbWnlvYVybo27eOhzEN+oy3eKz1lH/mRJwcndnoehRmHTMtw/VZt25oqdbXvmWe/vxNPZYpiEBNIwZUu2AxRKLNwobmSq6TEaY908WninirRxkQ1Ilwky1+H2wf/zbO+EUTCkB78c3o8hQ+m8yfB9oIAC5lamhotuHnDtang3dCVlwOLFqlOPTz6dlzd/yGgqAi44AKgVSt7bevRtKn7OoiUxs8EQ8GFHkZT1MOjbsX9I/+Kkiwxp4X2Sbd8Vxo14m/fIOwIcy7LAyDGLTeag0+bYyNJtTYJuwtjFCtFITNS+FVWJsQEhuXmJKBVwg2WguRYY5GDa3LTj/DutEvw1PgbIyf//GegStXMAAAApaxJREFUpRTmMxAAunUDcnMxq+X7CC5bqa2aH521AIOA8mx7YYd48EroZub1Has2idhjppwNzwvdm/6h+k21j4ATQ48wSgHZ5f0exY2DHpDzSHLBGP7zTjHOUnhWqy/rCFoV3Y2l4v2xMTcD0Hpw+YD27YGMDFd1K383V4aol14KnHIKqtctAGbOjLps+5dlDA+Pvh1ndP2PcZmWLYGRI3UvyYqszEz1eZ9aqWrWPuCdwTCRfuiPCQ7foUsuAfJET0OuNU4insfGjT1bfcljjYPNDxnMJwfXDXgQOYGj8rFuiEKe3yovL7rc1Knyn4eCGdh+sEjRjke//8knA4358kvLkKFDTCAFVxrhyFqZMTFwtJaw98TYseLGWVneBVqNeoeSbeYJNHUID1qMAcvav+aqP06ItwKCSG+iLUk8nOx4QxTGa2HLMZEzCMCZZwKVlfoFLrtMYZXmRZdok1mf8bMQrhnwID6beT4wfLh54fXrDS/V5O9CYeZhR30wewJ1n08DhfTUZh/hibE3AVCEKHTykvTrZ/+eGGE1YmT5dZK6l5QAp5ziOEyjU3yanEKdS7di+wLyNE12EhG+R4WFB1fT/D/Qq+IH9UmNh7OS8OfRfTeskKM2iM/x8navYFSjL+zXw4HbvAdh/Cwkhj+dM8fWPqhZwQ5MavI/5w2TgDo28AqICgqAyZOj18wLFsh/usrBpaz3oouAjRtN3ztdqqttrehtrf/txO/VMKWZ+Nwr35eQwEQBnZvnWhNW2NUrUloKtGsHdOkCZGVFN2XwXdnJN6SL4gNU50ZCRAd8IaBDB2DgQNw++G9yuFO7IQrXdnwJZ3R5Vvx8BKFA76k0fYUmTDC+plDG21njeBX5gPc99GoPzhiAqiqgb19n9xJJhxt5WJ1i/8cyAmKkg06dAABHghmq6CjyXM/xIHQq+RnC8hXRFwYMEI3NGjZ03GfCO0jBlS44WcV37Kh/b1YWcNJJkb81nhQqGEOP8h/Qo/wH4zIKtF4fZVkHMKjhd1JVOh4hOl4aiZ6Iupf/lNgOEOmHl0nibWLlwWWEn4U866utenQKfz9nPW4d9HdvOkOkPWd3exovTbwSgPgcF2UeFkNxanN2KJk6FWja1Fqowhiwdq39TaLBS1Ab8gMAXpx4FQB1Di6t1VmWvw6Tmn4MQG25PrTqK/Rv8B1/X5YsUXTLwdpC68HldJxgzDLxfX4gkk9D7unkyUBpadwN48QcbZHjDF8QDXP3Gt9AJAUJV3DF6EHNDRzVH4fM2tO8rLcNuQ+N83d72zG5Kfefe1qzD3Fej3+L1uo6YV7N6FSyDU+Muxl+J+GWAbK8jRUGHjRKGBPE37ykJNpjSBHOTjAQc8xq8Z5lG6pwwBUV/LkjNRgZWujObIpnKh5bgU6lEc9sq7mWF+XnTYRnBNd4zqHg+s+Ea/HomFvl0wFfUJSNZGZiWfvXMK35RwA0IQo5FjtDrpiEy9ftiISYJQgJ7vclPE4MHQqceGJ0PUx7nLxzlaceXOefr5sreG3HF/HtnA0etUTEC10PLs5p6tweT2JF+5d1r4UEhuxAnZzH0877oVt26lRg/nzuOjQVOruPMIUUXGmErb1Wu3ZizH49Tj4ZaNaMu6r3p1+CGwc+wFXWz0Kuwx/IyQBd1kMQyYinG0Kfz7S+pvk7MLXZR6bvkt7cu7zdKziuzVvc3eha9hP2Lj6Ra5AKCn7uCT/82QoyDzuzVifqJW2KfkWnUjFXjepRKy+3vNcwxG74/M03A50727dK1FT89PjrAQBHQmI+jGHVX0cVM/f8ivz90qSr0b5YP5desmOmKOxVsQWX931EbzdveW+iOHqCwbqLSBh+Fkrss6KMksALxxzZKG+3faG1Qb2xEJB5sda5ot/DWNruDflYW+Owqq9U1rbKT3egTgw1Vrdstet+EB7i8wHXXce3p9UYQaieU5N35B+j/mLdDYUxiRuMPode7iZbzblVsA4fjjO7PotaaU4Kh1d2BVO/1bGU3RnVbZgTi9eQT/pecwJHVeGr/ZoxMPz12/6MbduKwlCSYRAaHIVk4/AEjPvqxiKPX17gMB4fI0ab8MxjjBm/kH4WQqbP2JCFQhQmlm5lP+I7TgUk72/Vs+JHrOn4ku618JohbLxl5wnUNaBo3Vr1zD886laUZ+/jq5AMpWICKbjSBcFmMumePSOLKzsvlwchCpWDkyp2Om8Xwn+0aeOqL47p0SMx7RLpCzPYoHtQr9m48NbUy8Qwn336iBO0Tpx7PdZ3fxo1+busC4a7AQEFmUesC2rhsOR1SzIKwYnYkumvUwt+160DxowRrSGNsJgnGQSga9doIcqECfZjcgMYV/MZAOBIUL1RZBCsBTXdukVZjcc7XJ8KFwt4M0Xhhu5PYWGbt+VjuaQ0nnplkc6LONqar2kyYphXhXCGjwlxf1ZkxowBJk3iK6sMvWOxFheWrzCeoznW8amy5VaFltEJd2S2ngomckwkzNGxwo9Ceo6DKo8h7/DKs9MoD5heaDvlOGTZuo0QhUvavK6+Nm0aMGMGGBP78cuC0zG+5lOrFrnwLAeXGVdcYfjxDX835bqJM0Sh8rMENJ6etscPstQnAFze9xH8U8qBZwlXYnrrMj3Kf+Sux/Ubm5cHbNpk2q3irEOY3EyMNhGPNAEMAmryd+GrWedEX3OST5zwlCx/HVoU7og679Xvoq1HO3bbmet58tnNaPEhd31EbKDVfbogCOaThNlCWBBQk/eH8XXGjDcNOoK2woxDmNfqbZ3CBoOIZH2S7a/lEjLIA9X8+XI81VgwruZT/GPk7TGrnyBklAqueDYLAZg3T1R2n356dMiMRo3075PeY88WhUYr4VmzRGtHq9thLsgCxLjJBAEAVbl71HNNy5bAjBmWVoeASd4HJgCrI54AcqmmTcXcHU5o1AgV2fs17cMwRKES+UrHjkDHjqrPW5hxiLsL8rrChXDG8TjBOO802JjHW3nNmKD6mnwsFAkFTSQtDAlUcHXrZjnuyO+5wXxshtN3IB4KcQa4tl7lHpaqqqQ2FcJrt785CaxjCu+zq/Lg8lBQ6ZWCy+g50/M0Ohr081esk5dKj0Z5u3DnsHvVJ7t3V3kQNcjd59njHBeD9MJC1An635VXHlyaP+HXKCR1FVw2w703yd+Jhjl7bN1DpDbl2fvRrvgX3WtR49dll4FlZbpuc3TjL/TzBsWC4cMto2EoQ7p7tfbimc/bFP/m6D4ithiN2fohCm2EE9Q516rwV1VoXrGcuTsuQ6R/lusCadLgNoCgdWRMIAUXAQD4cf7ZaGyi5DJ8ncMJqRUFVnd8CX8d8jf94iyksXAWxHCJEDX4UQOCzkpZHgtyc0WhZAx4fMxNeGT0rZjV8v2Y1E8QRnhqScSYaX2MwdxrZf16sMqK6PvsdsNmeRV5eZG/+/TR1Mv3Xc1u8S4+mnGh7jWy26pf7Fp0MoZXf21oVW31TNn2sNQmZjKq06DMvcPvwvYFZwAAzuz6DM7u/nREwWXSprxp9PuBE0+U59ZjmnyMnMBR486sXas6dLT59HDBHhZyvjxJzJk2r9XbCC5bydV+3HNwaRrVhjRSbpKI5GBJm9eRE+AzrkooTZpwGXtocfQOrFiBoNYTykE18UArmNB+XLnfHToAZ5+tutaldGtM+0bEB8N3123EERjPy3YwUtTpCfW0HtumLFokRjI5/XTTYqZrGg6jHruovJ5i6LFs9LsbCiD9CoWYhQfXxb0fQ8/yH9Gj/Ec5lFpA83uF9Ix/fD5k+03WVxq+mnUuHhsb7c1D+YLqJ1H7i6IisDatrW6KXYdcYCp7kP/wTuJRF5YfOvg+yIMrsSjnCSsPYCdPu/KR+GbOuWiiiW5gFt7ytsF/x+/HReZYQwOKMCHxOveegkIUxgRScKULgiAPAw1y9uDeYXfKLxkA8wFfulYnJbM3ymehey2s4FINSMbounZKdWb7a1EbsrZei8dENLnZx8gN1OpfpMGI8BClRwYuuMDjyl0ufAMBoKQkulrpHeSt3vYbo6x49mzRC+KUU4Djj9fUK5azWnBoPStUdSS7YJPwlOIs0XvJ6Hd36vGgvUtbv+ViVxDwztRLok7nZxxBw9y9AIA/9/0nVnV4RaHgMnizMjIgQL3ZC7f/73E3Gfdh2TKgc2d1v12ItvMzDiu7YB+Fom5I1Tfy6SgBlkEDcffg0oQoDGhi/iepHKJec+ewexHwhVw9567gfSg2bJCF0YJgcp/GW1T3HbBaw/boER8PLg88ZGSFgM8Hq5DMytzCP847C9cOeNBd47QXSCyS8YgqRKHZ62Txe63q8BJWd3hJPvbOg0v/vN66VRnKuyrXwrOnQQPgtNPE8OJ6cOz7cdFFaiMyD1CufWI55xlZyLsOUcgY1nd/GjmBWvh9ghxKLUcjE9Bt3+/HrkWn4Iq+D5v2PUx2oC5KcQYALXXCdRHpASlT1AoFr9YawRBnPV26qA59JvIBIj4o50Ivl1VR75rBD200Z7wz9RIsafs68gKRedly3SpFJaMQ2ImFvv00Iryx61z6s5iXQjlKmI0YYQFYeGOoMwCYWYqLbWvL67ennUiU9Wb56yIWGCYwJkRZYlpxXo9/2SpPEAmhQQOwfHebzRcnXoVTO/+XqyzPQltvPeDVxp+L4mLgxBNlT08l4QWEn5mLJ2ntSmjxSpEVOW/+TpgtdsP39q78Qe3lY7TrkkMUas4ffzxQUwNMnRp1i9M3VtdK2YpAAB8fuwkPehDiV6so1FVMGoQFeuu35q7btwNjUH1PfhZSHTMIlqFbiMTQsWR7orvgDTU1qrlS+d5PafoRnj/maq5qHviul6tubJO8Ts0IvxmmIdItEEz2LSqCQVWxvMCRmHqXEO4xm7OUv7ZhiEKL+VNL74ofcEGvJ+Rjn5WlNidGq1O9dXST/D8QWrYC383ZgPN7/ttdwzxzdkkJ0Lu3u3Y0xGt3oPUyDeM6RKFOuanNPsSUph9p2tepLxBAdqAOfSq3cHtsJ733MOE5xvsIvXP23yhtvrhEYKYIkMc+n8+z5z/I48HVo0eUoayPcnAlHHnMrqxUKTx1QxS6+a0MZOFGT0zvyh+i8ib7mGAeen7SJGD4cP7nmrSrMYEUXOmCXlJUzYvcseRn/dwb4US9JtYPgqasTEYGkKmOD8xgbA2hKxiXCucGjqr6YGRhz0pLI5aYHANDt7IfcX4v/o3C+9Mv4i5LEJ6gFYK6YECD73BFv0ewf8mfYjZxhheuyve5JOsAhlR9HVX2lM7P4eLej8ekH1V5omeLNjY+QVhRXXIIz4y/zrP6tK+ads588Pue5hVIHteCclmmDKkzfbp1J/r0ET04SkpwQa/HcVGvx+RLKit365oi3XJiheb3o3PpNlTm7LN/r4YXJl4te7at7vASlrV/NbqQViAlhyiMvweXclOjXe/4mACMGRPXPhF8rO7wEn6Yd1ZiO+GV6apiMFK+A/kZRzCi0VdcVWw7UBw5OPtsy3WJNu9uleR1at5Nsc4f55+tu3bgQTvOat95WcimCcXmydKIPLhiCk9EDwAagZjJbxKeJwxyV2k9cH0moYPtYDQPGRmKMQa0KNyBLH+d67YBg/le+bk8fo5VxkMxfEeMZBZcHlxmoRl1FFz/HHMrOpaqjSB0BZiZmcDKlRhS9Q1Cy1fh6Amr0KX0J+O2YKAoo3VC2mLXc9mJTCBDEz0Aa9ZwNOTRutkqhDoUn8nDEIVmRoTymDB+PJCd7VGLhFfIv0+TJoaGC04IT3PyUzZunPj/qFGqucnKaDtUFcl/64Ngng83JweYM4ffg4vWkTGBFFzpgiBEvyOaRdqnMy9Ax9Jt0fdKCiozDy7TGOdXX809IWo3EIwJgCDg9cmXoWPJtuiksYKAq/uJYUTC1nR29xt2rEMYQuhRbr4YJQjP8VDB5WMCfExAXsZRbdU6zXK0pZcHT/pfaSlZnbsbL0+6Kqrs6V3+gwlNPg03aNlcTd4f3C952ALbb2GBxUyu09Ii8ZRn70PnOOdDYQX5GFvzubgYtXMf5xOj9RD7aX+pSZ0Rnhp3PZ4ad714UKHIf1dSInpmTZ1qHaKQMZzT4yls6PG0vA5QzoN2nnlH74fUZnh8cTymMYb2Jb+gd+UPAICbBt2P4dU6gnA5F6h6DTOx6cem1bco+J27KzxlGRNUgoWAxoOrb+VmstZLUhgDSjIPJrobuvgUwiAZnvBjUAt9ZI+UkLVByLdzNuKvQ+4Fhg1ThfbTcna3p/HFrHNx34g7LeuM6qYHsy/X3qN1a1mwIYdXVrZtZo1r3riz+wguDtZlWhdizL5ATCcagFiV+vdsnLeL3+vHBCNvcV2hWgyeKeXnKs/eh6rc3eoCHOOBHeL1WhgJEA09uJRjpt9Eear8zRcsMCxWnr1f/0L37vKfGb6QpaCTJ3INkdy8OPEqvDAxev9rhNHc55W6J0qW1qUL0LWrJ3XzwpWDKzfXsxCFdSZG+owJwAkniLlMNfx3a3vy4Eow8v5UEGIbHrt1a+C664CZM1WntblcteRm1uHZCdcC4I9gxB0yk4gJ9O2nEeFFtPyiFhYqLpq8kH36ADBXYpkOOBkZ2uKmoR30JpIBDb8HY/ohB07p8jxennQl/j78TsP7zfA8DwdjGNjgW7QrTpOQNkRSYVf++c3sjcjyR2LDqzZ3jNlLWq3XHyH6XWYQgNWrVRO9UbflMpWVovWUCc8dcw0+mbmJu2/hui1zcHHXSCSCFgU78PGxF8a30VWrxATtp52mOm1lEGEY9chE2QSoc0iZMb7JZxjf5DOgRQtgzpzIhaZNxfdH8Q5xKag5rCnNcBQ+xCsljkU98lUDa+yWheZKqe/mbjS8VpJ1QPW3WVllf4qzD0NYvgKA5FkqfYY3p/wZL0y8GgiF8PbUSzGt2YeW9RH1AI53RZ5DHSi4lF4w8lkOgXZx1iEc3+51y+Y6l/6MdsW/WtZniEtpuHZ9r1vb6acDBQWqU6qx0+V41SBnj6v7CX0sBV1yfkmb3slDhgCTJ5tViW/nbMC1/R/0ZC4zmkO9CoFoiMYQ5s99HsX3czZg89wN6s8V9DacWbxyX7Yo3IGuZdEGqYYCSMZEy/3hw809uJQ5yQYPBlav1i3WqXQbtnOEYjVVcDVooC8IJUOYlKI48yAKpZyzbtANyWb1KNgxSrfbuAvMEgcwJoh7mnnzPAlRuL7bU1jd8WXD6z4IhqFYj4b8unIOIn7Ic7ggeJoPV3dfHvbgq6yMlFPMGVW5u3VDfI5p/AUA/nmb+3PQWB8TSMGVRoQtGQUwcQGvsCLSllFRVQVccQVemXQl3pp6qe6GM6ixjLbCX1Gme54xtTBaW1udQUiKIVXfoGn+TukemwouG5Mnb0z+16ZcgYdGuc8vQhAAXHlwtSpSC3G1r+ieo7mG9/ocWi2x4iKga1f4fRpvTL02wucvvBAoKjKtNy9wBEWZh/knfEGAsHwF5dJIccLPyOI2b6BZAV9y7XuG3YWFrd901mBmppir5rTTxP8VON1saZ9/7bxTrbWa1tKjh/p48WJRKHvllcA554gJ5SOVW3UmquxNg+7H21MvBYqLbW0f3AirwnfGdP0+d25UmOQwTfJ3ycomO5ze5T/4Y9Gp8jHvOKkdu/0sJAvSyrP3i7HcQyH0qdyCnMBR2/1S8tGMOCuE6wEJ2WfaUXDxEvZIqqxEqUJRK49RHgu0naL85E7DIlVk7wPOMg4tqX0nddcpGRnoV/m9rrDclFAIvy08DRu6P23vPoILXt2nUpDElYPL5wP69Ys6nacYkxvm7EV2wJsQgUYKDt15xak3oRWFhTiz27MoyDwihj5UGqaahVtyAHdoJpeUZB3ERzOiUwqYCiBnzlQbDukxcqTo8bJ8uXist7447jgAQMPcvZZjuJlnCdav1xeEktAzpWCMf61sVoo3LKsVbvJk630Os3X0l7PO1T1v9lkYBODUU4HSUk8UGhf3eRz9G3wvVR5dn/b7EFRzBpFInhp3Pe4adrd4kJdn6fnEvSc9/3z5T93h9MQTxRzvALJ8kbn+uzkb8aCJfJcBXIsTbk80igQQE0jBlS5oX5BZs3TfaMPXqLAQXcp+Rt/KLQYKLv4BRxuGUItPm3hV0c+jSmtTk/jk+gf68EyejaUk11aeIHKb55xDLs1ETHCyr7HyHjHCae6q8GKRZxFtx0o1ZFORrhyrTA3adfICDq3iy0VCxJ7wWH/XsHtEgSUHx7V5CzV5u5w1eKGxcsDSg8swtIjRDQrDE8MiAtCrF7BhQ+Rk+NkuKAAaN7bXJgB06iQq7yRP7sqcfehTuQXo2TPqE0xv9oFheBXH1pWLFzu7zwYCIIZQk2B6ni4GzGrxHnc7TsbJIVVfY1aL94GMDPyx6JSIIYL0e7gVZHQti29ITyJx2BZWDR8OrFwJrFuHLoMKcWipmH9DFqh7GJLMzTpYucZ3UouwfAUqcvYDzZvbbzv8R5MmwOTJeHPqZbisz6O266nI2R+d74TwBMu9m07+aO71s6bg5zPPw/TZGfLz7EZArMUohKJuG4sXi8oVr2nZMvL3jBlqY7Nhw0SZwQUXeNLUln3lntTjFNehobKyxJxFPaW8qXrj5cCB3NVFhYtTkp0NIaCjQNN5kE/p/Bz+MZIMa5MRu/Og0Th1WCfiipM5VveeGAnS2xp4cBsZrAPqsS8e8v1oBVeEloW/kzwvgQyp+gZN8neJThnHHONa4dmqUHoeq6rMC1ZUAPPm4dwe/8bycG5nnw85gVpTo2kv1wZE7CAFVxoRfuV0B2o7ySO1sw1jlkKuqFtMJotc/xFF1YLqZr9q0mNA//7mfeXYzSg180b8NP9sAJqBq7paP/cAY6LA0WbuFoIwxGUOLqfLAR7PBL3+KF+7a/o/aHE/P7Ii3YGCy7QPOguSlyZdLVVB9luJJlMxRtux/tWW7VL6E16ZdIX5TS1ayFZbejiN/619T7RPHNeTqowPz5H7w3SsWLtWVJhp3yUd743WRb/p57aCCwVX//6RfDdevGJLlkT+lnKoaAXL5/Z4EjcOvJ+rOjthlJVeMOZ1RtYzL0+6Cse1eQvIyEBJlpTb6dJLZU88x8pZot6h+6yavVQ+nygsKCgAVq5E9ogB0i0OPLis8v25pU4c+72ah+3kAwYgjpHSfMCV80lJOPefvbsITnjnYkMPLi0mIT7b9yuC74Slcng6L593o8+hKyjLyxND4g0eDCxa5K5ho887Zoy6XCAgei0pvcRd8MWsc3FBr8c9qcsUg5xC8nzrFRaRJ3T3IUuXisYs552HXuU/mObw1LU30Bnfu5f9iGnN3Yc2jnlozHqIjwncihozAbne/OVk/ezI6MJjr0HevVw8Qppqv/OwEvz96RfhnB5PUojCBCL/NitXArm5lgYKZs9VRfZefDMn4lHIE8Z/U69/RQwQ9ULXbtggj/Ezmr+PRW0cRo4xgrx1YwIpuNIFQZAFUbqThfRycs2/OslXrQTPvBNUSdYBZPqDandnxarg2Obv45NjN0lNCcCgQeJC0SGvT74MT46/gbu87MHVty+wfj3AGGpPWKVblsYkwguUeVoA/udqUMNv8PiYm8wLxciDS7n5P7nz8wCA/bVZumV5rV0YQmhWsFM6iEHs4m7ddE/HK18AYUymYjNmGs5F4qlx1wOIXugWZBzB4KpvzW+2eGYME9YLYWUN3/MsaLwR9TauU5p+ZNwvt8ntGdP/rOXlUU+8/A7o7NA9SfjrxkRz+XIx0XvfvsD06cApp8iX8jRh/rqX/4Q1HV/SrWZD96dw5PjVuKrfQ+ha9hMm1HwKANi24Ax0LPnZtAtDq/SVf1q0BjsAItb45eVAaanoVTdrFv7c91HsWXwSV71EjJk3L3Ft84QotEiAbUp2tiwIfuNXyYvDQ5Np7fzerexHbiUzA4ChQ8UuedQfbT3ab0wVNnX4cPFA2vMcCmbAFpq8XoS3mBlXKOdhw3J25tCwIleqyscEy5yxvNjy4Ap3YsECYMAAdw0rP5Pb9YQN2hX/ivE1n8W+oSVLgH798Mz46zCpyf8AKLw6vaRxY2DZMmCjdR5Omb59gSuuAKqr8cDIv+DL2fph3AB+h1q/z30wt0+O3YRFTkN7E4YwCNx7SbNIQV7kzirL2o+BDb+LLsc577teHkj9yTUJw62N+uQpuiEK1d95+E3qUf4TGNOJLEV4xosG0UHCaOdBq1HuaNB+9AvDZ0z7sEtrQfl0u3ai4al04uHRt4uGi0TSQwqudEHQTK7aAZ5nxpo8WYxLrrQil+qyM+kyJuhOML8tPE0MgagsC40Hl09Ap9JtAKRBz+eTBUROBNEdSrajKnevqCjTYWjVV/j9uEiuDT8LiUK0RYvkGOVRrqqk2SI8RLvY5V3s5QWOYnKzj80L6XkgKvDCgyvMEZ3QCoC0sJSEWIYIAkLLV6Emf5dxA7qdi5QzG+LojU1uMv11QEkJAD6rv/FNPosqe/vgv5nGzebFSqFj9CxpH9kuZVuR449s8PTm0MfG3mLckI6hiYysbDMuEsXGjcDUqcDAgcZvvY5nh5sE0J4oj3v2FK3ZGQPGjhU3HNLnDxhZqep8MX4WQqY/iFO7PIePZlyEhW3eBgBxfWBy+yfHbsJNnAJ7ANHSqi5dgNNPV4efHDkSGb4QCjOdJSWvUuRzK83yWJhX3zjnHHl+SniYGkHA34ffEXU6ajnv8L36cneVaKE6YoStPpmhXR+Pq/lMVDJzCOcFQAwjc/XVUdfmtXobj442GR9dwrKzIrl4JOF/JWd4XJnSUmDhQo97lp4cXLrW9j2jG3+Oac1MPFbCIQoVc7bKAMVsDjVCacgydar9+3UwskiP63iTa5yLNxZwhft3S04OMHgwxtZ8jsbhvUOs6NUrKl8rL36fIObfNEAI6TwHOgpJM4UBIHovWNGi8HduIy2CHztrcR8zTuOhJ9y3K3Lasei0SD4qDooyD6Ig45BpHwCgUd4uZPv5c8d2LduKn+ev070WEiIGeJ6FfJs71/CS9hNp93mMAaFl9vP1EtYMM4gOEkY1VzBmuQe3o+a3LGmg4IrCJOoLkZyQgiuNCAuizBbNZpbaOOYY0SJKZza19uDSnhCiklLKVlXatg0sy2ThuwvLM3ngNNikZ/nrUJ6tTMINcWPuZGNEEA7wM2ceXJablI0bgepqDGxg7NHCtbDUeXfkMUa6dmzz9zG75ftiWE9tGxCsLeS1QuFYeHDF4n7CEzJ9QaB3bwDmcdu1KL29upZtRXXeHtd9cWojq513bxn0f/hj0SlyyCOlUPqMLs/i1cmXK+5VMHiwqBThWFB3K/uJv4M1NaJFutncpqPgcrP1dKMc4yFgJESz+V6bGRnkBI7C7xOPh1nk7fvlYJFuiGe0bu2ZgPHqfg9i6/yzAADbF5yBP3V80ZN66y0ZNr12Ysz81u9EndN6tWrXDLa4/nrRm5AXaUwwWm8EmHrMkMP8VVbytxEIRCntWhTswLTmH/HXIWGl/NPdH0lj4qjGX+Lw8WvsNWhgPEeoyQnU2r6nZeEOPDrmVstyNw/8P5zV7WkAmtBcjOHuYXdhdYeXom/Svj+aNSiP8Rcv4TXFG1MuQyNFaNq4Lj/HjgU6dxZzS8WBbmUc4aI9JNZrDVNc7leMPLiaK8IafjD9Ikxu+rGhfOe6AQ/g05nWOdR8ebmu838S0YgeXHyYff+OIia4DOk/q8X72LvkZNHbGwYK+VGjsHX+Wbiy3yO2uma0JxMEJsv2eN/ci3s/hpGNvtC/WFkZycer68Gl8RLSjhdCdJ5uIj7EI6cVtzGJUc7shQvtrWkhelISiYMUXOmCIkShLpmZUjFnI3jQLEmqBjstMABo2lRxInK3WVJIvfJ62B04lVYlhsQ6JwFRr3DqwWW6AV+6VBRoM4bXplyB+0b8NapIcNlKZPqdJUeX3yvpXXho9O24uv9DkaTMemXNUC68mzXTj4OsR6dO4v/t25sKtvwspL+490V/i+XZ+3DDAGuPjePbvhafPANpTpfSnzCn5bvyZscwRKAOSsttq6cs238UW+aebR2i0CJEotH7qRUABxbNR/bw/vL8lheI5J7sWrYVg/RCiABiaKI1ayz7KSxf4SxUAmPRIQrDXdeRtITs5sVTYCefmi0EAWd0eRZdyrZy32I2rmq9UJS/pTxCjBqFFydFe5oo+W5vRbQHvMcwFhlTG+bupU25V5x3XmLa5fgBtWsEV94RvMZbYQV769amxeTuz5mDhjl70Kv8B+6uhBRhXJX7l8v6PIKVHV7hq+TEE1WH2j2OkWJO9bUrDrL81jl7ieRiabs3sLrDywDU4Y7BGBa1eQuTm/5PPjZEmvs8zRspEV5T9G/wvTondOPG3jWih/Qh/CwE5OeLeTnDIXNjDGMQw0WPGhXbhqTFiyehlJ3i0iBWd+/StCm+mHW+nOO4+9x2ouePwXPpZwJXbi0GAeNrPjM1fCTsw5jALV978sdOuvNSw5w96FJqvaY9qdPz3mvHw4Z1iOG6XUEIEVnbP0b9BQ+M/IvlPas7vGTfy1pCu2biCYVPxAfV3owj+pjZe6a9PVx3VCSuMHr5FYcMid4tlpUBF1wghvvmnNO+nH0uXlMYshLxhd7wNCK8SBIUE4dM9+5Au3aRl9bm5PiXIffinmF3GbetGHBMk1sOGhQ9YA0eDMyfD1x4IbAu4s4sC8YzM4G2bQ0HtaVtX8PYxvrxvrWCeC3y2fHjsbL9y+ImiVPBRRBe0LroN0ceXE/91Fn8o0cP9WRcXi7GfwciSdB13h3+3FgmHlxadBYjtq1zzjqL/0tYvFgMM7RsmamCP9NIaKXT34rsfVjb6SXLpntXbMH8VtHW9oQ9/jPhOsxq+b5uuCErlMow+RmfMkW3rI8JaFrwh+Wz5T7LgcSgQaKySmpvQ4+n8Omx58t9SRjMRNXjcYhCwxCCHnB5v0eRa+QVoPMbm/3sdwy5Fxf3fkw+fvj7HvLfcp7CwkLLPh3b4n1gxQqgY0fLsvIY7RSpPwm1XE8Hwg9GdTVYzx7mZWONgYW11vvF9vjhZM16zjmi8qhPH/Oqw6NJTg62L1wnh/7k6pZB/1Z3fBmN8nZbVzB/vuW7ZvTJjRRcRGqgncXCx6b7T7PfOfzuhUMUemjAqHxfHxt7Mx6efK94cNJJnrVhhqGnc6yZM0f0HIsDdgyjkg3tHD5t1F6gc2dk9esOf9tW4sljjzWtg/d5ZQyY2+pdvDYlft519QGfjRxci9vo50DbvnAdWhX9rntNybUDHrTVNysEQNyrSIalumtKD/N2Dq36Cuf3/Jc8Hrcu+s0yOgIgKhEv6v24o9DF2vdjb22OuoCHn4+wh960nOGrE6OA6RhdVuUaR2oxGgcNjcKaNwcmTQJOPjlybv58fXkzY8Dq1cDMmQBEI1OzEPHl2QdQknXQ8DoRW0jBlS5oPbi0I0YgAJxySlTiey4YwwntXhetxQ1DFGrV5jrlmjcXJ9EGDdTnfT5gyBDR/bNFC1FABI1ySpFYXtvOHUP/JgqWdJAFAxyT1y2D/w+X9HmM+7thvtRdUBPJwSOjb8Xdw+5RnbO9sdY+28pjDxNLM0USVvkV4XhXuD6PchFjZ2zKzRUTxUth4IwIW/UKy1eoQyoJ0VZ3ZgKSHuU/4E8dXwAA/HrIWuBNWKMyZACngmv9egDqcIYNcqT8Ay6taa3yHBg9nlZPbW6gFh1LtwNQfGYL7whDXG7GtPny5Pk7Jyeq7MhGX6J98TZH7eRZfJeO4R0jFGsN3XFICmnSrXwrJjaJ5DNsV/yrmOsL9kJVlWfvBwoKgBkzrAsvWSIaHjll1Spg1ixScKU6ymdZ573+YPpFOK3Lf1Xn/HYVXE7Gi/x8UXkkRyzQhzEBuOIKR0oiv8Kq1ml0CS1WnzTcTS8VGF71neBHa8gZNkzRE2QZCZ5Xtn8Z/SqlXDXSGjRc1kud56KlftmSu3PpNgz9q5S3jcNowgv8PhchTd3QqFHc2k2IB9f8+er/rTBYm4ZyIvuX1R1ewqP/LRS/txNOQKib2ujCLIIAzzdNkWdiA2NA26JfMIDDM85qj8GFk729IOCCXo+jUJFvSzytfnLM9mC6nk8VFQCA3hWb8fKkKy3f+U09/4VFJvJEIxiAFoUGoYttRnMa1PBb9b6GFFxJhZlzQvuSX6JS4BgRvt3Qg4sxYOJEoH17J910FhI7qhJ69mIBKbjSBSFiPWKegyu2PD3+eqzt+KL+5JuVJQ4ma9bIyVr9JolXtYkHdQeScFgrM3fjcBgzM2wq/AjCC5oX7EB+xhHVOd7H65we/xb/MArgDniaS04pVJPHGEkpEbnA8MO8szCq0efyKS5rc96QhCaYLTSUHlyq3tj0OGtZ+DuuH/gPAECn0m20WfQAOVTHiBFAx454YuzN1uEq8vKAm27CrJbvYX6rtyEsX4HmhTvFawbPvPxbWbxgx7V+C/kZh+18BHX9RjRrJv/pYyHg5puBadMAOAg5ZvbOc3A4qM47JACioljKg6bkkj6P4fNZmxy107xgB1bp5T9xi9HmvmtXcSyRlFM4/XRuJaJyUz+8OmJNyustc2DpWrQs3CEeSLkMTGFMXBPZRB6y6uqAkSO5rYYJA5J8Pde9/KcoDy5TLxUTYhGWqjDjsGNBvXLcU75lXs6r2lBQ4fcnKhThtdd61ibhEZrwk4YMHiwbu3C/zj4fbhn8f3h50pV4a+qlEQVXDJZ0mYvmYuBxLeXjuI3Z0pfhZT4xJ+3Hg4R4cA0ZAtx4I5/HNmCs4Dpmkvy3duzjXeoxQNdAKapcck93KQuDgNLsg3idwzPOx6mMlLEalBjDjObvY1zNp5ZVndPjKWT51euJqPB9ZgouvWuDBwMAKrL3Y0jVN5Z9sGMgq7rPZh4lJdp1/KSmH0f2NfPmkZIh0axdK/7PGHpXbMaQht/Ix3YwKm2o4DKA9w01fWxW8CnhaFCODaTgSiPClrxcL6adF4rDUiT8ko+r+QzFWYfkhVyGMt54uM0GDYDjjgMAZPsNQgxZXJPhEeCvXWv9eZX1cFrGxFu43aLA2nWdSC1shf9TsH3BGTinx5PigXaGVT7r4RCFLjbU4f4o47vLTRx/fCTxppS4vkn+LtVigktAPHcuUF0NrFzpuJ9mC40MX1AuoPouGjaMKmvWX+VGoHvZj/Y7SUQhC1+ys4ETT0TXsq0YWvW1YflMnzQvBAIY0/gL/H3EnZoKLcZvi7ngzG7P4sWJxrmWjJ4zyyn11FPFZxzSIlwx59hWcOmEEuSGsSgPLgBiOCEPPT4BINMfxM2DrPPZ2cboy161Crj++oigp7BQDgUoC7qHD9etR2mZqszZZxXmOIwqXGJZmai8XLaM48M4pFZsL5VDMyUFybS5tOqLIKBFwe8YzyHI0qs32yikJ08VOuuSH+edJebZkfqmoqAAT467AWd3e1q3vn+NvRF3D7tbPvZK6K+1RtcK95sX7sBV/R6K/qo5hMOECwxCB+sSfpY6dgSmTtUtonoeFyxAXXa+ZXXqCsQHINMfRN/KLVEeXJ7CGFBaKh96YM9ls3khMeNcPNqUc7cmSJyVkWFdJpzP0CDnWqi0XP5b+5V17qw4GDnSJIKAwLV3ZEKCwlWmOUa5HnXLeiE70nh+Pzz6dhzf9nWuW7VjXLQRiM5DJkVJUUbN0PZFDkke3kf066fbvixLsJmawUhJISxfYduDS2bgQGDoUOvGidgRCKgGujenXIanx18vHjidQzSTrKu8tXpoI5Hp0aOH5bjw9exzPOoQoYUUXOmCVYjCcLHwxBZrjyUp9M/2BeuwvttTOh0RX3pHCi6dEGxmnmBgzNo6w0iwp3dfAjYKP847C8vavxr3don4w7P4bZi7Fxk6z/zBpWvVz7KVAnj0aO5+KReI8t+NGom5866/HjjvPPm6UljMpeCqrBTvdxGyS5k7qUn+TsxtGcmPpUw8Lr/SbdoAs2dH5QkqzTpg2EbYi21eq7dRnbcnqWSjqYqPCbY8AHIDR43H4PXrrcdnp2E19RSkVvcoycoSw/Qi+p2wnVNH4Q3mhPtH3iHGwLdDMj3shlIeFj3myTlVJAYNEsetzp1lpTygFpApfw95HWLXG3bcOKBXL3v32EHqe0JCM6UpCXnEmfW6Xcl3czfi3J5P2m7m0dG34PoB/7B9nxk1+bv0L4wfD3TpgglNPsWI6i91i/Rr8D2qcqWwsoyphGp2hIVarO7M8IVwapfnHNcfpip3t6LNJBobE8CZXZ/BY2NuTmgfdENnAWL4zDDK90vrZSsZnwgsRuOpou2SEmDr1tg0o9cmQ4IUXPHwipCiSHQs2R4553J95DkbNwIXXKBScipRiTQ0gthRoxTXZ80CmjbFkjbRigzen5cx8EW1IWxh5+0yXe/ref/r/bjKh0ZWzlugHA9M+hNWBrw77RJ8PvM8PDL6VnHQWrFCf5yVZA5yrr9wJIjjjhPDv2m7AajPn3aaVc8BAJk+g1zaAIeCy0LBQR5cSYPfJ8Dvk34PKcoJL6qIMIg863Y9uCwdLPr2BUaPjqz9HEYxaF30m6NIHoQ1tDNOI3g2WfIY7rGCK6rtoUOBk09GWfaBSLxhnXqa5v9hWKfW2lR3/pGETgtavY0nx92gutRc4/EkLF+BAk3sYZk2bSJ/K/tZpzOhykH847hhyM5GrZ7lDJF2WOV2m99Kk8SdMTncTk6gVq3gMntG58zhyxUjobSAiRLmZ2WJG02p7WOafIpGebv0y8YIZZjSmwf9Hzb2iCjWlQtjITztzZoF5OWpEnA/O+Fa/MMkPF74O7hvxJ3RIY4IRzAmAJdeqj5n8sx0LjXJB9W0qUk70h8uw/t58TRrN1u2rctatRLzUl52mf3GGcPslu9hdst37d+bLNjxNJMVXAIwcqSo3DrnHDFU8rBhojAekdAr81q9jQlNPgEEAbsXnyR6pE+dKluw/rHoFLw08UpvPofTTfXpp8sWhLQvd4lyPEj0l8nhweWUac0/QgelENhrlH2bOlV+R3lzFqbSY/yXIfdiy9z1ie5G0tC66DeMbKSvyIwXhvsjI8FTRgawYYPo9Tt2LDB7NoAYDgHhHB9SvppGjWLUjg4Js01RGJDEjMaNge7dcXa3p3H0hFWicuvkk2Pfrh0KCkwt/qVHAj4WQp+KLeZ1MYY7h90bddrHQly/s88HMapNjx6WZes7V/Z7CENMokko0e5Z5EgTOoghCg0GmnPPjT6nNyh17Rp1ytLYKayI0gj7tV7O1Xl7AAC9Kn5A+5JfML35h2IfevRQjbO3Dvo7XpkUyb8Z8IWAK68UIxgAonyuSZPojwMAkyJhOdGmDVBUZN53AP7iAuOLFoZChmFaw3uJRK/90hkrhbpO1CEAolGAzXGKQRANCiRjUrlam0ZToxp9gYdG3Wb8XGRmAscei9xiKU1HOJJRVH8syMiQ1x6Et5CCK10IhWQPrpgKlA2Eg/0qv8eZXZ+JnGBMtogLGXiNbV9wBi7o9UR0ZdKAkqONE+yLdmsOK7iyA3WY0EQdtuX7uRvV/TFi3TqgZUv9ax06qA7HNP5M/lvPkqVhzh7jdlzAhJC+aziR0ug9llbvb1RINkHAB9Mvxlezzomu1Oy5r67m2/lK76MqB5fFbSd3fh5vTrmMq6xXKD24GICdhyOJm5sX7pDDujGoxxGl92fzgp0ozT5o2IbW24tycLnHB4E7Zs9V/R6KhC4wwmqj0ratZTtOPAi47gmHvdM8N6YeyEa0axcJfeMS3ZAk0YWcN+BSqRiFnUElvJ4I1IpKbcYi/wIBOQRWaZb43t834k45l1ZRppSLTfJIQYMGKMk6iN6VW/TbuvhiJ5/GPoq8YqE4eI+MafwZdh53SszbqffMnGl+PYFCGMtXTjsWWdygnTtVHlyAvZB2yno074PhN6bXPxMDCSV+JiDT7yJMbBrRNH8H+lV+H7sGOJ/5diW/4txwXlptFUZjZJMmQLduwPTpsgFDKFZikQYNgEsuUUU6iCvx1HJdeilw9tmi10esYQxYuRKMid6Z6NQp5cKNDhkC7P3kBwSXrcL81u+YFzZ4H5jJtejCLOW+o0SwtuNLkVxAFmgF6GYGkDVGht0DB0aUQ2bMmiVHSQIgv9shq3V8vhjGVZvDUzs0jGz0Jf5YpL/eqxMicqg2xb+KIYrDkZRYiCv/bFBHdsY1PNlY/7Qv3qbKOWpZvySvJNzRsvC36JOqOKsWKBVcTscoxX1OowFk+oM4tsUHlg/OW+8G8K1JalvLd/Kssww9ewl3kIIrjRBkBResQxQ6zbUhhQPQUpB5BH/u+0/1SXnS9amOwzTM3Wu4SZzb8p0ohZXsCaYkKyti/uSUli2NB7HJk+WNdrOCHXh2wvXyYKSn4Nq+cJ0675iHkAdX/SD8KK7t+CKWt3vF+oZQCC0Kd6BN8W/qCrwirODymXhwhVGE8Yq3KE5pvcYg4Pt9ovXo3sUnYm7Ld+X497KiThtWAdYKq94VP3jZZQIGSl6DR7g48yDyMkxCFJq1A0FUVIwbZ/teJUbKIDchSjyPD24GU6wTJLhCbLnJz+W1YN7m7//M+OuwssPLpmU6lGwXQ7yG0fY5N1e0KDSiVy9XFusV2XuxrJ39MMTxCFGY7a81VfynNIrfOeEhCtu3B665xrhsMlsZt2snCt7WrROPLcYL1RjI1DOvLzMATJjgyMuAS1lvxEknqZTHhDVb5m1Ax9Ltjo19nOYW1v7KWYEgNvWyCLvL8YKXNM6LjpLgFWVlfDmbvMIgJFnMKS1NvjCBSU5BPudvZGAsZBq+TUkyhZpOMNqoP1rsrMsZE1Q51owUXMLyFajK3av/M/DO7+3b60ZqUQnTL7ww+r7p04G2baMUXHrh+0qyNOs9qQ11rlpBdS3gC3I9XzExytIY9n4+axM2dI9EcTEMURiO0nTCCRQqziWfzzwPtwz6v+gLTsccG3tOYfkK3fOxdgxo0sTYPwIguW0iIQVXuiAI8qRhprEe2/hz9Ch3IKQ96SQxlEO+cSJfI8bXfIqhVV+pT5pN5Izh/0begWYFO1WnZat3Qb0x1gqd/CyICTWf6FatFeTpLmCU3gQZGXKS+gALin+PHQsAqA3qvz6uNtgGMB8zjjFPpCx6G8/wuSVtX496B6IIBMQYxccco6gg+vkLevBMKr1ODNcrgwYBNTUAYvMemKFc3Ad8EY/HgswjYn+POQaYOjXKE9Sn8kwzHpd+XXg6VrRXKxzd5AohRPQ2HkYCGfm505s/li61bqxvX9cZ3h3n4FLgOgeXByj7y7WvdqPg8tqDiyfBbxhBwNiaz5EbsIipDsnLK0ytcXndJ8Ch8uHZCdcCEMPC3D7k75blozxU4vDopKtIzCyMUNzQTqYZGeJa00vi8pAwMQRoeLcvjRdGTTMmqEKJKZ9rLz+/ofJebxGTlwf07YsDS9diXM2n0dfDt5Lntid8fOwm3Dfijpi3Y+fXCsyYgr9PfABYuTJm/Ykn7Yu3ieEjSbGR1lTm7KPf2CZaI2otPiZw51dkAHDqqbLBNbfC0QskjxV5D9yhg37ItIIC4KSTVEadgEn4PiXSRK4U2MvyM+m50yrOlBzX+k38uc+jAAw8uHj6EAoBK/QVGXrPvvK3i/qM/fqJ/w8eLP5fWQksXGjdB8IQHxOsPZZ4GDRIzMeel2ddVg/Fs9Cq8Df8fbiDNUbY66x/f2d9kLBUcNGYHTMsJRaMsRrG2IuMsc8ZY58xxk6Szp/PGPuZMfaR9G+C4p6zGWPfMsa+YoyNVZwfJ537ljF2Vmw+Uj1F4BtYruj3CN6ffon9l6pDBzGUgxGTJ0efk9roU7kFL026mr9Ngx2xoRBQIXg7od2r2DJ3PZ4cf6NpEzcMuB+ARsF1+eViaAVtMnllfOE5c2QvtqOhiKB0Qs0n+Gb2RsQMQSBLgHRFM4mHF3pci84bbxStxpTvn8575sWzo/TgMnwX8/LE+MfgcM32mKDCkyHgC+rnCRszJmpRXpgZyctn1uOoGPdDhrjsMQHoPOdSTiRTtM/4vHmyIYIXmD0HXsiKk8GDS9U+T4jEZPLgmjgRGD1aHmtct63dNOfmRt7vcMLsGDGm8RcAIu/BVf0eQvtikzxzGoJx8OBKhAI2HmT6g94rXzm5tv8/8O60S3Sv/Xbc6Vjf7Snda47w4DOGXyPuZzNsVW5oEIBIXiLGVK+p1fM2utHneO4YfU837ifVZD+SG6i1Frg1aCAb8xDOjH18EKzv4507vJpj2rUDrr5aFLClOpInw5X9HnE3f6cSCRrPXePiOf95/jqMafw5356LBKpc9KrYYmvd42OCuP/t1QtAdJ6rTF9txGPMruBe+5trf8O8PGDlSj5lHGM6Hlz8n7NOsd6U75PCKvqZYPh8tSv+BWd2exaAizWrlAdMVk4p0aQT0RL1GRcvFsd5M/cbwhZZ/jr9MUjzTNw48H45nYuwfEX03LRwoWhgYjRWrTfOf6q9xe8TrMO+6rFypZir2aVcgRRciYNnlKkDcJogCB0A9AOwhjEWHkmuEQShm/TvKQCQrs0B0BHAOAA3M8b8jDE/gJsAjAfQAcBcRT2EWwTBnseE1y/VMceI+SfKyoxj57tsk2cD9Zchf0fj/N2W5dZ2egmARrBXVGQYC/WZ8dfhgZF/VZ3rVLoNf+r4AgAgL+MIWhWJoTZi4X7NIKhiHxNpxOWXqw7Dr4mZNVRUYUDOJYNp06KKuQpjJQiY0/IdlVU1j7VVvOdtlQcXC2FczWfRhRiLUnBd2uefeHHiVQAUi2CdsKdy7aWlwKZNwLx5aevZEE+inpOpU8EK9ZMJy89deEF89tliKKuBA63b0W3MPoYeXHamX837kxAPLkV/TZXpBdJv4WYj6LWCKysLOPZYPgEzT9vaUGiXXgoMGACcf76uZ6Du+Ofy2Qo/A6d2eU4Mw8lJPJSjhuFdUpgr+j6MD6dflLCwfx1LtqGXQcjbkqyD+iGOtMZXvHgg9D0iGXR9PmsT3w1SX/WstQGdHFzKHJoWr1Kzgp3cXimG+yKj3106byks3LRJDjeXzJEj44WfYw7rXvajKoS79udrV7wdV/V7yFkHTH4E+RngHaPTRejEm483HQgLIrt0SWw/nMI7RivSRIxu9DkeHHUbqvP2gDGbhow0aJny7rRLbZXP8UtrtrBBtGbNVJO/S/QYKywENm2y5wXM81t1785tVBqWLYTDunUp26ouoCfUlz7XuT2elI1L/Cwk5hAqKgIQnaNaiXKPo9tPi8/48bGbVO9IOPIBAFHhpTTyDXuPG7QPQPw8Tj2ECF2y/bX6ykvN3LOm40sozT5geN2Spk2BKVPw3DHXYExjHTmPFwQCovG4y3mzLg4GiIQ+lt+8IAjbBUH4QPp7H4AvADQyuWUKgAcEQTgiCMJmAN8C6CP9+1YQhO8FQTgK4AGpLOEFyhCFPOVjsdgtLxeT6E6QnPnMNuNBE+G9Qd9kIZyLvmvnUC4vGcYwtuZzdC3bqmq7MOsIrh/4D7GIoniscmLUUojCtIMxISpkWnjha6rgatsWWLBAfW78eOCGG8Qky0quu0713F/Y63F8OetcW+/R/SPVLt48t8oL/jihDVFoJPjVKrhyA7VoUSgqp+VNh5l31qWXAg0bAoxRqCIP0BWem212BgyQN1Ro1kw0qOANO8j5zJv9rsrnbEjV17JVppsQhdkmCak9Rycvh6kH1yWXAJdd5i5hfCKFKXbb9vvFRNmMAVVVsbV8V/RNfg9atLDl/ZqfccTrXkURfl7P6fFvZPmTIKyfB7Qr/kU2SgoTz/Hcaig6vt3ruH7AA+JBONyQU4GMBwquRrm7MajhNwCAJW1ej1ww8qL0+4GTTzb24NKctmOgZ/Y7aesJz+06BU3PW/ZH8QF4w1ilLYsXI8tfh63zzzQt9sGMi7Gu63/kYx9TPx1Tmv4Pp3Z5Tv3b+Hy4f8Rf0Ltis3kfSGAfjVOFeCqycKG4TmnePNE9cQbvfuy44+RcnwUZhzGzxQfypQzGsY5Md0VngsgOh7eW1ouGa+r27SNGYxCNp2e3eNeTPvCuG0c1+gKtCn8FICq5FrV5S11g8WLgoouA226LnJOem7LsA6JxCYAGOXtV71uAhYzld0oFl858afVYdi7dphrjw5EPAABDh6pzG0r7QeUcnq5RCJKJTB+fBxegma4dzt0jG30ZpUhmMPAiTND6QM/ooGPJz5EDGo9jhq2dO2OsGYDuAMIZWNcyxj5mjN3JGAtLPxoB+Elx21bpnNF5wgsEge/97dtXFA527BjzLiEnxzjMg1kydoMPYpp/xSgZ9bBh4v8DBgBAlBdUZc4+436YdkLdDx7ByHdzNthrS4NyoLx+wAN4ZPStruojkhNLD65LLhFjfYdjRytRWPfJZGerFpTVubvRtvhX2xOrXSFOg9x9eGuqPSs4N6hCFLIgKnP24Z5hd0WVm9DkUwyr+kpXcE05teKP3lNl9GgyJgCLFsV2UVhcbHpZ+Zz1rdiMMY0/j/SNE+W8M6L6S8xo/oFJ6dhjauiRmWn5nRhSVSX+n2ZhQDx7/BSKB9kDomFDW8L++a3fxvFtX+Mq62dBjGr0ua0uApHn44Je/4q74UKskN/XBG18fRpDCxmpP43yduNPnV4UQ+Gdd554rahIFD796U/2GmvSRPxfKQSySWn2Qbw6+UoAENcPgJhzy8yLsn17w3WMyrCBaVbQFi+Y2VirvPLa5Mtx3YB/GBS08uDip16vGpo3F/NUZGWhUd5u43I6yhYG9d5JVwjp82FOq/eQG7AYd0jBFU1enrgHnjDBsmjKk5HhfJ2SDNTUiP238kCrqhKj5SB6HCzLOYhdi06OTf8IU7LCnqnS3FWZzSdbGlvzOe4cdg8+PfZ8/sYM5seAz2BNoeHyfo/imznnGhfw+aKjmGhyegnLV6BpwR/67eugHOeNvLo9IxAwlgkSMSM3cFQla+pdsRl/GXKvZX40N+O21mPMMFKLmVNFDNFbt3w68wIw8L2rhHO4RxnGWD6ARwCcLAjCXgC3AGgJoBuA7QCu8qJDjLHljLH3GGPv/f67geUdEY0goCTrIAALQduSJcCf/yzmmIg1jKkT9SoHmKIiMfzPlVdG3xe2CNFYq7Yo+B2rOryk39by5dFxeSdNAmbNEv9u2RK48kocCUY2+F/MOhdX9nuY73NYFrHeXLUo3GHdlkkXVnd4GRu7PwkA6Fz6M1oU0PuRjig9uLyyDG5esDNSP9P+YdUhsZyTnFp9K7fYvscpWg8uxoDjwpZpkyaJ/zOG+0bciRcm6ucEZACwQV8RrafE5vkKx9d8giFVX1sXrKd4blnn9+OliVdirF7oAp4fTBIeGFGn2Jwp+/77If2wilp+WXC66A0MAIKA5ydeg2HV8X8+lHNWzKwbTz4ZmDEDmDs3NvXzYNd7xYbHVuO8P6wLmaHom4+FRM/RmTMRlMayt3UMBLSeqe2Kf8Vfh/6Nq7kGOXvx32Ous91N5fORdt4qCsF4PPNGGiqVtUqooiK1h2r//tFe2laUlwPnnit6OHjASZ2exztT9fOHack0UHBpv2k7zxVvyaLMQ8gwErpZKETsKJlthYdPN3gVSzpzb8AXVIfK1ZuHJMXYy9vbOukdAMWzVR+FSXPnGqcNIJKHjAwxOsTq1c7rYAzFWYcMLweXKfLaeKQQLudU5KQknAYhL028En5fJHx63Qkr0bzAQN4jrfmUY11uoBYdS7cbN2CVg0tiSNU3GNjgW/PO2h0DzzxTjBZjZqQW9u7K2h9dv87ndZyDS2ctz6DjNcZYVF5dsn+ILcLyFcgO1GFE9Vf465B7AYgeVie0e93Ag0s6V1wMrFplv0GpTjkEYLt2+uXChjUJegD6Vm7R9WwXwuqX+rgmiRNcowxjLAOicus+QRAeBQBBEH4VBCEoCEIIwF8ghiAEgJ8BKE36GkvnjM6rEAThdkEQegmC0KtCJw8KYYAg4Kf5Z1uXYyz+yWaPO050yT72WPX5qiqVq7ZMaSlwwQWii7SC7EAdbh50v34bjImxjZU0a6a2GpTayvSJruTtin+1letCbkeHeLg/96z4ERf2fgKAKOSKRa4vIvEoPbiCnO7eVoyr+Qy1J4iLCFlRwzvhS4ngbQk2Bw2y0z1PUAonVZbEJSXAxImqsozBQMElRKzd9Tj99OjyFjw1/kbU5O2yLFdf0fsOXUXUCgQwtPob56HUAgHTV8ynUQyF+1+evd+67rIyNMjdF3n2EpUQ/corVVaUMcvjVFwMjBkjenMnihjE2Q//5l/NPhfn9/yXeNLJBkoZ3sUXAubNA3JzZW/tPjoGAma/1QntXsX0ZsbegDkBZ++ESsGVbsJ8Rd7VnMz4WXkajjGMiaGGvaZRI8/ehexAHXpX/sC1Fsn06YfNisrBZeP1MffgsghLtGaN+D0YCZI5PLi07aed0tcJc+aYX8/OVh2+PfVStCzcoQ6VGx7bNCEKuVi9Wvxd16yJulSdu9teXQSRCHw+5wLPmhrLQVQ1HoZCcg4mp/Rv8B1+P+5064JJiDKntCGcv8XQ6m9U9/h9gvGcIP1GjfN24fXJ3hichGlWsBOvTbnC0zrRooV+tBhAlNNJfDtnA85QhJ+VkT5veJxvU/QLupX9FF2OByFadmG0lxevmYdFJLynMPMwjm8nhrCWv3HG8N8J1+CEdq/K5eTfY82aSKQPB8j7WGVoWuXzUF0tdUwjH44jpp7tRMywXO0xxhiAOwB8IQjC1YrzyidyGoBPpb+fADCHMZbFGGsOoDWAdwC8C6A1Y6w5YywTwBypLOEFNsPlxZWBA4ErrogMNDw0aGDsZWa0SdEu7nSUZ+NrPlHFrObCaJGj+s5ji/Y39bNQzHJ9EYlF6cHlpSt/OHyAvOjj3exPnAjMmGHPun3hQpu9c0/YKkxYvoIv9Kjiva6TBMpm+xm/LwS0bq06xysw1n53fp7NVT1BN1y2mxFVJ/66U3Yedwqaazxle5T/KHsvMAiylWfD3L3WFY4YAfTpEwkxligFV0GBKlxvWsen79dPXINYWQmGDaoaNDAv17u3/KcPAs7q9gy2zD3b2W85YgQwdy5OaPcqVrV/WX4ZlJ7mWsx+q+XtXsX67k8bXs9xqPT1sZCceDytnpR4RTPQQV7T6T03ylDDiRojeOAQAHYr/wnX9o8OE5jpV8yBNoW6ZqWV477uXqhLF+Cqq8Q8prqVW/dFO7fE0/MvaRkwIBJKUw9pfA1vm/SU9/LYpnLr4lyntmsn/q46Id56ru6LoyesAmbP5quLIFKJ6mrgrLP4ytrw4Bpe/SWKMw+46FhysqnnE9i9+GSusrbtlqTxyvC2/v0BiD/DgIbfu2zMAV55jEyapFortyzcoZ7TNYTH9q9mn4fmhTujrh8NcuQL1FFwATD8TEeDEc/3xvVYyeBWka3HnUPvsSyjnM9HNf4SNw/6P3w9+xyxT24fdWmNrPUGDGgjBqxcKe7ZTjnFZYPe4zPJWUe4hycz+0AACwF8whj7SDq3HsBcxlg3iOP4FgArAEAQhM8YYw8C+BxAHYA1giAEAYAxthbAswD8AO4UBEEnhhDhiGT3v/XyJTaqq3HjyN+zZwNNm0YVeWr8je7aM7IUgQDceCPw1VfA7fabsIvfF6JNdRpg9guKHlyRyTvbfxRl2Qe8eZd69hQts3gIBIAhQxASOBIYJyPKsdHgXd5xOF88ZSK+9esIlvMCRxx1qVfFD3hr6mVgt99mXbge4mpskxROWos9xgySz+oQfg6KMg9h8z61JzljEL0XIHlwVYj5JLksQnNzgeOPjxwncN4O1pcEzH6/6EVuxYknAk89BRxzjHm5448H+0hcuvqYgEx/UMxFEGxsfp8egQAwbBj+cs87gK+7fPpw0HhpbvZbGSY2l8h26MGlfGvSylulpCRhTWeHlY1WCqwUV3DlBmpxUucXcPKbauWC9jlWPVcWQlizeVqZ28vwXTF10Q0LKPmfc1qLS2RlGV8z+M5DqnlI51k3yBet68Vn9LuOGoWMoUNd5aAjiKSjRPI+PvZYcS1hZ29o4VUvLF+BYIjhH9/3wvwXTtAtk3TG1Jxk+IJ80XsYsx9OT/oNdA3sLrwwKpeVCqO5nlOhExfOOw/44ANg7Fj96xwhCvVomLsX/xp7IyY9u9a4UDjUKuea6LBkKHZw6VrH0QsIfZa0fQNLX15kWsan8cjO8IXQuug36ZrLsWPQIOCzzxB8XP28ReWBKy8HTtAfvxKNae5rwjWWI7cgCK8JgsAEQegiCEI36d9TgiAsFAShs3R+siAI2xX3XCwIQktBENoKgvC04vxTgiC0ka6ZJ7og7BFHb6KEY2TR16sXMHw4sG6daBlthlU4DZ62FYkRGZMs+Vu1clavTfxMoKExTVGGKFRu/u8dfhc2z13vun5BYGLOOpuL5HDYLG7ivAgvyDisf8FIiaDoXzg0jqnwWEf4kukPyiFPzaB31R5GAkOuJ6pLF6BFC/0Npl1PAYvi2nCFtjHb7MaYoEEusXpLZSWweHF0cm0tjMmCUtWY4CaJcZ8+4vpFom3xr6jJ+0PMX6bB6Ld6cNRt6Fq61bQZpx5cjAmR0G3p/KjE8cPJAhcrYY3f5rwbazSh5pzw+3GnRp2zoySS3wGdvCAZCgteRwJYSfBr1h8KUajA5pyq/a7ChiglWQcwoIHk0VBeHilQUQFccQXaFZvkqOGBlFtEmtFuXDMxr2JYCcyT1iMst5g82TKXo98npF9IYiWLF5tfD4VU+977RvwV67o+a36P5BmsO/MoxzU9ahOkhLETXam6WozqohxPzdZN0rXSLHNPQMaAiU0/MS5w4oliuHNAtWZiMDZcnNbsQzw6+hbXyq22Rb+4uj9d2dTzCYxq9LnhdVmBo/N85AZspofRkpUFnHhiRAEtPQMBFkwur6hp0wwvkQdXbKEYZ2mIWXz6tCYQEBVXZskww9iJxa4cgJTChlWr5M1+rAWE2jGQQhQmN7NavIef5kUnluRBGaJQKQwIsJCYMN3lhOh0fJjZ4n0MaviNdcFIQ47acUqU90KzZuL/HKGIwu+vmUAsyvVd4sgJJhZnhCNcWcQHAsCZZ3oSc131PBjkbPMxAW9MuczZ415WBpx6KnD++Y776BRliMKY5eBKc7Q5LbziqXE34MvZ5wI+Hy7u/RjGNI4EOzD6rWa2+CCS6FyHGwbcj/tG3OGoP4LAVAo4ALim/4OO6koWdF9XQZBzVcaanPAG32gtGo5A0LVrXPrDjVJYYUMh2K8yEo6pPJsz7JWUJ+atqZdGh8Q57zxdwZzSgpcxACNHcvcRgOjd3qePdTkFaa301RAOMSTDEeECgPwlqTwi8vJkAfofi07FyEZfAvPnA23aqO8tLCQvOYJQcPAgsOkCpjK0xfLl5vlsiooi3gz5+ZFQ2SaYKe9T3jDKauBu316l4BrV6EsMbPCtftlwRJSmTYFzz3VmYGdiJPX7caciwyCfpe12wkydKoZp3riRr7yT9rKz8eO8szC31bvu2igtjbSjCBNu1n52oA7Tmn/kqtkPpl+Evwz5m6s6Ug3eFArn9nxS9sgyRec982o2jw5RmGRKo3HjgKVLdS/5bESVIexDEvJ0IZnDmHhFeCAoK3Nflx0Fl1KppRyMGjSQExozvetaPExyKCq4aGBMVooyD6Fx/m7LcnoKlfCGQkxUqyjr0WbC6VNzx9C/4aWJV+HsbsY5XlSEFc1Nmjhs0R4ZWgXU2rWiwnvePP0bwu/qkiXyuyQrRXQWZHohCnlJayvIGBD1W0rYscjXjo927q2TFs2q4VznmQgLU/s3+D7qGjdt27pKsuuUehOiMAaEx2jV8+HGg0tDdqAOuYFaICcH67s/jTkt35OvRf1WGgtBI8OXAQ2/Q03+Lued6tIFOO88+bOf3Pl553UlAbrPvCBEhziJEbIFq1JAqeTkk8X1pV0FTayYO1f9P2BLs/Pm1MvQNH+H4fXwczWk6uuo+vtWbsGStm/IuSRCAjO0Oj+72zM4vu1rACQL2VmzuPso3uQDjj/e1pxdnzy4TAVaZvsfKdfdLZ8PEY+zs4GNG6O/uyFDdG+vT98xQViRk6Pj3FteDqxebXzTmjV8Xl6AHF3AbIgPpLNhVMeOwOLFKgVXgAX1x6GuXdXKwkaNoopwrbHrDBRYgoDy7AP6OQrd0LOnGKbZrZe4WX86dULN2A7wLV1s7jFox0qke3d1vkevvg+pD4MbfiMbcBVkHK5Xc88vC06PeFJLTGzyMT6cfqF83LroV/lvs3WS/L011oRv79fPEwNUAKjT5KlPyjGpoEB9XFMDgPbdsYYUXETqsGGDmOjcyq2cBzsKLmXCby2CteeHjFXYRBMYBJWVjY8JpOBKYtx4RCgn7GObf4A5Ld8BoJi4nZgLn3GG/GeLwt/t388iSrdL+jzGd8+yZcD48eYbLg9Z0uYNvDDxqsiJggIxZKlRrPnworhfPwQLigGow7ZpcbMY0S6Q6c01p0HuPnw/x104TjchCr/a3VD+e0bz98MViv9LYWjntXobM1u8L1rCAqLnWAqhDlGYhJuCJCaQ6UObcNiS1q3F/3v08K6BGTNEhZIkEFAm4I6aW8aNA/r1kw9jti5golIhXZT1WQ7DNdphUMNvMLnpR7rXijMPAevXR29+w+Tmis+AnbVqLBk2DLj+eqB//8g5m2PewTqTtbTEy5OusixTmGkQjhjAyEZfYlOvfwFwN8/aEcDQWpyDpk2BGTOwr1Zajw0fDpSWcn93T467Ec9OuDZ2/SOIdMArYb8krzAT7vt9IXFeSDH8ir30RzMuxLCqr6ILjRgB5OWpFFx+X0hf0tO1q6zAD6P93vzacGR6kYaMFFwSDXL2oiJ7r/VvzKvE9CoHaWWl6GHVvn30NcbEHLj9+pmvZTp3NrwU5VHEjA1cvKBv5Wac3Pl57F18IloVOZCXuOSyPo84jgBkRFHmQeRy5AxvkLsvak4uzDyEbuVi+PP3p1+Er2efK1/rUGIcOlgAxJQx2t9KEDC35buY2ORj7v4bERWi0JdkIQr16NIFZ3R5Fuf2eDL5+5rCJMnOiXBNfcjBVVMjunoaWbzaQbMYMSUQEENIXXihYREu7xq3A1lNjRxruzp3t/3kp0TcKM46yFVO75EozDyMEdVfAgB6VvyI+0eKYaUqcvaJBZwouFq1AkaMgLB8BQY1/M7+/U4oKhJDIHi1iLYgO1CH4dVfWxcMo0iI3qL4DwDR7u5eof3FSKFggSCgeeHOqNN2hlDbOeMUDKmSQnEWF2NI1TdomLMn4iW9ejXQrRvuG3En2hb/CixZIoZUWptaoSr7VG7B/434KwB33on1Ed+qFfhqzQ1iXoA//Qk47TRvBT1jxojW1pJlrfJZtlK08wqMV7R/Wd5Ef3LsJqzq8JJ8rTjTOIScV5aXiSZTz0tUMbeWZe3HK5OucNXGpX3+iSv6PhLdzPIVYl6IcBjCVCE8Z4aNtQy8bYxY0PodzG/1tu61gF5YHIPIFNkWysmwwVm8LGTrk4V3FDqTcstCAy+vMWMiHnrSfbzjVeui39C3cnPkhNYqnCCIKITlK0wF22VZ++W/N89dj87hPJ6SR7rZCJqqoa3leaFhQ3Qt24rZLd8zzPGnXHvl2cgbFL3n05xZuRKYMEGdl8tig/PO1Evx6cwLjAusWwcMHCjuu8249lrgyiu9y0uYkQFcfLG4HnbK7Nm6OWdXd3gJfxt+l+4t7Yu3oXPpz3xGQJwyv+PbviavUQoyxfcm3iGIG+XtRkXOfuuCNvj5yf/h/3RClC9s/WbUuahnVzpzcqfn0L5YnY9sbccXcdQgrHdI8ImKXO1zHQrhzG7P4l/jbuL/AAbcM+xu/GvsjXIbRtFfkgrGcHm/R3Fmt2dJwRVDSEKeLqgUXCSsMiTsit2li+gNNns2331VVbLLvgrZg4sDFwOZcnEkLF+B0uyDUa65APD8MVc7boPwjmw/Z6xsHTL8ITw/8RrVuZ/nr0Pfyi3iQSISPmif3WRPOjFpkvG1uXPFsEWKxX15jijQlRVcMf58pFCIPad2eQ4buj8lH9sZfhvl7ZZDMpzY6UVsX7gu8kzk5ak8ZlBWJio49KwXk5iALyTHxc/xu0z4W99o1kzc0HfsKAr927SJzUZFqlOt4NIRKilCvxgJjOWzUk6nAAuhKPMQAKBN0a8ozxY31LUnrELPih/l+woyDqGfImRJuqwvs/TmaMW4n5dxBIOrDPJt1HdmzwZuvtl2uPCr+z+Ev4+4UzyQ8teGyfTrCCZ0vNtenXw5TuvyX6723DyrdjwVyYMrwr4lJ+Kqfg8bXm+lUX45Vg6WlDq7jyDSGZ11iDw+6exrvp+7AfcOE8fkZgU7I7KGggKgSxfTcdDPQsm/F9TBz0LAOefI89fKDq/gi1nn65YNr71enXw5Ar4Q97ygLRelDCwsBKZMEdeRK1eKMiaLcPoNcvehMmzoqkfLlqK3lGZujSInx9hz3Ck+n7WiyWyNnJUle4ev7/YUXpSisRzX5k1xn6Jz73vTL8HrUy7n69/FF1uXYQx/Hfo32VspTPVwgzzeHvH65Mtw86D7AIhKZqf5yh4dfQseHHVb9IVJk5A3qr9uHvF7h98NAGiYs0f2ZNSGOQ+PCdcMeEg0zALkZ5UxiLnhAdEob9kysd5hd2JtxxcdfQ47dC//CRObfiIfJ10OLitSqa8pRmrF1CGMScFFRkK49lrxu/L7DRP/2cLnw1+H3IuhVV8DGGQ+WHmh4FLUoedtMqKRjqs9EXd6lv/g/Gad56Q6b0/kwOm77ibUkfZeD/PNeE7btsDEicbXDTwtupf9iAY5e8UDve/Ywfs7vdkHUnXqe5PFgysvcBgH6iw2Q4nA4Lu2I7Cc1PRjTGr6MS7+cIJlvXp0K98KCEWRE2ma5/KD6RehU+nPABYkuiuEFmkcUim49N6Bnj1FT/Nbb7X2sFKEmPNLG1Nl3qmAL5Lfc3j1l3hBNraYL7afJsp53RCFinDUmbwJ3U34cX8pGpgJpFIZN3k7srNVYZMBjeVtOG/WjBnAoUPARx/Jl+x4oLvJW2p2p/YdTI83wgMYQ37GEV1BWhh5LSTNxUEbykFlSUlPTxCEEp29i6xE1ln/FmYejgitoQhdzRiwZg2Eh+4xbCpVDfV8TBA9QHdZ5yTNzxC9eMLzDq9CXvvNmO5duncX/xmh3X+nqlCcQwF25PjVCPhC8jrT7JPmhp9bq++jcWM+Dy6DelqfMhFC9xfBRgy3rsMBpdkHZO/AZgVi5BKevW7fyu/x/d5y/H64UKwn6wD2HDVIyQDj/LwA0KdyMx4fe4tUTiuv0OmL9rc8+eSIkefvv2PhY49Frmm/1+bNgXclJZ7HsuukzMGl/fyp+v6mGOTBlS6QgouPzExVaDLXMIbj270e8zi9svWPYmDsXv4TxjT+LKbt1jeiYj07QFi+QmVRYobuIiZWk5+b0J6ppOByIngTBHww42LkZRyVj5UsbvOG7SrbF2/DI2NuAwoKrMNVJIj9S0/SDw0VJ3YtOln/QgzyzjAI9t8t5XNg9HeK0738p4gFHpFcSF6mx7d7Xbay9uv9Vj6fKCBZt87QwphBEDf5incg/JePCSphVbiOFzSexEB0UudURTdE4YwZsqd+WLDlhunNPiTvHj1WrYoKMScrFNesAQYNEv8uKBDL2iSs2NJVBnNi6rmgeQfTJS+dFR1KtrmuQ6uAP1ynCJNVau6VFV4vH1q6xixiPEHUX3TWpiHBWMEFqJUxdZqcNmZL3ZT24AK4ZEFX938I383ZIB/zftxoDy4X31Oy5OF0y5gx4v8jRxoWyfQHVftjrr2y1b6Od99n9D0z5r3HmwI/ExytVN6aehlOV3izZ/qDUfPr17PP4QoH3LboV/lvbR26BrnK72rdOnUEE+337feLRscDB4qeX8NjoChMpRxcSlKprylGmoyaBDp1ki1A3FgtEjbh0Mz3rtiMZ8Zf50opoKfgqszZh2cnXK8uqAi5OKP5+3hv2sWeJHKsDwjLV6Bb2VbrgrHGSkHj1JNk2DBgwAAxZ4xdtM+1RTLchOJkwWCxa7lr2D2265UX5hUV1uEqEkgiZ4virEP6F6R34PXJl+H4tq/Jp91Z5Lt8LtLUg4tIYrKygLVrUZZ9AAvbiHkBTIX2LVuaP+dXXin/6WOCymtG+bfZW3Y0lOGJIUii0Q1RWFICnCnmJVNatTtBWL4C2YE6HHWRCzCtUApEdOZbWeGYY2yBbBc3sgM7Hly7jtjI6ZuinNL5OXw2c1O0cFLnSzb72rVroeHVX+GuoXeLB5s2Gd+oUIhm+evSRuZLEF7z2czzcVa3p+VjKwVXO0VeHa0BS3g9ISxfga9nn4P3p1+Ef465GUBy7WPsIO/NcnPF/fC6dWI+LB3yM46gReEO+Zh3H3E4qM5v5Uouly6DXYsWwPXXRzy0tWiez009n0Cnkp+t67Wa6HmNXvXqCStuHChyBzb4Fr8tPM2yXIAFMa3ZR7hFClMI8D9np3X5L/YsPgmA+D4q59dWhb+iddFvCm9p9XMUzs23a9HJ+HPff0batmuw07Kl+ljve5w0SQyfOXSo5VrQDUnpwaWFlFpxIU1GTQKNGonCayK+cCw8upf9hLE1n7tSCnB7fEgDZ/OC3/Hw6NvRs+LHhOXM6Fr2U0LadUNx1kHvKnOaQN5KEWoVX9uIjAxg0SJVvhZutBNyMntweaHg4l10mbRl9s7S8kaBXu6qBWK4vAENv0eFyxBfV/Z7KHLgxoNLKXht0UL8X5kgmiBiQefOqkOn3p+/HipUbfQzfEExp8TyFQCAEzu9gOeOET22osIfa5J/J7Ngq03RL/hi1rmW5eRwtFqkMcKrz6j0UJnY5GMcWrrGk3pThlatxP+VihEdYwHD3wMALrpIzJvLydGgGH3fVQ4uk1la68H1168GOW6Hl9zAETTN32FdMEb4WEhcO06eDADIzzjMJbzTov1FsgN1WNz2TdF7SxEiNIqzzpIty0k+RBDGdCjZjlLFXtZUWL5gATqXbpPXAXWCtEYIe3ApirYu+g09yn/C1Gb/AyCNg6nswQWIY5pWQG8C76c9pFFwuYrakS4hCgFzrznNuuDcnk8iOyDJzczkZ1YKLLN5xaR9DBzoyIM8TIYviIqc/ZblAr4QCjMPY2WHV+RzvK+V3yegMPOwVE8QHUu2oTp3FzJ8dWha8IeqrNKD69TO/8Xvx4m5o4uzDkWezzZtop5x3dCGZt+5HYWsV4ajUpsp4cGl7F+y9zWFIQVXOiFYx6wlPKa6WlQcVFWJxzqDlV7+LLvIC7JevcT/O3aUQ+mI14P65RHtbhwPOpb8jI9mXBT3dt3yyOhbsX3BGdYFeeCwGtK16grH4u7RIyqMD5YuFZPTJgJlDOtkVnA1aGD/Hh4Fl97727GjYZXyex8KRW0wQ2DOFZUek9D5omlTYPny6POdOiEcg0hpTeakr6d1eU6618EG0+cTE0B37QqMGhU5X1wMXHWVucU5QcQAXc8jBeGh6/s56zGj+fvy+fYl21XlAhohfX7GEYxs9CUA4ECtRggRDi0D4ImxN+GJsTcDCxbg2zkbcErn5+x+hJiS5a9Du+JfTcsIy1cYe2hJG2W7Y83T46/XPX80FEl17GehiMCmvrBmDXDCCcCUKZE1q47xz1+G/A3fz1mvX0dFBbB6NTB3LleT4Zx1Wit6O5hZMWs9uD6ecQHO6/Evx23xMLz6K2yZt8G6YIzwQRC9HSRhIYMgCu9s7mvO7PosrhvwQPQay6qejAyE/JyCSoKor0jvlTK3XUgTdlDF4MGqQ60HV1AvJHFAnNPMjEBO6/If7FtyIk+P445umGdJ0K6bmzPMeeep5oWTOj1vaLByz7C7xcg9Em7C5aaVgssMIyVWu3aiEb+WQYPEvVlZmf59K1eKMroFnPmFa2oif48aJd4XVsjFUJEb8AWBU08V971z5wIXX4zsBkV4aeKV0WVZECOqv0TdCSujrlXl7kGb4t/w84Kz8OvCM/C45Gmp58GV6a8Tc5hpDUwLC6NCGermo+3UCWjTBhg7NvqanefT7fc6YIC4H5cMATNM8n8mDaTgigsB6yJEqkEhCuNIdjZw+eURCxEdpQZjgphUccgQx834fdJv2qePOGE3bAjs2QPcfjuE5Ssw87/L8fDmnpHyimcgyho7DiTLkN2j/Ad8sEMtTCnP3ocdh/XjKRdlHkaRZA2j5cyuz2BUoy8w+qlTPO+nihkzxIVDx47i5HfXXcAHH4jX+vaNbdtmXHaZs/CG8WLjRuCtt0RXeLvwhDDQW4gsXaoK+6UqHt7Q6Czg6kK+iLI6wSR0tujd2zj5r/SbOM5hM3Ys8OyzDjsmkZdnnAA6P99d3Ymma1fgf6IlruEGkUgqXp98Gfo12GxaJqxMb164E03zIxac2hwQhqFfxo/HnvuNw8RNahoOedwDLQt3xGVD2bP8B/Rv8D1u/Mw6dr/rECWSMEk374AJ42r086EqQxSq1uanxHgdkSzk5orjPACcfz5w5IjumF+QeQQFmSZ5zxgDioqs21u9GsFLHgEAFGYahMDlwDREoeZdal64E+f3+jc2fcC39nhx4lUY/m973k+J9pqUFeKyZ4c0LyvXThzCqo6l29GxdDsgjLfdh0TsZQgiFZEVU61bY0T1l9JYWGF+U00NHh19K3YdzQUgeqXqGgmcfDJwszTXGrzzfiYgL+A+j2Us0FU2SZ/jjiH3SnP5UvX1CROA6mp53Lu492NY2PotQ4OVmvxdqs/vyoOrb1/gww8jx+kqFM/LUx83bQqcfbbx51240Lw+o72bEZWVotFiYWH0GsWGImbr/DPR+L7LRMWVwhBdyQW9HsfC1m+j+f2XiGvztm3Ff2EyMzG0+puo++4dfhdmt3wPvnWniwbQmzcD114re2CGKdGJRhTSMxbt0AF49FHx7+7dgdmzUXheJGTpixOvQvfyH0UP+o+ltf+cOeL+9zSDNYwdA+iASzXEcceJ/+8Qvdv7Vm4BWAd3dcYa5ZrJSc54ggtaLaYhlNQ6zuTmRgZpPQ8uCMBZZwG5ubhp4P9hebtXospYofICq6kRvcbKy+WcW0GNhZbSQknXAivGJIuS9f3pl6iOt8w9G1Oa/s9RXW2KfkUHjRW8KRYLounNPlAJIGUyMkTvrawsUXG6dCkwfjywIXHWuwDEvrRpI/7dpEli+6JHTQ0wc6Yzz6hweNfwYph3MZuXp/bsUaDc0Ghrq7OZl6Uo08PQmUpikezVDhwbNUe5swBg+vSIZ62N9gCIC/fmzfW9y9KF448H1q4FzjtP/EckPQMafm8pKFFevbD34/hpnphTSikg37v4RCxo/bY6xObGjcC4ccAxx6Bd8S9oUfC7eWc4ktB7xeyW7+GGgQ9wlW1d9JvhtcENv8H+JRZGGrKCi++DFWUeRH6GvlEMIIaoXtbuVfXJqVNFq+T6ht9vbNAQxuyB4nnYunZFXYkoyC3PPmCjc9qmnIUf5mFY9de279EqqONBfsZhWXCmVbDJ+wqlUDLGwpqCzCNomLMnpm0QREoje3BJ72d+Pp475ho8OvpW6/VvRQV6V/6AMY2/kE8tbfs6Hgt7ggDAJZfIxl1m46CPhcAYogTvyYCuB5f0vWX7a1GmN29oQqmt7/40avJ3cbU3sMG3WNr2ddv9lOneHThXEXY5w7lnclKTmQn8+c+R44KC+CvzGja0XqMYcG3/f2D34pPQKG83AOC5nztER+GRKM06iGYFO/HkuBtQka0Thv+EE3SVYxm+oPjelZWJso727cVQimZI76vSQEQAxHxoBQpj75UrgcJCPDr6Vnw3R5Q3dS79WTT6XrVKNHC+5hpruUE4sk11tXGZ8ePF59qtLIkx+V/tCatwepf/uKsvhrQr3o4XJl6lPkkKrphBCq4042/D78TFvR9PdDcIBfL87PdjdceXMUqxeOTFakN9df+H8OyEawEAzx1zDf4x8i/ihVatcMvg+3D/iL/YbtMNRuHATuvyH/Qo/yGufVHStOAPVx4r3ML29u0BQYCwfAUaGGzIHxlzG18i+4wMUSCWDEqlZctES7bVqxPdE2+ZOFG0SlwqWe3pCdJyjL0a9JDf2T59ooRltSE/t2TYz4LYvdietf9nM8/HjQPv1732yqQr8M3sjfhm9sao0Bd3DLkHS9u+hprC3Zja7EPd++OG9P24EusxjcUa72apTRvRKMFsgZ7qZGWJYR2qq83j4hOJ55xzHN2WG6hF4/zdANRriILMI+KroFRw1dQA06YBGRl4bOzN+HzW+eJ5I69hrSeHQ3gEYO2L+QxLXp18Oe4edrfh9YAviLyMo+aVSBvOHLNQRRI3D7oPP8w7G78uPN2wTHHWIdw+5O8AksezPWXhtAxuUrgbfSu/d9WU2bzz1m/NHder9Axc1eElvDDxKvSpMPfK1N4XL5TGktqQprJ1+qxZkZPSu8MVDjgspA2HlA7najMhKxDE9oXrrOsmiPqKtKZXhsYNy4ANWb8e6NdPHQJW2u8UZB7BlGYKo1CFt3+d4EvKHFw3DrwfXUqNc4DrenBpcwFpP5e0VnLycR8ZfSuWtX/N/o1KlHsR3pxSqUhJSeTvZPJU4/jhM/11hhGAlJzY83VMafoRAGBCk0/1P2bjxsDZZ+Pg0rW4sFdEputjghj2Wfk9ma2LRo4U320AhRkaj/asLN17y7IPoEXhDnQp/QkFYeMtn08MS8ij/GvcWFSErzcIOQ2IMq2VK737jX0+BHyiUj2pnhuJwoxDmNbsIwwPGzfNmiXmM02SVBXpCCm40owFrd9Gp9Jtie4GoUDemDIGjBvnLhazFmlR1qxgp2x1NbLRl5FnoEsXtCzcgfFNPvWuTQ6M5pc/9/knupUZLzzjgW7CzDAdOojxfHVgjHNxW1kJnHSSfPjV7HNx97C77HUyWSksFHNpKBdX6YDfLyolwxsHvR96/nzu6kY3+hwnd3peXIiOGKF64+8f8RfcNEhf+aSHE2vx5gU7kG0goB1c9S1aFf2OVkW/R72oS9u9gTuG/g0/nnQ1/jnmVu72hOUr0DhPxxtRh6fHX4/pzT7gqFRScKnCKtj8LhS/o67VJkGkAo0bc3tbluqEJgGgPx7MmSN6Ep15pup0hi8k5voaPDgSAsSAWIu3Di5di4lNP+Eq2zBnr8poZGP3J/Ha5MvlY65tL2P45NhNuGf4XcDcuXhn6iWGwrJVHV5BUeZhMZcBT9XhbyuZc1gmGjPhRNga2iKsTUHmEbw19TJX3TBT3B6uc25BH1Dky21d+BuGV3+Nt6f9GRf3fgz/HHMzHh6lP+86Cb0pLF+BwQ2jwxzxUpolejL4WAh9KyUlHGPoU7FZzNd32mnq9XJuLtCli3ne35kzRWFteDw78URR0DNvnnWHyNqZIMwpLwdatsS4mk8xvkYzbxqNrU2bAkuWiPu7FSvEqBQdzMN8PTP+OlzW59FIVA8N8rp95kx8euz5OLfHv+1+Eses6fgSbht8H24edJ/udTMPrqjjjRtFwx/JS6ZF4Q7ufoS/bk/CODMmjp2zZqWvB1eYsBK1U6fE9sMmBRn6ITn/N+MC5AUiiq/rRv1LNjwzJTMTOYHaiJIJ0hqySxd1OW0kBqXhquJ5mdT0Y/ww7yypHonSUlH2ocnFBwD/O/YiZPodPrtlZfF9TrV56pKM3YtPxsW9H4ucGDkSOOaYhPWnPkA5uAgixqiWlNOmYcpTq/DetIvR658ehJzTCkqUVkjLlomh7h59NO6Ww0aCaD8LmYZ+iQcNcvYaXGgghuwysTrhygHg96u0YUWZh9GphJTOKc2qVUC3btzFbxl8H1oW7gDazI9aeM1p9Z74h6CTNFcH20od2NtQ6QryOBaLC1u/iU/+aISPdoqehbyhccfVfIZHN0uhIM2EmR55iIQRBYskICNSFE7T4a5lW7Fn8Umqc/uX/Enfc6mqyjwXVKtWlsoEu+FW7aJUWGX46lAbMu6PNoxaWfYBtCn6VT7m8oJhLGIgFAqhd+UPrkPSRaqW6iGrTWPM5p6qKtGb0crAxgOvgqNB4+fajbGE0hNKOf2t7/604T2PjbkZvSu2WNbdqeRnfLqrERrm7JE9nZyGrH909C2yUiu4bJXq2ltTw2GkNEpExoA1a1Dx3KXGFY8apQ7rXF7OL+jJzQUOOc+rRhBpj88HrFuHQStW4KnxNwJQ5CDi8Wzo0UP8Z8HYms/FP/r0wbZnPsYvtz+Bj3bWYOnLiwBI487GjUDjxuj40EP4U+4L2HUkFzd8NsKy7i6lP+HjP2qs+2pCvwab0aviB6x+Ldow0cyDq2GuJB8Iy1ZqasR/En0qt9gOu+hZnlKDcPhpx1lnAd98Yy9/VqzRevhp+G7OBjQr2Kl7rUvZz+oQw7wGToEAMGcOVgUfwqhGX6DTw+frr0XHjQMOHAB69hSf1ZNPBq6+OqoYY0ATbVhNxsTyqY5y3ZiEHlxJ2KW0J7lVnoQ9ktBVnIj2wAj4QuhZ8aM3lWt/86wsoGVL0fqqV6+EWTVoP3NYsMSYdwJrXtZ3ewrruz0lH1/Q6wlsX3BGdMH8fFMLUQbBnqBr6lT5z54VPyK4bCX/vURiEQSc0eVZVOdKi0Gz90hn3A0ovTZhnM/j1cmXo9JI4eoC7oT0Rp/LwnoTAK7t/yBGVH8lHx81ETxrORzkKCv1bUT1l2gtCaltLxIV8c/9TKBVJpG6VFgkiA8zdCgKMw/L+TkBWIflM8LsfZGEPrUxVnApOXrCGtPrWsVDpq9OlQ/Udh4jaWznVRK8OPEqtaV6ixbRhXr1AoYMsdeP+oTVGN24cXQy+hhwOBixPn5/+kW4YUDE63pc489s1cUQeS6VczNv2MFjmnyC6rxIqOs5Ld/RDUH8ycwL8M3sjXhnWkTBZOpNJfHtnGhjuxaFO1RtKtfGViHPeldskUOme8ry5eK4c4bO+p0gCHOceFT07m1+3edD1dgu6F7+E4Yr9gPNCnaK76o0UJRnH8D1A/9h2dxfh9yLlyZFC+e9xMhA4dDSNRjY8DvxwEgJoTfwGQyGYeNEbXhXwoLCQlFZk+ReOUpaFO7Qlw/16QNAM9fX1Ymfj4fhw5HpD6Jj6XZk+WvRUc9YOiNDjMbQujV3f20ZzqbCvpm8uwkNqTN6ENaQgispyfTVcZcd3ehznNjpef7Km+vkAjjjDDHshwInXiBuUE6Hfx9+B16ddEXC+nJxn8dxZrdn5eNMfzBipaWHwXvEmICa/F3YMvdsADDMrSXToQNwVSShpFdW4ER8uLzfo8gNcAiGdZ4X2WJPWqAbKXUHNfwO/5txId6eamzx7MT6z/V6dPJklYLWso1hw3CgNpLH6d/jbsCSNuqkysWZB2TLRy6vD2kzPrHpJ/h69rkWhQ2YPRsYMECszisrSoJIBMOGifkPzeLaA+JG97zzuEMamqI3F7ZuLYYfqa4Grr8eR03e5U4lP2NVh5fc9aF9e+6iWsvsTL967cWYICbAbtwYOP5444pKSsRwtVIeEl6jnGHVX6MmXwrVWlEhhnBT9o8Jomd9OufQcIsXYW082Asdqov8RhXZ++W1gLB8BUY1/pK7nhM7PY+6ZWLO0juH3oPHxtwiX+MNO6hUih3X+k3cNPB+TG76P92yrYp+R43CSttIOXvb4L/LireWOmG35DZnzAAuuEDXItwIxiCHTPeUZs1ErxCOfF0EQWgoKrJ/z9KlEQ+Prl3F/w3G6PCwe2DpWqzq8LL9tmBPXmJFWNYwpOprOczwXUPvxjFN9EMeZwcUbRt57GzYEJ2PuW1b/falPT/tPdIAQcBNA/8PFdlq2dGEmk8iHn0nnigq5yR6V2wW35nzz0dFzn5VXVi2DDjhBPGY01Pt8PFr0WqZB+t6ACUGocwBANOnq4+vucaTNmOKUiCR7Ao5Wv/HBVJwEUQMWdbuVSxs/RZX2U4lP2Nlh1dw3YAH+RvIzgZuvDFyHDatNBngCzIOYVGbN/jbcIDSaros+4AqzNAV/R7B65Pd5Ufgpl07AEBh5mF8P2c9/r+9O4+Tojr3P/59elYYBhj2QYYdQRBkU0GFACK4gAsKroDgLipuUVzBuPyMGpNoEhOTeNUbE5NobhKNiZrExOQmmrhdjRrXaJS4xIgLLiDM+f1xqnuqu6uXmemZnh4+79eL13RXVVdVD9PVp85zzvOotjavl5014b60ZfHf6JDad9V43An611Hn6uOVp+hX+3xVPSs/atow3DDu1s3X5EJpCe7UXvygf4tenhixl0izFyEIfg3o+oEGdE0Pln5wtA9St/TmKCqQfENqTvqGhuiAc3W171DPsf/tat7zTwYO1OBu76ouqNkxvd/Lqg2K7T666DJJLXgftbXSwoVJi57eUN/8fQSN9fLY1o7f8AUyKS/39Q+HDMm+XSzmg09m0rp16etb2/l/1lm+w7u8XKqq0uYsszGXjHhEX57+48h1M+uf1wWT7olcl+T00/Ouf5g+g2uruoRqj8XkfCfERRclRtZGuuIKf1MfXC9um/Nd/Wqfr6ZvN3p0WorHxIy2AQPS0jsWtP5qZzNjhg9cbJdf6t6sChHgCs3g6l75SV4zoaKUW6Ni5vTtmbfq6O3/pDnbNc1yyHemdfhr65bZN6tX9cdZA8thqQGuLmU+UHf8Dn/QESP/qoWDowNlCePH+/TdpNUESo+Z/y67+uqWDR6Ixfwgk699zadql3xa0fnzpcMPT9o0PhCka/lnsl6hNLLNCI5Xlm1NqlNYCHfu9U1dMvUuSdLRo/8cXTMzdbZQpu+QhgYfnIi78sqmmlEpupX7mkxl1phW5xSl5+Rxv08aPCKF+rp22MEPngp8ZfoP9aVpd/gn9fX6w8Kr9c8j1uiZxWt9bW4zPzvyssv8zOR8jBzpaze10iuHn6fV47MMpJ8/P7muV2pAtyMKz+DqaDP/wg24+npp2rTincs2pIP9FQCl74tflL4w9WeSpBtnfk/jer2R1+seXXS5Fg17vPkHzKPRGr6+Tui1XjfPuqX5x2mGcKd5mTUm3WT3rv5Iuw14WXfM/aZ6VGYZRVIIobROw7r/J6+aP5J0zbQ701YnRmYHm8bMqUv5Z5rf8ExidO/6I8/x08/DLrkk8zHnzcvxBlAULWggnb7jr/XnA3xtisQNWra/t9AxoraqrYzfHPnP0q79XlZDzbsRW3rnT7xHjxx0eWI0WVSH3IljH/QPhg71o8emTk3clD4WBKKaTsrUUPOuzppwnzYsPz1p1X6Dn1Rtxadava5O//7hb6UZM/TQgVfq6cXr/FsLBbgn93lNvao2amxd03UwsTZXwGnBguzrm4FRlNjm1Nf7ovGSNCeofdHazn+zpMBNthSFMbmMM7bPGP9rXbbzz7Ie6td3BoH/mTPTC2tHSKQgDNK/VMS2qqZis54/9CJ/PvnOoo7Fkt7j+F7/StQceeqQS/TPI9boi7vc6YONY8b47/igrZH2+wil02MWdxZHHeWzDxSic6IAAa5+XT5Uj8qPtXHFqepR+am2NmY5rywp8+JBrGPH/G/a111ranllCyxLSgQKU+vGTuv/cqKN8L05N+nne38j8uX8pQKdQEWFr10XmlnS4v2EL2CLFqUNgku6Zlx6adPjPFPKrptyl+YPelq1lZuaXecqm6rYFjXmqp+9667Jz7P9vsLfL1nqQZbFnNzxJ/hfW1S6YpSODG2KxKCleEAo2G71+N9qRv2Lic9M/64fqqHbBu1ww2nJqQT79s2/zZNvuybT3+7220vyg7QrOlvazPDvsCOnK1y3jsFC7YQAV2fC6PQO4ZxzpHN2ui+5FkMeWtX5MWuWn60wcWLk6kydTEO6vaNlo/6sO+Z+M23drbNuih61nEE4x7BJ0uzZmlX/nHbq/XraTbYkHTw8ezCvR+XH6l6Ru6B0mW2NrGVw1MiImXMZGghHjHy4aV3ENh+uOE2zBz6f8Rzi729gzfvpubsjGi8LB/+ftGJF+lRwdAz1fqbQQwf+P31n5q3Zt3VOnx6zStdO/7Gm9fdF2VNzrkfW4MqzURv/237owC9qfK/1Gbfboe7NpNp+GTuey8ul887zo8fMNLH3a9qxbr0m9fEpPHTUUf5nZaX+ueqLumbanepZlfw5vHvvr6ss5lQ2f676LJkjxWLqWfWJ+lT7NAxVKanBXj78At2zz/Xp55LPd9b++0uShtX+W6eM+13u7TMco4IZXNgWTZ7sR28vWeKfFziVddYAV5Y2TaJjYMkSPbvkYp067rdp2+w5M2KkdRaJdsCIEZKa0h2N6vF28vp8RXQUlMe2qqHbBp1zWfemGaYDBiTSOKWlXz3zzMTDvGsjouj+sPBqvXjYhYn6dVlncI0cmTElaLYgVrk1+s9nXH2OGcqhWYeje76ln+x1Q+ZtzzpLGj487/pxqRJtlmzfmXmk2rlo8t16tg2yFQLIQyFmxOZj6tTk+5wWzBZbO+XutHuNbMb3el1njr8/fcXAgdK55yYuXZVlW3NfB4cMka65Rjr5ZGmffRLZX1rs5JNb93p0LEG7OfVe3sz57/6DDvILcvXptCa4kW/bvb5eWr5cOvvs5OUnneRnZHdGpZSiEO2CAFdnMmhQsc8AgaqyLYkp8fnK1BlUm0eQR4cf7juxunZNX7d4cdLT+A335Tv/VC8cdpFumX1zZLCpZ9Un+dUgkvSNPW7ThuVnqDpIf+IkqbZWDyy8Vv26fKgt4dGvQeeTlKHjP/DYosv1pQzpjcK2HHeyXjj0Iv1+4TWJZYNq3tV/z/kv/yScTqCx0Td+U9w256amJxGjP7pVbMp+DuFOrUzFaQOXTv2ZHzVbW8sXcUe1887SEUdo136v6Jgx/5tz86qyLUn/leWxrdL06Ym/pVc39kp+wdln5x3gMpOvvaP00dhhvas2Jj0fGEp7OKDL+zpuzB/8k5Qg+B/3v9oXpW9o8NNPZ8zwK2Ixn4qsGeI1RVJnS/Wo/NSnBenvUz5m+9yn2W8/afp0vXz4herX5cNmnY+k5AAXsC3q3r3puyZTbYkoeVyjsgW48grolJVpTM+3VJPyHVvWgjRFZeakq65KdAR0SUlF1OxBRGPHps0iTcwS22uv5A7+4He1JXWmz6BB0okntuz4aJnUWfQt0Kv6Y/Wpbko9nbNuZIbOp8g6W0Ha6pg5n35z7lz/Xbtunf++yyT0eYyZ00HDnsi8bXW1dO65aR27O0YVqY8671ijrw3SPyJN87Jl/ryj7jdS7Nb/5Vb3FQNopnPP9RlCCpDSLC/HHitX3b6pzGYPfE5fmn5H9MrhwxMDBipjW/IL9NfW+ppJBx6Y/d48nzbUTjvlHrCAkjas9t/a+/jBvu5tfJbibrslp6OM/x2tWOH76fKczRipOW333XZLnikm+e/rqVPze32BB8K1ufJy3yZZvrzYZ5IuyyxPtB0CXJ1Jnz7+wnr55cU+E7Qgx2qmWVYfrDg9vy+bTA2yuXNlQwYnnlbGtkhLl+r8Sb9smqacIYd0rkZhPCVZddkWda/8VJ8cc6qkHJ0BZ56ZCMZmGxVbHtuqZaMe0h/3vyrrOUjS8O7vaFSPtxLPkzrWwl/osZh0/vm+Mz9V/PcXBBOiTyo6LczW8O9p+vSs55r4f84RCEMRmUmf+1yLX15ujdLRRyeeP70hZRTlqFFJnVXZPmfh68KWDAGu1488V3s3PN20YMgQ7T/k//TRylMkSTfMuE03zvyef0/LloV2bupS/pnvCD7xRKlnz+QdV1UlHj635CLVd30v43kGu9PfDlmXNoMt4Ygjsr4+o1amrRpXt97/fggoY1vXnELzedSsPHDoE76YdoSYuaR6nGGJj2IwonXT1uQR32mf1Ayf3XN2uld/PegKScH3fvD+/rD/VZo/6OmkbZtdA8ssrQ5gmTX6zqsM57fFRbd9DhvxFx016uHmHR8ts+OO/mcBIytJgcsc7fuu5Zu0cYVvC0d+FwaDTJzk67QuXtwUSDLTj+d+S0Nr35GkpvbvpZfm/v5assR/vy9blhhcEx4U8+GK03RtfNBYuF28fLmOGPmw9h/yRGJRdVnQJog65u67+7ShOWxYfnpyuwRA+xg+XDr44Iz3rAVnlr2bIkN2mUi9euXeRrm/z2Pm9N7Rq2UmfRpuX6xcmf+5RBkc9Kek3i+h84rP4AotevnwC3XS+RF/q926pS+bNi1nXeucWhMca654n2Aeg1g6jJkzfWCvo+nf39dZO++8Yp/JNoUAV2czfHhycUAUx5FH+pQ1xx4bXeg9cPyYB/X4Ip+rOuu9a3NGbkSI19k5eezvdNqOD/gvgRUrmjaIT68OKbPGnIW143V1UoNzvauTZ5NM6L1ehw7/q39SXp4YwZqtQVxujaos26rdB7wUuT5mjTp8xF/8KOu0dSk7Xr3afy5OPtmnTgiOH2nGjPSp3XEZGsaJTq0DD/T/UoXT0CRe1PpRxmgnedZuk6Sder+W1qk1uc+r6a8L5WQvL8v8QbDQMVJnB4zv9bo2LD9d29W85ze5+mo/42DVKpkpUUw5cSP4uc8lBa2SZPowBh1Z2/d8O2MQPixrzcHBgxOzGZqllQGuvy3+gr7QzBm1QKc0apR0yCE+hVkmxx7rZ5XkESA4evSf9cCC6CLyMXMZWxD9u3zgjzFypKTMA3wSFi+O7FBqqHlXU4Lra2IPjY3aY8BLKosl77PFM6hCQa4ya4xOPxSkZGqMSu3WpYt+sOd3tdcgcrW1i8MO80GeEwpXxyXRxtt556TBK1HKrFE1FZt18tjf6dARf/V1NiLa2JlmMh8y/DF1CbIhJNq//frl11m9++7+XyA8eKZbxaamtomZbytMmyYNHKjb5tykGQNelCQ9s3ithnd/J/excmhOujEApS3rd/gxx/hZzxEm9v5ncs2tTPcoKarLc6cw7lH5qSSpZ7ze98CB6fW2mqtnT+nKK5PrjKFzC+5Bp/T5Z44N1er71TSrV/v6WUuXFna/2Rx9tB+sfc457XfMzmzKFF/7HO2GABfQFiorpR128DfDWaapD+/+jkZ0/3fu/bXyCzM+Wejre/xAC4c86fcXHoUakUu/zBpzF2aVdMfcb2rJiEcSz9cfeY7u2OtbSdt0q9ik2+d+p2lB0JmeabSzlGHka8jeg57W9/f8buJ5uKsgLTXS2LF+ZmNEoVdTyrZmUt++emzRZbp615T0B1OmSF//uu9AGDYssThRgHzMmOgClytXptdoIMDVOaR8Np84+LK0jtRf7nN9er2MyZN9+swrrtB23d7XC4demLw+GIVu5hJpPVNThppccidS96AuTMosjcT5ZOsgyxTgOuIIn+pEBZgAVV0tTZqUCLjnvcPWXP+YtQU0MfMdTUHB6Ug77+wDSi347PTv8r7eXX6GJH/dKa+M6Z1lZ+q7M2/RPg1PSZI2rjhVu/R7JSl98ropdyUG+/jXpnwv9+0rXXaZduv/onqF0rGa+X9f2/0Hqo2nOcxwLcs0myynBQsS6V4ypl3s1k06+ujoLr7Ro32nWrwOGtpWdbVvoxVw9HFiln5FRc7PRUO3DZJ8e3tMz7ekyy6T5s9v2qCxUSfu8HvNG/RM805i4ULfQRuehS1p8fBHdO++X4l8ScY2/OzZfn+hgW7x7+Ud6t5s3nkB2OaVZ0srHPSJjOz+VtqM76T7pfjs2xweOehyXTjpnrzPbeGQJ/XJylV5b59TXV1eNQjRSQQDsW6c+T39fcnF7XvssWP9gLT2THXXu7cPcpFmEyWKABfQ3k47LfEwZk5JY5yrq32B01SHHtqqQ9ZWbdaP534rfcWee0oTJkTWpYqZS/QT7bXdM/r1fl/W84delLbdwcMfT8wUUc+eGljzvh81lUdaxePG/DEpLUpYamM5PqJVks6feI8unnK3fxJ0NoSPlnOkdujcKsuC40yalLTJpD6vaVDNhogTK/cdDGvWJNLYbWrMMbK2okIaOVKXTf2plm7/kF9Gw6F0ZPtbjkpHkKJflw81uNu7yQvNfJqgIBXAyB4pge66Ot251zf1/Tnf9aO3167V92bflPQZTOpni/gMN20XnH+2FAeZZomatSgVR9ro9CuuSASqPr/Tfbpi5//Jf2eFGhEXFXwG0CrhwFHMnOqqPg4eN0qDBql39UdaOeZPmt7/ZUlSTUUoUB9cW2srN2lin9eT9hPlj/tfrR/PvbHp2ME3/6pxv4t+TSjY3+wUhRHKsg28mT5dO/d9NXrQzMqV7VcPBa2Xkuo93xpcn59wr/64/9Xp68Nf1lu26IYZ39fAmvfTt4sSf22PHtLatUkztCRpdI+3NC/D7MBbZ/+X7pj7TY3v1fTZ0jXXJNXCbdN6GxEz1wB0PlVluQdtvnDYxWmzYBJXxoYG6ZRT8krfP6XvP5PbEfK1tz89JjqIZSZVlzOoFC00YIC0dq1i5lTf9X3VlH+aeduOXL+qS/vWyQOKhQAX0N7GjUsULo9ZY3KXyzXXpNe9WL487Ya2ucx82pM0S5ZIq1ZFjkiNmUukKLxvv69qz+3+rlE93s5+oHB6vqgv+fhxgjy51+9+u67Y+aeRuwrP4Nq44lQtGPJk4vnp43+jXfu94p9MmCBde626VzQ1OPJJRfTFXe6UFNQkO/jgyM6n/l0/yL6T4P00upjKso1eC1ww+Zca3G2DdMYZ0XXA0DFlC7CMHetna+aQV5HjFIuGPa5ZA5/3f2cDB6pX9cdJn8FEh21DQ8b8zj+a+y3NGficTw2YLRgX1MKJFPVZjgrEZ7J8eVKdv6l9X9V5k36V/41AoQJchU4dASBJ+Ls3Jpf0Gd/aGPH5i7gGrJtyV3TaQzOZJV9LI5Mghvd54YWJ0bdZg1O5BPvMFSTbu+FpNR5/UsuPg44hntJakhYuTEsPnCb4++hd/VEiwJtRHgMtMif3TBefMRZll36v6ODhj+vJQ4LZkfPnp99jxAeJFbpfbtw4ae+9C7xTAB3RwG4f6MlDLsm5XWot4cQAmW7d/LUozxSFOvdcqbxc39jjNt025zu6edYtPsgWv75mm6XeHlpZWgIdTDCItHvlp9q4cnXm7erq/N9xhtr2RTV+fHqJEqATaqfqkwCixFJvYysq0u8y27OwZEjMGrUly8wkU6N08cXS/ff7BmnPnsk1OyZOlO6+WxoyRPrPf6SNG5tmgkyYIF1yibR2bcY0heHRYDUVm5NmhCSCX6tX+2PGYqqpjckdf4Lsxm/lDnCZ6ZyJ92lTY7nqKj+WdpqTHOQLHs8e+LzeWpqhHleK6rLc+cATClj8HO0gW2CkosLX28tR7yOffPEDu27Qvz6OSEOQITBl5nxw69RT09NlVFdLn36qxfHAdsoMxYTDDpM++CD7LK3Ua9KyZfkXc12wIPO2+d4Ajh0r/eY3LSvqHP6/I8AFtKlwACh1QExSTc+gtl+8dlXYvEHPaFr/f6Qtj9sa6iDb6swHI94J1QwKX6+6d/epEM9RM0IGmbW4jhdKz047SdddJ1VVac8br9bPX90pZ3rC/l1SBkXNmdP0+KSTpPfekzYnzzzIKcv33jvLzswdUAtbtCh9WfCetuaRkjwvkydLjz3m69kC2DY0Nmp8r3/l3ixlsF9a7a5jjpG+/OWkRY8uukyL7jtRr24M1ZgfPlwaO1YnbXkw+fXxdv7ChU3pai8O0srlU8ewUEaMkN56y7dP0Pl07x69vLxcuv76jpkxJBbzA06BTo4AF1BEZn40yOOLLm3qoG7r6c2hgulh29VsULfyTXru/QGSpLLU9ImSD1oFqsu2+M6pcMHtDaGRpA0NvhBrba30+uvSnXdKhx/etH6AP07UyNjG407w99zr1kl//7t0++1JTeDK2BZpjz18x3dc6PeWqJMRUXNLUuKG/qLJ8Rzec6K3k08vJ0k6OyLQFYw0m97/JZ/O0GjIdkr5BEYuvVS6KD2FZ9z4Xv/SS4ddEL0y+Ntdf9QaHf/gUfr232f4ZVdcIW3alHF2lUnS5z8fPeJx1SrpS1/Kfd6pteGynF/iRnTgwPSOvj339EGotJMsQLfyuHH+fbYkrWc48EeAC2hT8dpZW4870QeD3ODEukQ9oHHjpCOP9I/r6nyb5K67JEn/PGJNdGpgKTRjuumaEpNLvz727Zv13FokOHaXPAYqoBMJvltn1L+oxw6+XFL2bAp9qoP6cFdckT6CO95+vidL7ZigU2zmgBea/s5Dac1T9a7+qOnJ6NFZzy2j4Htxc3hA2zHHtGxfkq8tumFDxxzBDqBt5FlXOjWQnvg2j7fV6+ulc87Rlb//iR7412jd+/o4Te7zmvp3+VCvbuwjd3xoMGFUf0m/fv5nbW16QL89A1yHHCL16tXqDDzoQEaPlp57Tho0SDr22MzbRQzcAtB+6O0BiihmjdK6dZr4uR6+A1dKb7AVooM4PFMiSI+Y6vUj12jpqIeSzi0trZpzunXWTerf5X2N6/Wv9HNLLepdV+cblEOH+iKZEXWCUtMVSKHd1tdHNmArYlulpUuTF4beY8ycH6UyfXraa5MPkOF5quHDE0Xmk8yfL02YoD8dcJV+NPfbHTv3MpqvVy//c/Dg7NtJ/qYqniYwQ8284d3fiVwe/rtJPIqnOAh/ZlauTHqZmcv8t7v99oUbQZ3PNWjJkvzzey9e7APg+d74mfk0Yy2ZzRqL+c/pvHmFuZYCSBL+VMVnOMXM+TpG4RSF8fZEaidTqE3S0G1D5o9psCK+fvseb2pG/Yu+o6G+viml3JQp/hpzQfKAgr7x4ENLOKfNx56kbhWb8tue9ESdU6Y2nnN6d/kZ2m/wU/55tg6u+GzqqNTGs2ZJgwbpWzNv03OHrvUzw6Pqa555ZvLzCy/0nW4tEcwQO3jYYzpm9B99bdBddmnZviT/nUtwC9i2zJ3rf2aqNRlcO9NmcJnz167Fi5OWnzvxXk3p82ri+c/mf0MvH3Z+8j7rUjJeTJ2aPJA2VXvOqqmp8YN34veRKH2nny5ddZUfzEoddaDDYgYXUERl5vyXZJYRmgXplM0z8BJOIVRmTqN7vtm08vzzpV/8Qku3f1j7D/0/f+52cfIOqqqkNWuaNVOiIpajdlVw7uEUhZGvCb1Hk/OdXJl+d7l+p/mef02NnymTIz1ds/aJjuPSS306odTAbSZDh0o33ND8/2uz/D6ju+4q/eUviVScY3q8Kdl2zTtWSwSfl9p4nbvWXpPmzm26GW4PUWmZABTcPz4MZk8NHepT8yxY4K+Jkk8H3BrBdTU+Q/u5Q9f65fX1frZ3nFny9eWzz/TqEWvUr/pDSXu1+PAVzanhledodpSYLIHLRKpAs+z1Luvr/ezqqHZF166+8yxXm3L0aH/fcN11/nmmmq4HH+yzJ8RfE6W2Vjr7bO14zTX6zuf+W9LU7McGgFQHHeTvuzMNCAyunakzuGLmfIA+4r4i3CcxIKom9oEHSh9+KD3+uH++fHl6uvawjpg2DqUjFpN69Cj2WQDIgQAXUESRKXOc028XfEmfbKnU25/USlaAm81co4kTI6uaGp5l1qgR3d/x6QB69vS1tILtelRm6egeNqxZpzax9+u6a/7XtPDeU7JuZ6HaF5H960mzYHJ0wOf6fbR0enm2/RLgKj3l5c1PaZHr/3mffdKXnX56Ws75XEGkD44+zdepy3Z92JojeJyv4IbxNwu+rE9GTpCGHBW9HbMWgG3LqlXSV29MPB3f63X/YOed/c+JE6Vrr5XOPFNnTPi1Fg55UlL/3PutqkofnR2Y2vdVLR315/zPccAADe62oXXpiZo7O5sAV+eUz3fc9dfnbgdkC4AV0rx5fkbF00/7Wd2ZhDMUMNMZQHPFYn5gSybB/Uh4BlfPyo80vd/LklLKCcTrAkaUMEhSUyOdeKL0xhs+nXum4NaKFT7QnzJLDADQ+dDjChRRLLW4qiQ5p9kDn9e+g/+mo0f/uTA3m/F0IxMmRK+PSB0QM9d0kx4fadoGKfhi5rRgyFPpK+I33MGIq3jwbfn2f4reUejcGp1l/72lpm5LHdVVXZ1cqyzf/4NsAYV3MqSnw7Zj3jzpgAPSl48Z42dISprc55+qjOWu81JbuUmVZTkCWP3z6EjOx/Tp0tixqj9uoYavXdrymZEAOpcJE2QjR0iS3jzqbD15yKXS8cdLc0J1LWtqpLVrVRFr1A51b0bvJ16TK+7aa6NHW0+dqrqqj3Xr7JvzP8faWl8PNJ+ahJnk2/aJ1yRtTQ0jdFyZ0vCG/z4KWX+jEN+pZWW+7Z+hlicAtLmIwa9vLztbXzrvnYzXudTZXgmheuCS/KzYbMG1adN8armWpnEFAJQMAlxAEeWaiSSpMDN/5s/3NbCOOy56fUTqgDJrlM4913e+x1/XFueWom/1B75GwamnBgt8yqMu5ZslSTfPuiX6hcG57NL3H9q34W/ZDzJmjHTJJU3PozrSMtQqy+qzLIGJsWObvz90LvX1OTusThr7oDYdm302Y9723NMH1Naubd1+Kiqk1aulmTOzb3fKKf7zetZZrTsegJITr7+lsWPT2wbhOkJR7YaZM307JS7TbKtMbZhc6urap4N/+nTpW9+i7kZnc+KJvmZW1ACVYqGQPYBS0bOndNVVSZliYnLRNbOC+6SFQ57Unts9K02e3LRu/nw/I6u5GIAHANsEUhQCxRB08ESmKExViEZZLJY9PUmmGVxDh0pnnNH642cydqz0zDOJpxdPvlvnTfylNGqRT1EU3+aww3T9p7fr4sm/yLyv4Pf08EFXBs8zFLqN69ev6XGuQF2+/wfZUiANHOjzjNPxte3Klrs79W8w37+5bDnlKyqkfffNbz+FMGqUdNllwZPX2++4AIoukUY407Xr4IOlu+4qbJCgvWr5kX512zZpkv+XSRtkN8hp1Chfl7OZacFzKsZ7AdD59eihJcMf0YefVemXr41vGhSTKmhDzBr4vGYNfF5SKMC1++7MRgUAZESACyiiHpWf5N6oPUYdde8uKTnAVZahPlhBnXaaHxkbGNXjbVWXp9SuMJNmz1av229Xr+osReqnT5cefDD/Y4cDCrlqc+QKgC1YIL30UnIdgyiZCoGjczv1VP/3kW0WX2oHaj6f++uu6/ijEjv6+QEoiMQnPdNnft48aa+9Mq9vbhCpV6/2q6kxcqT0yivJA2OAuLb6nss2gMVMWrmyfY8JAK1w8PDHdfDwx7NvlO0alG2gIABgm0eACyiGXXfVi4ddoGGzI0ZepgaR2qODuKZGWrNGjQc+n1gUObKq0AGu0Hv7aOUp6lL2WdryvC1Z4msO/fjH+b+mrk765JPMdRXq6qQNG3xqmmzC9bqAVDvu6P9l09Dg/732Wv77jc9yBIAiiX9dJ9oM2QaEZPtu37Il87oobZAiOaP99/fpV1NrfwBtqRjBJgJcANrDfvtFL09Nvzptmv8O3ryZ2VsAgKyowQUUQ58+GnHzxYodF1GIvFjpQYYNU2PTGGz1rtqYvk0bBt+6ln/WtLtcI7mXLk1fVlEhjR/fvHO7/HJfeD7TtmvWSMuW+ZHnQFuKxaQLLsi9HemDAHRApuDalGtGdCY77+x/jhlTmBMqpKoqadYsX0cESFXo7+UxY3y7dMSIwu43m3i6z/ZK+wlg27Xnnj5oFSUc4Lr0UmmnnXwN4yFD2ufcAAAlixlcQLFkmn2ReqPcjrUf4sVf3fEnRG/QrVv7nMjIkZnXLVok7bFH9LrwiO58RqHm2qZnT5/vG2gP4UBrXV30Nlu3ts+5FAopCoFtgpmTTjih5Z/5ESOkK6/MPwUR1xZ0FPPmSf/7v9I++xRmf6ef7r/rWxosbonFi6WDDmrfYwLYNmXLQBG+N+/bt+3PBQDQadCKBTqarl2Tnw8Y0G6H3upydBgtWSJt2iQ9+aR/XogUQd26SRuD2WLnn+/3ma1WVbab73AwkM4vlKLPf1569FE/ujHKkCHS3/8u1da273k1kxOfP2BbYI0+6B6TkyZPzrF1DpkC+2E77CA9+2zrjwUUyoAB0g03FC5tpllxAk0EtwAUm5l00kl+wC/38gCAZqAlC3Q0M2ZIL73kO3FGjWrXlDjbdX0v+wbdu0urVvlR2lJhbobPOEO6+24/MyufAu7ZGruZamkBpWLkyOwzGBcs8NeEDl4LhkyKwDYimFXabv1QJ57oA1zhlMRAsbVnTTgA6Mw6+D0OAKBjIsAFdDQVFdJxxxXl0GdNuF8njf29fzJ0aO4XFKLY66BBvsMqX9k6Ebp39/vq3r315wV0RJWV0pw5xT6LnLY6OvuAbcIRR0gXS3b08vY5XnW1NGlS+xwLAAAUFjOzAABtgAAXgISymFNt5SZfA+OMMzJveO21Ph1gRxyxSscXUHQEuIBtQ8XwBs2YIVXtNqXYpwIAADq6jth/AAAoeQS4AKTbYYfss7NqatrvXFIx6gvo8HLW8wPQKZhJDz5Y7LMAAAAlYcSIYp8BAKATIsAFoLSUlRX7DADksKWRzykAAAAASVdcIa1f7wfSAgBQYMwPBpCuI6YOiDeGd9yxuOcBIKfN8QAXMy4BAACAbVvv3tKECcU+CwBAJ8UMLgClYfVqafNmqaqq2GcCIIfNW2leAAAAAAAAoG11wGkaAIquI866MCO4BZSIzY0EuAAAAAAAANC2CHABaDJokP85cWJRTwNAafuMGlwAAAAAAABoYwyxBtDknHOkt96SGhqKfSYASthmAlwAAAAAAABoYwS4ADSpqpIGDy72WQAocdTgAgAAAAAAQFujBwoAABTUkhGPqEflJ8U+DQAAAAAAAHRi1OACAAAFddqOD+gX+3yt2KcBAAAAAACATixngMvMGszsATN7xsyeNrPVwfJeZna/mb0Q/KwLlpuZXWdmL5rZk2Y2ObSv5cH2L5jZ8rZ7WwAAAAAAAAAAAOis8pnBtUXSWc65sZKmSVplZmMlrZH0G+fcKEm/CZ5L0j6SRgX/jpd0g+QDYpLWStpV0i6S1saDYgAAoBNqbCz2GQAAAAAAAKCTyhngcs694Zx7LHj8oaRnJW0n6QBJtwSb3SLpwODxAZJudd5DknqaWb2k+ZLud86965zbIOl+SXsX8s0AAIAOZOvWYp8BAAAAAAAAOqlm1eAys6GSJkl6WFJ/59wbwao3JfUPHm8n6bXQy14PlmVannqM483sETN75N///ndzTg8AAHQkzOACAAAAAABAG8k7wGVm3STdKel059wH4XXOOSfJFeKEnHM3OuemOuem9u3btxC7BAAAxcAMLgAAAAAAALSRvAJcZlYhH9y6zTn3k2DxW0HqQQU/3w6Wr5fUEHr5oGBZpuUAAKAzIsAFAAAAAACANpIzwGVmJum7kp51zl0bWvVzScuDx8sl/Sy0fJl50yS9H6QyvFfSPDOrM7M6SfOCZQAAoDPZf3+ppkbaa69inwkAAAAAAAA6qfI8ttld0lJJT5nZE8Gy8yVdKelHZnaMpFclLQnW3SNpX0kvSvpY0gpJcs69a2aXSvprsN0XnHPvFuJNAACADmS//aR995XMin0mAAAAAAAA6KTMl8/qmKZOneoeeeSRYp8GAAAAAAAAAAAA2pmZPeqcmxq1Lq8aXAAAAAAAAAAAAEBHQYALAAAAAAAAAAAAJYUAFwAAAAAAAAAAAEoKAS4AAAAAAAAAAACUFAJcAAAAAAAAAAAAKCkEuAAAAAAAAAAAAFBSCHABAAAAAAAAAACgpBDgAgAAAAAAAAAAQEkhwAUAAAAAAAAAAICSQoALAAAAAAAAAAAAJYUAFwAAAAAAAAAAAEoKAS4AAAAAAAAAAACUFAJcAAAAAAAAAAAAKCkEuAAAAAAAAAAAAFBSCHABAAAAAAAAAACgpBDgAgAAAAAAAAAAQEkhwAUAAAAAAAAAAICSYs65Yp9DRmb2b0mvFvs8SkwfSe8U+yQAbLO4BgEoJq5BAIqN6xCAYuIaBKCYuAahrQxxzvWNWtGhA1xoPjN7xDk3tdjnAWDbxDUIQDFxDQJQbFyHABQT1yAAxcQ1CMVAikIAAAAAAAAAAACUFAJcAAAAAAAAAAAAKCkEuDqfG4t9AgC2aVyDABQT1yAAxcZ1CEAxcQ0CUExcg9DuqMEFAAAAAAAAAACAksIMLgAAAAAAAAAAAJQUAlydhJntbWbPmdmLZram2OcDoPMws1fM7Ckze8LMHgmW9TKz+83sheBnXbDczOy64Fr0pJlNDu1nebD9C2a2vFjvB0DHZ2Y3mdnbZva30LKCXXfMbEpwXXsxeK217zsE0JFluAatM7P1QXvoCTPbN7TuvOB68pyZzQ8tj7xHM7NhZvZwsPyHZlbZfu8OQEdnZg1m9oCZPWNmT5vZ6mA5bSEAbS7LNYi2EDokAlydgJmVSfq6pH0kjZV0uJmNLe5ZAehkZjvnJjrnpgbP10j6jXNulKTfBM8lfx0aFfw7XtINkr8Zk7RW0q6SdpG0Nn5DBgARbpa0d8qyQl53bpB0XOh1qccCsG27WdHXhS8H7aGJzrl7JCm47zpM0rjgNd8ws7Ic92hfDPY1UtIGSce06bsBUGq2SDrLOTdW0jRJq4LrB20hAO0h0zVIoi2EDogAV+ewi6QXnXMvO+c2S7pd0gFFPicAndsBkm4JHt8i6cDQ8lud95CknmZWL2m+pPudc+865zZIul/cRAHIwDn3oKR3UxYX5LoTrOvunHvI+WK0t4b2BQCZrkGZHCDpdufcJufcPyS9KH9/FnmPFsySmCPpjuD14esZAMg594Zz7rHg8YeSnpW0nWgLAWgHWa5BmdAWQlER4OoctpP0Wuj568p+4QGA5nCS7jOzR83s+GBZf+fcG8HjNyX1Dx5nuh5xnQLQWoW67mwXPE5dDgC5nBKk/7opNAuiudeg3pLec85tSVkOAGnMbKikSZIeFm0hAO0s5Rok0RZCB0SACwCQyx7Oucny08pXmdnM8Mpg1J8rypkB2CZx3QFQBDdIGiFpoqQ3JH2pqGcDoNMzs26S7pR0unPug/A62kIA2lrENYi2EDokAlydw3pJDaHng4JlANBqzrn1wc+3Jf2P/DTzt4LUFgp+vh1snul6xHUKQGsV6rqzPnicuhwAMnLOveWc2+qca5T0bfn2kNT8a9B/5NOHlacsB4AEM6uQ71i+zTn3k2AxbSEA7SLqGkRbCB0VAa7O4a+SRpnZMDOrlC/s9/MinxOATsDMasysNv5Y0jxJf5O/xiwPNlsu6WfB459LWmbeNEnvB2k07pU0z8zqgmns84JlAJCvglx3gnUfmNm0IP/7stC+ACBSvFM5cJB8e0jy16DDzKzKzIZJGiXpL8pwjxbMunhA0iHB68PXMwBQ0D75rqRnnXPXhlbRFgLQ5jJdg2gLoaMqz70JOjrn3BYzO0W+8VIm6Sbn3NNFPi0AnUN/Sf/j2zcql/R959yvzOyvkn5kZsdIelXSkmD7eyTtK19U9GNJKyTJOfeumV0q38CRpC845/It3g5gG2NmP5A0S1IfM3td0lpJV6pw152TJd0sqYukXwb/AEBSxmvQLDObKJ8S7BVJJ0iSc+5pM/uRpGckbZG0yjm3NdhPpnu0cyXdbmaXSXpcvhMJAOJ2l7RU0lNm9kSw7HzRFgLQPjJdgw6nLYSOyHzQFAAAAAAAAAAAACgNpCgEAAAAAAAAAABASSHABQAAAAAAAAAAgJJCgAsAAAAAAAAAAAAlhQAXAAAAAAAAAAAASgoBLgAAAAAAAAAAAJQUAlwAAAAA0Axm1tvMngj+vWlm64PHG83sG2143Flmtltb7R8AAAAASkl5sU8AAAAAAEqJc+4/kiZKkpmtk7TROXdNOxx6lqSNkv7UDscCAAAAgA6NGVwAAAAAUADBDKu7g8frzOwWM/uDmb1qZovM7Coze8rMfmVmFcF2U8zs92b2qJnda2b1wfLTzOwZM3vSzG43s6GSTpR0RjBbbIaZLTSzh83scTP7tZn1b+axXwkt/4uZjSzKLw4AAAAAWoAAFwAAAAC0jRGS5kjaX9L3JD3gnBsv6RNJ+wWBpuslHeKcmyLpJkmXB69dI2mSc26CpBOdc69I+qakLzvnJjrn/iDpj5KmOecmSbpd0jn5Hju03fvB8q9J+kqB3z8AAAAAtBlSFAIAAABA2/ilc+4zM3tKUpmkXwXLn5I0VNJoSTtKut/MFGzzRrDNk5JuM7OfSvpphv0PkvTDYNZXpaR/NOPYcT8I/fxys98hAAAAABQJM7gAAAAAoG1skiTnXKOkz5xzLljeKD/Y0CQ9HczImuicG++cmxdss5+kr0uaLOmvZhY1OPF6SV8LZmCdIKm6GceOcxkeAwAAAECHRoALAAAAAIrjOUl9zWy6JJlZhZmNM7OYpAbn3AOSzpXUQ1I3SR9Kqg29voek9cHj5S08h0NDP//cwn0AAAAAQLsjRSEAAAAAFIFzbrOZHSLpOjPrIX9/9hVJz0v6XrDMJF3nnHvPzO6SdIeZHSDpVEnrJP3YzDZI+q2kYS04jToze1J+xtfhrX1PAAAAANBerClTBQAAAABgW2Fmr0ia6px7p9jnAgAAAADNRYpCAAAAAAAAAAAAlBRmcAEAAAAAAAAAAKCkMIMLAAAAAAAAAAAAJYUAFwAAAAAAAAAAAEoKAS4AAAAAAAAAAACUFAJcAAAAAAAAAAAAKCkEuAAAAAAAAAAAAFBSCHABAAAAAAAAAACgpPx/Taa0BNrWmiUAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:05:02+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/tr/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/tr/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..e8a425cb7 --- /dev/null +++ b/translations/tr/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,699 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bu not defterinde, aşağıdaki konuları nasıl yapacağımızı gösteriyoruz:\n", + "\n", + "- 2D zaman serisi verilerini bir SVM regresör modeli için eğitime hazırlamak \n", + "- RBF çekirdeği kullanarak SVR'yi uygulamak \n", + "- Modeli grafikler ve MAPE ile değerlendirmek \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modülleri içe aktarma\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Verileri Hazırlama\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Verileri yükle\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Artık, verilerinizi filtreleme ve ölçeklendirme işlemleri yaparak eğitim için hazırlamanız gerekiyor.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verileri (0, 1) aralığında ölçeklendirin.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Zaman adımlarıyla veri oluşturma\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "SVR için, girdi verilerini `[batch, timesteps]` biçiminde dönüştürüyoruz. Bu nedenle, mevcut `train_data` ve `test_data` verilerini, zaman adımlarını ifade eden yeni bir boyut olacak şekilde yeniden şekillendiriyoruz. Örneğimizde, `timesteps = 5` olarak alıyoruz. Yani, modelin girdileri ilk 4 zaman adımına ait veriler olacak ve çıktı 5. zaman adımına ait veri olacaktır.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Model tahmini yap\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Tam veri seti tahmini\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çeviriler hata veya yanlışlıklar içerebilir. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan herhangi bir yanlış anlama veya yanlış yorumlama durumunda sorumluluk kabul edilmez.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:07:32+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/tr/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/tr/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..135b01619 --- /dev/null +++ b/translations/tr/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:06:30+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter ve Kurt: Pekiştirmeli Öğrenme Giriş\n", + "\n", + "Bu eğitimde, bir yol bulma problemine Pekiştirmeli Öğrenme uygulamayı öğreneceğiz. Ayar, Rus besteci [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev) tarafından yazılan [Peter ve Kurt](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) adlı müzikal masaldan esinlenmiştir. Bu, genç öncü Peter'in cesurca evinden çıkıp kurtu kovalamak için orman açıklığına gittiği bir hikayedir. Peter'in çevresini keşfetmesine ve en uygun navigasyon haritasını oluşturmasına yardımcı olacak makine öğrenme algoritmalarını eğiteceğiz.\n", + "\n", + "Öncelikle, bir dizi faydalı kütüphaneyi içe aktaralım:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Pekiştirmeli Öğrenmeye Genel Bakış\n", + "\n", + "**Pekiştirmeli Öğrenme** (RL), bir **ajanın** belirli bir **ortamda** en iyi davranışını öğrenmesini sağlayan ve birçok deney yaparak gerçekleştirilen bir öğrenme tekniğidir. Bu ortamda bir ajanın, bir **ödül fonksiyonu** ile tanımlanan bir **hedefi** olmalıdır.\n", + "\n", + "## Ortam\n", + "\n", + "Basitlik açısından, Peter'ın dünyasını `genişlik` x `yükseklik` boyutlarında bir kare tahta olarak düşünelim. Bu tahtadaki her bir hücre şu şekilde olabilir:\n", + "* Peter ve diğer canlıların yürüyebileceği **zemin**\n", + "* Üzerinde yürüyemeyeceğiniz **su**\n", + "* Dinlenebileceğiniz bir yer olan **ağaç** veya **çimen**\n", + "* Peter'ın kendini beslemek için bulmaktan memnun olacağı bir şey olan **elma**\n", + "* Tehlikeli olan ve kaçınılması gereken bir **kurt**\n", + "\n", + "Ortamla çalışmak için `Board` adında bir sınıf tanımlayacağız. Bu defteri çok fazla karmaşık hale getirmemek adına, tahtayla çalışmak için gereken tüm kodu ayrı bir `rlboard` modülüne taşıdık ve şimdi bu modülü içe aktaracağız. Uygulamanın iç detaylarını öğrenmek için bu modülün içine bakabilirsiniz.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Şimdi rastgele bir tahta oluşturalım ve nasıl göründüğüne bakalım:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Eylemler ve Politika\n", + "\n", + "Örneğimizde, Peter'ın amacı bir elma bulmak, aynı zamanda kurt ve diğer engellerden kaçınmaktır. Bu eylemleri bir sözlük olarak tanımlayın ve bunları ilgili koordinat değişiklikleriyle eşleştirin.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Ajanımızın (Peter) stratejisi, **politika** olarak adlandırılan bir şeyle tanımlanır. Şimdi, **rastgele yürüyüş** adı verilen en basit politikayı ele alalım.\n", + "\n", + "## Rastgele yürüyüş\n", + "\n", + "Öncelikle, rastgele yürüyüş stratejisini uygulayarak problemimizi çözelim.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Ödül Fonksiyonu\n", + "\n", + "Politikamızı daha akıllı hale getirmek için, hangi hamlelerin diğerlerinden \"daha iyi\" olduğunu anlamamız gerekiyor.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Öğrenme\n", + "\n", + "Bir Q-Tablosu veya çok boyutlu bir dizi oluşturun. Tahtamızın boyutları `genişlik` x `yükseklik` olduğundan, Q-Tablosunu `genişlik` x `yükseklik` x `len(actions)` şeklinde bir numpy dizisi ile temsil edebiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Q-Tablosunu tahtada görselleştirmek için `plot` fonksiyonuna aktarın:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Q-Öğrenmenin Özeti: Bellman Denklemi ve Öğrenme Algoritması\n", + "\n", + "Öğrenme algoritmamız için bir sözde kod yazın:\n", + "\n", + "* Tüm durumlar ve eylemler için Q-Tablosu Q'yu eşit sayılarla başlatın\n", + "* Öğrenme oranını $\\alpha\\leftarrow 1$ olarak ayarlayın\n", + "* Simülasyonu birçok kez tekrarlayın\n", + " 1. Rastgele bir pozisyonda başlayın\n", + " 1. Tekrarla\n", + " 1. Durum $s$'de bir eylem $a$ seçin\n", + " 2. Yeni bir duruma $s'$ geçerek eylemi gerçekleştirin\n", + " 3. Eğer oyun sonu koşuluyla karşılaşırsak veya toplam ödül çok küçükse - simülasyondan çıkın \n", + " 4. Yeni durumda ödül $r$'yi hesaplayın\n", + " 5. Bellman denklemine göre Q-Fonksiyonunu güncelleyin: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$ yapın\n", + " 7. Toplam ödülü güncelleyin ve $\\alpha$'yı azaltın.\n", + "\n", + "## Keşfet vs. Sömür\n", + "\n", + "En iyi yaklaşım, keşfetme ve sömürme arasında bir denge kurmaktır. Çevremiz hakkında daha fazla bilgi edindikçe, optimal yolu izleme olasılığımız artar, ancak ara sıra keşfedilmemiş bir yolu seçmek faydalı olabilir.\n", + "\n", + "## Python Uygulaması\n", + "\n", + "Artık öğrenme algoritmasını uygulamaya hazırız. Bundan önce, Q-Tablosundaki rastgele sayıları ilgili eylemler için bir olasılık vektörüne dönüştürecek bir fonksiyona da ihtiyacımız var:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Orijinal vektöre, tüm bileşenlerin aynı olduğu başlangıç durumunda sıfıra bölünmeyi önlemek için küçük bir miktar `eps` ekliyoruz.\n", + "\n", + "Gerçek öğrenme algoritmasını, **epoklar** olarak da adlandırılan 5000 deney için çalıştıracağız:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Bu algoritmayı çalıştırdıktan sonra, Q-Tablosu her adımda farklı eylemlerin çekiciliğini tanımlayan değerlerle güncellenmelidir. Tabloyu burada görselleştirin:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAFpCAYAAAC8p8I3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdd3xUxd7H8c9sS7KbShJCL1IEBAURFEUBxXoVVFBARS7YQETgiooFsaGAXgUsIGIDHzuCXiuKICDKpYoiHUJJAmkk2Wzfc+b5YzcRrgSQZHM2ZN6+8kr27ObMlyX8nMyZmSOklCiKoig1h8noAIqiKMrfowq3oihKDaMKt6IoSg2jCreiKEoNowq3oihKDaMKt6IoSg0TscIthLhCCLFVCLFDCDE+Uu0oiqLUNiIS87iFEGZgG3ApsB9YDQySUv5R5Y0piqLUMpHqcXcFdkgpd0kp/cAHQN8ItaUoilKrRKpwNwT2HfZ4f/iYoiiKUkmWCJ1XHOXYEWMyQog7gTsBrFZr5zPPPDNCUSrP5/PhdDpJS0szOkqFioqKsFqtOBwOo6NUKCcnh7p162I2m42OUqG9e/fSpEkTo2NUKBgMkpeXR/369Y2OUqHS0lKCwSDJyclGR6lQXl4eiYmJxMTEGB2lQps2bcLj8RytloKUsso/gG7At4c9fgh4qKLX161bV0az7du3y9mzZxsd45gWLFggV65caXSMY3rqqadkYWGh0TEqpOu6vOeee4yOcUwFBQVy0qRJRsc4phUrVsiFCxcaHeOYZs2aJbdv3250jGMK18Wj1sxIDZWsBloJIZoLIWzAQODzCLWlKIpSq0SkcEspg8A9wLfAZuAjKeWmSLSlKMqpZf369Xi9XqNjRLVIjXEjpfwK+CpS51cU5dSSlZXFq6++SkxMDPPnz6dVq1YMGTLE6FhRSa2cVBTFcFJKcnJy2LZtG8OHD6d79+589NFHZdfIlP+hCreiKIYLBAJMnjyZF198kX/961+cccYZDBgwgLfeesvoaFEpYkMliqIoJ8pmszF+/Hhuu+02OnTowMcff8z333/PV1+p0dajqXE97l27dvHRRx8ZHUNRlCrWtGlT+vfvj9lsZsqUKdx7771GR4paNarH3adPH2JjYzn33HPp0KEDS5YsiepFMYqinLj09HTuuOMO9u3bx6JFi+jVq5fRkaJWjelx79q1i9jYWGbNmkW/fv24/PLL+f33342OpShKFWvcuDHffPMN/fv3NzpKldu0aRO6rlf6PDWmcK9du5auXbtSUlLCW2+9Rb169Vi+fLm66qwop6D4+HjOPPNMVq1aZXSUKrNkyRLee+89NE2r9LlqzFDJDTfcQIcOHThw4AD169dn3Lhx5OTkIMTRl/IrilJzORwOLrjgAn744QfOPfdco+NU2s8//8zPP//MuHHjsFqtlT5fjelxQ+j/WFdffTUul4sDBw4wfPhwioqKjI6lKEoExMTEIKXE7/cbHeWkSSnZsmUL8+bNY/To0aSkpFTJeWtU4U5LS6Nnz55MmDCBjIwM3n77bSZMmMCOHTuMjqYoShW75JJLcDqd/PLLL0ZHOWmrVq1i2rRpvPLKK1W6c2eNKtxlyoZHkpOTGT16NPPmzSMzM9PYUIqiKIdZsmQJ33//PS+88EKVD+nWyMJ9uJYtWzJ06FAee+wx3G53xNurigsLkSSlrJKr1pFUEzLquq4ufEeB4cOH8/rrr1fLv+2qIqVkw4YN/PDDD4wcORK73V7lbdT4wg3QrFkzZs2axa233kpOTk7E2snKyqJfv35s3ryZgoKCiLVTGevXr+eOO+5gy5YtUfnDHgwGee+993jhhRfYsmVLVBZwp9PJxIkT+fLLL9m1a5fRcaJWdnZ2xMefmzZtihCC3bt3R7SdqrRlyxamT5/OhAkTqmxM+3+dEoUbwG6389JLL/HSSy+xffv2iLTx+eef89JLLzFmzBhmz54dkTYq6/PPP2f06NEMGDCAdevWGR3nL9xuN4WFhTRt2pSLL744Krfv3LFjB927d2fDhg1cd911RseJOkVFRcyZM4eXXnqJ2bNn88UXX0S0vbfffptbbrklom1UlWXLljFnzhzeeOMNbDZbxNqpMdMBT0T9+vUZOnQob7zxBmPHjiUjI6NKzz9ixAjGjBmDpml88sknbNmy5aTPFRcXx8yZM6t87Ou+++5j1KhR6LrOpEmTqFu37kmfq127djz44INVmA4SExPp3r0706ZNQ9M07rzzzkrdyuy6667j2muvrcKE0KlTJ95//322bdtGVlZWpbcWHT9+PG3btq2idMY7cOAA8+bN44033uC///0vzz33HFdffbXRsQy3fPlyVq5cyWOPPYbJFNk+8SlVuAFatWrF2LFjGTFiBPPmzavSK7kej4dRo0bx9ddfk5mZyV133XXS5/J6vXTu3LlKxlHbt29Pv379gND42vjx45kyZQqXX345nTt3Punz/v7773Tq1KnS+QAGDx5cnq9x48Y88MADbNy4kQcffJDY2NiTPu/HH3/ME088USUZp02bBoSGc3r06EGbNm1Yt24djz76aKXO+8QTT7BpU9XcR+STTz6p8LnS0lIuuuiiiI/Ne71ecnNzueGGG1iwYAFms5mXX36Ze+65JyLtCSGYPn06DzzwAFOnTo1IG5UhpWTz5s0sWLCAxx9/nMTExIi3ecoVboCMjAzmzZvHPffcw2OPPUbz5s2r5LzPPvss7du35+OPP+aOO+6gVatWJ30uKSVr1qypklwQGiIBGDlyJD169GDXrl20a9euUhlbtmxJ3759qySfEIJJkyZRVFTE1KlTSU1NJTk5mVatWlWqcD/00EOMHz++yjJ+8skn/PLLLyxfvpxNmzbRuXPnSr2HAHPnzq2SfMAx1y04HI4q/ZmqyJYtW7j//vuZM2cO27ZtY+bMmSxZsiRi7QkhqFevHgcPHoxYG5WxYcMGXnrpJebMmRPxnnaZU7JwQ+iH+LHHHuOdd95h8ODBtGjRotLnfPLJJ3nppZdYunRppYc4hBARWfU5a9Ys5s+fz48//ljpc0UiY0pKCqNHjyY/P59JkyZV+nxVmbGsp9q9e3ecTie33HILjRs3rvR5q+sfc6R+pv5Xeno6HTt2ZPbs2QQCAW6++eaIt1unTh2aNWvGhg0b6NixY0Tb+jt++OEHli5dyqxZs6rt7xlO4cIN0Lx5cwYPHsxzzz3HlClTSEpKqvQ5R40aVQXJIsfhcHDrrbcaHeOYasJ475VXXml0hKiVnp7OpEmT2LhxIy1atKjS4ciKpKWlcdppp7FmzRrOOussw7e6kFKyevVqVq5cydixYyN6IfJoTplZJRVp0aIFU6dOZciQIVE7hU9RaqIzzzyzWop2mQEDBrBmzZqoWCm9detW5syZw3333RexKX/HcsoXbgjNZHjzzTd5+umnIzZVUFGUyIqNjSUYDBq+CO7nn39mxowZvPbaa8TFxRmSoVYUbgiNkY0cOZJ58+axd+9eo+MoinISBg4cyNy5c6t94dbzzz+PpmksWbKEJUuW8Nxzzxk6XFNrCjeEZknccccdjB8/PipXFSqKcmy9e/fmm2++qbbtCL7++mvOPfdcTjvtNLp168a3337LiBEjqnWI6GhqVeGG0N013njjDYYOHcr+/fuNjqMoyt/UsmXLahnn9vl8ZGZmMnbsWFq2bEkwGERKWa2zRypifAIDxMXF8cILL7BixQqjoyiK8je9/fbb3H777RFvx+l0sm/fPjp06MDXX3/Nl19+icPhiIp9U2pl4QZo2LAhAwcONDqGoih/k81mY9CgQbz33nsRbSctLY0zzjiDf/7zn9xwww0MGzaMoqKiqJhHfkrP41YU5dRjsVjo2LEjn332GTfddFNE2+rbty8XXHAB9913Hy+//LIhU/+ORhVuRVFqnKSkJKxWK3l5eaSnp0esnfj4eOLj48v3iDF64U+ZWjtUoihKzXXGGWeQkJBQbbc1q67tBE6U6nErilIjDRgwwPBpeUapVOEWQmQCTkADglLKc4QQdYAPgWZAJnCjlPJQ5WIqiqIcqVmzZkZHMExVDJX0klJ2lFKeE348HlgspWwFLA4/VhRFUapIJMa4+wLvhL9+B6ja25MoiqLUcpUt3BJYJIRYK4S4M3wsQ0qZAxD+fPL3zlIURVH+orIXJy+QUmYLIeoC3wkhTvgmjOFCfyeEptxE8659+/fvp6ioKKoz5ufno+t6VGd0uVzs3r2b/Px8o6NUyO/3R/V7WFJSgsvliuqMBw4ciPp/L0VFRezbt6/a9jw5GcfaSKtShVtKmR3+nCuEWAB0BQ4KIepLKXOEEPWB3Aq+dzYwGyA1NVUuXbq0MlEiqqioiP379xPNGXfu3Indbo/qPcfz8/NZuXIlMTExRkepUGlpaVT/PXu9Xn7O+5nPln5mdJQK2XPsXOK5pNp38Ps7srKyWLt2bVTs7V2RY75/UsqT+gAcQMJhX68ErgCeA8aHj48Hph7vXHXr1pXRbPv27XL27NlGxzimBQsWyJUrVxod45ieeuopWVhYaHSMCum6Lu+55x6jYxxTQUGB7DypsySK/6u3op5cuHCh0W/VMc2aNUtu377d6BjHFK6LR62ZlRnjzgBWCCF+Bf4LfCml/AaYDFwqhNgOXBp+rPyPyy67LKp7JIqiRK+THiqRUu4CzjrK8QLgksqEqg3y8vKMjqAoSg2llrwriqLUMKpwK4qi1DCqcCuKotQwqnAriqLUMKpwK4qi1DC1snB7vV42btzIU089hdvtVtPyjqK0tJShQ4caHUNRaoTdu3fz8MMPV1t7tbJw9+jRg+eff5527drRvHlz9u7da3SkqKNpmnpfFOUE+f1+Dhw4UG3t1brCvXjxYi677DImT55MQkICL774Ip9++mlU71mgnNreeust9fNXw1X331+tK9wZGRlkZWXx8MMPU79+fbKysmjcuLHRsZRaaOHChVxzzTWYTCb69OlTfl9DRTmeWnfrsiZNmpCZmUkwGOSXX37hlVdeYfny5VF1Pznl1Fe2C+FNN91Er169cDgcbN++Hb/fj81mMzqeEuVqVY87OzubyZMnM3v2bObOnYumaWRmZqoet1LtMjMzKSoqwm63M3HiRLp3747b7Y7q3eqU6FFretzFxcW8+uqr9OvXj5YtWwIwfPhwg1MptVVsbCw//fQTzzzzDNu2bePuu++ma9eutGvXzuhoSg1QKwq3pmkMHz6cJ554gtatWxsdR6nlPB4PDz/8MJMnhzbOfPjhh3nggQdo0qSJwcmUmuKUL9x5eXk8+OCDTJs2jYyMDKPjKLVcdnY2Y8aM4e2338ZutwPw7rvvRvXNJaKBz+cjPz+fhg0bGh3lqKr7GtkpPcadk5PDq6++yr/+9S9VtBUgNG1r/vz51d5uSUkJs2fP5pVXXuHFF18sL9qAKtonYM+ePdW6wOXvUtMBq4jX62Xy5Mn06dOH9u3bGx1HiQIfffQRQ4YM4cCBAwwePLjablEmpWTChAnExcUxZMiQqO01KjXHKTlU4vF4GDx4MNOmTaNRo0ZGx6mRhBDExMTg8/mqrUdYWloa0Z7LqlWr6NmzJ9deey1+v5+tW7dy4YUXYjabI9amz+fj7rvvZty4cbRt2zZi7ZzKpJR4vV7i4uKMjlIhk8mE2WwmEAhgtVoj3t4pV7hzcnKYOnUq06ZNi+qeTceOHfn111/p1KmT0VGOKiEhgfvuu49nnnmGJ554olraHDFiBFlZWRE7/86dO1myZAlLlixh6tSpPPfcc2RlZUXsomBhYSHTp09n5MiRtGnTJiJtnOp27dpFZmYmw4cP57XXXiMnJ4f69esbHesvWrVqRbdu3fi///s//vnPf0a8vVOqcOfn5zN79mxuvfXWqO9pz5gxgz59+rBkyRKjoxyVEAIhRLWO3c2bNy+i53/ooYfIz8/nqaeeYuTIkVx55ZURK9ozZsygqKiInj17cvbZZ0ekjdrg9ddfZ//+/ZSUlPDiiy/Sp08fbr/9dqNj/UXZxcnq+vdyyoxxa5rGuHHj6N+/f9T2YhVjDR06lH79+nH//fdz5513csUVV1R5G1JKJk2aRIMGDbjqqqvo1atXlbdRW/z3v//Fbrczbdo0GjRowMyZM9m8eTP79u0zOprhToked1FREXfffTcvvviimj2iVKh169blv9ImJSVV+fn9fj9vvPEGbdq04dprr43o2Hlt0LFjRxYuXMjy5ctZtGgRH374Ia1ataJBgwZGRzNcjS/cWVlZzJo1i8cff1wVbeW4hBARKdqLFi0iMzOTmJgY+vXrV+Xnr41sNhvt27fn/fff5+DBgyxbtoy77rpL/Q+RGj5U4nQ6eemll7juuuvUikjFMB9//DEbN24kMTGRYcOGGR3nlHLTTTfxwQcfkJSUxIcffsjFF19sdKSoUGN73MFgkNtvv51nnnmGFi1aGB1HqaUWL17Mvn37uPvuu49YVKNUHSEEAwcONDpGVKmRhTs3N5dHH32U6dOnU69ePaPjKLVYz5496dmzp/r1XalWNa5wHzhwgNmzZ3PPPfeooq0YThVsxQg1rnBbrVb69OnDmWeeaXSUU9qoUaP47bffyMvLY+vWrbzxxhvEx8cbHUtRoo6u6/zzn/9ky5YtACxZsoS3334bkylylxBrXOFOTU0lNTXV6BinNJfLxdq1axk1ahSrVq1i27ZtFBYWqsKtKEfhdDrZsWMHN954I0II5s+fj9PpjMjspTI1rnArkffyyy8zbtw4mjdvTiAQ4JZbbuHJJ59kzpw5RkdTlKjz5JNPMmnSJPx+P0IIzj77bJ588kn+/e9/R6xNVbiVv3jwwQdp27Yt999/P82bN2fAgAGsWbPG6FiKEpWmTp1K8+bNmT59OlJKxowZw+7duyPa5nEHYYQQbwohcoUQvx92rI4Q4jshxPbw55TwcSGEmCGE2CGE2CiEUJs01FBvvvkmAD/++CNz5sxRwySKUgGTycS8efPIzMxk7969zJs3L6Lj23BiPe63gZeBuYcdGw8sllJOFkKMDz9+ELgSaBX+OBeYGf6s1DDdunWjS5cuBINBYmNjjY6j1DDVfUcYIwkh6NGjB926dQNCKz4j7bj/W5BSLgMK/+dwX+Cd8NfvANcednyuDPkFSBZCRN8ejMoJsVgsqmgrJ6W67wgTDWw2W7UUbTj5Je8ZUsocgPDnuuHjDYHDt+7aHz6mKIqiVJGqHog52u9HR/1frxDiTiHEGiHEGo/HU8UxottDDz3ERRddxNq1a+nUqRMrVqwwOpKiKDXIyRbug2VDIOHPueHj+4HGh72uEZB9tBNIKWdLKc+RUp4Tzbckqmq5ubk4nU7mzZvH2WefzciRI9mxYwfBYNDoaIqi1BAnOx3wc2AIMDn8+bPDjt8jhPiA0EXJ4rIhlWPRNI2FCxeeZJTIy8/PZ+fOnVWS8ZdffiElJYVNmzZhs9lIS0tj9uzZ2Gy2Sm1S9Pvvv7Nnzx4OHjxY6YyRcuDAAb755puovndgSUlJVP8sut1uHDkOTlt4mtFRKpSQmcDvrt+jepx7165dWCwWfv/99+O/2CCaplX43HELtxDifaAnkCaE2A9MJFSwPxJC3AbsBW4Iv/wr4CpgB+AGhp5IQL9fMGJE9O6lbbfrDBlir5L9vvv27Vv+ddm9HCdMmFDp8+7Zs4dZs5IoKore97FlyxiuvTYdh8NhdJQKWSyWqN7XvbS0lC4xXZicMdnoKBXacmgLTpMzqt9Hu93OM3WewZ3hNjpKhfzCX+Fzxy3cUspBFTx1yVFeK4GRJ5ys/PtMHDjQ7e9+W7VJStpB/foF5dN9otHBgwcpKsqI6vexUaPFdO7cGZvNhtPpJKVOMgcPZZPgSKIkkMuiQ3PZ5d6EKWAhRsQjdDM5zmzOS7mCy5oPxO/20Si9CSUlJTgcDg4dOoTdbicQCKBpGg6HAyklcXFx5Uv0y5Yelz32+XwkJSXh8/mQUhIbG4vJZCq/v+Z7771XJX/Puq6zaNEi9uzZw4ABA3A6nbzwwgs888wzlfqNo7CwkNWrV0f1z6Ku6+Tn50d1xo0bN1LQoYDilsVGR6lQvKnitRM1+kYKSs0jpU5BIJstrl9YVfgVM7Lv5MPsKXx28DU0P7S2nYtdT6e01EdD2+mcl3oFiTEp3L/kZiavu5/Nub/h1/0EAgFMJhO6rgOhRRCapqFpGj6fDyEEmqYhhCh/bdljv9+Ppmnouh6xawtut5vPPvuMXr160a9fPywWC02aNGHp0qURae/vWrZsWVQPZSjHppa8K9VKIlmfu4rp6yeR4cigSVJTioMBft39B5nZ+2jXsjHWgI1tu3aQ37qI5kltEewnRiYSJxJ5f8ObnF6nA5e3vIZYWxxCCMxmM7qulxeiQCCA1WpF0zQsFguaphETE4MQAovFUl6spZQEAoGIzL2Nj4/nuuuu47bbbmPv3r08/PDD+P1+xo4dW+Vt/R0//fQT//nPf0hISOCrr77isssuU3eVqYFUj1upViZh5py0i6kf6MymrYVs3JTH+o05lGTbiHHXw7XPTtY2P5vW57Fq/Xo27VrNsnVL8biCrNz5M7nOAmatfIVCXz5OpxMI/Wru8XiwWCyYTAK7PQ6v14PVasXn8xEbG4vL5SrvbTscjvIiHsm71nTt2pXJkyeTlpbGoEGDmDhxYsTaOhFSStauXUtCQgL33nsvGRkZrFy5UvW8ayBVuJVqpes6DpOdGdfMoHlqcxbv+I5PNsznxx0/sjFrI1+tXkrdpMbcdulweqXdQANPV9xuF96SAvILD7E9ZyfBgJm+M69Bs4V6zjabjZSUFHxeD+u/fpJ5Dzfhs8kdWPvFYyQmJuJ0OklNTUXXdeLi4sjPz0fTNPx+P4WF/7souOokJydzwQUXkJyczLnnnmv4fVG3bNlSfpu1Sy+9lGHDhuH3+1m3bp2huZS/TxVupVqZTCZiYmLwlnp4rd8srmrzDyxmM6eln8Z5Lc/jzGbt2ZO3h01Zv1PgLCSnIAdHQVNcW5PokNgWT3E+6F60YsHtM25HCIHX66WwsADnwU3s3LSCQyVeGrbvQ3KDjjhLSoiPjycvLw8hBC6Xi7S0NCwWCxaLheTkZKPfkmrTtm1bGjduzCOPPEJWVhaPP/44NpuNzp07Gx1N+ZvUGLdSraSU+P1+UlJSCAQCzOz3Ko/GTeDTtZ9SVFqEw+zALuLwCT+5BVsoPlRMgjWRvt36UuosJY46FOTlYkrJxn8wgKYFsVqtLFkwjdzMnziUs49OF4/lwj5jCQZDz3k8HlJSUtA0DbvdTnFxMWazGSklpaWlEd3wPtr07t2bhIQEPvjgA77//nveffddoyMpJ0H1uJVqZzKZMJlMSClJiavDk5c/yYBzBlEacLErbze/Z/3B6t2r2XtoH6c1akGTBk3YlbMLp9dJgkila5OLyF/jI6btAd5aOIeA38vqpfPx+iz0vetNulx6Z/n5y6b5ld0bsuxxmdq0ix1Au3btGDp0KIsXL6Zhw4acddZZRkdSToLqcSvVzmQyUVpaisPhwOVykRiTyOR/PMOTV07kuleu51DJIXbs20XdhDQKSwuItybgdXshIMnLKyDe6uDSzn3Yv38by+UCfhnxFima5Ipet9C0bTesVitut5uYmJjyi5OlpaXYbDb8fj92u718OqDVajX67TBE06ZNjY6gVIIq3Eq1klLi8/lITU2lsLCQ5ORkXC4XNqsNf6mfL0Z+QWZhJv9Z+x9cXhemoAmHzU5JUQlIgcftJcZsY0DvAZxz1jks27iI11c+Ro9/DOCs865G0zRKS0upU6cOJSUlJCUlUVRURFpaGk6nk7i4OAoKCrDb7UgpcblcUb3CL1JiY2M555xzWLlyJeeff77RcZS/SRVupVoJIYiJiaGwsJC4uDiKi4uxWq0Eg0Hi4+ORUtKybktGXToKKSU2i5kDK77nwH8/xR4TS2qvK0nudgnWmBgOHTpE4EAQT5Hggt79sNlsSClJTk4mPzOT1W+8TOH+vaS0aEvnIXeQXDe9fLxb1/XyWSa1kd1up3v37vzwww+qcNdAqnAr1aqsx52UlERxcTGJiYm43W4sFkv5XGz8Xkw+L1seG4X0e2l03c2c89Cz6MKE1Wxi9+wpFPy6lqCmsyO/iJi8XHy/r2bNT8vI3biOgKbRdsAwOl0/EL/Pi+b18f6dgyktKaXPY0+Q2LwFGY2bYDKZcLlcxMTEGP22KMrfogq3Uu3MZjOBQKB8FWPZhUSz2YzmLCZ79nO49u6g7b+exJqQSKDoEN5d20GAT0LD62+h6a0jCbqcNPxxMeds20zBT8toduHFdLjpdoJBP65Dh/A7i9Ek6Ej6PPI4QU1n+f/NZeOKFdw1521OO7tz+UXL2igjIwNN08jLyyM9Pd3oOMrfoAq3Uq2EEEfsI1K2daWUEoJB9sx8Fu1gNqfdPBx/3gGCeQcQSMomfwgJ/r278UqJDiSe3pbkjp3R/EE8RQWU7NmJJiWaBE1KdCnRdNClJKhLzr66DwFd5//G/YuBz06l1bm195aonTp1Yv78+fzxxx/06NHD6DjK36AKt1KtpJQEg0FSUlKOuDhpsVjYt+BdPDs20/yW4RDwInQQIvxxxDlCBRwkmtuFX8pQsQ4XaE2X6JLy4h3UJJrUCYZf0/6iXvi8fmaNuIuxH35M27PPNujdUJSTowq3Uq1MJhOxsbHk5OSQmppKfn4+DocDn9tF4fefc/rNI9HcxUgTIASmcA/dFK7cUspQ71wSquBlRVqX6LokKHU0XaJpEAwX7oCuE5QQ1HU0XaDpOm3Pv4Dc/fvx5OdH/M+s9gJRqpoq3Eq1Kutxx8XFEQgEyi8MFqz4HpsjHm9+FmaTwGQOrQ0TZjAfVrh1GepVS12ApqNLHSlB6uGetl5WoCUBPTQ8EtQlQUmogOuhYZRAUCe1UVNeHX0vr2/6AxHBse5oXuTTtWtX1qxZQ7du3SJ6h/Lvv/+eHTt20LBhQ6655pqItVNbqMIdhSZOnMiECRNCMyxOQWWFrOyzlBLnupXYm7VE87gQJoE0mULrek0CYRKYw5Vb6hIhJVIHqcnwtD7Cn0PFW9NDRQuL1LoAACAASURBVPrPwq0T0P8s3AEt1Atv0KoFW1avMuptiAp9+vShV69e3HnnnREt3LGxsSQkJLBr1y5uvvnmI54bOnQovXv3jljbp6JTszLUUF9++SXjxo1j2rRpdOnShWuuuYYnn3zS6FhVqmz/bKfTid1ux+VyYbfbMZtNSM2P5nFhMgl0kwlpIlTAzaHiDYS73ICuo5cVbglBLVSUg1qoxx0M97gDuiQQ1AhKiV+XBDRBQNPCRZzyGzEokdW9e3e6d++Ox+OhT58+Rzz32muvMW7cuCOOzZ8/n7S0tPLHJpOJhISEaslaE6jCHWF5eXn89ttvJ/TaVatW0bt3b2w2Gx9++CFz5szh4MGDp9TKPl3X8fl8JCcn43a7SUxMxO/34/f5kQUHiQnvYyLMApNJIMwCYTIR6n5LgoCm66HirMlwgQ59HZDh3rQWKtj+YKg4l5QUY7Y78GtlxTv8fHgRTm3XqVMn1q5dS8+ePSPeVlxcHM2bNz/i2LPPPsuzzz57xLGBAweSf9j1hzp16jBixIgjXtOkSRNatmx5Qu1mZmaSkJBAamrqSSaPLqpwR1hBQcEJ367qjz/+wOVysWzZMm6//Xbsdjt5eXmnVOE2mUzYbDYKCgpIT0/n0KFDJCQkEJuYRM6P32AzmSA5GcLFG1NoSknQ70PExKFTNvwBPpcTd34efk3HF9Tx6xKfpuMLSjSTBUtaBgEExdn7sddriF/XCWjg0zSCOuTlHMDv9Rr9lhju+eefp3Pnzqxfv96Q9o92DeDDDz884nFBQQHTp08/4ljZZmWHGzZsGI0aNSp/7PP5mDJlCrquI4QgKSmJMWPGVGF6Y6jCHWFt2rQ54eGOjz76iIkTJ/LCCy8waNAgzjzzTNq3bx/hhNVL13X8fj/p6aHl58nJyfj9fupffyt5Py2maOtvaA2b4Eiri24S6CZBUEBw306sjVsgAc/BbAIlxXh9PrylpXiDGn5N4glKfEENr6bjR6Dv24sfM3GNm1Cck4NwOAho4NV0igsL2bXpDzpefS1E8cVDJSQ1NfUv/452797N9u3bjzj2yCOPkJOTU/44GAxSUFDAJ598QklJCYMHD2b06NHVkjmSVOGOItdeey2XXnopd999N5988gnx8RXf5bkm03W9/D6RZdusxjRogm6xEXC5Yfd20DRs8fEEpIYZ8JcUIzb+NzRXW9MIaDp+Tcev/Tk8EpR6eO42BDQNb1EhvqBOQX4+noCGH0Fi42YcOnSI3KwDeP1Brh4xIqpnfVSXtLQ0CgoKatRQQvPmzf8y7HLeeeeVL+oCuOqqq9i5cyf9+/fn1VdfZdasWTzwwAMnPMQSrVThjiI2mw2bzcb7779vdJSIEUJgs9lwOp3ExMTg8XjKi7gWE4dfl8iAhrmkmKAWQMveF54OKBCAhixfZOPXdYKawK8fPnatl495B8MzTIJaAE2DQFDDU1pKYc5BdAkIE3HxDqPfEsOZTCZmzpzJ2LFjmTt3rtFxKiUxMfGIx4sWLeK8885j3rx5FBYWMmLECDZv3sxrr71mUMKqoW6koFSrsjvgJCcn4/F4SEhIQNd1LBYLzW6+HV94nNpVWIi71IlP0/FqOh5Nx63peIM6nmDosV8DX7jXfUTPW9dDKyZ1WT67JBiefVJSeCh0R3iTiS79rkfE1s7dAWuLmJgY7r77bhYsWMCqVasYP3680ZGqhOpxK9WqbFvX/Px84uPjKSoqwmazEQgEaHDBpazXQZc6ugygO90Q1EPXJ0WojyGlHl6EA8HwYht/+GKlXy+bLSLxa6HnA2UFXEpEbCxejy/0Gi1Ix549aXLaaQa/I0ok2Ww2Ro4cycGDB4mLi/tLj7ymUj1upVpJKQkEAqSlpeF2u0lKSiq/E43T5Sahy0WhXnZQo9RZijsQ6mG7A3r4axnqcQd1PEENT3hGiTeo4Qtq+DQNf1Di1zT8mn7YXG4dV6kbv89PQno6lw+/C3NsXETv8l6TxMTEkJiYeMQUvFNJRkbGKVO0QRVupZqVLcBxu91YrVa8Xm/5LoFxCQm0vuk2vEEZLtAa3vBsEW9QwxvUDivaoSEUb1CWD6/4NIkvPFzi1wR+HfyaPGK+d0BKMlq1oqTwEN2u6VNrb6Twvxo3bkzXrl1ZsGCB0VGUE6AKt1LtpJTl27qWLYCRUmKxWEhpeTqNLusTLtThXnUwNLb95/i2xBMIPe8Lv84XnmUSCBfv0HCJFiriusSvh1ZXtruoJ5qwcH6//lgsllp7z0mlZlOFW6lWZUXbbrcTCASIi4srv4mCx+PB5IgntX1H/JhCvW4tNDTiDmq4y4t4MHSxsvxxqDfu1UJzuH26xBsMLbbx6xq+cG9bFyZSGjbE6Syhw0UXoWkaLpfL6LckathsNoLBIMFg0OgoynGowq1Uq7JtXXNzc3E4HBQUFJTfESc5OZm4uDhaD7iVjPMuDA2N+DXcAQ13UA99BHTcfokvKPEGZXi4JNQL9wbBo0l8wdCUQG94+CSgaUiLlfYXX8rqxUt5dsFCYmJjsVqtNWrecqQNGDCAdevWsW3bNqOjKMehCrdSrcouTsbHx+Pz+XA4HOULcrxeL36/H5MQtO3TH80ai0cLj20HNDyBP3vX7sPHvLU/i7gvPGxy+DTBICYan9mJAIIL+/dDs/7ZsywtLTX6LYkaaiFSzXHcwi2EeFMIkSuE+P2wY48LIbKEEBvCH1cd9txDQogdQoitQojLIxVcqbnMZjOapmG1WgkEAuWrJy0WS/k9IJtcfDn2NmfgDUrcQVne4y6/MBk+Xjb+7QuExrt95Rct/xz3rtuyNfaUOmRu+oMOvXrhiI8v3+fiVN0692SV/V2omz9EtxPpcb8NXHGU4y9KKTuGP74CEEK0AwYCZ4S/51UhRO29G6vyF2X3nPT7/Ufce1JKecSmQbqu84+nnseUknpYwS4bMpG4whclvYE/i7lHA0+4aHs1Dd1iJbFRUyzxCRQXFnL96Hs5vWtXzGZzeQ51cfJIM2bM4K677jpi2bgSfY5buKWUy4ATnezaF/hASumTUu4GdgBdK5FPOcX871CJ3W5H13VMJhMej4dAIACELpQ1aNmKga++SUKTZngCevgjNETiK5vfXT7GrZfPRPEFQ2Pgfinw+gOUFB6iU+9L6T10KLFxcTidTjRNUxcnjyI2NhaPx2N0DOU4KjPGfY8QYmN4KCUlfKwhsO+w1+wPH/sLIcSdQog1Qog1gYD6QaktylZOFhUVERsbS0lJCRDaxc3hcBATE4OUEq/Xi9PppGXX87j6yWfpdP2N+KQon2XiN1tofmHP8imC3qBGbFpd4us1wKtpoeXwvgA2u53rRo3i0mHDEELg9XpJTk7GbDZjsVjU5vxH8eijj/5lu1QlupzsAN9M4ClCt2x9Cvg3MIwjb8Zd5qiDZVLK2cBsgISEDOnznWQSpcax2WzUrVsXs9lMenp6+UWxsmJhsViw2+3lxzpfegXtu3XnmvtD+0xICcIksCcnU3rYykeLLQaEOGKPbVtsLHWbNEEPTzmMi4tDCFG+8EZdkPur6667zugIynGcVOGWUh4s+1oI8TrwRfjhfqDxYS9tBGSfdDrllHT4WPbRenbm/7lxr8lkwpqSQnxKyl9em5JR74TaLDtjWXvVWbDVhT6lqp3U70NCiPqHPbwOKJtx8jkwUAgRI4RoDrQC/lu5iIpSs914443ld7NXlKogjtcbEEK8D/QE0oCDwMTw446EhkEygbuklDnh1z9CaNgkCIyRUn59vBBJSXVk69b/Otk/Q8RZrS7OOCOfpk2bGh2lQgcOHODXX2Pwev/aK40WKSnb6NateVTP5Pjtt9/o0KGD0TEqFAgEyMzMpFWrVkZHqVBhYSF+v5969U7styEjZGZm8kf6HwQcAaOjVGjbC9soLiw+6q+Gxy3c1SEhoa70+7caHaNCiYmZNGjwE1u23Gx0lAo1bfoNr76aTufOnY2OUqFp06YxdOhQkpKSjI5SoUceeYRJkyYZHaNCRUVFzJ07l3vvvdfoKBVas2YNBQUFXH559C7jmDdvHhdddFFUd8ZOP/10cnNzj1q4o2T1gcDvj96eYiBQgKbFRHVGTYvD4XCQcpRx4GhhtVpJSkqK2oxle6ZEaz4IZbRarVGd0W6343a7ozpjTEwM8fHxUZ3xWNdh1JwfRVGUGkYVbkVRlBpGFW5FUaKalJLdu3cbHSOqRMkYt6Ioyl+tXr2aX3/9tXy2z4UXXsjpp59udCzDqR63oihR69NPP2XXrl089thjrFq1iuXLlxsdKSqowq0oSlRasWIFDoeD8ePHM3ToUCZPnsxvv/2mhk1QhVtRlCh1wQUX4HK5ePvtt9m8eTP//ve/6dChA82aNTM62l+43W62bq2+tShqjFtRlKgkhKBXr14sXboUp9OJEIKOHTtG3cZgc+fOZc+ePQSDQWw2G8OHD4/4LfFUj1tRKqEm3C2mJmSsyGWXXcbTTz9N/fr1mTRpEuecc47RkY4gpWTOnDmcd9553HHHHSxdupS8vLyIt6sKt6KcBCkl2dnZfPjhh8yaNYusrKyoK47BYJD9+/fz6KOPsnz5cnJzc42OdMqZPn069957L02bNmXKlCm8+eabjB8/Hl3XI9quKtyKcpK6dOlCVlYWVquVTp064ff7jY50hOzsbLp160b37t1ZuHAhV1111fG/SflbxowZw4wZM/jss8/YtGkTt956K5MnT474jShU4VaUkzBr1iwef/xxLr74Ylq3bs20adOYMWOG0bHKSSmZPXs2s2fPxuVyMWbMGAYNGsT8+fONjnbKGTVqFEVFRbjdbq699loyMjIi3qYq3LXEU089Rc+ePfH5fDXmRrCBQIDvvvuO9957j0AgEFVDEb1792bRokU0atSIzz//nOnTp9O7d2+jYx3h4osv5scff+Scc87h3nvvZcWKFVG9e2RNdcMNNzB48GDatWvH6NGjq2XjKlW4a4nx48fz2Wef0aNHD5544gl+/fVXgsGg0bEqJKWkZcuWfPfdd2RlZdGgQYOoGopo2bIlubm5TJw4EQCfz8fTTz/N/v37DU4WIoSgbdu2LFq0iIULF6LrOpmZmcyaNYuioiKj4ymVpAp3LVG2peovv/xC9+7dee2113j11Vf55ptvjI52VF9++SW33nord911Fw6HgyFDhvDpp58aHaucEIIff/yR22+/nauvvpoNGzYwbdo0XnvtNXbs2GF0PADq16/PunXrqF+/Pq+88grr16/niiuuYObMmZSWlhodT6kEVbhrocsuu4xXXnmFRo0a8ccffzBo0CB27txpdKwjxMfH43Q6sdlspKSk4HQ6mTp1qtGx/uLss8+mZ8+eADRu3JjBgwczc+ZM8vPzjQ12mAEDBtC4cehWsD179uTCCy/koYceiqqhJ+XvUQtwaikhBNdffz0ul4vrr7+ecePGsWPHDq655hruu+8+EhIS/nLT3urUo0cP7rzzTnRdp27duvznP//hnXfeoWPHjkyYMIHevXuTmJgYdYsxWrduzfjx4xk4cCDNmjXj5ZdfJjY21uhYR+jevTupqal06tSJ2267jREjRmCxqFJQGdX9c6h63LWcw+GgWbNmfPzxx6xfv55zzz2X/v37s3DhQtatW2dYLiEEW7dupX///rRt25asrCx69+7N+vXr2bJlCzfffDOLFy9m165dhmWsSHp6Ot999x3Dhw9n6tSpFBYWGh3pL9q0acP69euJj49n3rx5UX29oyao7t9eVOFWgFChFEJw9dVX8/3337N//34+//xzJk6caNgwihCCiy66iH79+pXnE0LwyCOP8Mknn/DTTz8xd+5cHn/8cbxeryEZKyKE4JxzzuHCCy9kxowZUZlPCMHQoUMRQvDKK68YHalCQghuuOEGFixYYHSUCqWlpdGkSRPWr19fLe2pwq0c1ejRo7n33nu58MILmTx5MjfddFP50uloGBuNjY1l4sSJ3HrrrXTv3p3+/fvzzDPPRE2+Mr169eKaa65h5MiRUZetzJAhQ2jSpAlTpkyJyoxCiPLfAqNVWeHesGFDtbSnCrdSoTp16tC7d2+mTZvG448/TqdOnejUqRNLliwhKyvL6HgAnHbaaVxyySW89957nH766XTq1ImlS5eSnZ1tdLRyZ599Nvfffz+dOnVi9uzZUTWtEUKFsU+fPjRv3pxOnTqxYsWKqCveypFU4VaOy+Fw0Lp1azZs2MCGDRv47rvvePrpp3nvvffYu3fvEa9dvHgxgUCgWvMJIUhMTKRfv35s2LCBb7/9lqeeeuqo+YwghKBNmzZs2LABi8XC3LlzI76Xxd9lNpu58cYb2bBhA19++SXffvut0ZGUY1CFW/nbnn32WZ588kmKi4t59dVXGTduHJs2bWLEiBGsW7eOsWPH8s477xiWb/LkyTzxxBMUFxdX+/9Ejue2227D4XDw/PPPGx2lQk8//TR//PGHWh4fxVThVk5Keno6I0aMYMSIEVx//fX07dsXs9nMsGHDuPrqq1myZImhS+vr1q3LiBEjaNGihWEZKjJgwABOP/10unTpwnfffRd1wxIWi4Xhw4eza9cuunTpQk5OjtGRlP+hCrdSKU2bNuWMM87g/PPPp06dOlxzzTV07dqVzp0788UXXxgdLyqZTCb69OnDypUr+f7771m2bFnUFW+73c59993HypUrGTt2LNu3bzc6knIYVbiVSktKSuKmm25izZo1fPjhh7z88susXr2avn37Gh0tagkhsFqtTJkyhVWrVhkd56hMJhNWq5XXX3+dH3/80eg4ymHUcimlSnTs2JFhw4bx7LPPctFFF3HllVcaHanGeOCBB4yOcEwJCQncfvvtRsdQDqMKt1Il6tWrR//+/enZsyd16tSJ+EbyihJtYmNjq23puyrcSpVKS0szOoKiGGLQoEHV1tZxu0VCiMZCiCVCiM1CiE1CiNHh43WEEN8JIbaHP6eEjwshxAwhxA4hxEYhxNmR/kMoiqIYrWwbgepwIr/PBoH7pJRtgfOAkUKIdsB4YLGUshWwOPwY4EqgVfjjTmBmladWFEWpxY5buKWUOVLKdeGvncBmoCHQFyhbZfEOcG34677AXBnyC5AshKhf5ckVRak1bDYb9erVMzpG1PhbV5CEEM2ATsAqIENKmQOh4g7UDb+sIbDvsG/bHz72v+e6UwixRgixJhDw/P3kiqLUGs2aNWPKlClGx4gaJ1y4hRDxwHxgjJSy5FgvPcqxv6wukFLOllKeI6U8x2qNO9EYiqIotd4JFW4hhJVQ0f4/KWXZjf8Olg2BhD/nho/vBxof9u2NgOjZqk1RFKWGO5FZJQJ4A9gspXzhsKc+B4aEvx4CfHbY8VvDs0vOA4rLhlQURVGUyjuRedwXAIOB34QQZbuEPwxMBj4SQtwG7AVuCD/3FXAVsANwA0OrNLGiKEotd9zCLaVcwdHHrQEuOcrrJTDy70eJrk12ji76M0bbZkVHE+0Zoz0fqIxVpSZkPBoRDcGTklJkx463GB2jQmazn6SkUmy2OkZHqVAwWEJysgW73W50lArl5uaSmppq6N3jj2f//mwslgZGxzgGjYApG2tdq9FBKqS7deKD8SQmJhodpUKFhYXEx8djs9mMjlKhd999l0OHDh210xwVhTshIUOWlh40OkaFkpJ28NxzS7jjjjuMjlKhhQsXkpGRwbnnnovP58Nqtf55lxWTzgHfHg4FDyJ1iQUbIPAE3NjNibRIPAOhm7HZrGiahhCCYDCIEAKTyUQwGMRms5V/Ljt/MBjEbDYf8dqy1WPBYBCrNVRcylaTPf3004wcOZKUlBSD3qVjk1Jy44338sknLxkdpUIxMYW0f+wy1j681ugoFar3Uz1m5c+K6t0hX3vtNS655BJatmxpdJQKZWRkcPDgwaMWbrVXySlG0zQKCgqITbDx30NfUDe2KUGTl52lv5Lj34PTW4rTW0yDuBZ4/B7qWhuxPXYzuwt2cM+5j+D3BRBCUFpaihCCmJgYSktLSUtLo7S0lDp16lBcXEydOnUoKSnB4XBQVFSE1WrFZrNhs9mwWCyUlpZGbYFWlKqWm5vLkiVLGDBgQLW0pwr3KWZH0a/MP/QiolhwwLcHq4wlGJQ4SCEtpiHJpFDkduHRA9SJaQS6la93fkqcJYGnfniAge1vo4G9MQkJCUgpCQaDpKam4nK5iImJIT8/n/j4eEpKSoiLi8Pn85GcnIyUEk3TcLvdQGilW0FBAcnJyVgs6sdMObUVFhby7bffqsKtnJx0e1M+WLyeOrF1ODP9TE6r24Zd2Zm8s+J9WrZOIt0Rz/aNOZgbBrmg3UWYg7HEWZIpdOYTY0/gzf/O5B9tr+WMlLOwWKxYrVby8vKoW7cuLpeLOqmpFBYUkJSURHFxMQ6Hg5KSEqzW0GsdDgcmkwmXy0VKSora3lWpFaprc6ky6l9VNdF1nU8//fT4L6ykOOzM/seb6Jrgyz++ZvoPL7Poj0VkpNTDn5eM70A9WtU9j9iihmhFOitW/Mzu/E2s3bmeLdk7WL13HfM3zMer+7BYLAghiI+Px+/343PmsO2Xuexe+wF7f/8PVmtoTDw2Nra8V+33+/F6vZhMJrxeb5VdtV+4cKGh97BUlGOp7muFqnBXE13XeeaZZyLejslkonWdljx68SOYLIKdBTs55DlEfKwDt9+NO+Cicd3GtE3rSKKnJc0S2+HcJhF+HTM+9uZm8+1vi5n0xdNA6AdS13WQGll/fMvSD8aw9qtHWbvo3wj5559N1/XyH16TyYSUskp+mFevXs3gwYPZt28fw4YNM/Tu8Yrx5syZQ3a2WoitCnc1io2Nxev1RrQNq9VKwB+gW6NuzL9pPmnxqZjMZoq8xVhtFnyanz/2byLPmcfWvVtYvuZnmtrb0ydjML8u3kqXNo2xO818/PXHBIIBAJwlReTuWc2yL1+iyB1Dl/5vcOmw/yOghWaV+P3+8hksZRcpdV0nLi6uUr9C6rrOzp07adWqFTfffDODBg1izZo1BAKBKnmvlJojMzOTNm3aoGkad9xxBzfccMPxv+kUpgp3NTGbzcyaNYvRo0dHtJ3i4mLq1q2LkIK29drx070rSHYkk+M8wIGSg2QX57Dv0H5+3vYzy7csJy0lHU1qHMzNp8/ZA3BsbkVSjIW6SXHs3LcNKSU/LnieOZOHEZvciktueYX2Xa8miAO73Y7X66VOnTrY7fby2ShFRUXYbDby8/MrNbxRWFjIsmXLGDp0KP/4xz/o0qULTZo0Yfny5VX4jinRTkrJhg0bGDJkCD169GDu3LnY7XZ27txpdDTDqMJdTcrmN0d6LKzsYqEQAq/XS4a9Hm8OepO7e9yNXw+QWZDJlpwt+HU/rRq2Jq1OGrlFuRwqLSQrLxu3101CYTPiEgVPfDaGT/8zh22bN5Jcrx3X3Dad9l2vwuv1Yrfb8fv9WK1W3G43fr8fgLi4OOx2O5qmkZCQUKmLk2lpaVx88cXcddddXHrppUycOJHdu3dz8cUXV9XbpdQQv/32G2eccQbPP/882dnZtGzZkh07dhgdyzBqVokBpJQRuwrt9XqJi4sjEAiUL8I5Pb01rXuNpWvDLhx0HeSZT54hKz+bXQd3Uic2FRs2CvLz8bkDeEs9jLh2BKPOv4di+37efnEKKbka9z31OinpjXG73cTFxeH1eomJiSlflFM2zl1WwMsKekxMTKX+PBdeeCEJCQm8++67/Pzzz3z++edV8TYpNYgQggEDBtCrVy+EEGzatAmTycSECROMjmYYVbirUZs2bejYsSMfffRRxOZ76rqOxWLB7/cfcZFQSuh2Wjdi42K5ot0VWG1WSp2l2MyCrF3bSE9KxSfBXiedWFssKckplJQcYmvzDfQa9g+ateqIEAJN0zCZTJTm5xGwmAloOqkNGmIymcqLN1D+2sr+hpGRkcHll1/Oeeedx6ZNm3jrrbd47rnnquKtUmqQFi1a8Msvv/D4449z991306pVK6MjGUoV7mpkNpvLi2qkxMbGls+r9vl85e0CxMTE4Pf7SYhNIH/NSmIDHpy5B0nI3kNJ0SGSO3QiseN5lGbuYLfHw74Dufy2/CfOO7s7gay9ZG/fQmxcHCXxKexZvpi9v/9KfHp97Ke1Jj41jYZnnEFGq9PLl8EnJSVV2TzupKQkGjZsiMViYe/evTRp0qRKzqvUDGazmR9++IFu3brRuXNno+MYThXuU4zL5SI1NZXS0lJiY2PRdR2fz4cQAo/HQ6zHye7/m4UjJRV/nJ2k9Hoknt8DKQQC8OzfgywuJEYP4ti9jfN9buTiL8jOykSYLBwK+Imr25DWl1xBi0suR2o6W39axoHff2Xv+rU4PV6ufXgCKWlpFBcXk5qaWmXFu2nTpjRu3JiffvpJFW6lVlOFu5pdf/31TJw4kZ49e9K4cePjf8PflJiYGNqrJDYWt9uNyWTCarUipcRhNbNh1B0kndaKlIsuw2S2gNTwZ+0NbdwrJWazhaSWbdClxNG4BS2vH4im6fjcJVji4tGkTiAQxFNciC5B0yWN2p9FfSkpLijg8+kv8Mbdd3HP2++SnJxc5TsB3nDDDUyYMIEePXrQoEE07+KnKJGjZpVUs7S0NEpKSsqHMapaSUkJaWlpSCmx2+2hed2BAN5DBay6/VrsDRpS/8p+6M5i9OJCpLMY4S1FeErB60K6StAK8wgW5qG7nASLC9CchxB+P/6iQgKHDhF0lhB0uQi6XQTcLvylTnyloeGZvmPuo/RADi//81b27dxZ5asd09PTSU5OZuvWrTV2L2VFqSzV4z7FxMbG4nK5EEIQCASQUmI2m8n5z0fUadyCBpf3IZCfgzk8PdEkwnfJEAIhJbqUIAUCCbqOlKBJSVAHTdfRpUSXhB9LNF0SkBJN6gR1ga5Lzh94E9+99SablvxA89NPr/I/4+TJk+nYsSPr51Q7CwAAIABJREFU16+v8nMr0amoqIj9+/fTq1cvo6NEBdXjPsXY7XaKiooA8Hg8oVkePg/ObRtJbtOeYP4B8LrB60b4XJh8bsx+N2afG5Pfg/C5ET43eFxIrxvpdSHdbqTHheZxE3S7CLpcBFxOAq5S/O5Sgi4X/lIXfpcTn9uJCWjW4SxWffYZxXl5xr4hyikhNzeXbdu2cf755xsdJSqowm2AadOmMWLEiD9vdFCFiouLycjIQEpJfHw8FouFnKXfgs+PrgXQPC6kJ1SYhdeNyevC7HNj8bkweV0I3/+zd+ZxTlbX/3/f7DOZZFb2fVMRFEUQgUIFVERxq7VudbdqrbYI6qB+RbG1isoiboigIra/Qq1KXepSRS2KqIAiiGwCOjIDwyyZ7HmW+/sjyeOMggyQITNw369XXkme58l9TrZPTs4995yUWMeiyEgEMxzBjIYxokHMSFK8tegPYRItHCIeCREPB0mEQ8RCYaKhOtr27EmwuppQTU3GnyPAwoULufjii5tkbIWiuaNCJVmguLiYnTt3NsnY+fn5bN++HZ/PRzgcxm63k+t2EnTZMRMxTB2kzQY2kDYBNoHNbkMIkCYIU4IpkabENAxMKyRiYpigG8nQSMKU6IZEN000EzTTREvdT5ipsImuQRPFodu3b4/X62XTpk306NGjSc6hUDRXlHAfZESjUXw+H4C1ajEWi2HGY0nP2QZ2mx3TBqZdYNpsmDaBDYEpU4JtmhimxDSkJdq6KZMCbSRv60ZSsBOGmRJriWaAZsqUiJsYTVgMKi8vj/PPP5/nn3+eu+66q8nOo1A0R1SoJEvMmDGDCRMmZHxcu91OJBKxutdIKXHYnQQ3rCVevRMjHEaPhNDT8epICC0cIWFliYTQIxGMSAgtEkILh9DCye1aKEQilAyJJMIh4qEQ279eQ7S2hlgoSCwUJBoKJsMlwVCTV/Hr378/UkpWrFjRpOdRZBfTNLnnnnuYOHFitk3ZLekSFgcq00l53Fmif//+3H333RkfN503nS5oZZom7pJW4HRRt/ZLRI9eSLcbabMh7QIpJIlwEOHOBacTQ9fREjrxWITar9eQ0HViuiRuSmK6QcwwiRvg69UXw+XCmZtLLBxBFwLNkMSNZMhk27dbCVRWIpqwo3tRUREFBQVs3ryZfv36Nevu8Ycama7Fs2bNGnr37p3RMTNJz549GTx4MH//+98PyNyLEu6DjHRZ12AwiNfrRdd1OPp4igePZPt/XsCIhino2gMjNxfDJrALibH9e4TDDS4XiWCA+M4dJIxkHDtumOiGJKFLNMNA1yWaYfL9qk+J6+AoaUNc08GbBy4PCSmo3VnN1g0bOPHK31HUrl2TPt9x48Zx0kknMWrUKAoKCpr0XIrGc6jl2Nvtdqs2/YFAhUqyhNvtZuzYsSxatCij4+bm5hIIBKyyrrqebHYQjSfQTUk8Eia4fRuxUB11326mbss3hGtqCX3/LXWbNxLekRTttOesGZJEatJRNyW6KTFkesLSILDtewIVFVSsX09NeTk7tm6h/JtNmCZ0P+pocvLyMvr8dsW4ceOYOnVqk59HoWguKOHOEi6XiwEDBvDpp59mdNxEIoHX60VKicvlwm63J7vRdOiAbneS0E2iwSDhQC3hqkrCVZVEIlGiuklMN4kZBlHdIG6YxI3kpGMilTGimaBJMzkxmV6EIyUGyUnMeCxGNBjGFAJ3np9YPN4kKY8/5vTTT+eVV15p8vMoskMikcDlcmXbjGaFEu6DkPTf1Pp/V7v/9npsJW2JGAaRSIxwIEBUM4hqJlHNJKKbRDSDiG4S1SVxHeK6SVw3SeikskaS2SKaKTH0H7zwhGFiIgjXhYlGo+i6Sb/TT2X4xRcdsOfcq1evQ7qw/sHM6aefzltvvZVtM5oVSrgPMlwuF9FoFCEEuq5jmmayLrewYStohW4kJxkjoUhStA2TqG4Q1c2U1y2JGaZ1SXrdqeuUBx43zWTKn0wKumGCDhgkQyhHDB2OHRu5npyMVQb8OYQQPPjgg0yaNKnJz6U48BiGoSaef4QS7izSp08fnE5nRmtuxGIx/H4/kKxb4nA4knnZhkHXS39P3BDEdJNoLJEU7pRgRzUjGSrRDWJa6tqQxI2kkCf0lHjrJrouSZikFtyk4uCaTiwWw+5xY3M7OfWaa6mrq8t4kand0bp1awYMGMAbb7xxQM6nUGQTJdxZpKSkBLvdzvbt2zM2ps/ns1ZlhkIh4vE4drsdp9NJt0FDMXPziKXCIsl4tiSa8q6juklEk8lwiSGJ6kkxjxsmMTMp4gkD4iYkTIO4kVx0o5smCUMiHU4Gn3cBNTur6NK3L61atcLhODCJS7m5uXTp0oUNGzYkM2kUioOYPQq3EKKTEGKxEGKtEGKNEOJPqe13CyG+F0J8nrqcVu8xtwkhNgoh1gkhRjflE1A0JBQKkZ+fj5QSj8eD0+nEMAxM0ySiaYx8+BkrHztiJGPbUc0kkopzR1OTkz944CYxzSChGz+ESgyThJ5e3m4QN0E3TI4Y8guWL17MDU/OxuVyEQqFDsjkZJpf/epXfPXVV3z11VcH7JyKpuXrr7/msMMOU6GSH9EYj1sHJkgpewMnAH8QQhyZ2jddSnlM6vI6QGrfBUAf4FTgcSGEetV3w8iRI1myZAmRSCQj47lcLmKxmNXzMR3jFkLgcrlwt25D26EjU4Kc8rJ1g6iuW5kl6fBITP9h0U3ykgqbGKaVMhg3JJpp4PbnE40lGHTaabTt0gXDMHA6nU3WFHlXCCE4++yzeeGFFw5YiEbRtMybN4/f/va35OTkZNuUZsUehVtKWS6lXJG6HQTWAh1+5iFnAf+QUsallJuBjcDxmTD2YGTIkCF89tlnxGKxjIzn8XgIBoMIIUgkEpimaS0OyM3NxVFQRPvjhxDXZSqrJOlZR3WZvE5lmUR1k7hhpMSa1OUHsY6bkriRrF1iCgd9Rp5ENJFgyJln4/P7MQwDr9d7QIUbYPTo0bz55psqXKI4qNmrGLcQoitwLLAstekGIcQqIcTTQojC1LYOwHf1HlbGzwu9AtiyZUtGxqmrq6NVq1aYppkUaocDTdPQNI2amhq8ubn0ueAyOo44haiZ9LDDmkE4YRBJpQdGUqGScErAY5pBTNeJawbx9MSlnvS8DbuTw3/xS6p3VtH/pJPp0LcvtbW1OJ1Odu7cmRXP96233mL0aBWhOxgoKSlR3vYuaLRwCyHygH8B46SUdcATQA/gGKAcSC9d25WL9ZP1r0KIa4QQnwkhPtO06F4bfrDw+eefU1ZWxkknncTcuXP3Oz7r9/uprq7GZrMRiUTQNA2n04nT6aSgoIBIJILd6aTzyaehO3OsvO2okZyUjBip+7r8IePEShOURNMxblOCx0PrHj2RDjuRugAdjjgCf34+BQUFaJpGUVFRVmKTubm5nHzyyQf8vIrMM2HCBAYOHJhtM5odjRJuIYSTpGj/TUr5IoCUcruU0pBSmsBT/BAOKQPqd8HtCGz78ZhSytlSygFSygFO56H7i/rqq68yevRovF4vy5cv55133tmv8SKRCH6/35qcdDgc6LqOrutW53cpJcefcx7DbypNetn6DxcrJdAwf5icNGRKuA3iejLerUuBy18ILjflm7fw27vu5oSxpxONRgmHwzgcDoLB4AGdnEzjdDq54447Dvh5FYoDRWOySgQwF1grpZxWb3v96kHnAKtTt/8NXCCEcAshugG9gE8yZ/LBw/Lly0kkEkyaNIkOHTpw7733snXrVtauXbvPY+bk5FBXV4eU0qpVYrPZsNlseL1eYrEYUkrq6ur45ZXXcsr/3Y1udya96VQ+d1Q3SQh7KiUwleNtmCSkjZhuENclcQSRaIyKLd9yyV2T6TVoULISoduNx+NB1/WsxLgVikOBxiTZDgUuAb4UQnye2nY7cKEQ4hiSYZAtwLUAUso1QoiFwFckM1L+IKVUU/y7oH///vznP/9h7ty5vP3228yYMYMuXbpwxBFH7POYdrsdh8OBw+Gwlrynb9ff53A4cLndDL74cnoedwJvP/EodTuT/SElMPiii/nf355HSjBNiSMnl05HHcXapUsxJUgERe3acvHtt1PUqRMOp9MaN31Oh8OhhFuhaAL2KNxSyiXsOm79+s885l7g3v2w65BACMGQIUN46623mDlzJuFwmFGjRu2X2NlsNkpKSna7Pz8/HwCv1wskVxy2bt2aPsOH/+TYU664ep/tcDqd+/xYhaIlYZom06dP5+OPPwagurqam266qUnLPah63Flm5MiRjBgxgiVLljBs2LBsm6NQKPYSKSWzZ89mzJgxCCGYPXs248aNa9JzNhPhlrjd1dk2Yre4XHXEYjGqq5vOxj59+uzX+JFIhFAo1KQ27i+aplFbW9vMi+wbzfqz6HbXYtfsuKvd2TZlt7hCLiKRSLP+LMZiMerq6jJi4xVXXMGLL77I+++/jxCCq6++ml/96lc888wz+zXuz31PRHP4EhUVFcmbb74522bslnA4TGVlJV27ds22KbulvLwct9tNUVFRtk3ZLevWraN79+7NOozyxRdf0K9fv2ybsVs0TWPJkm+oqTk826bsFo+nmmOPjdOuibsf7Q+bN2+mdevWVsiwOfLQQw9RXV2967iplDLrl9atW8vmzIYNG+Ts2bOzbcbP8tJLL8mPPvoo22b8LH/+859ldXV1ts3YLaZpyhtuuCFj43399dfyzTfflFJKuXPnTjl//vz9HrOqqkoed9y9EmSzvbRtu0S+/PLL+/1c0zzyyCMyFotJKaX8xz/+ISsqKvZ7zFmzZskNGzbs9zhNSUoXd6mZqjqgQtEE1NXVcd9999G7d2+uv/56AGpra3n55ZezbFnL4oknnqCwsJDy8nImT57MgAED+NOf/tQsa9G88sorB6wTUzOJcSsUBxc+n4/zzz+fqVOnsnz5ct577z2WLVvGddddl23TWhRXX301Y8aMwe12s2LFCtauXcvtt99+QBp07C2VlZUHLP21+T17heIgQAhBly5diMVilJWV8dprrzFixIgDVp/8YMHpdDJmzBj+9a9/8fXXX1NQUEDr1q0P+fUB6lOkUDQRRx55JLNmzWLDhg1Mnz7dyqFX7B0TJkxg/fr13HvvvcyaNSvb5jQLlMetUCgULQwl3AqFQtHCaFHCrWnafie1KxQKRUunxQj39OnTOeussxBCcMopp/DRRx9l2ySFQqHICi1icjIcDrNx40buuece2rVrRyAQYPPmzQwaNEg1EVUoFIccLcLjXr58OZ07d6ZLly7cd999tGnThs8++4xgMJht0xRNSDQa5cMPP8y2GQpFs6NFCPfw4cPZuHEjN998MxdddBEXXnghJ554IgUFBdk2rUl46KGHDvlmt48//ji33XYbH3zwATfccAPffvtttk1SKJoNLUK4ASZNmsQNN9zAo48+ysqVK3nttdcoLy/PtlkZxTAMpkyZwuGHH94sV4YdKHRd5/XXX+e8887jd7/7HcFgkPLy8mZeVVChOHC0GHXo1KkTAwYM4LnnnuOYY47h8ccfZ9y4cWzatCnbpmWMl156idzcXE4//fRDWriff/55zj33XLp3786dd97JH/7wB2bMmIGmadk2TaFoFrQodRBCNGiNNWfOHObMmcPy5cuzbNn+EwwGWb9+Pf369TukRRvg8ssvZ+HChSxcuJBRo0YxaNAgOnXqdMi/LgpFmhb9TfD5fNx444289NJLrFu3Ltvm7DNSSkpLSxkxYgTDd9FC7FDk9ttvp7CwkA8//JCFCxcyYMAAbr/99mybpVA0C1pEOuDP0b59e26++Wauv/56pk+fTps2bbJt0l4RjUa58cYbufXWWznssMOybc4+YZomNTU1QPJHaOzYsUSjUSZNmsS55567T2MOGzaMIUOGcPrpp1NcXIyUkvbt2zNp0iTuuOMO3O7m2wFGoWhqWrxwAxQUFDB//nyuvvpqJkyYQN++fbNtUqOZOnUqv/71r+nVq1e2TdkrAoEAS5cuBZJtoNLFf4QQvPvuu+Tk5Oz3Oex2O8XFxda4Q4cOpaamhlmzZtG3b19Gjhx5yFeJUxyaHBTCDckv+YMPPshjjz1GIpGgf//+2TZpj2zevBld1+nVq1ezF6Da2loefPBB677NZsPj8QDg8Xj4z3/+0+TPQQjBGWecwUsvvcTSpUvZunUrV155ZZOeU6Fojhw0wg1QUlLCNddcw5QpU2jVqhUdO3ZstoIYDoe55557mDRpEt26dcu2ORbp1kgAK1euJN0LND8/n5tuusk6Li8vj+OOOy4rNp5zzjmcccYZzJ8/n2effZbLLrus2b7PKoVx/0l/Jpvreww/vM8HysaDSrgB2rVrx3333ccll1zCjBkz6NixY7ZN+gnbt2/n97//PfPnz2/SZqVSSsrKyujUqdNujwmFQlRUVFj33377bWbPng1A//79+fe//w0kP5B5eXlNZuve4nA4uOSSS3jsscd4+eWXOfPMM5tl+YN7772XW265Bb/fn21TWixSSq644greeeedbJuyWzZs2MDSpUt5/PHHD8j5DjrhBsjJyeH5559n4sSJXHbZZRx77LHZNsli/fr1zJ07lyeeeKJJRXv58uWsXr2aTz75hEGDBjFkyBB69uwJwOrVq1mxYgUAO3bsYNWqVdbjRowYwcqVK5vMrkzicDj405/+xJQpU5g3bx69evVi2LBh2TarAfF4HLfb3ay9xZZANBrNyLxJU2GaJoZh4HQ6D8j5DkrhhmTctbS0lNmzZ+N0OpvNhOV7771H3759mzz75Z///CcAkydP5uabb2bu3LlW1kq7du0sL7xfv35WOKSlcuutt/Lcc8/x4YcfYhgGJ554YrZNUiialINWuCEpUNdffz2lpaX89a9/pW3btlmzRUrJ2rVrWb9+Pf/3f//XpOdasmSJleN+2mmnUVZWxrZt2ygtLaVr164UFxe3uLTJn0MIwWWXXUYgEODee++lpKSEPn36KC9XcdDSohfgNIZWrVrx1FNPcdNNN7F+/fqs2fHll1/ywAMPMGXKlCYvjjV06FCCwSALFizgtdde46KLLuKJJ55gzJgxHHnkkQeVaNcnPz+f+++/n2nTprF06VK2bNmSbZMUiibhoBduSKYKzpo1i+eee+6ALo8Ph8O89NJLvPfeeyxYsICnnnrqgEygCSEYNmwY69ev5/HHHyeRSHD00UcfEh6ozWZj9uzZLFq0iGeeeSarP9YKRVNxUIdK6pOfn8/111/Po48+Sn5+vjVR11T85S9/YfXq1QwdOpRbbrmF559//oBNXACcfvrpnHbaabz//vuHXMzX4XAwZcoUNm3axMyZM5k0aZK1kEehOBjYo8cthPAIIT4RQnwhhFgjhJic2t5NCLFMCLFBCLFACOFKbXen7m9M7e/atE+h8bRv357bbruNO+64o0EKXKYJh8OsWrWKSZMm0apVK4LBIKFQ6IDn9AohDjnRrk+PHj2YPHnyQVu3XXHo0phQSRwYKaXsBxwDnCqEOAGYAkyXUvYCaoCrUsdfBdRIKXsC01PHNRt8Ph9/+9vfuPPOOxukwWWSN954g+HDh1NUVMSmTZt49913WbhwoSpLmgUKCgqaZX63QrE/7FG4ZZJQ6q4zdZHASOCF1PZ5wNmp22el7pPaP0o0s+Cqw+Hg3nvv5eWXX+bzzz/P+Pjnnnsub775JjNmzODss8/m8ssv58orr8TlcmX8XAqF4tCjUTFuIYQdWA70BB4DNgG1Usp0f60yoEPqdgfgOwAppS6ECADFwM4M2r3ftG7dmmuuucaq751pHnjgAb799ltmzpzJww8/TJ8+fZrkPAqF4tCjUaolpTSAY4QQBcBLQO9dHZa63pV3/ZPgrhDiGuAaSIYvskFT5nX37t2bI444ghNPPFGVIFUoFBllr9IBpZS1wHvACUCBECIt/B2BbanbZUAngNT+fKB6F2PNllIOkFIOaM5LWfcHIYQSbYVCkXEak1XSKuVpI4TIAU4C1gKLgV+nDrsMWJS6/e/UfVL735WqRJpCoVBkjMaEStoB81JxbhuwUEr5qhDiK+AfQoi/ACuBuanj5wLzhRAbSXraFzSB3QqFQnHIskfhllKuAn5SXk9K+Q1w/C62x4DzMmKdQqFQKH7CIbHkXaFQKJqK5557jieffJIvvviCe++9l6qqqiY/pxJuhUKh2EeklMyZM4cePXpQXFzMe++9R2VlZZOfVwm3QqFo9rRu3TrbJuyShx9+mD/+8Y+MHj2avn378vTTTzNx4kRM02zS8zaLIlOmafLhhx9m24zdUlFRQXl5ebO2ccuWLdTU1DT5B2Z/qK6u5tNPP23Szj/7SyQSyej7HAgE2LBhQ8ZWzYZCITyeatq2bb6fxcLCdWzZEszo6zhp0qSMjldeXs6qVavYvn37fo0zcOBAJk2axB133MG5555LaWkpF198MUuXLt1vG3/uu9wshFtKeUDiQvtKIBAgGo02axvD4TDPPGMjGGy+NnbunGDQoBpisVi2TdktNTU6l1ySydfwJF59FSAzYzocEdqd+ik5t76YkfGaAtdmP+Hwb5r19yUWi/F/tf9HzJGBz+Kf4OrQ1RACLoCP+Tgjb3dcxne7r1kIt91u58wzz9zvcaSUvPzyy7hcLkaPHs2XX37J0qVLue6667DZ9j0qtHHjRgzDyIiNTYVpmuzY0YaKisHZNmW3FBev4pRTTqGwsDDbpuwSKSXz57/N5s3N9312u6vxt32IzWduzrYpu6Xth23ps7PPfn9fTNPk8ccfZ+jQoRx11FG8+eabJBIJzj777P2uLV9eXs624dsI9Azs1zhNSZ599825D6oY99atW3n//fet5rjdu3enqqqKjz/+ONumKRSKvWTp0qXU1tbSvXt3hg4dyqBBg3j//ffZunVrtk3LOgeVcHft2pW+ffsyfvx4ysvLufvuu4nFYgwZMiTbpikUir1k6NChhMNh7r77brZt28b48ePp27cvXbt2zbZpWeegEm6AESNGMGbMGOx2OwMHDuT888/PtkkKhWIfueCCCxg4cCB2u50xY8YwYsSIbJvULDjohLtHjx5ceOGFFBYWcsEFF3D00Udn2ySFQrGP9OvXjwsuuIDCwkIuvPBCevTokW2TmgUHnXArFArFwY4SboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWKBSKFoYSboVCoWhhKOFWHHC++uorPvjgg2yboVC0WJRw7ydSSkzT5IEHHmDLli2N7vmYftzuLuvXr+fEE09scHnuueea+Nk0PTU1NcycOZM333yTESNGNOs2ZgpFc6VZtC7LNBUVFbRr1+6AnGvTpk0MHz6cadOm8cc//pFwOMw777wDwDfffIOu67t8XCwW4/LLL0dKucv9vXr14tVks0KLTDWczSYFBQX85S9/oaKigtdff53169fj8XiybZaFYRjZNkGxC9q1a0dFRQVt27bNtinNgoNSuEtLS3n44Yf3q89kY5BSsnjxYiZPnkznzp0ZO3YsDz30EPPmzQNgyZIlJBKJXT42JyeH5cuX73fvvJaGEILvvvuOGTNmsG3bNh544AHsdnu2zbIIh7NtgeLH2Gw2ZsyYQWlpqfXdOtQ5KIX7QGKaJg6HA9M0LW8t7WXPnDmTnJycbJrX7Kirq2PJkiWMHTuWt956i9mzZ5Obm5tts4DkD/FvfvPHbJuhUOwRJdz7gRCCUaNGMWzYMB5++GHeeOMNOnbsyFVXXZVt05otubm5FBUVsXXrVhYvXtyswiQKRUtBCfd+0qNHD8rKynjwwQeZMWMGXbp0ybZJzRqHw8FFF12ElLLJQ1kKxcGKEu79RAiB3W5n4sSJ2TalxSCEOORi+wpFJtmjyyOE8AghPhFCfCGEWCOEmJza/qwQYrMQ4vPU5ZjUdiGEmCmE2CiEWCWE6N/UT0KhUCgOJRrjcceBkVLKkBDCCSwRQvwnte8WKeULPzp+DNArdRkEPJG6VigUCkUG2KPHLZOEUnedqcuuk4+TnAU8l3rcx0CBEOLAJFUrFArFIUCjZoeEEHYhxOfADuBtKeWy1K57U+GQ6UIId2pbB+C7eg8vS21TKBQKRQZolHBLKQ0p5TFAR+B4IURf4DbgCGAgUASUpg7f1azTTzx0IcQ1QojPhBCfRaPRfTJeoVAoDkX2Kh9LSlkLvAecKqUsT4VD4sAzwPGpw8qATvUe1hHYtouxZkspB0gpB6hFKgqFQtF4GpNV0koIUZC6nQOcBHydjluLZF7X2cDq1EP+DVyayi45AQhIKcubxHqFQqE4BGlMVkk7YJ4Qwk5S6BdKKV8VQrwrhGhFMjTyOXBd6vjXgdOAjUAEuCLzZisUCsWhyx6FW0q5Cjh2F9tH7uZ4Cfxh/01TKBQKxa5Qa44VCoWihaGEW6FQKFoYSrgVCoWihaGEW6FQKFoYSrgVCkWzx+fzMWiQKnmUplmUddV1nSeffDJj40UiEV588UUKCgoyMl4gEKCsrCyjNmaab775hs6dcykpWZVtU3aL37+F+fPn43a793xwltD1avr2bb7vs90eI39zPn2f7JttU3ZLbnkuS2NLqaioyOi4drs9Y9/B1atX0yPQg0T+rlsLNge+1b/d7b5mIdx2u51Ro0ZlbLxMjgVQVlaGzWbL+LiZxOFwcMIJRRx11FEZGU/XdRyO5MfDMIyM9IWcO3cLf/7zMDTNt99jNRUnn7yCl15qvu9zXV0d//rXDq4YtevlERKJxERKiUhVn5CpihM2Ybe2NSWrVq2itraW4cOHZ2S8+p+/TH0WA4EAU4+fSseOHfd7rKZisG3wbvc1C+EWQtCzZ89sm/GzbNiwoVnbuHr1atq0aZMRG7du3crtt9/OnDlzCIdjCDOLAAAgAElEQVTD/PWvf+XSSy9l4MCB+zWu1+slGOxKPF643zY2DRKbzZXR97m8vJy8vDx8vsz8WFVXV+P1eunWrRtVVVXJjTkadeFa8vML+GLHYj6MvEowVoOpC7y2IsLxMJF4mKu6T8bjzKFdXkcKvcUEAgGcTiehUIiSkhJ27tyJ3+8nEolQUlJCOBzGbrejaZolmOFw2NqXn59PZWUlJSUlAFZHo+3bt2O32zPyOm7bto3S0lJmzpxJXV0dCxYsYMCAAZxyyin71YwjPz+fjh070qlTJ0KhEDk5OYTDYZxOJw6Hg2g0is/ns/bF43GEEDidTiKRCH6/n2AwSE5ODpqm4Xa7SS5hAZfLRSgUIi8vj3A4TG5uLrquY5ombrebYDCIz+cjEong8XgwTdNylDwej/W8fq5DVLMQbkXzYvHixfzyl7/k008/5f333+ekk07i1Vdf3W/hPhR5/PHHGTlyJCNGjMjouFE9xJfR9wjpAcrq1lAVq8BT7UOYDlrbutEh5yi+2vkpDruPvr5jsOXZ+aJ6Ka9uXMDoLucxqstY2ng6IKXE4/EQj8ctEUmLk2malhilRSR9rBCCSCSCy+Wyrl0uV0afI8Cnn37K0UcfTXl5OVOmTOGyyy7jrbfe4uSTT85IF6VQKER+fj6hUIjCwkJ0XUfTNIqKiqipqaGwsNASYSkl8XickpISampqKCoqIhKJkJubSzQaRQiBaZrWmFVVVeTn5xMIBHA4HNhsNqqrqykoKKCqqgq/309dXR1CCNxuN9FoFLfb3ajnpYRb8RMuv/xyDj/8cNq1a0dlZSWzZ89m7dq12TZLUQ+bsDHzk8fQjDgd/R3pXtgdt93Ls+/Ox+9zcViXdlRtDVMVX0O/vrUUuVqjGSbtcnqwpmIV6A5audsw+rAzASzRSd+22WyYponNZkPX9QbnTreeS4u5zWZrslZ0Z511Fr/85S95++232bBhA//73/94/fXXM9avNCcnh1AohMPhoK6uDrvdjs1mIxAIcOONNzJgwACuvfZaIpGI9Zxra2vxeDzU1dXhcDiIxWJWWNFms1k/bvn5+SQSCbxeL6ZpMm/ePN555x2efPJJ8vPz0TTN2ielbLRogxJuxW5YvHgxH3zwAR9//DHjx48nPz8/2yYp6uG25/KXgY9z9oKz2OEy2OioJlfkUiS6kBtzE9mSx87vo3xdsQN37pd4qoqoKdqJ11GEw+YiUBcjlkhwQsfhOKQTr9dLOBxGCJH86++UJGJhnA47CA+mlNjtduLxOF6vF13XcTqdhMNhfD5fk/YQXbhwIV9//TWPPPII06ZNo127zPVlCYfDFBYWUldXR15eHoZhoGkafr+f119/nUWLFmEYBpdeeikFBQXE43H8fr/lcYdCIVwuF7FYDMDyuAsKCqitrSU/P5/vv/+ed955h9LSUuLxOM888wy1tbX4/X5CoWSPmrTY5+TkKI9bse+0b9+e1q1b4/f76dy5c7bNUfyIWCxG91ZdWfibhVz4z/NZvmU5Tt1BsasImQAzYXLfhffz8ZdL6ezvzJtr3qRDp0K2fFuJ25dHeWUVsYTOfW//lbvGTiYcDuP3+4nH4zhljOfvPA5Tj4GQ/OqWleQUtMU0TQoKCgiHwzgcDgKBALm5udTU1JCbm0tubm6TPNc2bdoQCATwer0Z/yw6nU50Xcdut2MYRnJSt14z62g0SmlpKXfeeSdvvfUWxx57rBWP1nUdm82GlNL615EOe0gpcblcrFq1ilNPPZVAIAAkkwjsdrsVVnI6ncAP/3KUx61QHMTk5uZSWVlJB297nvjVLG5ceCM7anbQs7gXdmnHTBj888MFeO1eorEILoeT7Z84OKLLALbt2ERd8Q5KtE78vzcXcErXUzlt0GlUVlbiccHyNx8mENJo3XkAvY45CeHMJR6PY7fbqa6utiYni4qKqKyspLi4uEk97qbE4XCgaRo2mw1N06zn8fTTT1teNEAikeCiiy7ikksu4ZxzzqFr165MmTIFKSWGYVgC7HQ6+d3vfsf27dv5+9//zj/+8Q9LtCGZFTN79mx+97vfYZomDofDmkfYm2wZJdwKRQskEomQl5cHwADPAP7fJX/nrKfO5usd6/A5fOSIHOIiTmV8JxWV5VTvrOb0gWMpcbXHxM7ReQN464v/UOR24LY5CQaDBHZs5JV/z2DH1s9o3aE/w34zlYLWXbEJgd1uxzRNiouLLY+7qqoKn8/X5B53UxKNRikqKqKurg6/34+u6yQSCf7+97+TSDTM8d62bRtTpkzhtddew+v18tlnn2EYRoNjbDYbr732GlJKVq5c+ZPzSSmZPXs2F1xwAQUFBYRCIYQQeDweEomE5fHviUN25WQ8Hmfnzp3ZNkOh2CfS3pmUEpuw0bOoF+9c9w492x5GXayOdRXr+WzrClZ9twpfnp+BfQYS1aJ8u30rwmGj7vsEJ/YYQ16ugzufv4HN2zby7cbVfP3lcoadeRvn3jCf4rbdEST/xqcFJZ0WKITA4XBgmiZ2u/0n3mJL8cDTPzxut5vq6moikQgAmqZZx0ybNq3BGo7Vq1ezbNmyn4g2JGPcK1asaCDabdq0Yd68edZ9h8NBq1at0DSN/Px8vF4vkPwXpUIlP8Prr7/O1q1b+e677zjyyCM5++yzLe9FoWgJ2Gw2YrEYIuUNa5pG2/y2vHHtq7z25Wu8+uXrLF3zERVV24kkwlSZduL2BGbCBB3WrvuKUwaOZnjJr2k9WHDjtAs5vNLOMQNGcdhxY8jNy7dEOp31IIQgkUjgdDoxDAOXy2VNUv5YcNJ//5s76TTAuro6ioqKLI87HfqApIi/9NJLFBYW7lKs98SoUaMa/BDous7OnTspKCggEAhYHrdKB9wDkydPZty4cQwfPpzLL7+cX/ziF0q4FS2KWCxmhSai0Sher5fa2lp8Ph8je47i3IG/5o0Vb1ARrCARS+Dz5BGNRIlHEyAF+gidzm06MfL4kRQVFuGvKOK7j77g5F/9gZLW7amqqsLr9aJpGg6HwxLpdH6yx+OhtrbWWrjj8/maJI+7qUmnAzqdyXBReoKwvkDn5OSwrw3Nr7zySh544AHeeusta5vdbsfv9zdIB4Tkwh3lce+GqVOnUlpaSq9evXj11VdZsGABEyZM4IUXXmgxf+8UitzcXOrq6oDkFz69Gi8dsw2Hw4w+djSB2lpyXS6itVV8O+9RYhvX4mnXgSNu+jMJpxM7sLOinIqV23B7W9Opc0/qqqsp9PlIaBobX3mR5f+cj3B6OOLM39DjxJEUFhdjGAYlJSWEQiGKi4utPOaWRjweJy8vj0gkQk5OjrWK0ePxWMckEgncbreVebI3nHXWWQANJjqllITDYbxer7Xd5XI18Mr3RMt8tfeDcePGMXjwYDRN4/XXX+fKK6/k0UcfzbZZCsVeEQ6HrdV80WiUvLw8K284fb195TJE2Wa2vLYQZ46XoydPB5sTYbdh7Kxg7Z0TMYQNM2Zirv2S1kf3Z8sLz/LdB4uJBOvI69SNw8++kDPumYqpa3z17ts8f8WFuPILGfnH8eS1bU+XXr0IBALk5OS0yH+t9eP3UkorxPPyyy/Ttm1bgsEgW7duZcWKFT9ZiNQYNm7cyHHHHcfGjRut851zzjnWnED91MO9cRwPOeH+3//+x8iRI5FS8tRTT9G7d28KCwuVt61oUbjd7gYx7kQigcfjQdM0PB4POz94k61T76TTBVfT59a/IgSE160l/TGXQtD3zmlIAbGKcgo/XkIikcAubAy44VZwOIlHIySiESJVOzClpMtxA+l83PEEqqv516Q78HfqzGUPzSDH72+xHrfT6SQej2Oz2ayl/EKIBh7yI488wiOPPLJP40+YMIFt27YxdepUIDk3MW7cONxuN6Zp4nK5rB+LvXkNW+arvY8sXbqUjz76iNLSUmw2G5s3b+aYY47JtlkKxV6TzuaAHyrmpReSVL73Bt89eT89LroWb9cexL/fgkBgQyIEqVqBguimrzGlxARaHz8YE0hEIkQrKzBN0KXElBIDMEyJYYIpk+f95SWX8v78+cy5/vdc+/QzDSbzWhLpglrplYzpycmpU6fuc1z7x6RFG5Lv25133smdd95pTYqml8knEolGZ5YcEsItpWT9+vXMnz+fBx54wPpLp0Rb0VJJZ3UIIayVfJFIBFG1ne0vP0/nsy/GXVSCGajChg0hUisCAQGYSDCTtzEliUgIQ0p0MynSppSYMnlbT1+bEgMTzQCXO4dfXPRbFj08nUevvIKb//7/svuC7CPp5esej4eamhqklDz22GM89NBDDUIjhYWF2O32BmmRNTU1uxwzPz8fp9OJaZpWDZj0sVJK5syZg91u56677rIyVQzD2Kt0wEMij/uTTz5h2rRpPPbYYy0yDqdQ/Jh0TDtdeS4QCFCQn0/Flyvxl7TFW1CMGaqFWAQRD2GLR7DHw9jikeQlFkbEwhANQywE0TBmJIyMhDAiIfRICD0cJBEOoYWCJEJBEuEg8WDyOhaqw9Q1Tr7qamrKygju2JHtl2SfCAaDFBQUkEgk8Pl8PPnkk9xzzz0NFt8ceeSRrFixgrKyMjZt2sSOHTv47LPPdlkts3fv3rz77ruUlZXx5ZdfUlZWxieffEK/fv2sYwzD4PHHH+eBBx5g27ZthMNhIOn9NzaN8qAX7nfffZe3336b6dOnqzi24qAhXZDI7XZjGEYyrS1QS+37b2DL8aAFayAWQUYjEEsKtS0ewREPY49HELEIxCPWMUYkjIxGMKNhzGgEMxJBj0TQIyG0SJhE+jocJhEOkQiHiIdDaLEETm8e7/2jZXrcOTk5RCIRHA4H27dvZ9KkSQ329+nTh1mzZlFUVGTFwuvq6mjVqhVTp06lV69e1rFut5ubb76ZXr16EY/H8fl8aJpGmzZtmDt3Lscff3yDsadOnUo4HLY6Qql0QJJ/ST7//HPef/99xo0b1yKW40oprZzONJMnT2bx4sUAXH311Vx22WXZME3RzEiHRiD5hU8kErhtgtg3X1E8aixmNIxhs2G3iaR7ZgO7zY7NBqYEYUowJdKUSNNEGhLTBMM0k/FtU6KZEk2aaEYyhKKbZnKbKdGN1G0Jbbt2QctQPPhAo2kaubm5xGIxrrvuOiu7JE15eTm33norhmFwxBFH8Oijj+LxeIhEIhx77LGccsopbNiwAYBTTjmFESNGkEgkrB+Eu+++m5UrV2KaJlu3bm1wbiEEf/jDH3jxxRdxuVx7lWp40Ar32rVreeSRR5g1a9YBWRhQUVFBYWFho/spmqbJhg0bGnxIli1bxsMPP9zguMmTJ1NaWgrQYieAFJmnfvqaldJmE0jTwIxF0G1gs9kxbQJpE2ATSLuAtDCZIM2ko2AayWvdBN0w0SVouokuk3HthGEmhdww0U2ThCnQDIlmmmiGSSwcyvbLsc+kGxg4HA7mzp3L+++/z0UXXWTtr66u5uOPP6ZHjx7cf//92O12IpEIbrebeDzeIBPE5/PRqlUrK8vH6/UyadIkxowZw4oVK35y7pkzZ3LhhRc2aGDRWA5K4f7ggw9YtGgRc+bMyVjB9d0RCAR44YUX2LRpE+3bt6dLly6cccYZPznulVdeYUe9OKBhGCxbtqzBMQMGDNjlG5wNEokEn3/+Of3798+2KYpdkEgkLIfEMAw8Hg+xQC1GOEJs+zZy/PkYNjs2u0DYQNgFCBsmNkwkupQYZlKQdSPtVUt0aZIwQEt71EZyMjIajRLXNHDnkDBlSrhBMw3ikQhNucBdSsnixYsz1sPyx2OnwxN2u50PPvjgJ8f07t2bBQsWkJeXh8Ph4O2332bHjh0UFBTQr18/LrvsMnRdZ9CgQSxbtowtW7aQk5PD2WefjcfjYdGiRYwdO5Yvvviiwbiffvop5513nuW87U2ZgINOuD/44AM++ugj7rrrriYXbUj213vmmWd4+umnWbFiBVOmTMFut/PPf/6zwXHHHntsg76DbrebOXPmNNu4ezQa5bXXXuOdd97JtiktmnPOOYf58+dzwgknkJOTk7FxPR4PO3bsQAiB1+tN9kH05WFKqPt6DfZeRyByPGCzpTztVCaJpiPcHgxpJoVX1wlv+45YOEzMMEkYkrguiZsGcR2cxW3A5ycWiRJPJBC6QSJ1nGZKErrB1tWr6Tnw+D0bvY9IKZk1a9Yuq+1lgnSnn1AoxKxZszjzzDNZt24d69ats84/depUHnzwQYQQVFVVMX78eIYMGcILL7zAOeecY5Vnvfbaa3nhhReYNm0akKxLcueddzYQ5Q4dOjBq1Cief/55SktLyc3NbXRVwDQHjXBLKVm7di2LFi1i0qRJ+P3+Jj+naZpMnDiRv/3tb8ybN48XX3yRjRs3smzZMiZOnNjg2M6dO2f0i6toGfTv359bbrnFintminSz3vRiEZ/PRzAU5MjSe1kzeRzGl2FKDu+LdLswbAJDgIhHMGtrsLdpj6kbBDeuwdAlsXicuKYRN0ziOkR1g7huEjNMtIptaNiR3nzs+QXISAzd7kAzIGGYbPxyFTZXLkf+YljGntuBJN3Y1+Px4PF4+OSTTygpKeG3v/2tdczXX3/NunXr+OCDDzj//PO56qqrKCoqstL9DMOwmicYhkFeXh5nnHEGTz/9NNOnT2fLli0N5q4KCgqYPn06N954I926dbO6Dh2SC3C++OILHn74YebOnXtAPG1Ixhnvu+8+rr/+eubMmcOIESO47bbbDpi3rzi0MQzD+pwlvUY7wleIppvYwmGqv/qc/J5HYDN07KaB0OJold9DeVkyV9sEzTRJmEkPOqEnvWiDVO62hEQ8QUwziAWCxL/7jphhojvdeNu2Z9uWrQSDEboefxh9myCMcSBIN/aNx+MUFRVRWFjId999RywWs/puQtIx3Lx5M/fffz9r1qzh3//+N8888wxSSnJycqz0wb59+3LzzTczceJEFixY8JPwh81mIxqNUl5eTu/eva1FPk6nk1gs1ug5skYLtxDCDnwGfC+lHCuE6Ab8AygCVgCXSCkTQgg38BxwHFAFnC+l3NLY8+wLixcv5t133+XJJ588oIIphKC4uJijjjqKp556ilgsxkUXXaREW9HkpJdqp8U7XV41BJgeD4l4DDSdcG0NhOsQoSA2m8CGQCIxpIkpk8Ktm6Ri1j/ErvV0/NtMxsNNU2LI5OpJQ9MI1dQSi0Sxuz1I2XLqb/+YvLw8qxt7bW0tLpeLTZs2MWTIEEaPHk1dXZ01gTlr1iyklLzyyisMHjyY0tJSq9u91+tFSsmECROYP39+A9G+4YYbLI88XRxs48aNtG/fHr/fj2EYe/2PbG887j8Ba4F0DGIKMF1K+Q8hxCzgKuCJ1HWNlLKnEOKC1HHn78V5Go2UkmXLlvHhhx8yfvz4rJSVLCkp4f7772fVqlV0795dLfBRHBDi8bhVwS4SiZCbm5sss9r7KAp/cQrb33wZEx1ZVYVDmNh0E2ETiJRwm7KeEEuZjG0bsoGA6/UmL3WZnLA0pETXJPGaAKYEu8fDGbfeYtVIaWmkQ06JRIL8/HyklAwbNoyRI0cSi8Wspeg2m41evXoxfvx4AGbMmMFNN91kpRMmEglrleS0adMs0b7rrrv4/e9/j8fjsVa5ejweYrGYVdURsLrFN1bDGuUaCiE6AqcDc1L3BTASeCF1yDzg7NTts1L3Se0fJZro53j9+vU8++yzjB8/nsLCwqY4RaM5+uijlWgrDhher5dQKNSglnR+fj5xYcffpSe6CXHNJBqJEo0miBgmUd0koievo7pJTE+KdVSTyYlJ0ySRSv/TpCRuSnRDoktBIuVxa6aJzZuXDCW4ctB0ncEnj24R6yR2RW5uboPXMB3yqKurIycnh7q6Oqu7fe/eva3H6bpu9ZKMxWI4nc4GTYDT9OrVi8LCQpxOJzabDb/fTzQaJT8/36rHnfa098bxbKzHPQO4FUinRRQDtVLK9GL+MqBD6nYH4DsAKaUuhAikjs9on7ClS5cyf/58nnjiiRb7N02h2FcikYiVpZS+HQgE8Pl82Lr2wtaqPbGKMjSZwI7AbiNVGTDpq0nZ0OtOL66xskUMA81IinfCTOdzS3QDYjW1mAKOHjUCT1ExlZWVFBQUNMiaaimk67yk86jTYU6Hw2E1AZZSYrfbG0weCiGsvOt0DZP6lzTpbvDpbZqmWXne6RBXOo7+48V3P8cePW4hxFhgh5Ryef3NuzhUNmJf/XGvEUJ8JoT4bG+rcC1evJjFixdb6TkKxaFGOu4ajUatCa/03/ouQ0/E06EzUcMklsoOSXrYJjFdJ6brRHWDqG78sN8S6dREpSGT+dxpMU/leWtmMoRS0rUb36xew9jrb8Dv97fI7jfwQypgWpzr53SnKzBKKbHZbHTr1q1BY4T//ve/AFaIJB3/rqqqApIty/r27WvtS2ed2Gw2DMNo8DjIfB73UOBMIcRpgIdkjHsGUCCEcKS87o7AttTxZUAnoEwI4QDygeofDyqlnA3MBmjTpk2jLV6zZg3//e9/ufnmm60mmwrFoUb6i5/+8qczINKCM+CWe3jlt2cQjYawC5GcmJRYZV1NwExXAUSi68lMkqQ4m+gGJMykmGummco+SQq42+endc/DadWzJ0Xt2lntvloi6SbBfr+fQCCAy+XC6XRanYSqq6vx+XxEIhEKCgoYNmwYixYtIhwOc8MNN9CpUydL2AHKysqsSoDHHXcc7dq1s+qkp2vK1NTUWJ3l063LEolEZtMBpZS3AbcBCCFOBG6WUl4shPgn8GuSmSWXAYtSD/l36v7S1P53ZQY7h/bu3Zu7775bLf9WtBiaonGuYRjWFz39lz4SieByuYhGoxR070Fu527sWPM5NmHDbpV0NZHYkCLlAaYmJ5P1tpMrJ5P1SITlaWumScxIhkwSpoHPX4DN5aJbv374CgqsmtIt0etOVweMxWIUFBRgmiaGYVBUVGS1ZYtGo/h8PqSUVn0YgMrKSiorK3c7dvpfUCAQwG63Y7PZqKmpwev1Ul1dbcXQ02GXdLPgxrA/eWulwHghxEaSMey5qe1zgeLU9vHAxN08fp+w2WxKtBUtiqbwRr1eL8FgkFAohMPhsPKRI5EIxcXFRCIRxjz2DHHNJK4bRDUjFR6RyeuESVRLhk/i6TCKIYkaENMFMd0kYZjEjeR2zTBJ6AaFHTrTa+gwPLleTrngAoLBICUlJS12ctLn81FTU4PL5aKmpsbKq043QN65cyd2u526ujoikQgDBw6kU6dOexy3bdu2jBgxwvpBcLvd2Gw2qx9oSUmJlcmSjhzszWu4V8ItpXxPSjk2dfsbKeXxUsqeUsrzpJTx1PZY6n7P1P5v9uYcCoViz0SjUXJzc8nJybGK8KdXAAYCATweD9Lhot8lVyeF2kgKd0T7IbadzC4xkvFvQ9YT8eSy9rhuErfi3RJ/2w50H3A827Zs4aQrriAQDJGTk0NtbW2DVl8tiUgkYnVc9/v9VkpjQUGBFR4xDAOv14vH42Ho0KHMmzePgoKC3Y7pcrmYM2cOJ554Im63m2AwiKZpSCmtbJWamppk3n0oZP1Y7M1rqFaKKBQtELfbjaZpVpZCNBq1VvDl5eUlGwMUFlEyeDi2Vu2I6pKIbhIxkimBP6QFyh9uGyYxzUh62XoyRTBuGCRMicufT+uevajasZ1IMET3Y47B5/MRj8fxer0t9l+wx+MhHA7jcDgIh8NWOmD6RzAYDGK324nFYlZPyt69e7Ny5UqeffZZ/H4/Pp8Pv9+P3+9n+vTprFu3jsGDB+Pz+ax2ZA6Hw6orky5RoOs6ubm5DepxN5aDZsm7QnEoUX8pdjojon7tjPSkZbfjBzPg0qt5d/qDaJGw9XiZWogjZXKS0iAd7072mtStBTgmnqIS8tq0IxKN4nZ7mPL2W5YN9SdFWyL124ulqd+erP6+dPlcm81G69atGTNmDN9++y26rlsrIwFrviFdX9s0TSt7pP57BMn5ifpZJ41FCbdC0QJJNwhOi0G6WbDNZkPTNOva5XIx7KrrMKTk1b9MRjYQqGSGiSFJ5nSnl7XLH+py61JgMySBmhq6tmvH1Q8+iC1VCS8ej1s5yUKIFtnpvb7oplc3QtITT5fLhYbecHpf/YUz9VP6NE3D6XRamSKaplmPTSQS1r70e1b/h6KxqFCJQtECSedsx2Ixq7h/elteXp61BD0YDGKz2Tj+okv59UMz6XjswGQ8O3XpMOB4PG3aEjPM1EXSa/iJxE2SS+BNiEWi9D/5JK647z5yU81CTNMkLy+PeDxOXl5ei8woASxhTS+GSYtnfdFNL1VPe+DpSn7psEo6N1sIYSVPpJs5m6aJw+Gw9judTnRdb7Av/YO3N/9aWt5PpELRQohGo1RWVhKLxSgrK0PTNEpKSjI2flFREZD8C5+Tk4MQwtpWWFiIEIL27dtb+0deejnDzjsfo54HaHc6MU0D0/jBE3e4XGj1muUCuDweXB6P5R36/X6ryFpLzeGG5A+g2+1u8BrCD+GS9L76pLux72pfmp+LW+9LTPvHKOFWKJqI//3vf0yYMIEdO3YwYcIEiouL+dvf/pax8etXoUwLyJ6u7Y2sp+PZzeK23Y3bUkkvYkrfrr/9x9sas+9AoUIlCkUTEIlEeOedd3j66afp27cvTz31FH369GHJkiXZNk1xECCaYlXX3lJYWCgvueSSbJuxW+LxuLWKqrkSCARwOBwZKwNgGAZbtmyhR48eGRkPkm3etm8vQcrmm4FQUPA9Xbp02POBe8AwDLZu3Ur37t3ZtGkTXbt2pa6uDtM09+tzZBgGVVVVtG7der9tbCrC4TCGYWS0C9X69es57LDDMjZeVVUVeXl5jV6pmA3mz59PTU3NLt36ZiHcQohKIEyGKwhmkBKUbfuCsm3fULbtGwebbV2klK12taNZCDeAEOIzKeWAbNuxK2bvclkAAAUsSURBVJRt+4aybd9Qtu0bh5JtKsatUCgULQwl3AqFQtHCaE7CPTvbBvwMyrZ9Q9m2byjb9o1DxrZmE+NWKBQKReNoTh63QqFQKBpB1oVbCHGqEGKdEGKjECKjTRf20Z4tQogvhRCfCyE+S20rEkK8LYTYkLo+IC3lhRBPCyF2CCFW19u2S1tEkpmp13GVEKJ/luy7Wwjxfer1+zzV8i6977aUfeuEEKOb0K5OQojFQoi1Qog1Qog/pbZn/bX7Gduy/rqlzuURQnwihPgiZd/k1PZuQohlqddugRDCldruTt3fmNrfNQu2PSuE2FzvtTsmtT0b3wm7EGKlEOLV1P2med1+3J34QF4AO7AJ6A64gC+AI7Ns0xag5EfbHgAmpm5PBKYcIFuGA/2B1XuyBTgN+A/JZs0nAMuyZN/dJNvb/fjYI1Pvrxvolnrf7U1kVzugf+q2D1ifOn/WX7ufsS3rr1vqfALIS912AstSr8lC4ILU9v/f3tmEalGFcfz3LLQiI1FCxLsoQ1AIsUgJFBGTyAqvwl0IQS4EwWohLQQR3Lms3IiC9qF9KWmhuDK6iisx/Eiv+NGFgsSLd6XpxtL+Lc4z3uH1fV8VmznzwvOD4T1zZmD+/N85z8x5zsyc7cBaL78PbPfySmBvBm1fAgNt9s/RJj4CvgUO+XolvuW+454HDCvNpvM3af7K/sya2tEP7PLyLmB5HQeVdIz7J1rupKUf2K3EcdJkzlMz6OtEP7BH0m1JvwPDpP+/Cl0jkk55+SZwAZhGA7zroq0TtfnmmiTplq+O80XAYmCf17d6V3i6D3jdrJqPeHTR1ola24SZ9QFvAzt93ajIt9yBexrwZ2n9Ct1P4joQcNjMTprZGq+bImkEUsMDcr5v3ElLk7z80Lumn5fSSln0eRf0ZdLdWaO8a9EGDfHNu/tngFHgJ9Jd/nVJd9pouKfPt98gzUFbizZJhXeb3btPzax4j71u77YA64HiU4uTqci33IG73RUm92Mu8yW9AiwFPjCzhZn1PCxN8XIb8CIwBxgBPvb62vWZ2QRgP7BO0l/ddm1TV7e2xvgm6a6kOUAf6e5+VhcNtepr1WZmLwEbgJnAXGASaSLzWrWZ2TvAqKST5eoux38sbbkD9xWgPGVyH3A1kxYAJF3131HgR9KJe63oYvnvaD6FHbU0wktJ17xx/QvsYKxbX6s+MxtHCozfSPrBqxvhXTttTfGtjKTrwFFSfniimRWfgS5ruKfPtz/Lw6fP/g9tb3r6SUoTln9BHu/mA8vM7A9Syncx6Q68Et9yB+5fgBk+8jqelKQ/mEuMmT1tZs8UZeANYMg1rfLdVgEH8iiELloOAu/5SPprwI0iLVAnLTnEFST/Cn0rfTT9BWAGcKIiDQZ8BlyQ9ElpU3bvOmlrgm+u4zkzm+jlp4AlpDz8EWDAd2v1rvB0ABiUj7jVpO1i6WJspBxy2bta/ldJGyT1SXqeFMcGJb1LVb5VPcr6oIU08nuZlEfbmFnLdNII/q/A+UIPKff0M/Cb/06qSc93pG7zP6Qr9OpOWkhdr63u4zng1Uz6vvLjn/WTc2pp/42u7xKwtEJdC0jdzrPAGV/eaoJ3XbRl982PNRs47TqGgE2ltnGCNDj6PfCE1z/p68O+fXoGbYPu3RDwNWNPntTeJvy4ixh7qqQS3+LNySAIgh4jd6okCIIgeEQicAdBEPQYEbiDIAh6jAjcQRAEPUYE7iAIgh4jAncQBEGPEYE7CIKgx4jAHQRB0GP8B3ecKdiRIWmrAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Politikayı Kontrol Etme\n", + "\n", + "Q-Tablosu, her durumdaki her bir eylemin \"çekiciliğini\" listelediği için, dünyamızda verimli bir gezinmeyi tanımlamak için onu kullanmak oldukça kolaydır. En basit durumda, sadece en yüksek Q-Tablosu değerine karşılık gelen eylemi seçebiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Eğer yukarıdaki kodu birkaç kez denerseniz, bazen sadece \"takıldığını\" fark edebilirsiniz ve bunu durdurmak için not defterinde DURDUR düğmesine basmanız gerekir.\n", + "\n", + "> **Görev 1:** `walk` fonksiyonunu, yolun maksimum uzunluğunu belirli bir adım sayısıyla (örneğin, 100) sınırlayacak şekilde değiştirin ve yukarıdaki kodun bu değeri zaman zaman döndürdüğünü gözlemleyin.\n", + "\n", + "> **Görev 2:** `walk` fonksiyonunu, daha önce bulunduğu yerlere geri dönmemesini sağlayacak şekilde değiştirin. Bu, `walk` fonksiyonunun döngüye girmesini engelleyecektir, ancak ajan yine de kaçamayacağı bir konumda \"sıkışabilir\".\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Alıştırma\n", + "## Daha gerçekçi bir Peter ve Kurt dünyası\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan herhangi bir yanlış anlama veya yanlış yorumlama durumunda sorumluluk kabul edilmez.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/tr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..fa287a7b8 --- /dev/null +++ b/translations/tr/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,462 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:15:31+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter ve Kurt: Gerçekçi Bir Çevre\n", + "\n", + "Bizim senaryomuzda, Peter neredeyse hiç yorulmadan ya da acıkmadan dolaşabiliyordu. Daha gerçekçi bir dünyada, zaman zaman oturup dinlenmesi ve kendini beslemesi gerekir. Dünyamızı daha gerçekçi hale getirelim ve şu kuralları uygulayalım:\n", + "\n", + "1. Bir yerden başka bir yere hareket ettiğinde, Peter **enerji** kaybeder ve biraz **yorgunluk** kazanır.\n", + "2. Peter, elma yiyerek daha fazla enerji kazanabilir.\n", + "3. Peter, bir ağacın altında ya da çimenlerin üzerinde dinlenerek yorgunluğunu atabilir (yani, bir ağaç ya da çimen bulunan bir tahtanın konumuna yürüyerek - yeşil alan).\n", + "4. Peter, kurdu bulmalı ve öldürmelidir.\n", + "5. Kurdu öldürmek için, Peter'ın belirli bir enerji ve yorgunluk seviyesine sahip olması gerekir, aksi takdirde savaşı kaybeder.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Durum Tanımlama\n", + "\n", + "Yeni oyun kurallarımızda, her bir tahta durumunda enerji ve yorgunluğu takip etmemiz gerekiyor. Bu nedenle, mevcut problem durumuyla ilgili tüm gerekli bilgileri taşıyacak bir `state` nesnesi oluşturacağız. Bu bilgiler arasında tahtanın durumu, mevcut enerji ve yorgunluk seviyeleri ve terminal durumdayken kurdu yenip yenemeyeceğimiz yer alacak:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [ + "Hadi rastgele yürüyüş kullanarak problemi çözmeyi deneyelim ve başarılı olup olmadığımızı görelim:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Ödül Fonksiyonu\n", + "\n", + "### Amaç\n", + "Bu bölümde, bir ödül fonksiyonunun nasıl oluşturulacağını ve bu fonksiyonun bir modelin performansını optimize etmek için nasıl kullanılacağını öğreneceksiniz.\n", + "\n", + "### Ödül Fonksiyonu Nedir?\n", + "Ödül fonksiyonu, bir modelin belirli bir görevi ne kadar iyi yerine getirdiğini ölçmek için kullanılan bir metrik sağlar. Modelin çıktısını değerlendirir ve bir \"ödül\" değeri döndürür. Bu değer, modelin öğrenme sürecinde yönlendirilmesine yardımcı olur.\n", + "\n", + "### Ödül Fonksiyonu Nasıl Çalışır?\n", + "1. **Girdi:** Modelin tahminleri ve gerçek değerler.\n", + "2. **İşlem:** Tahminler ile gerçek değerler arasındaki farkı hesaplar.\n", + "3. **Çıktı:** Modelin performansını temsil eden bir ödül değeri.\n", + "\n", + "### Örnek\n", + "Aşağıda basit bir ödül fonksiyonu örneği verilmiştir:\n", + "\n", + "```python\n", + "def reward_function(predictions, actuals):\n", + " # Tahminler ile gerçek değerler arasındaki farkı hesapla\n", + " error = abs(predictions - actuals)\n", + " # Hata ne kadar küçükse ödül o kadar büyük\n", + " reward = 1 / (1 + error)\n", + " return reward\n", + "```\n", + "\n", + "### İyi Bir Ödül Fonksiyonu Tasarlama\n", + "İyi bir ödül fonksiyonu tasarlarken aşağıdaki noktaları göz önünde bulundurun:\n", + "- **Hedefe Uygunluk:** Ödül fonksiyonu, modelin optimize etmeye çalıştığı hedefle uyumlu olmalıdır.\n", + "- **Hassasiyet:** Küçük performans değişikliklerini ayırt edebilmelidir.\n", + "- **Denge:** Modeli aşırı uyumdan kaçınmaya teşvik etmelidir.\n", + "\n", + "### Yaygın Hatalar\n", + "- **Aşırı Karmaşıklık:** Ödül fonksiyonunu gereksiz yere karmaşık hale getirmek, modelin öğrenme sürecini zorlaştırabilir.\n", + "- **Yanlılık:** Ödül fonksiyonunun belirli bir tür tahmini veya davranışı ödüllendirmesi, modelin dengesiz sonuçlar üretmesine neden olabilir.\n", + "\n", + "### Sonuç\n", + "Ödül fonksiyonu, modelin performansını optimize etmek için kritik bir bileşendir. İyi tasarlanmış bir ödül fonksiyonu, modelin doğru ve etkili bir şekilde öğrenmesine yardımcı olur.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Q-Öğrenme algoritması\n", + "\n", + "Gerçek öğrenme algoritması neredeyse hiç değişmez, sadece tahta pozisyonu yerine `state` kullanırız.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAFpCAYAAAC8p8I3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdd3xUVd7H8c+Zmt5J6CC9qVQpKoqIoggIuouCoojCIkXQXUV3F0XEh10bYAFBUQFBwRXXroggIjYiKkWE0EsI6Zle7j3PH5mMsBBqkjsJ580rr2Tu3Mz9zoT8cubcc88RUkoURVGU6sNkdABFURTlzKjCrSiKUs2owq0oilLNqMKtKIpSzajCrSiKUs2owq0oilLNVFrhFkL0FUL8LoTIEkJMrqzjKIqinG9EZYzjFkKYge1AH+AA8CNwq5Rya4UfTFEU5TxTWS3uS4AsKeUuKaUfeAsYWEnHUhRFOa9UVuGuB+w/6vaB0DZFURTlHFmMOrAQYhQwCsBqtXa66KKLjIpySj6fD4fDQVpamtFRylVUVITVaiU2NtboKOXKzs4mPT0ds9lsdJRy7du3j4YNGxodo1zBYJDc3Fzq1KljdJRyOZ1OgsEgSUlJRkcpV25uLgkJCdjtdqOjlGvLli14PB5xwjullBX+AXQHPjvq9sPAw+Xtn56eLiPZjh075Lx584yOcVIrVqyQ69evNzrGSU2bNk0WFBQYHaNcuq7LcePGGR3jpPLz8+X06dONjnFS69atk++9957RMU5q7ty5cseOHUbHOKlQXTxhzaysrpIfgeZCiAuEEDbgFuD9SjqWoijKeaVSukqklEEhxDjgM8AMLJBSbqmMYymKopxvKq2PW0r5MfBxZT2+oijK+UpdOakoilLNqMKtKIpSzajCrSiKUs2owq0oilLNqMKtKIpSzajCrSiKUs3U2MK9cOHCsqs2FaXKeb1eli9fbnSM0/L222/j9/uNjlGut956K6LzGaHGFe6VK1cycOBAgsEggwYNYvHixYbkkFJy3333GXJsxVhPPfUUw4cPJzc3l4EDB7Jp0yajI53QTz/9xMCBAyksLGTo0KHMnDnT6EjHyMzMZODAgRQVFTF06FBmzZpldKSIUaMKdzAYJCsrixtuuIF+/foxatQotm3bhs/nq9IcL7zwAq1ateKWW26hZcuWzJkzp0qPX9mklBQUFBgdIyK53W42bdrEI488wk033cTFF1/Mnj170HXd6GjH0HWdPXv20LlzZwYNGsTkyZPZvHkzbrfb6GhAab7du3fTtWtXBg0axIMPPsjmzZvxeDxGR4sINapwHz58mF27dtG3b1/69u1Lly5dsNlsbN68ucoyFBQUUFRUxKJFi/D5fCxcuJCCggIKCwurLENlW716NdOnTzc6RkRavXo1HTt2JD09ndtvv52hQ4fy0Ucf4fV6jY52DJfLxRdffMGQIUO45ZZbqF+/Pq1bt2bdunVGRwNKZxhcvXo1f/rTnxgyZAiNGjWiVatWfP3110ZHiwiGTetaGerXr0/Lli0ZPXo0hw4dYvz48TRt2pROnTpVWQaPx4OmacTFxfH999+zb98+4uLi8Hg8JCcnV1mOyrJixQq2b9/Ov//9b6OjRKR+/foxZMgQfv/9d6644gruuusupk+fTkxMjNHRjhEfH8+AAQMYOXIke/fu5aGHHkJKyQMPPGB0NAASEhLo168fI0eOZM+ePTz44IMAEZPPaDWqcAP079+fNm3acOeddzJ+/HgaNGhQpcevV68eMTEx/OlPf2LZsmVcdNFFNG7cmMGDB5ORkRHRc1GXR0qJrut8+eWX7Ny5k3HjxlXJ8whPYWmqXm8Mp02bxv79+5k4cSLdunWjY8eORkc6oa5du/Kvf/2LcePGcfvtt9OoUSOjIx2je/fuzJgxg3HjxvHll1/y5ZdfGh0pYlSv34jTkJGRQY8ePYiNjaV79+6GTIo/ceJEfvjhB6ZOnYrD4WDTpk38+9//ZtCgQWRnZ1d5nnO1f/9+rrzySj7//HPuu+++KlmsYf/+/Xz33Xfce++9bNu2rVr1bbZo0YKrrrqKWbNm0aBBAyyWyGwfJScn06NHDxITE+natSvNmzc3OtIxyvKtW7eOtLS0iMhXUFCAw+EwOkbNa3FHAqvVitVqZdmyZeFtixYt4uDBgzz//PM0adKEP//5zyQkJBiY8vT89NNPvPnmm3z66adVurrOqFGjaNq0Kffccw8333wzr7/+Op07d66y458rIQRXXXUVq1ev5ocffqBXr15GR6q2oqKijI5AMBhk4cKFHDx4ELvdTt26dRk2bBhCnHiBmsqmCncVqlevHnfeeSfffvstjz76KM8++6xhP/jTsXXrVt5//30eeOCBKi3aH3zwAX369GHo0KE89NBD6LrOY489Rmpq6nH7Dho0iBtvvLHKsinnp2AwyIwZM3jzzTdxOp2MGjWKYcOGHbPP119/zSuvvHLC7+/cuTPjx4+vsDw1snA/+OCDzJ49OyKLYosWLWjRogVdu3Zl5MiRzJo1i+jo6Ih7O33kyBFmzJjBc889d8KCWZkuvfRSlixZwk033cTkyZN54oknuPnmm2nXrt1x+y5fvpypU6ee9PE+++wz0tPTKyvuSU2cOJHRo0fTuXNn4uPjDclwKgsXLuTWW2/lww8/NDrKCQkhmDVrFg8++KBhJ8VHjBjBf//7XzZs2MCzzz7LgQMHjjt30atXL/7xj3+c8Pvj4uIqNE9kVYsKkpOTQ+3atSOycJdp2bIl48ePp3///gwfPpzbb78dq9VqdCwAtm3bxrRp01i0aJEhJwZTUlKoX78+48aN4/rrr+fAgQO0adPmhH2cDz/8MJMnTz7p4xn5/yA1NZXCwsKIG8d9tHr16nHw4EGjY5RLCEHt2rXJyckxLMOrr75K9+7deffdd+nQoQM333wzmZmZx+1XVb8vNbJwVwdCCDp06MCaNWt4/fXX+de//kWbNm0YPHiwYZmOHDnC/Pnz8fv9PP/884aO5njqqacoKSnhww8/5Kuvvip3PyFERP+BBhg6dCiLFi1i3LhxRkdRzpLVamXAgAHhqTSGDx9u6P89VbgjwB133MGqVavCfcr9+/evkv8QUkrWr1/PkSNH6N+/P5MnT+ZPf/oTF198MSkpKZV+/FNJSEhg6NChRsc4ZyNGjKBjx46qcJ8jIQRSSkOKpdVqZdq0aezevZuYmBgyMjKqPMPRatxwwOpICMHVV1/N6NGjyczMpFOnTpX+1lVKSZs2bVi6dCnr1q2jXr16/N///R/XXXcddevWrdRjn2+EEKSmpqppAs5B06ZNueyyy3jzzTcNzXHBBRcYXrRBFe6IYrfbmTp1Kj/88APPP/98pR5r/fr19O7dm4cffpioqCh69uzJxo0bK/WY5yshBPPnz6/QUQXnG7PZjNVqjbipA4xSI7tKhg0bRq1atYyOcdYsFgszZsyo1GPk5eWRlpaGx+Ph5ptvZvXq1eTm5lbqMc9nqamp3H777UbHOCEhBH/729+MjnFKXbp0UVM1h9TIwn3NNdcYHSHiDRgwgAkTJuDxeMjIyODJJ5/kwIEDRseqsRITE+nbt6/RMU5ICFEtziW0adPG6AgRo0YWbuXUhBDs2LGDNWvWkJOTw8GDB7HZbEbHUhTlNKjCfR6z2Wzq3YmiVEPq5KSiKEo1owq3oihKNaMKt6IoSjWjCreiKEo1owq3oihKNXNOo0qEEHsAB6ABQSllZyFECvA20BjYA/xZSllzVspVFEUxWEW0uHtJKdtLKcuWJ5kMrJJSNgdWhW4riqIoFaQyukoGAm+Evn4DUMuTKIqiVKBzLdwS+FwIkSmEGBXaliGlLFsR9zBg/FRaiqIoNci5Xjl5mZTyoBAiHVgphNh29J1SSimEOOGsMKFCPwpKl/XZsWPHOUapPAcOHKCoqCiiM+bl5aHrekRndLlc7N69m7y8PKOjlMvv90f0a1hSUoLL5YrojIcPH47435eioiL2798f0ZNWnWzVpHMq3FLKg6HPR4QQK4BLgBwhRB0pZbYQog5wpJzvnQfMA0hNTZVr1qw5lyiVqqioiAMHDhDJGXfu3ElMTAz5+flGRylXXl4e69evx263Gx2lXE6nM6J/zl6vl29zv+W/a/5rdJRyxWTH0NvTO6KXazt48CCZmZlkZWUZHaVcJ339pJRn9QHEAvFHfb0e6As8BUwObZ8M/PtUj5Weni4j2Y4dO+S8efOMjnFSK1askOvXrzc6xklNmzZNFhQUGB2jXLquy3Hjxhkd46Ty8/Nlp+mdJBH8r/a62vK9994z+qU6qblz58odO3YYHeOkQnXxhDXzXFrcGcCK0DJCFmCJlPJTIcSPwDIhxEhgL/DncziGoiiK8j/OunBLKXcBF59gez7Q+1xCKYqiKOVTV04qiqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDM1rnBLKZk5cyaHDh2K6AlkFEVRzlaNKtw7duzgggsuIC0tjbvuuosbb1RTgSuKUvPUmMItpeSHH37gr3/9K23atGHx4sXUqVOH3377zehoNcaBAwfYtWuX0TEU5bx3rvNxRwwpJYcPH6ZBgwbMmjWLBx98kJSUlIie5rS6kFIyZcoUpJSYTCZ8Ph9PPvkkZrPZ6Gh89tlnNGnShObNmxsdRVGqTI0p3CaTiX79+tGrVy80TeOXX34hOTmZJ5980uhoQGnx03U9IordmdJ1nXfffZcVK1ZgsVgYNGgQU6dONfS5OBwOrrnmGq677jqWLl1KcXExK1asMCyPolSlGtNVAtCiRQt27drFNddcw+LFi/n000+NjgSUrgiyadMmhgwZwpYtWygqKjI60hmZMGECc+fO5euvv2bDhg0sWrSI0aNHG5pp586dtG7dmrvvvptnnnmGYDDIgQMHDM2kKFWlRhVuk8lEdHQ0ZrMZu90eMSutPPXUUzz22GM888wzjBkzhg8++MDoSGfk+uuvp2/fvqxcuZJ69eoxaNAg5s+fb2imd955h5tuuomXX36ZjRs30r9/f0P/UGuaxiuvvMI333xjWIaaYNOmTRF/HuXzzz/H5XIZmqHGdJVEqszMTJKSkpgzZw6PPPIIl156KZs3byYnJ4eMjMhfR3nFihXs3buXuXPncvjwYb766isef/xxLBZj/+uUnYSuVasW69evZ+/evWzfvt2wPEIIoqOj+eqrr5g7dy6xsbHMmTOH0EIjyikEAgHuuece6tevj9frxe128+KLL0bU67dz504ee+wx2rdvz7Jly2jdujUPPPCAIVlqZOGOjo7G6/UipTT8B9+qVSuWLVtGVlYWkydP5r333uOpp55i9erVrFy5MvwuwehC+L+klHz11Vds3bqVSZMmERMTg8PhQNd1EhMTjY5HfHw8q1ev5sUXX6Rbt25ceumlhuYxmUwMGzaM4uJicnNzcblcdOzYEYCZM2fSsWNHLBYL0dHRuFwuNm/ezJdffsm4ceOIjY3FZKpRb37PmNPpZMeOHTz55JO4XC4GDBiAw+EI//7u37+fYcOGnfB7V61aRUpKSqXmk1Jy8OBB4uPjueOOO9i6dSvTp09n1KhR4X0+/vhjZsyYcdz3pqWlsXLlygrNE1nVooLMmTOHTp06sWHDBsMLd2xsLI0aNWLKlCmMHDmSjz/+mEWLFnHZZZcxYMAApJQMHTqUtm3b0r17d0OzHm3t2rV88MEHPP300+HXMD4+3uBUfzCbzRw4cIDU1FRuvfVWw3/OZRITE0lMTERKSWZmJlD67mDq1Km0a9eOwYMHc/fdd9OnTx969epF06ZN+fnnn6lbt67ByY01efJknn32Wb799ltefPFF9u/ff8x1GPXq1Qu/nv+rKn72gUCAV199lX/84x/Mnj2bL7/8km3btjFw4MDwPtdee225GStajSzcQoiIumry3nvv5S9/+Qsvv/wyq1evDm9ftWoVAG+88Qbr1q2LiML9008/8fHHHwMcU7QjUdnPOBIzCiHCuZ599lkAfvnlF0aMGEHv3r2pV68eycnJzJw5k6VLl3L//fdH5POoKrNnz6Zly5bMmzePe++9lz179rBq1aqIeU1sNhv33nsvd955J6+++ipRUVEkJiby0UcfGZKnRhbuSGQymRgzZswJ77vjjjuqOM3xpJTs2bOHpUuXcvXVV3P55ZdHzC9NeSI93/+6+OKLWbx4MTNmzKBTp040aNCA9evX06lTJ6OjGc5qtTJ//nw2bNhAfHw8r776qtGRjtOqVSumTJnC888/T+/evenbt69hWVThVoDSUREPPfQQr732GrGxsUbHOS09e/akR48eRsc4I23atCE7O5sFCxbQs2dPFi5cyF/+8pdq90eooplMJvr06cPll1+OyWTCZrMZHek4iYmJXHvttfTo0cPw8xKqcCsAWCwWli1bZnSMM2K1WrFarUbHOGMrV64kKyuL9evXs3PnTqPjRJSoqCijI5xSJJzrUYVbUQzQrFkzmjVrZnQMpZqqsWOQlixZct6//VQUpWaqsS3u1q1bGx1BURSlUtTYFreiKEpNpQq3oihKNaMKt6IoSjWjCreiKEo1owq3oihKNXPKwi2EWCCEOCKE2HzUthQhxEohxI7Q5+TQdiGEmC2EyBJC/CqE6FiZ4RVFUc5Hp9Pifh3434vyJwOrpJTNgVWh2wDXAc1DH6OAORUTU1GU6kRdQ1G5Tlm4pZRrgYL/2TwQeCP09RvAjUdtXyhLfQckCSHqVFRYRVGqh0ianbMmOts+7gwpZXbo68NA2VIu9YD9R+13ILRNURRFqSDnfHJSlv5pPeM/r0KIUUKIDUKIDR6P51xjKIqinDfOtnDnlHWBhD4fCW0/CDQ4ar/6oW3HkVLOk1J2llJ2jo6OPssYiqIo55+znavkfeAOYEbo83+P2j5OCPEW0BUoPqpLpVyapvHee++dZZTKl5eXx86dOyM64+bNm9m7dy85OTlGRynX4cOH+fTTT4nkP9QlJSUR/XN2u93EZsfS5L0mRkcpV/yeeDa7Nkd0P/euXbuwWCxs3rz51DsbRNO0cu87ZeEWQiwFrgTShBAHgEcpLdjLhBAjgb3An0O7fwxcD2QBbmDE6QT0+wVjxkTuiucxMTp33BET0auy7927l8TExIjOaLfbqVWrVkQv1GCxWCL6NXQ6nXSxd2FGxvGL0kaKbYXbcJgcEf06xsTE8GTKk7gz3EZHKZdf+Mu975SFW0p5azl39T7BvhIYe9rJwt9n4vBh49dbLE9iYhZ16uRHxJqQ5cnJySEjI+OsM0op+f777xk8ePAx20ePHs3DDz9cISuSrFq1ik6dOmGz2XA4HCSnJJFTeIj42ERKAkf4vHAhu9xbMAUs2EUcQjeT7ThEt+S+XHPBLfjdPurXakhJSQmxsbEUFhYSExNDIBBA0zRiY2ORUhIdHU1BQQFxcXE4HA4SExPDt30+H4mJifh8PqSUREVFYTKZwuuULlmyJKJ/zgUFBfz4448RnVHXdfLy8iI646+//kr+hfkUNys2Okq54kxx5d5XY6d1VU5fMBhk/fr1XHPNNfh8vmPue+yxx7BarUycOJGYmJhzPpaUOvmBQ+xybcGEzvvZL9EstiN+3Y+NaFrYunLIt49iTxGtkjrQKPUiEqzJ/G31MOKtqYzt8A9q2epgC9gwmUzoug6ULn2laRpSSnw+H0IINE1DCEEgEAjfL4TA7/eH34YGg8GIXCZLUU5GFe7znKZprFixgokTJx5XtMs8+uijFBcX88QTT5zzUmESycYj3zNr43QyYjNomNiI4mCAX3ZvZc+h/bRp1gBrwMb2XVnktSjigsTWCA5glwlEiwSW/ryAlikXcm2z/kTZohFCYDab0XU93KcaCASwWq1omobFYkHTNOx2O0IILBYLwWCwNIuUBAIBVbiVakfNVXKeE0Lw3XffkZ1d/jnkYDDIO++8UyGLo5qEmc5pV1En0Iktvxfw65ZcNv6aTckhG3Z3bVz7Yzi43c+Wjbl8v3EjW3b9yNqf1uBxBVm/81uOOPKZu/5FCnx5OBwOoPStucfjwWKxYDIJYmKi8Xo9WK1WfD4fUVFRuFyucGs7NjY2XMQr4l2EolQ11eI+z2VnZ/P777+fcgRASUkJGzZsoGvXrud0PF3XiTXHMLv/bO5aMYJPNn+M7oNoGYVN2vgpS+NPl9zEyD5dKHYVYfPYOOD+BG9JPnkFhezQdhIMmBk4pz8rx68GwGazERUVhdfjZvOqGWT9uJhgUKN19zvodMPjOBwOUlNT8Xq9REdHk5eXh91uJxgM4na7SU1NPafnpChVTbW4z3Nms/m0ugpOd79TMZlM2O12vE4PL980l+tb9cNiNtOkVhO6NevGRY3bsTd3L1sObibfUUB2fjax+Y1w/Z7IhQmt8RTnge5FKxbcPftuhBB4vV4KCvJx5Gxh55Z1FJZ4qdduAEl12+MoKSEuLo7c3FyEELhcLtLS0rBYLFgsFpKSks75OSlKVVMt7vNcrVq1aNCgwSn3s9vttG3b9pyPJ6XE7/eTnJxMIBBgzk0v8Y/of/Ju5rsUOYuINccSI6LxCT9H8rdRXFhMvDWBgd0H4nQ4iSaF/NwjmJIP4c8JoGlBrFYrq1fM5MiebyjM3k+HqyZx+YBJBIOl93k8HpKTk9E0jZiYGIqLizGbzUgpcTqdJCYmnvPzUpSqpFrc5zmTycSIESNo1qzZSfd76qmnsFgq5u+8yWTCZDIhpSQ5OoXHr32cIZ1vxRlwsSt3N5sPbuXH3T+yr3A/Teo3pWHdhuzK3oXD6yBepHJJw57kbfBhb32Y1957hYDfy49r/oPXZ2Hg6AV06TMq/Phlw/zMZjNA+HYZNYudUh2pFvd5TghBu3btuOyyyzCZTGzfvv2Y+zMyMmjUqBG9e/eukJOTUFq4nU4nsbGxuFwuEuwJzOj3JI9f9yiDXhxMYUkhWft3kR6fRoEznzhrPF63FwKS3Nx84qyx9Ok0gAMHtvO1XMF3Y14jWZP07XUbjVp3x2q14na7sdvt4ZOTTqcTm82G3+8nJiYGTdPQdf2cR8mcqaysLOrUqRPRFyEpkU8VbgWz2cycOXO4++67ycrKCo+NBmjYsCHz588nJSWlQo5VNs46NTWVgoICkpKScLlc2Kw2/E4/H479kD0Fe/gg8wNcXhemoIlYWwwlRSUgBR63F7vZxpCrh9D54s6s/fVz5q+fwhX9hnBxtxvQNA2n00lKSgolJSUkJiZSVFREWloaDoeD6Oho8vPziYmJQUqJy+Wqkiv8ioqKmDt3bvgPSoMGDRg+fHilH1epmVThPs9JKZFSMnnyZJYuXXpM0Qb48ccfGTVqFCtXriQuLu6cuxaEENjtdgoKCoiOjqa4uBir1UowGCQuLg4pJc3SmzG+z3iklNgsZg6v+4LDP7xLjD2K1F7XkdS9N1a7ncLCQgKHg3iKBJdefRM2mw0pJUlJSeTt2cOPr75AwYF9JDdtTac77iEpvVa4v1vXdXRdr7J5UwoKCvjss894/fXX2bFjB//85z+5/fbbVVeNclZU4T5PlRXs/fv388gjj7B8+fLjinaZ77//ni5duvD666/TuXNnzGbzWRecshZ3YmIixcXFJCQk4Ha7sVgs4bHY+L2YfF62TRmP9HupP2gYnR/+P3Rhwmo2sXvev8j/JZOgppOVV4Q99wi+zT+y4Zu1HPn1JwKaRushd9Fh8C34fV40r4+lo27HWeJkwJSpJFzQlIwGDTGZTLhcLux2+7m8lKf1nCdNmsQbb7zBrFmzGDlyJA899BDPPPMMf/3rXyv12KcjPz+f5OTkCusKUyqfKtznISkluq7z7rvvsnz5ct59992TzkQG8Pvvv/OXv/yFUaNGMWTIEFJSUs66eJvNZgKBQPgqxrITiWazGc1RzKF5T+Hal0Xr+x/HGp9AoKgQ764dIMAnod7g22g0fCxBl4N6X62i8/bfyP9mLY0vv4oLh95NMOjHVViI31GMJkFHMuDvjxHUdL5+cyG/rlvH6Fdep0nHTuGTlpVJCMFzzz3HbbfdRvfu3Vm7di1vvPEG3377baUf+2RycnL45ptvWLlyJVdeeSVNmzalc+fOhmZSTo8q3OeZspb2vHnzuP/++8OTLZ2OX375hbFjx7J+/XoWLFiA1Wo94+IthDhmHpGyPxhSSggG2Tvn/9ByDtFk2F/w5x4mmHsYgaTsMEKCf99uvFKiAwktW5PUvhOaP4inKJ+SvTvRpESToEmJLiWaDrqUBHVJxxsGENB13vzr/dzyf/+m+TleUHS6UlNTadiwIZmZmaSmpjJhwoQqOe7JbNy4kVdeeYWXX36ZBQsW8NFHH7Fw4UKjYymnQRXu84ymabz66qs8/PDDeL3es3qMJUuWoGkar732GlFRUWf0vVJKgsEgycnJx5yctFgs7F+xGE/Wb1xw218g4EXoIETo45jHKC3gINHcLvxSlhbrUIHWdIkuCRfvoCbRpE4wtE+7nr3wef3MHTOaSW8vp3XHjmf1OpyJsvHiEyZMoF27doZfal9SUsLbb7/NvHnzmD59Ok888QRLlizh/fffZ8CAAYZmU05NFe7ziK7rvPXWW4wdO/aUXSMnI6XkP//5DykpKTz55JNndAGLyWQiKiqK7OxsUlNTycvLIzY2Fp/bRcEX79Ny2Fg0dzHSBAiBKdRCN4k/ji2lLF0sT0ooK9K6RNclQamj6RJNg2CocAd0naCEoK6j6QJN12nd41KOHDiAJy/vrF+H0yWlZMeOHcTFxXHJJZdU+vFOR3x8PDfffDPPPPMMmZmZrFmzhszMTMaMGWN0NOU0qMJ9HlmyZAnDhw8/pmuk7GKYshnzymMymcJ901A6A99LL72Epmk8/fTTxMWVP3fw0cpa3NHR0QQCgfCJwfx1X2CLjcObdxCzSWAyl54oE2YwH1W4dVnaqpa6AE1HlzpSgtRDLW29rEBLAnpp90hQlwQlpQVcL+1GCQR1Uus34qX7JjB/y1ZEJfZ1SymZOHEiP//8c6Ud40wJIWjcuDEej4eDBw/y8ccfc+WVV1bYRVZK5VKnkSPQo48+espCehHdQoQAACAASURBVKYWLFjAhAkTjuvP7tKlC/369TtlX3VGRgZjxx6/Rsb8+fO57777zmiZqrJjlX2WUuL4aT0xjZuheVzoHhfS7QKvCzxuhNeN2efB7PMgvKW3pdeF9LrRPW50txvd7UJ3u9DcTjS3m4DbddSHE7/rjw+vw4HX5aBu86ZovrPrLqoJ2rZty9y5c2nRogUzZ87kzjvvNDqScppU4Y4gH330Ea1bt6ZHjx506dKFKVOmnPNjlnWP3H///RQWFoa3R0VF0aRJE959911atGhxyseJi4tj2rRprF+/njZt2hzz+G+88QYjRow4rT82ZfNne71eLBYLfr8/tM2E1Pzhwq17XEiPC+lxQ6hYC2/p13g8cNR+utdF0BP6cLsJup0EQ0Xb73bhczrxuxz4XE68TjdepxOv04mnuLjcIZAV6bbbbuPtt9+u9ONUZw6Hgx9++IEnnniC4hP8XDRNo7i4+JiPOXPm0L59e3r3Pm4xrhpPvS+qZLm5uWzatOm09v3++++5+uqrsdlsvP3227zyyivhJcnOhpSSnJwcXnrpJYqL/1iiqW7duvzrX//ixhtvPKNLr+Pi4ujWrRvLly/n1ltvZdOmTUgp0TSNL774gk8//fSUrXdd1/H5fCQlJeF2u0lISMDv9+P3+ZH5OdhDXTfCLDCZBMIsECYTpW0MSRDQdJ2grhPUSrtBAqGvA1IS0EIfusQf1AnqUFJSjDkmFr8m8etH3R+6CKcy7dq1i+joaOrXr1+pxzkXHTp0IDMzkyuvvNKwDD179qRr16706dOHFi1a8Pzzz5OWlha+v6CggDlz5hzzPUOGDGHjxo1VHTUiqMJdyfLz81mzZs1p7bt161ZcLhdr167l7rvvJiYmhtzc3HO6JFvXdQKBQHhypfT0dKZMmcKgQYPOar4MIQStW7fmhRde4K9//Ss//PBD+D6/v/zFTcuYTCZsNhv5+fnUqlWLwsJC4uPjiUpIJPurT7GZTJCUBKHijal0SEnQ70PYo9Ep67cGn8uBOy8Xv6bjC+r4dYlP0/EFJZrJgiUtgwCC4kMHiKldD7+uE9DAp2kEdcjNPoz/LEfWnK7XXnuN2267LaLnJnn66afp1KmTYUXwo48+YtCgQdx1110sWLCAtLQ07rzzzmMuTkpNTWXVqlWG5ItEqnBXslatWvH444+f1r7Lli3j0Ucf5dlnn+XWW2/loosuol27dmd9bCEE6enpTJs2jb/97W9kZWXxn//8hw4dOpxTIRFC0L17d1577TXGjRvH999/z6OPPkrv3r1P2Veu6zp+v59atUovP09KSsLv91Nn8HByv1lF0e+b0Oo1JDYtHd0k0E2CoIDg/p1YGzRFAp6cQwRKivH6fKXdHkENvybxBCW+oIZX0/Ej0Pfvw4+Z6AYNKc7ORsTGEtDAq+kUFxSwa8tW2t9wI1TSZeeZmZlYLBYuvvjiSnn8mqJJkyYsX76cmJgYunXrxsqVK3n88ccZPHiwmhKgHKpwR5Abb7yRPn36cO+99/LOO++c9kiNk7FarfTq1YvVq1cTDAZJTU095peh7CrKUymb26PssmiLxUKbNm147733wl0fpzvTnq7r4XUiy94J2Os2RLfYCLjcsHsHaBq2uDgCUsMM+EuKEb/+UDpWW9MIaDp+Tcev/dE9EpR6aOw2BDQNb1EBvqBOfl4enoCGH0FCg8YUFhZy5OBhvP4gN4wZU2nFIT8/H5PJVGETdNVUrVu3Jjs7m3vuuYdevXqRnZ3NpZdeqor2SajCHUFsNhs2m42lS5dW6OOazeZyV3rRNI1GjRoRHx9PSUlJuY/RsWPHY4bvlUlISDijLEIIbDYbDocDu92Ox+MJF3HNHo1fl8iAhrmkmKAWQDu0PzQcUCAADRm+yMav6wQ1gV8/uu9aD/d5B/XSC26CWgBNg0BQw+N0UpCdgy4BYSI6rnK6MPx+P7///nuFLD5xPvjss8/YtWsXX3/9NVlZWUbHiXhqVMl5zmKxMHjwYBo2bFjuPkIIHnjggQqZjKlsBZykpCQ8Hg/x8fHouo7FYqHxsLvxhfqpXQUFuJ0OfJqOV9PxaDpuTccb1PEES2/7NfCFWt3HtLx1vfSKSb3s5GXpNl1CSUFh6YrwJhNdbhqMiKqc2QFdLhcffPABgwcPrpTHr4maNGnCHXfcYXSMakG1uJXTmu2voiZjKpvWNS8vj7i4OIqKirDZbAQCAepe2oeNOuhSR5cBdIcbgnrp+UlR2saQUg9dhAPB0MU2/tDJSr9eNlpE4tdK7w+UFXApEVFReD2+0n20IO2vvJKGTZpUyPP6XyNHjjxuFESkEkLw1ltvGR1DOQOqxa1UKSklgUCAtLQ03G43iYmJ4ZVoHC438V16lraygxpOhxN3oLSF7Q7ooa9laYs7qOMJanhCI0q8QQ1fUMOnafiDEr+m4dd0AqFiHgjquJxu/D4/8bVqce1fRmOOiqagoKDCn+OuXbuA0hZkdSCEoGXLlkbHUM6AKtxKlSq7AMftdmO1WvF6veFZAqPj42kxdCTeoAwVaA1vaLSIN6jhDWpHFe3SLhRvUIa7V3yaxBfqLvFrAr8Ofk0eM947ICUZzZtTUlBI9/4DKmUhhYcffpiZM2eqk2tKpVGFW6lyZRftCCHCI1qklFgsFpKbtaT+NQNChTrUqg6W9m3/0b8t8QRK7/eF9vOFRpkEQsW7tLtEKy3iusSvQ1DTadPzSjRhocdNN2OxWCplzclJkyYdc/GIolQ0VbiVKlVWtGNiYggEAkRHR4cXUfB4PJhi40ht1x4/ptJWt1baNeIOarjDRTxYerIyfLu0Ne7VSsdw+3SJN1h6sY1f1/CFWtu6MJFcrx4ORwkX9uyJpmm4XK4Kf47dunUzfNpWpWZTJyeVKlU2reuRI0dITU0lPz+fuLg4AoEASUlJaJpGiyHD2bluDXvXrkIgwnNyA0gpwhNaBeUfQwMDUhLUQicjQ5e0+8r6uDUdabHRrmcvfly1hhe//QZbVBRSyjMezqgokUC1uJUqVXZyMi4uDp/PR2xsbPiCHK/Xi9/vxyQErQfcjGaNwqOF+rYDGp7AH61r99F93prEG5Slre1Qt8nRwwSDmGhwUQcCCC6/+SY0q41gMEgwGMTpdBr9kijKGTtl4RZCLBBCHBFCbD5q22NCiINCiJ9DH9cfdd/DQogsIcTvQohrKyu4Un2ZzWY0TcNqtR4zj4rFYgkPO2x41bXEtGqLNyhxByXuoI776BOToe1l/d++QGl/ty980vKPfu/0Zi2ISU5hz5atXNirF7FxceF5yNX800p1dDot7teBvifY/pyUsn3o42MAIUQb4Bagbeh7XhJCVP5qrMo5OZO5tM9V2ZqTZdO5lp2klFKGiymUXhbfb9rTmJJTjyrYWqiAS1yhk5LewB/F3KOBJ1S0vZqGbrGSUL8Rlrh4igsKGHzfBFpeckl43LoQolJOTipKZTtl4ZZSrgVOd7DrQOAtKaVPSrkbyAIiY60mpVx2uz1cMKG0RXx0QZNSVtiwuf/tKomJiQnPgeLxeMIr7NhsNuo2a84tLy0gvmFjPAE99FHaReIrG99ddjWlpodHoviCEl9Q4pcCrz9ASUEhHa7uw9UjRhAVHY3D4UDTtEo7Oakole1c+rjHCSF+DXWlJIe21QP2H7XPgdC24wghRgkhNgghNgQCnnOIoZyrpKQkkpNLf4Rms5nRo0fz/PPPhy9xj42NpXbt2hVyrLIrJ4uKioiKigrPjxIMBomNjcVutyOlxOv14nA4aHZJN254/P/oMPjP+KQIjzLxmy1ccPmV4SGC3qBGVFo6cbXr4tW00svhfQFsMTEMGj+ePnfdhRACr9dLUlISZrMZi8VCfHx8hTwvRalKZ9vBNweYRumSrdOAZ4C7zuQBpJTzgHkA8fEZ0uc7yyTKORNC8Prrr+NyuRBCULduXeLi4rjiiivCJw7PZEHgU7HZbKSnp2M2m6lVq1b4QpWjZx4sG05nMpno1Kcv7bpfRv+/TQZCq7ybBDFJSTiPuvLRYrODEMfMsW2LiiK9YUP00JDD6OhohBDhdxDqIhmlOjqrwi2lzCn7WggxH/gwdPMg0OCoXeuHtikRTAhBo0aNjtveqlWrSjne0X3ZR3fRlPnfeVFMJhPW5GTikpOP2zc54/TeCZQ9YtnxVMFWqrOz6ioRQtQ56uYgoGzEyfvALUIIuxDiAqA58MP/fr+iKIpy9sSpRhQIIZYCVwJpQA7waOh2e0q7SvYAo6WU2aH9/05pt0kQmCil/ORUIRITU2SLFvef7XOodFari7Zt807YKo0Uhw8fxm63h/uqI9H27du54IILInokx6ZNm7jwwguNjlGuQCDAnj17aN68udFRylVQUIDf76+w8yKVYc+ePWyttZVAbMDoKOXa/ux2iguKT/jW8JSFuyrEx6dLv/93o2OUKyFhD3XrfsO2bcOMjlKuRo0+5aWXatGpUyejo5Rr5syZjBgxokL7yyva3//+d6ZPn250jHIVFRWxcOFCJkyYYHSUcm3YsIH8/HyuvTZyL+NYtGgRPXv2jOjGWMuWLTly5MgJC3eEXH0g8Psjt6UYCOSjafaIzqhp0cTGxkZ0i9tqtZKYmBixGcvmTInUfFCa0Wq1RnTGmJgY3G53RGe02+3ExcVFdMaTnYdRl7wriqJUM6pwK4qiVDOqcCuKolQzqnAriqJUM6pwK4qiVDOqcCuKolQzqnCfpzZv3hyeiU9RlOolQsZxK1Vl//79LFy4EJ/Ph81mo1WrVtx8881Gx1IU5QyoFvd5RErJ3r17+eWXXxg3bhwtW7Zk6dKlVbqQgqIo504V7vOI1+tl9uzZzJo1i8cff5zWrVtz/fXXs3jxYqOjnRWv1xuez1tRzieqq+Q8Eh0dzYQJE7j33nt56aWXuOiii7j88st59913jY52xj755BN27dpFbm4uF154If3798dmsxkdS1GqhGpxn2eaNGnClVdeyezZs3n44Yfp3Lkza9asMTrWGbv//vupU6cOffv25ZFHHsHtdhsdqVwvvPACHk9krvL00UcfkZWVZXQM5Qypwn2eqVu3Lvfddx933XUX48ePZ8yYMXz++ef8+uuv1aav+5///CczZ86kfv36/Prrr6xYsYJ7773X6FjlWrFiBX6/3+gYJ/Ttt99y8GDkr3Wyb98+HnvsMaNjlGvPnj1MnTq1yo6nCvd5qnnz5uFZ5h5//HGee+45tmzZYnSs0zJlyhQmT57Mxo0b2bhxI6NHj2bWrFlGx6qWateuzeHDh9E0zegoJ+X1etm9e7fRMcrl9XrZu3dvlR1PFW4Fi8XCK6+8wsKFC6tFt4nVauWGG27g448/JjMzk4suuojY2FijY1VL48aNY+7cuRHd1aQcTxVuBShd5/GRRx7hu+++Y926dUbHOaVp06Yxfvx4+vXrx4svvhheXFhRzgeqcCthSUlJjB07lmXLlrFt27Zq0+etKEar6sWnVeFWjhEfH8+sWbN46qmn+Omnn4yOoyjVQlU3clThVo4jhODFF1/kww8/ZPXq1UbHKVeTJk2QUrJr1y6jo5Tryy+/pGfPntjtdqOjlOuOO+5gwYIFRscol5SS5cuXM2jQIKOjlCstLY2GDRuycePGKjmeKtzKCUVFRTF+/HjWrl3Lhg0bIrLbpDoU7tWrV9OzZ0+ioqKMjlKu4cOH8/rrrxsdo1xSSt555x1uvPFGo6OUq6xw//zzz1VyPFW4lXKlpKTw0EMPMXfuXLZt22Z0HEVRQlThVk4qKiqK+fPnM2fOHL755huj4yiKgircymkQQjB9+nTWrl1bLcZ5K2cuNzeXN954w+gYx3nvvfcYO3YsBw4cYMyYMRHZeNB1nYkTJ7Jo0SIWLVrExIkT0XW9Uo+pCrdyWuLj4xkzZgyffPIJmzdvjsg+70hSUlJChw4dePXVVxkzZgzXX3+90ZHKdffdd5OXl8cDDzxAhw4d2Llzp9GRgNKC+P3333PJJZeQmppKgwYN2Lp1a6UXxTPl9/tZs2YNvXr1olevXqxZs6bSpzhQhVs5bUlJSTz55JM888wzbN682eg4ANSrV4+EhASjYxxny5Yt9OjRgxEjRjB79myio6Mj8iTqwYMH8fl8rF+/nuuuu47Bgwfz22+/RcQf5u+++47Y2FgGDRpEp06duPvuu9myZQv79u0zOtoxJk2axLx582jfvj3t27dn3rx5TJo0qVKPqQq3ckbMZjPz589nyZIlEdFtMmrUKC655BKjYxzniy++oHfv3lx66aU0btyYK664IiLf5v/yyy9cdNFF1K5dm759+9K9e3fWr18fEYW7R48euFwupkyZwnPPPcekSZNo164djRs3NjraMV544QWGDBlCdHQ0drudIUOG8MILL1TqMdV83MoZs1gsPPjgg8yZMwe73U737t2NjhRxxo0bR+vWrXn++ed58803Wbp0Kdu3bzc61nGuv/56/v3vf7Nr1y5uvfVWRowYwUcffYTJFBltumHDhrFt2zb+/ve/h1vekcZkMvHcc8+FhwI+99xzlf76qcKtnJXk5GQmTJjAQw89xAUXXEDt2rWNjhRREhMTyczMZNGiRXTr1i2ip51955132LNnD0uWLGHdunWkp6cbHSmsXbt2tG3blp49e0ZUrqMJIbjxxhvDE3VVxbw5qnArZy0uLq7S3xJWVyaTiXr16vHQQw8BVT+XxZlIS0sjNTWVTp06RWROIUTEFu2jVeVEZ6dszwshGgghVgshtgohtggh7gttTxFCrBRC7Ah9Tg5tF0KI2UKILCHEr0KIjpX9JBTjCCEi8pc9UlSX16e65FRKnU5HTBB4QErZBugGjBVCtAEmA6uklM2BVaHbANcBzUMfo4A5FZ5aURTlPHbKwi2lzJZS/hT62gH8BtQDBgJlI/bfAMomEhgILJSlvgOShBB1Kjy5oijKeeqMTn0KIRoDHYDvgQwpZXborsNARujresD+o77tQGjb/z7WKCHEBiHEhkAgMhdSVRRFiUSnXbiFEHHAf4CJUsqSo++TpYM+z2jgp5RynpSys5Sys9UafSbfqiiKcl47rcIthLBSWrTflFK+G9qcU9YFEvp8JLT9INDgqG+vH9qmKIqiVIDTGVUigFeB36SUzx511/vAHaGv7wD+e9T24aHRJd2A4qO6VBRFUZRzdDrjuC8Fbgc2CSHKZgl/BJgBLBNCjAT2An8O3fcxcD2QBbiBERWaWFEU5Tx3ysItpVwHlDfAs/cJ9pfA2DOPYvzcCKcW+RkjYY6JU4n0jJGeD1TGilIdMp6IiITgiYnJsn3724yOUS6z2U9iohObLcXoKOUKBktISrJU6dVbZ+rIkSOkpqZiNpuNjlKuAwcOYbHUNTrGSWgETIewpluNDlIu3a0TF4yLyFkbyxQUFBAXF4fNZjM6SrkWL15MYWHhCRvNEVG44+MzpNOZY3SMciUmZvHUU6u55557jI5Srvfee4+MjAy6du2Kz+fDarX+MW+xSeewby+FwRykLrFgAwSegJsYcwJNE9oidDM2mxVN0xBCEAwGEUJgMpkIBoPYbLbw57LHDwaDmM3mY/YtuwIvGAxitZYWl7Ir8p544gnGjh1LcnKyQa/SyUkp+fOfJ/DOO88bHaVcdnsB7aZcQ+YjmUZHKVftb2ozN28uAwcONDpKuV5++WV69+5Ns2bNjI5SroyMDHJyck5YuNVcJTWMpmnk5+cTFW/jh8IPSY9qRNDkZafzF7L9e3F4nTi8xdSNborH7yHdWp8dUb+xOz+LcV3/jt8XQAiB0+lECIHdbsfpdJKWlobT6SQlJYXi4mJSUlIoKSkhNjaWoqIirFYrNpsNm82GxWLB6XRGbIFWlOpOFe4aJqvoF/5T+ByiWHDYtxerjCIYlMSSTJq9HkkkU+R24dEDpNjrg27lk53vEm2JZ9qXD3JLu5HUjWlAfHw8UkqCwSCpqam4XC7sdjt5eXnExcVRUlJCdHQ0Pp+PpKQkpJRomhaeIc1ms5Gfn09SUhIWi/pvpigVSf1G1TC1Yhrx1qqNpESlcFGti2iS3opdh/bwxrqlNGuRSK3YOHb8mo25XpBL2/TEHIwi2pJEgSMPe0w8C36YQ7/WN9I2+WIsFitWq5Xc3FzS09NxuVykpKZSkJ9PYmIixcXFxMbGUlJSgtVaum9sbCwmkwmXy0VycnLEzOusKDWJKtw1TDQxzOu3gAc//xsfbf2EzzZ/gV23kZFcG3+uHZ8jjebpjThUtButSOfbn7+lfrsUsg4folmqnyJ3MV6fRtMrWpFkiUYIQVxcHH6/H58jm+3b3sdR4iAlvS5pTXqjaRpRUVHhfuyytfZMJhNer5fo6Gg165yiVDDVHKphTCYTLVKa8Y+r/o7JItiZv5NCTyFxUbG4/W7cARcN0hvQOq09CZ5mNE5og2O7RPh1zPjYd+QQn21axfQPnwBKT9jpug5S4+DWz1jz1kQyP/4HmZ8/gwid19Z1HV3Xw0OrTCYTUspqO9RKUSKdKtw1jNVqJeAP0L1+d/4z9D+kxaViMpsp8hZjtVnwaX62HthCriOX3/dt4+sN39Ioph0DMm7nl1W/06VVA2IcZpZ/spxAMACAo6SII3t/ZO1Hz1PkttPl5lfpc9ebBLTSUSV+vz88gqXsJKWu66q1rSiVRHWV1DDFxcXh/ujWtdvwzYR1DH7lZrLzs7FLGzZpJwo7ufm5SL9ORnJtNKmRcySPAR2HUPRbEYn2InyJ0ezcv51WF7TlqxVPsy3zQxpc0JrLrh5Fu0tuoKSkhLiYGLxeLykpKWiaRiAQwOl0IqUkJiaGvLw8UlNT1clJRalg6jeqhik7WWixWPB6vWTE1GbBrQv4YNMHzPlyDocKssEvibfE06ZeG2zCxpGiI8RYonGUOBAaxBc3xpFQxNT/TuRPTYeQ9duvJNVuQ/+RM0nNaITX6yUmJga/34/VasXtdofHb0dHl870qGka8fHx6uSkolQCVbhrmLITgoFAIHwRTstaLWjRaxKX1OtCjiuHJ995koN5h9iVs5OUqFRs2MjPy8PnDuB1ehhz4xjG9xhHccwBXn/uXyQf0Xhg2nySazXA7XYTHR2N1+vFbreHL8op6+cuOzlZVtDtdrvBr4ii1DyqcNcwuq5jsVjw+/3HnCSUEro36U5UdBR92/TFarPidDixmQUHd22nVmIqPgkxKbWIskWRnJRMSUkhv1/wM73u6kfj5u0RQqBpGiaTCWdeLgGLmYCmk1q3HiaTKVy8gfC+6gSlcq6OHDlCWlqaevd2FFW4a5ioqKjwuGqfzwcQnhvEbrfj9/uJj4onb8N6ogIeHEdyiD+0l5KiQpIu7EBC+24492Sx2+Nh/+EjbPr6G7p1vIzAwX0c2rGNqOhoSuKS2fv1KvZt/oW4WnWIadKCuNQ06rVtS0bzluHL4BMTE9Uvm3LWsrOzWbt2LWvXrqVHjx40a9aMrl27Gh0rIqjCXcO4XC5SU1NxOp1ERUWh6zo+nw8hBB6PhyiPg91vziU2ORV/dAyJtWqT0OMKpBAIwHNgL7K4ALseJHb3dnr43MhVH3Lo4B6EyUJhwE90ej1a9O5L097XIjWd379Zy+HNv7BvYyYOj5cbH/knyWlpFBcXk5qaqoq3clY2btzIm2++yZw5c1iwYAGff/65KtwhqnDXMAkJCaVzlURF4Xa7MZlMWK1WpJTEWs38PP4eEps0J7nnNZjMFpAa/oP7SifulRKz2UJis1boUhLboCnNBt+Cpun43CVYouPQpE4gEMRTXIAuQdMl9dtdTB0pKc7P5/1Zz/LqvaMZ9/pikpKSKm0mwEAggMViUcMNa6iioiKWL1/OnDlzmDp1KjNmzGDx4sW8//77DBgwwOh4hlNNoRqmpKSEtLS08JA8q9VKIBDAW5jP93ffSEzdetS57iZ0RzF6cQHSUYzwOhEeJ3hdSFcJWkEuwYJcdJeDYHE+mqMQ4ffjLyogUFhI0FFC0OUi6HYRcLvwOx34nKXdMwMnPoDzcDYv3Dmc/Tt3omlahT6/vLw8Nm7cyC233MLPP//M4cOHK/TxlciQmJjI4MGDefrpp/npp59YuXIlGzdupF+/fkZHiwiqcNcwUVFRuFwuhBAEAgE0TcNsNpP7wTJSGjSl3rWDCORlg9eN8Loxed0Irwfh82LyehAeF8JTeh8eJ9LtRHM7CHrcBN1Ogh4nuidUtJ1Ogk4nPpcTv8uJz+Ui4PHS45ah5OzeyZbVX1Z4i3jZsmU89NBDzJo1i6lTp/Lyyy9X6OMrkUEIQZMmTQgEAhw6dIjRo0dz1VVXRfRc7lVJFe4aJiYmhqKiIgA8Hk/pKA+fB8f2X0lq1Y5g3mHwuksLt8+FyefG7Hdj9rkx+T0Inxvhc4PHhfS6kV4X0u1GelxoHjdBt4ugy0XA5SDgcuJ3Owm6XPidLvwuBz63AxPQ+MKL+f6//6U4N7fCntvevXvZv38/r7zyCrNnz2bu3LlIKdm0aVOFHUOJHG3btuWFF15g0aJFNGjQgNtvv93oSBFDFe4IIKWkqKiIFStWsHTp0nN6rOLiYjIyMpBSEhcXh8ViIXvNZ+Dzo2sBNI8L6SktzKUtbhdmnxuLz4XJ60L4QsXa60G63eguN7rHheZxoLtLi3fA80c3ScDlxOd24nM58LuceJ0uPM4SajdrhqOgAGdhYQW9SlCnTh1q167NunXrGDFiRPhEVfPmzSvsGErk6dWrV7W8+tbr9VJYWMjgwYMpLCzE6/VW2GNXv1ejhtm5cydZWVm89NJLXHLJJTzyyCPn9HiJiYnk5OQQHx+Py+XCbDYTY7fisJnR/V70IEiTCUwgTQJMApPZhBAgdRC6BF0idYmuaeh66QlITdfRdAhqkoCU+HVJUJMEdZ2ADgFdJxC67dd1grpADwagAsdx22w2mjRpwgsvvICu66SnpyOEICoqqsKOUNx5nAAAIABJREFUoSgVZebMmSxfvpzly5dz1VVXMXz4cCZNmlQhj60Kt0F8Ph/Tp0/HZDJh/n/2zjxMiur63++t3qene1b2fTMoRECWQNxQIqIRlyRuuH0JKjHiL0YFJLgnGjdcokYkiiARxYhbNCFxjcEFRVAEkQAyyLDNMHvvtdzfH91dzigDA0zTPXjf5+mnq6uqqz59u/vUrXPPPcfh4Nlnn7Wnix8I0WiUQCAAYM9ajMViWPFYsuesgUNzYGlgOQSWpmFpAg2BJVMG27IwLYllSttoG5ZMGmgzuWyYSYOdMK2UsZboJuiWTBlxC1PXD/jzfJvx48czfvx45syZwx//+Efee++9Vj+HQrE/rFixghdffNF+/f7773PYYYfx/PPPs3DhQhYtWkR5eTldu3Y94HMpw51B0rMWH3vsMY4//nj69+8PwF133cW7777L1KlT6dmzJ7179261czocDrs6TXpg0ulw0bB+Lb5AAcLnw3BoCEey1y00AcKBACySRtewwLRMdFMmH5ZElxa6AQnTxJBJg50woWLzJvLad0TXHOgmyZ64BQkjmXQqU1x++eVUV1ezdOlSVqxYwVFHHZWxcymyixCCm266iTvvvJPrr78+23K46aabdtthGDRoEOPHj7df79y5k549e3L00UdTWVmJx+NptQLKynBnkIqKCgYPHsytt97Kr3/9a1avXk3Xrl2ZOXMmV155JYFAoNWjLtKj7kIIO5e2p7QduNzUr/0c0acf0uNBahrSIZBCkgg3IDx54HJhGgZ6wiAei1D75RoShkHMkMQtScwwiZkWcRMC/QZiut248vKIhSMYQqCbkriZdJls+3ozdZWViAxGARQXF1NYWMimTZsYNGiQijjIIVrzdy2EYMCAAbz00kutdswD4frrr99tp8Ttdje5a163bh0PP/wwo0aNYsqUKVx77bXKcLcF3njjDaZNm8bQoUNZtWoV/fv35//+7/8YOXJkxs6ZTuva0NCA3+/HMAw4cgQlo05k5z+fx4yGKezZBzMvD1MTOITE3LkV4fSA202ioY74rgoSZtKPHTctDFOSMCS6aWIYEt202LrqY+IGOEs7ENcN8OeD20tCCmp3VbN5/XpG//Iyijt1ythnBbj66qv5yU9+wpgxYygsLMzouRQt51DOUZOXl9ei/X71q18xefJkZs6cyZo1a1pVg4oqyRBSSjuDXiKRYObMmQQCATt7XqbIy8ujrq4OIQSxWAzDSBY7iMYTGJYkHgnTsHMbsVA99V9vor7sK8I1tYS2fk39pg2EK5JGO91z1k1JIjXoaFgSw5KYMj1gaVK3bSt1O3aw43//o2b7dio2l7H9q41YFvT+4ZH48vMz+nkhabxnzZqV8fMoFPuKEII77rij1Y+retwZQgjBaaedxg9/+EPuvvtuHn30UdasWcM999yT0fMmEgny8/OJRqO43W5M08Q0TXxdumA4XGDoiIYGpNuNrKrEIS2E0JIz3gFTJgcm9bSv2pIkUhEjugW6tFKRJSR94VJikhzEjMdiRENRLCHw5AeJxeNYlpXxXCU//elP7fEDxaGJpmlomoZhGG0yNLC1UT3uDNK+fXu2bdtGXV0d1113HcuWLTso503fpja+Xe194a/RSjsSMU0ikRjhujqiuklUt4jqFhHDIqKbRAyLqCGJGxA3LOKGRcIgFTWSjBbRLYlpfNMLT5gWFoJwfZhoNIphWAz66TiOu2DCQfm8Qgj69u17UM6lyA59+vTh+OOP56mnnsq2lJxAXboyiBACp9PJb37zm4N2TrfbTTQatXsn8E3xXq2wHcbXm5DSxAxF0EwLh5AIJKQHMwFLymTMtmXZPe94ymgnrORApW5Z6DJp0E0LDMAk6ULpf/RxONDI8/pUZkBFq5CusJTO9/59RxnuQ4x0Dch0WlfDMNB1Hcuy6HnxFXz824/RLAvDSqAhcGiSZELXJBYyOelGSgxJKn5bohvJiTUJ08IwIWGRmnCT8oNbJnHDwuH1oHlcjLt8MvX19Xi9XmW8Fa3C6NGjD+lBz31BGe5DjEAgwK5du/B6vYRCIYQQuFwuHA4HvX50NMvy8kk01KEJcGoCzRIIIdNZXTFlssdtkexxmxYYqZmSycHKpNFOWCZxE3QzuV/ClEinix+ffR7rVn5Kj4ED8fv9yh+paDV69OiRbQk5w167QkKIbkKIt4UQXwgh1gghfpNaf4sQYqsQ4tPU49RG75khhNgghFgnhDg5kx9A0ZRQKERBQQFSSrxeLy6XC9M0sSyLiK5z4oNP2vHYETPp247qFpGUnztqmkQNk6huEjOs5EM3SRhmctJNKkQwYaSnt5vELTBMi/4/PoZP3n6bKY/Nwe12EwqF1K2tQpEBWtIdMoBrpZQrhBAB4BMhxOupbfdLKe9tvLMQ4gjgPGAA0Bl4QwhxmJSydRMzK3aL2+0mFos1qfmYdlW43W487TvQ8egT+fq/b6Kl/IaCpJ9boiGRqZ530ndtWhaGlN9Mebe+CRFMWBZxM+nv9gQLiMYS/OjUU+nYowemaeJyuVShA4UiA+y1xy2l3C6lXJFabgDWAl328JYzgGellHEp5SZgAzCiNcQq9o7X66WhoQEhBIlEAsuycDgcyWRTeXk4C4vpPOLHxA2ZiipJ9qyjhkw+p6JMooZF3DSJmZKYSeqR7G3HzeQAZdJVYmEJJwNO/AnRRIIfn34mgWAQ0zTx+/3KcCsUGWCfRo2EED2BIUA6rm2KEGKVEGKuEKIota4LsKXR28rZs6FXtCL19fW0a9cOy7KShtrpRNd1dF2npqYGf14eA867hK4njCVqJV0hYd0knDCJpMIDIylXSThlwGO6ScwwiOsmcd1KulqM5ECl6XDxg2OOp3pXFUf95CS6DBxIbW0tLpeLXbt2tXoFHIVCsQ+GWwiRDywGrpZS1gOPAn2AwcB2YJ+mrgkhLhdCLBdCLNf16L68VbEHgsEg1dXVaJpGJBJB13VcLhcul4vCwkIikQgOl4vuJ52K4fLZcdtRUyZjuc3Ua0MSNSz7ETMkMVMSTfu4LQleL+379EU6HUTq6+jSvz/BggIKCwvRdZ3i4mKVP0ShyAAtGvIXQrhIGu2npZQvAEgpdzba/hfg1dTLrUC3Rm/vmlrXBCnlHGAOQCDQQcbj+yNf8W0ikQjBlKsiXeU9Hc+dSCTwer2YpsmIs84mWl3Fq7fcQFNvxjfx3KYlkwWBU1PcDZnMHKhbFlI4yA8WgdvD9k1lXH7PPQw49lii0agdv97Q0EAwGFTGW6FoZVoSVSKAJ4C1Usr7Gq1vnD3oLGB1avkV4DwhhEcI0QvoB3zUepIVe8Ln81FfX2/nSjEMw54u7Pf7icViSCmpr6/n+F9OZuwNt2A4XMnetGEl/d6GRUI4iDZaFzMtElIjZpjEDUkcQSQaY0fZ11x08630+9GPkpkIPR47flz5uBWKzNCSHvfRwEXA50KIT1PrfgecL4QYTDLFRRkwGUBKuUYI8RzwBcmIlCtVRMnBw+Fw4HQ6cTqd9mSF9HLjbU6nE7fHw6gL/o++Q0fy+qMPU78rWR9SAqMmXMB/n/4rUoJlSZy+PLr98Ies/eADLAkSQXGnjlzwu99R3K0bTpfLPm76nE6nUxluhSID7NVwSymXArv79/1jD++5Hbj9AHQp9hNN0ygtLW12e0FBAQB+vx9I5lNp3749A4477jv7jp146X7rcLlc+/1ehUKxZ9RcZIVCoWhj5Mh8ZInHU51tEc3idtcTi8Wors5djZFIhFAolNMadV2ntrY2x/NNmDn9W/R4anHoDjzVnmxLaRZ3yE0kEsnp32IsFqO+vj6nNe7pfyJy4U9UXFwsr7vuumzLaJZwOExlZSU9e/bMtpRm2b59Ox6Ph+Li4mxLaZZ169bRu3fvnHajfPbZZwwaNCjbMppF13WWLv2KmpofZFtKs3i91QwZEqdThqsfHQibNm2iffv2tsswF7n33nuprq7e/SBRuqBtNh/t27eXucz69evlnDlzsi1jj7z44ovy/fffz7aMPfL73/9eVldXZ1tGs1iWJadMmZJtGXukqqpKDh16u0ymBMvNR8eOS+VLL72U7abaI7Nnz5br16/Ptow9krKLu7WZysetUCgUbQxluBUKhaKNoQy3QqFQtDGU4VYoFIo2hjLcCoVC0cZQhluhUCjaGMpwKxQKRRtDGW6FQqFoYyjDrWhCKBQiHA5nW4ZCodgDOZKrRJFtLMti8eLFbNy4EafTSa9evfjZz36m0rIqFDmI6nErADBNk+nTpzNy5EgGDRrE1KlTsy1JoVA0gzLcCgAmT57M4sWL2bVrF7qu88wzzzBlypRsy1IoFLtBuUoUADzyyCOMGjWKSy65BI/Hw/Tp01mxYkW2Ze2Vbdu2kZ+fTzAYzLaU3bJt2zYCgQCBQCDbUhSHEKrHrQDA7XYzZswYqqurmTt3LkcffbRdhiyXeeyxx/joo9wtafroo4+yfPnybMtQHGIow60AkrUqZ82axcSJE/H5fEybNk0NTCraBLW1tdx1113ZlnFQUYb7IGIYBqFQKNsy9kjv3r156aWXuOyyy3K8Uo1CARMnTuT000+nT58+9OvX76C597JdyUkZ7oPEhx9+yKJFi7j11ltZsmQJkUgk25KapaSkhD59+vDxxx9nW4pC0SxfffUVPp+Pq6++mt69ezN16lQ+//xzTNPM2DnLyspYsmQJU6ZM4V//+hdfffVVxs61J9q04Q6FQixYsGCv+0kpuf3225k5cybvv//+QVD2Xa655hpqa2uZMGECM2bMYNu2bVnR0VLuueceZsyYkW0ZCsV3SCQS3HDDDVx66aXs2rWLL774go0bN9KtWze2bNmCZVkZO/cLL7zAggULmDVrFgsXLuS5557L2Ln2RO6PPjXDzJkz+eyzzzj99NMZPXo0Dz30EAMGDLC3X3zxxWzdutV+PW3aNPLy8ujevftB1/roo49y5ZVXMnLkSC699FK2b9/OZZddxhtvvGH7kYUQOedTFkIgpcw5XWnSt6q5qk9xYKTLdAGUl5dzySWXAOByuZgxYwYnnXQSv/vd74hGo4wePZoJEybwt7/9LWM1TdeuXUtlZSUPPPAAV1xxBf/73/947733+Ne//gXAuHHjdjv/IRP/7TZpuGtra/n666958MEH0XWd9957j5EjR9K3b180LXkTsXDhQrp27Wq/x+/329sONhMnTuTUU09l2LBh/PWvf2Xy5MnMnDmT4cOH2z/Md955h4KCgqzo2x3BYJBrr72WO+64g5kzZ2Zbzm75z3/+g6ZpHH/88dmW0izFxcVUV1djWVbWfn9tidraWnbt2gXAypUrueOOOwDo2rUrr7zyir1ffn4+Qghef/11Kisruffee1mzZg15eXkZ03bYYYdRUlLCSy+9xJw5c3j66aeprq7mmmuuAeAf//gHQ4cO/c773nrrLYqKilpVS5s03B988AGDBw8mPz+fqVOnsmLFCkaPHs0LL7yAx+PJtrzv4PV6Of7443nwwQfp1q0bxcXF9O7dO6fjpIUQeDwe4vF4tqU0i2EYADkdtvjb3/6WMWPG8JOf/CSnLsy5hmmaLFy4kM2bN/O///0PgEGDBrFy5co9vi8vL48ePXrw0EMPZVyjw+Fg4MCBLFy4EF3X+fjjjznnnHPsGP1zzz2Xc889N+M6oI0a7lNOOYXZs2ezYcMGrrnmGi655BJmzpyZk0Y7zc0330xNTQ2rV69uM77j/v378+677/LFF19wxBFHZFuO4hAnkUgwZswYbrjhhmxLaZZx48Yxbtw4Fi9ezPz587PmpmuThhvg4Ycfpry8nEceeYRnnnmGnj17ZlvSXikqKuLYY4/NtowW07lzZ9xuN2VlZRx++OE55UuOxWIkEgl0XScajeL1enNKn2LfcDgcTJo0KdsyWszPf/7zrJ6/zRrubt260bVrV0aMGIHD4ci2nEOW66+/nlNPPZVRo0a1up/uQDjyyCNxu93U19fz6KOPsnHjRgoLC7Mtqwm1tbV8+eWX1NXVsXz5cnr06EHfvn2zLUtxCNBmDTck/bDKaGcWTdMyGl61P7z66qtMmDCBY445hvfee4+OHTuyaNEiJk+enG1pTfjoo4+47bbbqKio4KmnnsIwDJ5++ulsy1IcAqhhbsVeueWWW3IqsqRjx45s27aNI488knPPPZetW7c2iSDKBUKhEC+99BJPPPEE/fr1484772To0KF26JhCcSDs1XALIbxCiI+EEJ8JIdYIIW5Nre8lhFgmhNgghFgkhHCn1ntSrzektvfM7EdQZJpRo0bx5ZdfZluGzbBhw1i5ciVTp07l73//O/PmzeOYY47Jtqwm5OXlMXbsWBYtWsTChQvZtm0bq1evblNjHIrcpSWukjhwopQyJIRwAUuFEP8ErgHul1I+K4SYDUwCHk0910gp+wohzgPuAg5OjIwiIwghePPNN7MtowkfffQRq1at4osvvmDz5s05NzCpaRo9evRg7ty5tG/fnjfeeINRo0ZlNM5Y8f1hr4ZbJmeIpDMjuVIPCZwITEitnw/cQtJwn5FaBngeeFgIIaTKWNSmyTXDKIRg0KBBDBo0KNtSmmXIkCG88sorLFq0iKeffjqnw1UVbYsW+biFEA4hxKdABfA6sBGolVIaqV3KgS6p5S7AFoDU9jqgpDVFKxRtiXPPPVcZbUWr0iLDLaU0pZSDga7ACKD/gZ5YCHG5EGK5EGJ5NBo90MMpFArF94Z9iiqRUtYCbwOjgEIhRNrV0hVIZ3TaCnQDSG0vAKp2c6w5UsphUsphPp9vP+UrFArF94+WRJW0E0IUppZ9wEnAWpIG/Bep3S4BXk4tv5J6TWr7W8q/rVAoFK1HS6JKOgHzhRAOkob+OSnlq0KIL4BnhRB/AFYCT6T2fwJYIITYAFQD52VAt0KhUHxvaUlUySpgyG7Wf0XS3/3t9THg7FZRp1AoFIrvoGZOKhQKRRtDGW6FQqFoYyjDrVAoFG2MnMgOaFkW7733XrZlNMuOHTvYvn17TmssKyujpqYm5zL5Naa6upqPP/4Yv9+fbSnNEolEcvp7DoVCeL3VdOyYuxqLitZRVtaQ0+24fft2Vq1axc6dO7MtpVn29F/OCcMtpaSq6juh3jlDXV0d0Wg0pzWGw2GefFKjoSF3NXbvnuBHP6ohFotlW0qz1NQYXHRR7rah0xmh07iP8U17IdtSmsW9KUg4fE5O/19isRg31N5AzJm7v8W4bL5sYE4YbofDwemnn55tGc2yYcMGTNPMaY2WZVFR0YEdO0ZlW0qzlJSsYuzYsTlVkKExUkoWLHidTZty93v2eKoJdryXTadvyraUZun4XkcG7BqQ0/+X7du3s+24bdT1rcu2lGbJd+Q3u035uBUKhaKNoQy3QqFQtDGU4VYoFIo2hjLcCoVC0cZQhluhUCjaGMpwKxQKRRtDGW6FQqFoYyjDrVAoFG0MZbgVCoWijXHIGO5Zs2aRSCSyLUOhUCgyTps33O+88w5HHXUUPXv2ZPTo0dxyyy3ZlqRQKBQZpU0bbl3X2bhxI//v//0/jjjiCObNm0dNTQ27du3KtjSFQqHIGG3acMdiMTZu3MjAgQP597//zWuvvUa7du346quvsi1tryQSCZ5//vlsy1AoFG2QNm24A4EAI0eOZOLEiZx00knMnDmTsrIyRoz4TinMnCMej/PII49kW4ZCkXPce++9VFdXZ1tGTtOmDTfA2LFjWbJkCX/4wx946aWXsi1HoVDsJ6tWraJPnz50796dn/3sZ1x00UXZlpSztHnD7fV66dKlC08//TSHH344hYWFlJeXZ1uWQqHYByzL4tNPP2XatGn07duX5557jvz8fDZu3JhtaTlJmzfcaYQQdOvWjf79+/Pmm29mW44iy5SXl/Pqq69mW4aihViWxdatW+nSpQtlZWU88MADlJaWUlFRkW1pOckhY7jT/PSnP2XFihWq1/09ZuLEiUybNo01a9Zw/PHHqyijNoDT6WTs2LFcccUVFBcX89RTT/Hoo48yY8YMPv3005yupZoNcqJ0WWvSqVMnvF4vmzZtokuXLgghsi1pt2zZsoUuXbpkW0ab4euvv25xrcrly5czb948OnXqRFlZGZs2baKkpCRnfwuKJIMHD2bt2rXceOONLF26lNLSUgCmTJlCRUUFDzzwAB06dKCgoCDLSrPPIWe4Ae666y6GDBnCJ598krN/1gsuuIBPPvkk2zLaDHPnzmXTppbVWdy+fTsPPvggJ598Mueccw7PPvssw4YNy7BCxYHicDjIz8/n/vvvb7J+3rx57Nixg+nTp9O/f3+6devGhAkT0LRDzmHQYg5Jww0wffp07rnnHqZPn55tKYpWYF9mxA4ZMoTevXvTvn17fvnLX7J06dKcvYArWkbHjh2ZP38+S5cuZe3atVx22WWMGzeOs88+O9vSssIhe8k67bTTeOONN1T+ku8hL7zwAiNGjOCdd97hn//8J+3bt8+2JEUrccwxxzBp0iSmTp1KWVkZ7777brYlZYVDtsft9/u54YYbuO222/jDH/6QbTk2O3bsYOPGjYTDYd5//326detGjx49si3rkKJXr1707NmTcePGfa9vpw9VNE2jf//+HHbYYd/bO6lD9ledDg90Op05NQX+lVde4Y9//CN1dXU88sgj/OUvf8m2pEMSIYQy2oc4mqYpw90cQgivEOIjIcRnQog1QohbU+vnCSE2CSE+TT0Gp9YLIcSfhBAbhBCrhBBHZfpDNEfv3r1xuVysW7cuWxKasHnzZjZs2MDs2bPp3Lkzf/rTn3C73axYsSLb0hQKRRuiJV2SOHCilHIQMBgYJ4QYmdo2VUo5OPX4NLXuFKBf6nE58Ghri94Xrr76ahYvXkxNTU02ZQDQpUsXevTowZIlS1iyZAnLli1D13UGDBiQbWkKhaINsVcft5RSAqHUS1fqIffwljOAp1Lv+1AIUSiE6CSl3H7AavcDv9/P448/no1Tfwen00nfvn3585//jKZpvPLKK5x99tl4PJ5sS1MoFG2IFjkBhRAOIcSnQAXwupRyWWrT7Sl3yP1CiLT16QJsafT28tQ6BXDyySfz8ssv43Q6efHFF7nggguyLUmhULQxWmS4pZSmlHIw0BUYIYQYCMwA+gPDgWJgnwKmhRCXCyGWCyGWR6PRfZTd9rn44ou/twMrCoXiwNinYXcpZS3wNjBOSrldJokDTwLpJNhbgW6N3tY1te7bx5ojpRwmpRzm8/n2T71CoVB8D2lJVEk7IURhatkHnAR8KYTolFongDOB1am3vAJcnIouGQnUZcu/rVAoFIciLZmA0wmYL4RwkDT0z0kpXxVCvCWEaAcI4FPgV6n9/wGcCmwAIsDE1petUCgU319aElWyChiym/UnNrO/BK48cGkKhUKh2B1qaplCoVC0MZThVigUijaGMtwKhULRxlCGW6FQKNoYynArFApFGyMn8nEbhsFjjz2WbRnNUldXR3l5eU5r/Oqrr+jePY/S0lXZltIswWAZCxYsyOncLIZRzcCBufs9OxwxCjYVMPCxgdmW0ix52/P4IPYBO3bsyLaUZlm9ejV96vqQKMjdQitfG183uy0nDLfD4WDMmDHZltEs5eXlaJqW0xqdTicjRxbzwx/+MNtSmuWJJ8r4/e+PRdcD2ZbSLCedtIIXX8zd77m+vp7FiyuYOGb30yMkEomFlBKBsNcBaMJhr8skq1atora2luOOOy7j59pf6urqmDViFl27ds22lGYZpY1qdltOGG4hBH379s22jD2yfv36nNa4evVqOnTokNMa/X4/DQ09iceLsi2lGSSa5m7VNty+fTv5+fkEAq1zsaqursbv99OrVy+qqqqSK3069eFaCgoK+azibd6LvEpDrAbLEPi1YsLxMJF4mEm9b8Xr8tEpvytF/hLq6upwuVyEQiFKS0vZtWsXwWCQSCRCaWkp4XAYh8OBruuYponD4SAcDtvbCgoKqKystKuxpwtX7Ny5E4fDkdO/xYKCArp27Uq3bt0IhUL4fD7C4TAulwun00k0GiUQCNjb4vE4QghcLheRSIRgMEhDQwM+nw9d1/F4PCSnsIDb7SYUCpGfn084HCYvLw/DMLAsC4/HQ0NDA4FAgEgkgtfrxbIsDMPA6XTi9XrtHEZ7KgSSE4ZboThU+fOf/8yJJ57ICSec0KrHjRohPo++Q8ioo7x+DVWxHXirAwjLSXutF118P+SLXR/jdAQYGBiMlu/gs+oPeHXDIk7ucTZjepxGB28XpJR4vV7i8bhtRNLGybIs2xiljUh6XyEEkUgEt9ttP7vd7lb9jAeDUChEQUEBoVCIoqIiDMNA13WKi4upqamhqKjINsJSSuLxOKWlpdTU1FBcXEwkEiEvL49oNIoQAsuy7GNWVVVRUFBAXV0dTqcTTdOorq6msLCQqqoqgsEg9fX1CCHweDxEo1E8Hk+Lks8pw61QtEE0ofGnjx5BN+N0DXald1FvPA4/895aQDDg5rAenajaHKYqvoZBA2spdrdHNy06+fqwZscqMJy083Tg5MNOB7CNTnpZ0zQsy0LTNAzDaHJuIUST0nBtuYSYz+cjFArhdDqpr6/H4XCgaRp1dXVcddVVDBs2jMmTJxOJROzPXFtbi9frpb6+HqfTSSwWw+lMmlJN0+yLW0FBAYlEAr/fj2VZzJ8/nzfffJPHHnuMgoICdF23t0kpW2y0QRluhaJN4nHk8Yfhf+bMRWdQ4TbZ4KwmT+RRLHqQF/MQKctn19YoX+6owJP3Od6qYmqKd+F3FuPU3NTVx4glEozsehxO6cLv9xMOhxFCJG/9XZJELIzL6QDhxZISh8NBPB7H7/djGAYul4twOEwgEGizhjscDlNUVER9fT35+fmYpomu6wSDQf7xj3/w8ssvY5omF198MYWFhcTjcYLBoN3jDoVCuN2O+SXYAAAgAElEQVRuYrEYgN3jLiwspLa2loKCArZu3cqbb77J9OnTicfjPPnkk9TW1hIMBgmFkjVq0sbe5/O1qC1VOKBC0QaJxWL0bteT5855jnq9lrc3vMO/1/6bL3as4eOvVvD6Z+9wyUmXcsbgczg2eD7VO6Czv4ianZXUh+r4onwdX5Sv54+v34Hm1QiHwwSDQUzTxCVj/PXGH7D4D0fw7K2HoYercLvdCCEoLCwkHA7bvdK8vDxqampsw5Vp1qxZYxu71sDlcmEYBg6HA9M0k4O6qTsKgGg0yvTp0+nRowfLli1DCGH7ow3DQNM0pJRomobD4cDhcNj+brfbzapVqxg+fDhXXHEF4XAYSAZjpN1KLpcLl8tl9+ZVj1uhOITJy8ujsrKSLv7OPPqz2Vz13FVU1FTQt6QfDunASpj87b1F+B1+orEIbqeLnR856d9jGNsqNlJfUkGp3o1n/rWIsT3HceqPTqWyshKvGz7514PUhXTadx9Gv8E/QbjyiMfjOBwOqqur7cHJ4uJiKisrKSkpyXiPu6qqigceeACn04lpmnTr1o3LLrvsgI/rdDrRdR1N09B13f4cc+fObXIxSiQSTJgwgYsuuoizzjqLnj17ctdddyGlTF7sXC4gaYgvu+wydu7cycKFC3n22Wepq6uzj2OaJnPmzOGyyy7DsiycTqc9juBwOFqu+4A/uUKhOOhEIhHy8/MBGOYdxjMXLeSMv5zJlxXrCDgD+ISPuIhTGd/FjsrtVO+q5qfDT6PU3RkLB0fmD+Pfn/2TYo8Tj+aioaGBuooN/P2VB6jYvJz2XY7i2HNmUdi+J5oQOBwOLMuipKSEcDiM0+mkqqqKQCBATU0NeXl55OXlZeSzSimpqqri448/Zu7cuaxfv54bbriBSy+99IAvGNFolOLiYurr6wkGgxiGQSKRYOHChSQSTWO8t23bxl133cVrr72G3+9n+fLlmKbZZB9N03jttdeQUrJy5crdfpY5c+Zw3nnnUVhYSCgUQgiB1+slkUjYPf69oVwlCkUbJN07k1KiCY2+xf1481dv0rfjYdTH6lm3438s37yCVVtWEcgPMnzAcKJ6lK93bkY4Neq3Jhjd5xTy85zc+NcpbNq2ga83rObLzz/h2NNn8PMpCyjp2BtBcjAybVDSYYFCCJxOJ5Zl2S6CxrRmD1xKyfTp05kzZw533HEHHTp04De/+Q0PPvjgAR87feHxeDxUV1cTiUQA0HXd3ue+++5rModj9erVLFu27DtGG5I+7hUrVjQx2h06dGD+/Pn2a6fTSbt27dB1nYKCAvx+P5C8i1KuEoXiEEbTNGKxGCLVG9Z1nY4FHVky+VVe+/w1Xv38H3yw5n12VO0kkghTZTmIOxJYCQsMWLvuC8YOP5njSn9B+1GCq+47nx9UOhg8bAyHDT2FvPwC20inox6EECQSCVwuF6Zp4na77UHKbxuc9O1/a33Wu+66iwsuuACHw8Hzzz/PkiVLWLp06QEfOx0GWF9fT3Fxsd3jTrs+IGnEX3zxRYqKinZrrPfGmDFjmlwIDMNg165dFBYWUldXZ/e4VTigQnGIE4vFbNdENBrF7/dTW1tLIBDgxL5j+PnwX7BkxRJ2NOwgEUsQ8OYTjUSJRxMgBcYJBt07dOPEESdSXFRMcEcxW97/jJN+diWl7TtTVVWF3+9H13WcTqdtpNPxyV6vl9raWnviTiAQyGgcd4cOHbjwwgtZsGABpmly3XXXtcpx0+GALlfSXZQeIGxsoH0+H/tb0PyXv/wld999N//+97/tdQ6Hg2Aw2CQcELAHgFvCIWe4DcOweyEKxaFKXl4e9fX1QPIPn56Nl/bZhsNhTh5yMnW1teS53URrq/h6/sPENqzF26kL/X/7exIuFw5g147t7Fi5DY+/Pd2696W+upqiQICErrPh7y/wyd8WIFxe+p9+Dn1Gn0hRSQmmaVJaWkooFKKkpMSOY84UBQUFdOzYkXPOOYfJkye3Wr6beDxOfn4+kUgEn89nz2L0er32PolEAo/HY0ee7AtnnHEGQJOBTikl4XAYv99vr3e73U165XvjkDHcUkqWL1/OBx98gKZpjBw5kqFDh7bZ+FKFYk+Ew2F7Nl80GiU/P9+OG04/71y5DFG+ibLXnsPl83PkrfeD5kI4NMxdO1h74/WYQsOKWVhrP6f9kUdR9vw8trz7NpGGevK79eIHZ57P+NtmYRk6X7z1On+deD7ugiJO/H/XkN+xMz369aOurg6fz2cPlmaK2tpa8vLyWjVJWWP/vZTSdvG89NJLdOzYkYaGBjZv3syKFSu+MxGpJWzYsIGhQ4eyYcMG+3xnnXWW3bFsHHq4L7bqkDHclmVx1llnMWvWLHt58+bNynArDkk8Hk8TH3cikcDr9aLrOl6vl13v/ovNs26k23mXMmDaHQgB4XVrSf8dpBAMvPE+pIDYju0UfbiURCKBQ2gMmzINnC7i0QiJaIRIVQWWlPQYOpzuQ0dQV13N4ptmEuzWnUvufQBfMJjxHnemcLlcxONxNE2zp/ILIZr0kB966CEeeuih/Tr+tddey7Zt25g1axaQ9NdfffXVeDweLMvC7XbbF4t9acNDJqrkxhtv5PHHH6ekpISOHTvy2GOPcdNNN2VbVpslEolw8803Z1uGohnS0RyNJ4BYloUQgsp3lrD+gVvoOWEywd6HEd9aRrx8MyIWRsTCEAtDNEx045dE1q/FaKil/YhRdD7meAq69yJauYPw1i3EqnZhhMMY0Qh6JEK8IUSsvg6Hw8HxF11M/ZYtPP7rK+wwtrZIOqwy7W9OG9JZs2btt1/726SNNiS/txtvvJG6umQ7hkIhotGonQelpe3YNi+Tu+Hqq6/m/PPPZ/z48TidThYvXsxzzz2XbVltFl3XW2XUXpEZ0lEdjWfyRSIRRNVOdr70V7qfeQGe4lKsuio0NIRIzQgEBGAhwUouY0kSkRCmlBgWmJbEkhJLJpeN9LMlMbHQTXB7fBwz4UJefvB+Hv7lRK5b+EzGP28ikcDn87XqcdPT171eLzU1NUgpeeSRR7j33nubuEaKiopwOBxNwiJramp2e8yCggJcLpd9IbUsy95XSsnjjz+Ow+Hg5ptvtiNVTNPcp3DAQ6bHXVpaSmFhIU8++SRXXXUVPp+PkpKSbMtSKDJC2qedzjxXV1dHYUEBOz5fSbC0I/7CEqxQLcQiiHgILR7BEQ+jxSPJR7r3HQ1DLATRMFYkjIyEMCMhjEgII9xAIhxCDzWQCDWQCDcQb0g+x0L1WIbOSZMupaa8nIaKiox+3o0bN7J06VIuuOCCVj1uQ0MDhYWFJBIJAoEAjz32GLfddluTyTdHHHEEK1asoLy8nI0bN1JRUcHy5csZPnz4d453+OGH89Zbb1FeXs7nn39OeXk5H330EYMGDbL3MU2TP//5z9x9991s27bNngofiURa3OM+ZAy3pmksXryYp556iqOOOooHH3xwj/lsc5WXX36ZrVu3ZluGIsdJJyTyeDyYppkMa6urpfY/S9B8XvSGGohFkNEIxJKGWotHcMbDOOIRRCwC8Yi9jxkJI6MRrGgYKxrBikQwIhGMSAg9EiaRfg6HSYRDJMIh4uEQeiyBy5/PO89mtsedprXHrHw+H5FIBKfTyc6dO7/jXh0wYACzZ8+muLjY9oXX19fTrl07Zs2aRb9+/ex9PR4P1113Hf369SMejxMIBNB1nQ4dOvDEE08wYsSIJseeNWsW4XDYHmz9XocDDho0iIEDc7esU3Ps3LmTn//855x55pk888wzeL1e5s2bl21Zihwl7RqB5B8+kUjg0QSxr76gZMxpWNEwpqbh0ESye6aBQ3OgaWBJEJYESyItibQspCmxLDAtC8sCw5LolkSXFrqZdKEYlpVcZ0kMM7UsoWPPHuit5A8+2Oi6Tl5eHrFYjF/96ld2dEma7du3M23aNEzTpH///jz88MN4vV4ikQhDhgxh7NixrF+/HoCxY8dywgkn2C6dSCTCLbfcwsqVK7Esi82bNzc5txCCK6+8khdeeAG3271PoYaHnOFuK1iWxfr16+0fyY4dO/D5fIwbN45LLrmESZMmsXPnTjp06JBlpYpcpHH4mh3SpgmkZWLFIhgaaJoDSxNITYAmkA4BacNkgbQklmVhmclnwwLDtDAk6IaFIZN+7YRpJQ25aWFYFglLoJsS3bLQTYtYuPWy9R1s0gUMnE4nTzzxBP/5z3+YMGGCvb26upoPP/yQPn36cOedd+JwOIhEIng8HuLxeJNIkEAgQLt27ewoH7/fz0033cQpp5zCihUrvnPuP/3pT5x//vlNCli0lEPScB933HG8/fbb9O3bN+vhgJs3b+aNN974znrTNFm2bJn9OhwOs2HDBu6//35uuOEGTj75ZN54441W9+kpDg0SiYQ9U9E0TbxeL7G6WsxwhNjObfiCBZiaA80hEBoIhwChYaFhITGkxLSSBtkw071qiSEtEibo6R61mRyMjEajxHUdPD4SlkwZbtAtk3gkQiZjSqSUvP322xmpYdk4qZPD4eDdd9/9zj6HH344ixYtIj8/H6fTyeuvv05FRQWFhYUMGjSISy65BMMw+NGPfsSyZcsoKyvD5/Nx5pln4vV6efnllznttNP47LPPmhz3448/5uyzz7Y7b/sSmXNIGu5JkyYxZMiQVskedqA0rhTSGI/Hw+OPP27r27JlC8cddxznn38+Tz75JK+99hqffPLJwZZr4/P5GD9+PC+++CJnnXVW1nS0dc466ywWLFjAyJEjWzUiwuv1UlFRgRACv9+frIMYyMeSUP/lGhz9+iN8XtC0VE87FUmiGwiPF1NaScNrGIS3bSEWDhMzLRKmJG5I4pZJ3ABXSQcIBIlFosQTCYRhkkjtp1uShGGyefVq+g4fsXfR+4mUktmzZ+82215rkK70EwqFmD17Nqeffjrr1q1j3bp19vlnzZrFPffcgxCCqqoqrrnmGn784x/z/PPPc9ZZZ9npWSdPnszzzz/PfffdByRnct94441NjHKXLl0YM2YMf/3rX5k+fTp5eXktzgqY5pA03LlE9+7dmThx9xW5G9OxY0eWLFnCvHnzGD16NJMmTToI6prH7XZz5JFH8s477yjDfQAcddRRTJ06tdVD2dLFetOTRQKBAA2hBo6Yfjtrbr0a8/MwpT8YiPS4MTWBKUDEI1i1NTg6dMYyTBo2rME0JLF4nLiuEzct4gZEDZO4YREzLfQd29BxIP0FOAoKkZEYhsOJbkLCtNjw+So0dx5HHHNsq322g0m6sK/X68Xr9fLRRx9RWlrKhRdeaO/z5Zdfsm7dOt59913OPfdcJk2aRHFxsR3uZ5qmXTzBNE3y8/MZP348c+fO5f7776esrMzORwJQWFjI/fffz1VXXUWvXr3sqkP7MgFHGe4cweVy8YMf/IDbb7+9yTRYhaI5TNO07+aSvUYHIlCEblho4TDVX3xKQd/+aKaBwzIRehy9citsL0/GalugWxYJK9mDThjJXrRJKnZbQiKeIKabxOoaiG/ZQsy0MFwe/B07s61sMw0NEXqOOIyBGXBjHAzShX3j8TjFxcUUFRWxZcsWYrGYPakJkr3uTZs2ceedd7JmzRpeeeUVnnzySaSU+Hw+O3xw4MCBXHfddVx//fUsWrToO+4PTdOIRqNs376dww8/3J7k43K5iMViLZ7O32LDLYRwAMuBrVLK04QQvYBngRLgE+AiKWVCCOEBngKGAlXAuVLKspaep7W44IILeOaZZ9qcj7gthjAqDj7pqdpp451OrxoCLK+XRDwGukG4tgbC9YhQA5om0BBIJKa0sGTScBsWKZ/1N75rI+3/tpL+cMuSmFJiWmDqOqGaWmKRKA6PFylbP0zvYJGfn29XY6+trcXtdrNx40Z+/OMfc/LJJ1NfX28PYM6ePRspJX//+98ZNWoU06dPt6vd+/1+pJRce+21LFiwoInRnjJlit0jTycH27BhA507d7bLxe3rHdm+9Lh/A6wFgqnXdwH3SymfFULMBiYBj6aea6SUfYUQ56X2O3cfztMqTJ48mfHjx7c5w50rTJo0iTVr1lBVVcUnn3xiD84ocoN4PG5nsItEIuTl5SXTrB7+Q4qOGcvOf72EhYGsqsIpLDTDQmgCkTLclmxkiKVM+rZN2cSAG40GLw2ZHLA0pcTQJfGaOiwJDq+X8dOm2jlSMsGMGTO4++67M3LstMspkUhQUFCAlJJjjz2WE088kVgsZlem0TSNfv36cc011wDwwAMP8Nvf/tYOJ0wkEvYsyfvuu8822jfffDNXXHEFXq/XnuXq9XqJxWJ2VkfArhbf0tS4LereCSG6Aj8FHk+9FsCJwPOpXeYDZ6aWz0i9JrV9jMjC5VgIoWZO7ic1NTVs3LiRadOmccYZZ+D1etmxY0e2ZSka4ff7CYVCTXJJFxQUEBcOgj36YlgQ1y2ikSjRaIKIaRE1LCJG8jlqWMSMpLGO6jI5MGlZJFLhf7qUxC2JYUoMKUikety6ZaH585OuBLcP3TAYddLJGStbBrBs2TJGjRqVkWPn5eU1acO0y6O+vh6fz0d9fb1d3f7www+332cYhl1LMhaL4XK5mhQBTtOvXz+KiopwuVxomkYwGCQajVJQUGDnR0n3tPcln3lLe9wPANOAQOp1CVArpUxP5i8HuqSWuwBbAKSUhhCiLrX/rharagXy8/NZvHjxwTzlIcP8+fO5/PLL6du3L4lEgjPPPJMHH3xwvzOkKVqfSCRCIBBoslxXV0cgEEDr2Q+tXWdiO8rRZQIHAodGKjNgsq8mZdNed3pyjR0tYproZtJ4J6x0PLfEMCFWU4sl4MgxJ+AtLqGyspLCwkJbT1sineclHUeddlWmixK7XC6klDgcjiaDh0IIO+46ncOk8SNNuhp8ep2u63acd9rFlfajNx7A3Bt77XELIU4DKqSUrRqbJoS4XAixXAixvLWycClah6uvvprbbruN//73vxQVFXHhhRdy2223ZVuWohFpv2s0GrUHvNK39T2OHo23S3eipkUsFR2S7GFbxAyDmGEQNUyihvnNdttIpwYqTZmM504b81Sct24lXSilPXvx1eo1nPbrKQSDwYxWv8kk6VDAtHFuHNOdzsCYzr7Yq1evJoUR0vMz0i6StP+7qqoKSJYsGzhwoL0tHXWiaRqmaTZ5H7R+HPfRwOlCiFMBL0kf94NAoRDCmep1dwXSCTa2At2AciGEEyggOUjZBCnlHGAOQIcOHdpmTshDmEWLFrF69Wo+/PBDnnvuuTbZmzqUSf/x03/+dARE2uAMm3obf79wPNFoCIcQyYFJmex1S8ACrHQWQCSGkYwkSRpnC8OEhJU05rplpaJPkgbcEwjSvu8PaNe3L8WdOtnlvjL1OTM5YJ8uEhwMBqmrq8PtduNyuexKQtXV1QQCASKRCIWFhRx77LG8/PLLhMNhpkyZQrdu3WzDDlBeXm5nAhw6dCidOnWy86Snc8rU1NTYleXTpcsSiUTrhgNKKWcAMwCEEKOB66SUFwgh/gb8gmRkySXAy6m3vJJ6/UFq+1uyrSbr/R6Tzvmyr1NxFd8lEz9/0zTtP3r6lj4SieB2u4lGoxT27kNe915UrPkUTWg47JSuFhINKVI9wNTgpGnJVArXdD4SYfe0dcsiZiZdJgnLJBAsRHO76TVoEIHCQurr69E0LSO97ltuuYUbbrjBroTe2qSzA8ZiMQoLC7EsC9M0KS4utsuyRaNRAoEAUko7PwxAZWUllZWVzR47fReUzr2taRo1NTX4/X6qq6ttH3ra7ZIuFtwSDuRSNh24RgixgaQP+4nU+ieAktT6a4DrD+AciizicDiU0W4FMtEb9fv9NDQ0EAqFcDqddjxyJBKhpKSESCTCKY88SVy3iBsmUd1MuUdk8jlhEdWT7pN42o1iSqImxAxBzLBImBZxM7leNy0ShklRl+70O/pYvHl+xp53Hg0NDZSWlmZscDLtg85Ujz4QCFBTU4Pb7aampsaOq04XQN61axcOh4P6+noikQjDhw+nW7duez1ux44dOeGEE+wLgsfjQdM0ux5oaWmpHcmSvijtSxvuk+GWUr4jpTwttfyVlHKElLKvlPJsKWU8tT6Wet03tf2rfTmHQqHYO9FolLy8PHw+n52EPz0DsK6uDq/Xi3S6GXTRpUlDbSYNd0T/xredjC4xk/5vUzYy4slp7XHDIm77uyXBjl3oPWwE28rK+MnEidQ1hPD5fNTW1jYp9dWWiEQidsX1YDBohzQWFhba7hHTNPH7/Xi9Xo4++mjmz59PYWFhs8d0u908/vjjjB49Go/HQ0NDA7quI6W0o1VqamqScfepCjjAPrWhmu2hULRBPB4Puq7bUQrRaNSewZefn58sDFBUTOmo49DadSJqSCKGRcRMhgR+ExYov1k2LWK6mexlG8kQwbhpkrAk7mAB7fv2o6piJ5GGEL0HDyYQCBCPx/H7/Rm7M5s6dep38li3Jl6vl3A4jNPpJBwO2+GA6YtgQ0MDDoeDWCxm16Q8/PDDWblyJfPmzSMYDBIIBAgGgwSDQe6//37WrVvHqFGjCAQCJBIJ8vLy7LuGdGX3QCCAYRhNih9nIhxQoVDkEI2nYqcjIhrnzkgPWvYaMYphF1/KW/ffgx4J2++XqYk4UiYHKU3S/m6S6VztCTgW3uJS8jt0IhKN4vF4uev1f9saGg+KZoLi4uKMHDdN4/JiaRqXJ2u8LZ0+V9M02rdvzymnnMLXX3+NYRj2zEjAHm9I59e2LMuOHmn8HUFyfKJx1ElLUYZboWiDmKZph6qlDadhGGiahq7r9rPb7ebYSb/ClJJX/3ArsomBSkaYmJJkTHd6Wrv8Ji+3IQWaKamrqaFnp05ces89aKlMePF43I5JFkK0yUrvjY1uenYjJHvi6XS50LQ3nN7WeOJM45A+XddxuVx2pIiu6/Z7E4mEvS39nTW+ULQU5SpRKNog6ZjtWCxmJ/dPr0tXLU/f6muaxogJF/OLe/9E1yHDk/7s1KPLsBF4O3QkZlqph6TfcaOJWySnwFsQi0Q56qSfMPGPfySvqAiPx4NlWeTn5xOPx8nPz2+zcdxpw5qeDJM2no2NbnqqeroHns7kl3arpEMW0ymcXS6XXczZsiycTqe93eVyYRhGk23pC96+3LW0vUukQtFGiEajVFZWEovFKC8vR9d1SktLW+34aTeCEAKfz4cQwl5XVFSEEILOnTvb20+8+P849uxzMRv1AB0uF5ZlYpnf9MSdbjd6o2K5AG6vF7fXa/cOg8GgnVairSaYguQF0OPxNGlD+MZdkt7WmHQ19t1tS7Mnv/X++LS/jTLcCkWG+O9//8u1115LRUUF1157LSUlJTz99NOtdvzGE1PSBmRvz44WJgrzNhM33dxx2yqNUyg3/ix7+ny58NmVq0ShyACRSIQ333yTuXPnMnDgQP7yl78wYMAAli5dmm1pikMAkQuTGouKiuRFF12UbRnNEo/H7VlUuUpdXR1OpzNjM8xag507d7JzZylSZiYCoTUoLNxKjx5d9r7jXjBNk82bN9O7d282btxIz549qa+vx7KsA/odmaZJVVUV7du3P2CNmSIcDmOaJsFgcO87Z4mqqiry8/NbPFMxGyxYsICamprddutzwnALISqBMAc5g+A+UIrStj8obfuH0rZ/HGraekgp2+1uQ04YbgAhxHIp5bBs69gdStv+obTtH0rb/vF90qZ83AqFQtHGUIZboVAo2hi5ZLjnZFvAHlDa9g+lbf9Q2vaP7422nPFxKxQKhaJl5FKPW6FQKBQtIOuGWwgxTgixTgixQQiR9aILQogyIcTnQohPhRDLU+uKhRCvCyHWp56LDpKWuUKICiHE6kbrdqtFJPlTqh1XCSGOypK+W4QQW1Pt92mq5F1624yUvnVCiJMzqKubEOJtIcQXQog1QojfpNZnve32oC3r7ZY6l1cI8ZEQ4rOUvltT63sJIZaldCwSQrhT6z2p1xtS23tmQds8IcSmRm03OLU+G/8JhxBipRDi1dTrzLTbt6sTH8wH4AA2Ar0BN/AZcESWNZUBpd9adzdwfWr5euCug6TlOOAoYPXetACnAv8EBDASWJYlfbeQLG/37X2PSH2/HqBX6nt3ZEhXJ+Co1HIA+F/q/Flvuz1oy3q7pc4ngPzUsgtYlmqT54DzUutnA1ekln8NzE4tnwcsyoK2ecAvdrN/Nv4T1wALgVdTrzPSbtnucY8ANshkNZ0EyfqVZ2RZ0+44A5ifWp4PnHkwTiqlfBeobqGWM4CnZJIPSRZz7pQFfc1xBvCslDIupdwEbCD5/WdC13Yp5YrUcgOwFuhCDrTdHrQ1x0Frt5QmKaUMpV66Ug8JnAg8n1r/7bZLt+nzwBghMpPEYw/amuOg/ieEEF2BnwKPp14LMtRu2TbcXYAtjV6Xs+cf8cFAAv8WQnwihLg8ta6DlHJ7ankH0CE70vaoJZfackrq1nRuI7dSVvSlbkGHkOyd5VTbfUsb5Ei7pW73PwUqgNdJ9vJrpZTGbjTY+lLb60jWoD0o2qSU6ba7PdV29wsh0vPYD3bbPQBMA9KpFkvIULtl23DnIsdIKY8CTgGuFEIc13ijTN7b5EQoTi5pacSjQB9gMLAdmJUtIUKIfGAxcLWUsr7xtmy33W605Uy7SSlNKeVgoCvJ3n3/bGn5Nt/WJoQYCMwgqXE4UEyykPlBRQhxGlAhpfzkYJwv24Z7K9C4ZHLX1LqsIaXcmnquAF4k+cPdmb7FSj1XZE9hs1pyoi2llDtTfy4L+Avf3NYfVH1CCBdJw/i0lPKF1OqcaLvdacuVdmuMlLIWeBsYRdLNkE4D3ViDrS+1vQCoOojaxqXcT1ImC5Y/SXba7mjgdCFEGUmX74nAg2So3bJtuD8G+qVGXt0knfSvZEuMEMIvhAikl4GxwOqUpktSu10CvJwdhR0Bo5UAAAF0SURBVLAHLa8AF6dG0kcCdY3cAgeNb/kQzyLZfml956VG03sB/YCPMqRBAE8Aa6WU9zXalPW2a05bLrRbSkc7IURhatkHnETSD/828IvUbt9uu3Sb/gJ4K3U3c7C0fdnoYixI+pAbt91B+V6llDOklF2llD1J2rG3pJQXkKl2+//t2z1uwkAQhuG3g5qOlgNQpUxBC9fIMZByi5wgkVJwBeAANBAgRX5ukibFDIIGJBf2stL7SC7ASPtphEfyjt3GZLXJQUx+v4l9tHnhLCNigv8BfJ7yEHtPK+AHWAKDjvK8E7fNf8T+2NO1LMTk/CXreAAeCuV7zfX3+eccXvx+nvm+gGmLuR6JbZA9sMtjdg+1u5GteN1yrTGwzRxH4Pni2tgQw9EF0Mvv+/n5N8+PCmRbZ+2OwBvnJ086vyZy3Qnnp0paqZtvTkpSZUpvlUiSGrJxS1JlbNySVBkbtyRVxsYtSZWxcUtSZWzcklQZG7ckVeYf2tkbinO+r1AAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Sonuçlar\n", + "\n", + "Peter'i kurda karşı savaşması için eğitmede başarılı olup olmadığımızı görelim!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Artık çok daha az boğulma vakası görüyoruz, ancak Peter hala her zaman kurdu öldüremiyor. Hiperparametrelerle oynayarak bu sonucu iyileştirip iyileştiremeyeceğinizi denemeye çalışın.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluğu sağlamak için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/tr/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..9100c9729 --- /dev/null +++ b/translations/tr/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:12:34+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter ve Kurt: Pekiştirmeli Öğrenme Giriş\n", + "\n", + "Bu eğitimde, bir yol bulma problemine Pekiştirmeli Öğrenme uygulamayı öğreneceğiz. Ayar, Rus besteci [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev) tarafından yazılan [Peter ve Kurt](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) adlı müzikal masaldan esinlenmiştir. Bu, genç öncü Peter'in cesurca evinden çıkıp kurtu kovalamak için orman açıklığına gittiği bir hikayedir. Peter'in çevresini keşfetmesine ve en uygun navigasyon haritasını oluşturmasına yardımcı olacak makine öğrenimi algoritmalarını eğiteceğiz.\n", + "\n", + "Öncelikle, bir dizi kullanışlı kütüphane ithal edelim:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Pekiştirmeli Öğrenmeye Genel Bakış\n", + "\n", + "**Pekiştirmeli Öğrenme** (RL), bir **ajanın** belirli bir **ortamda** birçok deney yaparak en uygun davranışı öğrenmesini sağlayan bir öğrenme tekniğidir. Bu ortamda bir ajanın, bir **ödül fonksiyonu** ile tanımlanan bir **hedefi** olmalıdır.\n", + "\n", + "## Ortam\n", + "\n", + "Basitlik açısından, Peter'ın dünyasını `genişlik` x `yükseklik` boyutlarında bir kare tahta olarak düşünelim. Bu tahtadaki her bir hücre şu şekilde olabilir:\n", + "* Peter ve diğer canlıların yürüyebileceği bir **zemin**\n", + "* Üzerinde yürüyemeyeceğiniz açık bir şekilde belli olan **su**\n", + "* Dinlenebileceğiniz bir yer olan **ağaç** veya **çimen**\n", + "* Peter'ın kendini beslemek için bulmaktan memnun olacağı bir **elma**\n", + "* Tehlikeli olan ve kaçınılması gereken bir **kurt**\n", + "\n", + "Ortamla çalışmak için `Board` adında bir sınıf tanımlayacağız. Bu defteri çok fazla karmaşıklaştırmamak adına, tahtayla çalışmak için gereken tüm kodu ayrı bir `rlboard` modülüne taşıdık ve şimdi bu modülü içe aktaracağız. Uygulamanın iç detaylarını görmek için bu modülün içine bakabilirsiniz.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Şimdi rastgele bir tahta oluşturalım ve nasıl göründüğüne bakalım:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Eylemler ve Politika\n", + "\n", + "Örneğimizde, Peter'ın amacı bir elma bulmak, aynı zamanda kurttan ve diğer engellerden kaçınmaktır. Bunu yapmak için, elmayı bulana kadar etrafta dolaşabilir. Bu nedenle, herhangi bir konumda yukarı, aşağı, sola ve sağa olmak üzere dört eylemden birini seçebilir. Bu eylemleri bir sözlük olarak tanımlayacağız ve bunları ilgili koordinat değişiklik çiftlerine eşleyeceğiz. Örneğin, sağa hareket etmek (`R`) bir çift `(1,0)` ile eşleşir.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Ajanımızın (Peter) stratejisi, **politika** olarak adlandırılan bir kavramla tanımlanır. En basit politika olan **rastgele yürüyüş**ü ele alalım.\n", + "\n", + "## Rastgele yürüyüş\n", + "\n", + "Öncelikle, rastgele yürüyüş stratejisini uygulayarak problemimizi çözelim.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Rastgele yürüyüş deneyini birkaç kez gerçekleştirelim ve alınan ortalama adım sayısını görelim:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Ödül Fonksiyonu\n", + "\n", + "Politikamızı daha akıllı hale getirmek için, hangi hamlelerin diğerlerinden \"daha iyi\" olduğunu anlamamız gerekiyor.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Öğrenme\n", + "\n", + "Bir Q-Tablosu veya çok boyutlu bir dizi oluşturun. Tahtamızın boyutları `genişlik` x `yükseklik` olduğundan, Q-Tablosunu `genişlik` x `yükseklik` x `len(actions)` şeklinde bir numpy dizisi ile temsil edebiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Q-Tablosunu tahtada görselleştirmek için tabloyu plot fonksiyonuna geçirin:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Q-Öğrenmenin Özeti: Bellman Denklemi ve Öğrenme Algoritması\n", + "\n", + "Öğrenme algoritmamız için bir sözde kod yazın:\n", + "\n", + "* Tüm durumlar ve eylemler için Q-Tablosu Q'yu eşit sayılarla başlatın\n", + "* Öğrenme oranını $\\alpha\\leftarrow 1$ olarak ayarlayın\n", + "* Simülasyonu birçok kez tekrarlayın\n", + " 1. Rastgele bir pozisyonda başlayın\n", + " 1. Tekrarla\n", + " 1. Durum $s$'de bir eylem $a$ seçin\n", + " 2. Yeni bir duruma $s'$ geçerek eylemi gerçekleştirin\n", + " 3. Eğer oyun sonu koşuluyla karşılaşırsak veya toplam ödül çok küçükse - simülasyondan çıkın \n", + " 4. Yeni durumda ödül $r$'yi hesaplayın\n", + " 5. Bellman denklemine göre Q-Fonksiyonunu güncelleyin: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Toplam ödülü güncelleyin ve $\\alpha$'yı azaltın.\n", + "\n", + "## Keşfet vs. Sömür\n", + "\n", + "En iyi yaklaşım, keşfetme ve sömürme arasında bir denge kurmaktır. Çevremiz hakkında daha fazla bilgi edindikçe, optimal yolu izleme olasılığımız artar, ancak ara sıra keşfedilmemiş bir yolu seçmek faydalı olabilir.\n", + "\n", + "## Python Uygulaması\n", + "\n", + "Artık öğrenme algoritmasını uygulamaya hazırız. Bundan önce, Q-Tablosundaki rastgele sayıları ilgili eylemler için olasılık vektörüne dönüştürecek bir fonksiyona da ihtiyacımız var:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Orijinal vektöre, tüm vektör bileşenlerinin aynı olduğu başlangıç durumunda sıfıra bölünmeyi önlemek için küçük bir miktar `eps` ekliyoruz.\n", + "\n", + "Gerçek öğrenme algoritmasını, **epoklar** olarak da adlandırılan 5000 deney için çalıştıracağız:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Bu algoritmayı çalıştırdıktan sonra, Q-Tablosu her adımda farklı eylemlerin çekiciliğini tanımlayan değerlerle güncellenmelidir. Tabloyu burada görselleştirin:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Politikanın Kontrolü\n", + "\n", + "Q-Tablosu, her durumdaki her bir eylemin \"çekiciliğini\" listelediği için, dünyamızda verimli bir gezinmeyi tanımlamak için onu kullanmak oldukça kolaydır. En basit durumda, sadece en yüksek Q-Tablosu değerine karşılık gelen eylemi seçebiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Eğer yukarıdaki kodu birkaç kez denerseniz, bazen sadece \"takıldığını\" fark edebilirsiniz ve bunu durdurmak için not defterinde DURDUR düğmesine basmanız gerekir.\n", + "\n", + "> **Görev 1:** `walk` fonksiyonunu, yolun maksimum uzunluğunu belirli bir adım sayısıyla (örneğin, 100) sınırlayacak şekilde değiştirin ve yukarıdaki kodun bu değeri zaman zaman döndürdüğünü gözlemleyin.\n", + "\n", + "> **Görev 2:** `walk` fonksiyonunu, daha önce bulunduğu yerlere geri dönmemesini sağlayacak şekilde değiştirin. Bu, `walk` fonksiyonunun döngüye girmesini önleyecektir, ancak ajan yine de kaçamayacağı bir konumda \"sıkışabilir\".\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Burada gördüğümüz şey, başlangıçta ortalama yol uzunluğunun arttığıdır. Bu muhtemelen çevre hakkında hiçbir şey bilmediğimizde - kötü durumlara, suya veya kurda takılma olasılığımızın yüksek olmasından kaynaklanıyor. Daha fazla bilgi edindikçe ve bu bilgiyi kullanmaya başladıkça, çevreyi daha uzun süre keşfedebiliriz, ancak elmaların nerede olduğunu hâlâ iyi bilmiyoruz.\n", + "\n", + "Yeterince öğrendiğimizde, ajan için hedefe ulaşmak daha kolay hale gelir ve yol uzunluğu azalmaya başlar. Ancak, hâlâ keşfe açık olduğumuz için, genellikle en iyi yoldan sapar ve yeni seçenekleri keşfederiz, bu da yolu optimalden daha uzun hale getirir.\n", + "\n", + "Bu grafikte ayrıca gözlemlediğimiz bir diğer şey, bir noktada uzunluğun aniden arttığıdır. Bu, sürecin stokastik doğasını gösterir ve Q-Tablo katsayılarını yeni değerlerle üzerine yazarak \"bozabileceğimiz\" anlamına gelir. Bu durum, öğrenme oranını azaltarak (örneğin, eğitimin sonuna doğru Q-Tablo değerlerini yalnızca küçük bir miktarla ayarlayarak) ideal olarak en aza indirgenmelidir.\n", + "\n", + "Genel olarak, öğrenme sürecinin başarısı ve kalitesinin, öğrenme oranı, öğrenme oranı azalması ve indirim faktörü gibi parametrelere önemli ölçüde bağlı olduğunu unutmamak önemlidir. Bunlar genellikle **hiperparametreler** olarak adlandırılır, çünkü eğitim sırasında optimize ettiğimiz **parametrelerden** (örneğin, Q-Tablo katsayıları) ayrılırlar. En iyi hiperparametre değerlerini bulma sürecine **hiperparametre optimizasyonu** denir ve bu ayrı bir konu başlığını hak eder.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Alıştırma\n", + "#### Daha Gerçekçi Bir Peter ve Kurt Dünyası\n", + "\n", + "Bizim senaryomuzda, Peter neredeyse hiç yorulmadan veya acıkmadan etrafta dolaşabiliyordu. Daha gerçekçi bir dünyada, zaman zaman oturup dinlenmesi ve kendini beslemesi gerekiyor. Dünyamızı daha gerçekçi hale getirmek için aşağıdaki kuralları uygulayalım:\n", + "\n", + "1. Bir yerden başka bir yere hareket ettiğinde, Peter **enerji** kaybeder ve biraz **yorgunluk** kazanır.\n", + "2. Peter, elma yiyerek daha fazla enerji kazanabilir.\n", + "3. Peter, ağacın altında veya çimenlerin üzerinde dinlenerek yorgunluğunu giderebilir (yani, tahtada bir ağaç veya çimen bulunan bir konuma yürümek - yeşil alan).\n", + "4. Peter, kurdu bulmalı ve öldürmelidir.\n", + "5. Kurdu öldürmek için Peter'ın belirli seviyelerde enerjiye ve yorgunluğa sahip olması gerekir, aksi takdirde savaşı kaybeder.\n", + "\n", + "Yukarıdaki ödül fonksiyonunu oyunun kurallarına göre değiştirin, oyunu kazanmak için en iyi stratejiyi öğrenmek üzere pekiştirmeli öğrenme algoritmasını çalıştırın ve rastgele yürüyüş sonuçlarını algoritmanızla karşılaştırın; kazanılan ve kaybedilen oyun sayısı açısından değerlendirin.\n", + "\n", + "> **Not**: Çalışması için hiperparametreleri ayarlamanız gerekebilir, özellikle epoch sayısını. Çünkü oyunun başarısı (kurdu yenmek) nadir bir olaydır, bu nedenle çok daha uzun bir eğitim süresi bekleyebilirsiniz.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/8-Reinforcement/2-Gym/notebook.ipynb b/translations/tr/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..c661ae1ea --- /dev/null +++ b/translations/tr/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,394 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:18:22+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Kartopu Kayma\n", + "\n", + "> **Problem**: Peter, kurttan kaçmak istiyorsa ondan daha hızlı hareket edebilmelidir. Peter'ın özellikle dengeyi koruyarak kaymayı nasıl öğrenebileceğini Q-Öğrenme kullanarak göreceğiz.\n", + "\n", + "Öncelikle, gym'i yükleyelim ve gerekli kütüphaneleri içe aktaralım:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Bir cartpole ortamı oluştur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Çevrenin nasıl çalıştığını görmek için, 100 adımlık kısa bir simülasyon çalıştıralım.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Simülasyon sırasında, nasıl hareket edeceğimize karar vermek için gözlemler almamız gerekir. Aslında, `step` fonksiyonu bize mevcut gözlemleri, ödül fonksiyonunu ve simülasyona devam etmenin mantıklı olup olmadığını gösteren `done` bayrağını geri döndürür:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Bu sayıların minimum ve maksimum değerlerini alabiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Haydi, diğer ayrıklaştırma yöntemini de kutular kullanarak keşfedelim:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Haydi şimdi kısa bir simülasyon çalıştıralım ve bu ayrık ortam değerlerini gözlemleyelim.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [ + "## Q-Tablo Yapısı\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Bu grafikten herhangi bir şey söylemek mümkün değildir, çünkü stokastik eğitim sürecinin doğası gereği eğitim oturumlarının uzunluğu büyük ölçüde değişir. Bu grafiği daha anlamlı hale getirmek için, diyelim ki 100 deney serisi üzerinde **hareketli ortalama** hesaplayabiliriz. Bu, `np.convolve` kullanılarak kolayca yapılabilir:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Hiperparametreleri Değiştirmek ve Sonucu Görmek\n", + "\n", + "Artık eğitilmiş modelin nasıl davrandığını gerçekten görmek ilginç olurdu. Simülasyonu çalıştıralım ve eğitim sırasında kullandığımız aynı eylem seçme stratejisini takip edeceğiz: Q-Tablosundaki olasılık dağılımına göre örnekleme yaparak:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Sonucu animasyonlu bir GIF olarak kaydetme\n", + "\n", + "Arkadaşlarınızı etkilemek istiyorsanız, denge çubuğunun animasyonlu GIF resmini onlara göndermek isteyebilirsiniz. Bunu yapmak için, bir görüntü karesi oluşturmak üzere `env.render` çağrısını yapabilir ve ardından bunları PIL kütüphanesini kullanarak animasyonlu bir GIF olarak kaydedebilirsiniz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlık içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalardan sorumlu değiliz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/tr/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..5b23ba030 --- /dev/null +++ b/translations/tr/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,526 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:21:21+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "tr" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## CartPole Kayma\n", + "\n", + "> **Problem**: Peter, kurttan kaçmak istiyorsa ondan daha hızlı hareket edebilmelidir. Peter'ın özellikle dengeyi koruyarak kaymayı nasıl öğrenebileceğini Q-Learning kullanarak göreceğiz.\n", + "\n", + "Öncelikle, gym'i yükleyelim ve gerekli kütüphaneleri içe aktaralım:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Bir cartpole ortamı oluştur\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Çevrenin nasıl çalıştığını görmek için, 100 adımlık kısa bir simülasyon çalıştıralım.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Simülasyon sırasında, nasıl hareket edeceğimize karar vermek için gözlemler almamız gerekir. Aslında, `step` fonksiyonu bize mevcut gözlemleri, ödül fonksiyonunu ve simülasyona devam etmenin mantıklı olup olmadığını gösteren `done` bayrağını geri döndürür:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [ + "Bu sayıların minimum ve maksimum değerlerini alabiliriz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Haydi, diğer ayrıklaştırma yöntemlerini de kutular kullanarak keşfedelim:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Haydi şimdi kısa bir simülasyon çalıştıralım ve bu ayrık ortam değerlerini gözlemleyelim.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [ + "## Q-Tablo Yapısı\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Bu grafikten herhangi bir şey söylemek mümkün değildir, çünkü stokastik eğitim sürecinin doğası gereği eğitim oturumlarının uzunluğu büyük ölçüde değişir. Bu grafiği daha anlamlı hale getirmek için, diyelim ki 100 deney serisi üzerinde **hareketli ortalama** hesaplayabiliriz. Bu, `np.convolve` kullanılarak kolayca yapılabilir:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Hiperparametreleri Değiştirmek ve Sonucu Görmek\n", + "\n", + "Şimdi, eğitilmiş modelin nasıl davrandığını gerçekten görmek ilginç olurdu. Simülasyonu çalıştıralım ve eğitim sırasında kullandığımız aynı eylem seçme stratejisini takip edeceğiz: Q-Tablosundaki olasılık dağılımına göre örnekleme yaparak:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Sonucu animasyonlu bir GIF olarak kaydetme\n", + "\n", + "Arkadaşlarınızı etkilemek istiyorsanız, denge çubuğunun animasyonlu bir GIF resmini onlara göndermek isteyebilirsiniz. Bunu yapmak için, `env.render` çağrısını kullanarak bir görüntü karesi üretebilir ve ardından bu kareleri PIL kütüphanesi kullanarak animasyonlu bir GIF olarak kaydedebilirsiniz:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, AI çeviri hizmeti [Co-op Translator](https://github.com/Azure/co-op-translator) kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/tr/PyTorch_Fundamentals.ipynb b/translations/tr/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..45a04806d --- /dev/null +++ b/translations/tr/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2830 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:20+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "tr" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Tensor Veri Türleri\n" + ], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Feragatname**: \nBu belge, [Co-op Translator](https://github.com/Azure/co-op-translator) adlı yapay zeka çeviri hizmeti kullanılarak çevrilmiştir. Doğruluk için çaba göstersek de, otomatik çevirilerin hata veya yanlışlıklar içerebileceğini lütfen unutmayın. Belgenin orijinal dili, yetkili kaynak olarak kabul edilmelidir. Kritik bilgiler için profesyonel insan çevirisi önerilir. Bu çevirinin kullanımından kaynaklanan yanlış anlamalar veya yanlış yorumlamalar için sorumluluk kabul etmiyoruz.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/2-Regression/1-Tools/notebook.ipynb b/translations/vi/2-Regression/1-Tools/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/vi/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb b/translations/vi/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb new file mode 100644 index 000000000..b12e3c64e --- /dev/null +++ b/translations/vi/2-Regression/1-Tools/solution/R/lesson_1-R.ipynb @@ -0,0 +1,448 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_1-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "c18d3bd0bd8ae3878597e89dcd1fa5c1", + "translation_date": "2025-09-06T13:45:46+00:00", + "source_file": "2-Regression/1-Tools/solution/R/lesson_1-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "YJUHCXqK57yz" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Giới thiệu về Hồi quy - Bài học 1\n", + "\n", + "#### Đặt vấn đề vào bối cảnh\n", + "\n", + "✅ Có nhiều phương pháp hồi quy khác nhau, và việc bạn chọn phương pháp nào phụ thuộc vào câu trả lời mà bạn đang tìm kiếm. Nếu bạn muốn dự đoán chiều cao có thể xảy ra của một người ở một độ tuổi nhất định, bạn sẽ sử dụng `hồi quy tuyến tính`, vì bạn đang tìm kiếm một **giá trị số**. Nếu bạn muốn khám phá liệu một loại ẩm thực có nên được coi là thuần chay hay không, bạn đang tìm kiếm một **phân loại danh mục**, vì vậy bạn sẽ sử dụng `hồi quy logistic`. Bạn sẽ học thêm về hồi quy logistic sau này. Hãy suy nghĩ một chút về một số câu hỏi bạn có thể đặt ra với dữ liệu, và phương pháp nào trong số này sẽ phù hợp hơn.\n", + "\n", + "Trong phần này, bạn sẽ làm việc với [một tập dữ liệu nhỏ về bệnh tiểu đường](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html). Hãy tưởng tượng rằng bạn muốn thử nghiệm một phương pháp điều trị cho bệnh nhân tiểu đường. Các mô hình Machine Learning có thể giúp bạn xác định bệnh nhân nào sẽ phản ứng tốt hơn với phương pháp điều trị, dựa trên sự kết hợp của các biến số. Ngay cả một mô hình hồi quy rất cơ bản, khi được trực quan hóa, cũng có thể cho thấy thông tin về các biến số giúp bạn tổ chức các thử nghiệm lâm sàng lý thuyết của mình.\n", + "\n", + "Vậy thì, hãy bắt đầu nhiệm vụ này nhé!\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n", + "\n", + "\n" + ], + "metadata": { + "id": "LWNNzfqd6feZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Tải bộ công cụ của chúng ta\n", + "\n", + "Để thực hiện nhiệm vụ này, chúng ta sẽ cần các gói sau:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu trở nên nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [bộ sưu tập các gói](https://www.tidymodels.org/packages/) dành cho mô hình hóa và học máy.\n", + "\n", + "Bạn có thể cài đặt chúng bằng lệnh sau:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\"))`\n", + "\n", + "Đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành mô-đun này chưa và sẽ cài đặt chúng cho bạn nếu thiếu.\n" + ], + "metadata": { + "id": "FIo2YhO26wI9" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse, tidymodels)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "id": "cIA9fz9v7Dss", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2df7073b-86b2-4b32-cb86-0da605a0dc11" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ, hãy tải các gói tuyệt vời này và làm cho chúng khả dụng trong phiên làm việc R hiện tại của chúng ta. (Đây chỉ là minh họa, `pacman::p_load()` đã làm điều đó cho bạn)\n" + ], + "metadata": { + "id": "gpO_P_6f9WUG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# load the core Tidyverse packages\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# load the core Tidymodels packages\r\n", + "library(tidymodels)\r\n" + ], + "outputs": [], + "metadata": { + "id": "NLMycgG-9ezO" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Bộ dữ liệu tiểu đường\n", + "\n", + "Trong bài tập này, chúng ta sẽ áp dụng kỹ năng hồi quy bằng cách dự đoán trên bộ dữ liệu tiểu đường. [Bộ dữ liệu tiểu đường](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt) bao gồm `442 mẫu` dữ liệu liên quan đến bệnh tiểu đường, với 10 biến đặc trưng dự đoán: `tuổi`, `giới tính`, `chỉ số khối cơ thể`, `huyết áp trung bình`, và `sáu phép đo huyết thanh máu`, cùng với một biến kết quả `y`: một thước đo định lượng về mức độ tiến triển của bệnh sau một năm kể từ thời điểm ban đầu.\n", + "\n", + "|Số lượng quan sát|442|\n", + "|------------------|:---|\n", + "|Số lượng biến dự đoán|10 cột đầu tiên là các biến dự đoán dạng số|\n", + "|Kết quả/Mục tiêu|Cột thứ 11 là thước đo định lượng về mức độ tiến triển của bệnh sau một năm kể từ thời điểm ban đầu|\n", + "|Thông tin về biến dự đoán|- tuổi tính theo năm\n", + "||- giới tính\n", + "||- bmi chỉ số khối cơ thể\n", + "||- bp huyết áp trung bình\n", + "||- s1 tc, tổng cholesterol trong huyết thanh\n", + "||- s2 ldl, lipoprotein mật độ thấp\n", + "||- s3 hdl, lipoprotein mật độ cao\n", + "||- s4 tch, tổng cholesterol / HDL\n", + "||- s5 ltg, có thể là logarit của mức triglycerides trong huyết thanh\n", + "||- s6 glu, mức đường trong máu|\n", + "\n", + "> 🎓 Hãy nhớ rằng đây là học có giám sát, và chúng ta cần một mục tiêu 'y' được đặt tên.\n", + "\n", + "Trước khi bạn có thể thao tác dữ liệu với R, bạn cần nhập dữ liệu vào bộ nhớ của R hoặc tạo một kết nối để R có thể truy cập dữ liệu từ xa.\n", + "\n", + "> Gói [readr](https://readr.tidyverse.org/), một phần của Tidyverse, cung cấp cách nhanh chóng và thân thiện để đọc dữ liệu dạng hình chữ nhật vào R.\n", + "\n", + "Bây giờ, hãy tải bộ dữ liệu tiểu đường từ URL nguồn này: \n", + "\n", + "Ngoài ra, chúng ta sẽ kiểm tra dữ liệu bằng cách sử dụng `glimpse()` và hiển thị 5 hàng đầu tiên bằng `slice()`.\n", + "\n", + "Trước khi tiếp tục, hãy giới thiệu một điều mà bạn sẽ thường xuyên gặp trong mã R 🥁🥁: toán tử pipe `%>%`\n", + "\n", + "Toán tử pipe (`%>%`) thực hiện các thao tác theo trình tự logic bằng cách chuyển một đối tượng vào một hàm hoặc biểu thức gọi. Bạn có thể nghĩ toán tử pipe như đang nói \"và sau đó\" trong mã của bạn.\n" + ], + "metadata": { + "id": "KM6iXLH996Cl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import the data set\r\n", + "diabetes <- read_table2(file = \"https://www4.stat.ncsu.edu/~boos/var.select/diabetes.rwrite1.txt\")\r\n", + "\r\n", + "\r\n", + "# Get a glimpse and dimensions of the data\r\n", + "glimpse(diabetes)\r\n", + "\r\n", + "\r\n", + "# Select the first 5 rows of the data\r\n", + "diabetes %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "Z1geAMhM-bSP" + } + }, + { + "cell_type": "markdown", + "source": [ + "`glimpse()` cho chúng ta thấy rằng dữ liệu này có 442 hàng và 11 cột, với tất cả các cột đều thuộc kiểu dữ liệu `double`.\n", + "\n", + "
\n", + "\n", + "> glimpse() và slice() là các hàm trong [`dplyr`](https://dplyr.tidyverse.org/). Dplyr, một phần của Tidyverse, là một ngữ pháp thao tác dữ liệu cung cấp một tập hợp các động từ nhất quán giúp bạn giải quyết các thách thức phổ biến trong việc thao tác dữ liệu.\n", + "\n", + "
\n", + "\n", + "Bây giờ chúng ta đã có dữ liệu, hãy thu hẹp lại một đặc điểm (`bmi`) để làm mục tiêu cho bài tập này. Điều này sẽ yêu cầu chúng ta chọn các cột mong muốn. Vậy làm thế nào để thực hiện điều này?\n", + "\n", + "[`dplyr::select()`](https://dplyr.tidyverse.org/reference/select.html) cho phép chúng ta *chọn* (và tùy chọn đổi tên) các cột trong một khung dữ liệu.\n" + ], + "metadata": { + "id": "UwjVT1Hz-c3Z" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select predictor feature `bmi` and outcome `y`\r\n", + "diabetes_select <- diabetes %>% \r\n", + " select(c(bmi, y))\r\n", + "\r\n", + "# Print the first 5 rows\r\n", + "diabetes_select %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "RDY1oAKI-m80" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dữ liệu huấn luyện và kiểm tra\n", + "\n", + "Trong học máy có giám sát, việc *chia* dữ liệu thành hai tập hợp là một thực hành phổ biến; một tập (thường lớn hơn) để huấn luyện mô hình, và một tập nhỏ hơn \"giữ lại\" để kiểm tra xem mô hình hoạt động như thế nào.\n", + "\n", + "Bây giờ chúng ta đã có dữ liệu sẵn sàng, chúng ta có thể xem liệu máy có thể giúp xác định một cách chia hợp lý giữa các số trong tập dữ liệu này hay không. Chúng ta có thể sử dụng gói [rsample](https://tidymodels.github.io/rsample/), một phần của khung làm việc Tidymodels, để tạo một đối tượng chứa thông tin về *cách* chia dữ liệu, và sau đó sử dụng hai hàm rsample khác để trích xuất các tập huấn luyện và kiểm tra đã được tạo:\n" + ], + "metadata": { + "id": "SDk668xK-tc3" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\r\n", + "# Split 67% of the data for training and the rest for tesing\r\n", + "diabetes_split <- diabetes_select %>% \r\n", + " initial_split(prop = 0.67)\r\n", + "\r\n", + "# Extract the resulting train and test sets\r\n", + "diabetes_train <- training(diabetes_split)\r\n", + "diabetes_test <- testing(diabetes_split)\r\n", + "\r\n", + "# Print the first 3 rows of the training set\r\n", + "diabetes_train %>% \r\n", + " slice(1:10)" + ], + "outputs": [], + "metadata": { + "id": "EqtHx129-1h-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Huấn luyện mô hình hồi quy tuyến tính với Tidymodels\n", + "\n", + "Bây giờ chúng ta đã sẵn sàng để huấn luyện mô hình!\n", + "\n", + "Trong Tidymodels, bạn định nghĩa mô hình bằng cách sử dụng `parsnip()` và chỉ định ba khái niệm:\n", + "\n", + "- **Loại mô hình** phân biệt các mô hình như hồi quy tuyến tính, hồi quy logistic, mô hình cây quyết định, và nhiều loại khác.\n", + "\n", + "- **Chế độ mô hình** bao gồm các tùy chọn phổ biến như hồi quy và phân loại; một số loại mô hình hỗ trợ cả hai chế độ này, trong khi một số chỉ có một chế độ duy nhất.\n", + "\n", + "- **Công cụ mô hình** là công cụ tính toán sẽ được sử dụng để khớp mô hình. Thường thì đây là các gói R, chẳng hạn như **`\"lm\"`** hoặc **`\"ranger\"`**\n", + "\n", + "Thông tin về mô hình này được lưu trong một đặc tả mô hình, vì vậy hãy cùng xây dựng một đặc tả!\n" + ], + "metadata": { + "id": "sBOS-XhB-6v7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- \r\n", + " # Type\r\n", + " linear_reg() %>% \r\n", + " # Engine\r\n", + " set_engine(\"lm\") %>% \r\n", + " # Mode\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Print the model specification\r\n", + "lm_spec" + ], + "outputs": [], + "metadata": { + "id": "20OwEw20--t3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sau khi một mô hình đã được *xác định*, mô hình có thể được `ước lượng` hoặc `huấn luyện` bằng cách sử dụng hàm [`fit()`](https://parsnip.tidymodels.org/reference/fit.html), thường sử dụng một công thức và một số dữ liệu.\n", + "\n", + "`y ~ .` có nghĩa là chúng ta sẽ khớp `y` làm giá trị dự đoán/mục tiêu, được giải thích bởi tất cả các biến dự đoán/đặc trưng, tức là `.` (trong trường hợp này, chúng ta chỉ có một biến dự đoán: `bmi`).\n" + ], + "metadata": { + "id": "_oDHs89k_CJj" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Build a linear model specification\r\n", + "lm_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>%\r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Train a linear regression model\r\n", + "lm_mod <- lm_spec %>% \r\n", + " fit(y ~ ., data = diabetes_train)\r\n", + "\r\n", + "# Print the model\r\n", + "lm_mod" + ], + "outputs": [], + "metadata": { + "id": "YlsHqd-q_GJQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Từ kết quả đầu ra của mô hình, chúng ta có thể thấy các hệ số được học trong quá trình huấn luyện. Chúng đại diện cho các hệ số của đường hồi quy tốt nhất, giúp giảm thiểu tổng lỗi giữa biến thực tế và biến dự đoán.\n", + "\n", + "
\n", + "\n", + "## 5. Dự đoán trên tập kiểm tra\n", + "\n", + "Bây giờ chúng ta đã huấn luyện xong một mô hình, chúng ta có thể sử dụng nó để dự đoán sự tiến triển của bệnh y cho tập dữ liệu kiểm tra bằng [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Điều này sẽ được sử dụng để vẽ đường phân cách giữa các nhóm dữ liệu.\n" + ], + "metadata": { + "id": "kGZ22RQj_Olu" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\r\n", + "predictions <- lm_mod %>% \r\n", + " predict(new_data = diabetes_test)\r\n", + "\r\n", + "# Print out some of the predictions\r\n", + "predictions %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "nXHbY7M2_aao" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 💃🕺 Chúng ta vừa huấn luyện một mô hình và sử dụng nó để tạo dự đoán!\n", + "\n", + "Khi tạo dự đoán, quy ước của tidymodels luôn là tạo ra một tibble/data frame kết quả với các tên cột được chuẩn hóa. Điều này giúp dễ dàng kết hợp dữ liệu gốc và các dự đoán trong một định dạng có thể sử dụng cho các thao tác tiếp theo như vẽ biểu đồ.\n", + "\n", + "`dplyr::bind_cols()` kết hợp các data frame theo cột một cách hiệu quả.\n" + ], + "metadata": { + "id": "R_JstwUY_bIs" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Combine the predictions and the original test set\r\n", + "results <- diabetes_test %>% \r\n", + " bind_cols(predictions)\r\n", + "\r\n", + "\r\n", + "results %>% \r\n", + " slice(1:5)" + ], + "outputs": [], + "metadata": { + "id": "RybsMJR7_iI8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Hiển thị kết quả mô hình\n", + "\n", + "Bây giờ, đã đến lúc xem kết quả một cách trực quan 📈. Chúng ta sẽ tạo một biểu đồ phân tán cho tất cả các giá trị `y` và `bmi` của tập kiểm tra, sau đó sử dụng các dự đoán để vẽ một đường ở vị trí phù hợp nhất, giữa các nhóm dữ liệu của mô hình.\n", + "\n", + "R có nhiều hệ thống để tạo biểu đồ, nhưng `ggplot2` là một trong những hệ thống thanh lịch và linh hoạt nhất. Điều này cho phép bạn tạo biểu đồ bằng cách **kết hợp các thành phần độc lập**.\n" + ], + "metadata": { + "id": "XJbYbMZW_n_s" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plot\r\n", + "theme_set(theme_light())\r\n", + "# Create a scatter plot\r\n", + "results %>% \r\n", + " ggplot(aes(x = bmi)) +\r\n", + " # Add a scatter plot\r\n", + " geom_point(aes(y = y), size = 1.6) +\r\n", + " # Add a line plot\r\n", + " geom_line(aes(y = .pred), color = \"blue\", size = 1.5)" + ], + "outputs": [], + "metadata": { + "id": "R9tYp3VW_sTn" + } + }, + { + "cell_type": "markdown", + "source": [ + "✅ Hãy suy nghĩ một chút về điều đang diễn ra ở đây. Một đường thẳng đang chạy qua nhiều điểm dữ liệu nhỏ, nhưng nó thực sự đang làm gì? Bạn có thấy cách mà bạn có thể sử dụng đường này để dự đoán vị trí của một điểm dữ liệu mới, chưa được thấy trước đó, trong mối quan hệ với trục y của biểu đồ không? Hãy thử diễn đạt bằng lời về ứng dụng thực tế của mô hình này.\n", + "\n", + "Chúc mừng bạn, bạn đã xây dựng mô hình hồi quy tuyến tính đầu tiên, tạo ra một dự đoán từ nó, và hiển thị nó trên biểu đồ!\n" + ], + "metadata": { + "id": "zrPtHIxx_tNI" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/2-Regression/1-Tools/solution/notebook.ipynb b/translations/vi/2-Regression/1-Tools/solution/notebook.ipynb new file mode 100644 index 000000000..cc1e127ab --- /dev/null +++ b/translations/vi/2-Regression/1-Tools/solution/notebook.ipynb @@ -0,0 +1,675 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nhập các thư viện cần thiết\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets, linear_model, model_selection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tải tập dữ liệu bệnh tiểu đường, chia thành dữ liệu `X` và các đặc trưng `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "[ 0.03807591 0.05068012 0.06169621 0.02187239 -0.0442235 -0.03482076\n", + " -0.04340085 -0.00259226 0.01990749 -0.01764613]\n" + ] + } + ], + "source": [ + "X, y = datasets.load_diabetes(return_X_y=True)\n", + "print(X.shape)\n", + "print(X[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chọn chỉ một tính năng để nhắm mục tiêu cho bài tập này\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442,)\n" + ] + } + ], + "source": [ + "# Selecting the 3rd feature\n", + "X = X[:, 2]\n", + "print(X.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 1)\n", + "[[ 0.06169621]\n", + " [-0.05147406]\n", + " [ 0.04445121]\n", + " [-0.01159501]\n", + " [-0.03638469]\n", + " [-0.04069594]\n", + " [-0.04716281]\n", + " [-0.00189471]\n", + " [ 0.06169621]\n", + " [ 0.03906215]\n", + " [-0.08380842]\n", + " [ 0.01750591]\n", + " [-0.02884001]\n", + " [-0.00189471]\n", + " [-0.02560657]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [ 0.01211685]\n", + " [-0.0105172 ]\n", + " [-0.01806189]\n", + " [-0.05686312]\n", + " [-0.02237314]\n", + " [-0.00405033]\n", + " [ 0.06061839]\n", + " [ 0.03582872]\n", + " [-0.01267283]\n", + " [-0.07734155]\n", + " [ 0.05954058]\n", + " [-0.02129532]\n", + " [-0.00620595]\n", + " [ 0.04445121]\n", + " [-0.06548562]\n", + " [ 0.12528712]\n", + " [-0.05039625]\n", + " [-0.06332999]\n", + " [-0.03099563]\n", + " [ 0.02289497]\n", + " [ 0.01103904]\n", + " [ 0.07139652]\n", + " [ 0.01427248]\n", + " [-0.00836158]\n", + " [-0.06764124]\n", + " [-0.0105172 ]\n", + " [-0.02345095]\n", + " [ 0.06816308]\n", + " [-0.03530688]\n", + " [-0.01159501]\n", + " [-0.0730303 ]\n", + " [-0.04177375]\n", + " [ 0.01427248]\n", + " [-0.00728377]\n", + " [ 0.0164281 ]\n", + " [-0.00943939]\n", + " [-0.01590626]\n", + " [ 0.0250506 ]\n", + " [-0.04931844]\n", + " [ 0.04121778]\n", + " [-0.06332999]\n", + " [-0.06440781]\n", + " [-0.02560657]\n", + " [-0.00405033]\n", + " [ 0.00457217]\n", + " [-0.00728377]\n", + " [-0.0374625 ]\n", + " [-0.02560657]\n", + " [-0.02452876]\n", + " [-0.01806189]\n", + " [-0.01482845]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [-0.06979687]\n", + " [ 0.03367309]\n", + " [-0.00405033]\n", + " [-0.02021751]\n", + " [ 0.00241654]\n", + " [-0.03099563]\n", + " [ 0.02828403]\n", + " [-0.03638469]\n", + " [-0.05794093]\n", + " [-0.0374625 ]\n", + " [ 0.01211685]\n", + " [-0.02237314]\n", + " [-0.03530688]\n", + " [ 0.00996123]\n", + " [-0.03961813]\n", + " [ 0.07139652]\n", + " [-0.07518593]\n", + " [-0.00620595]\n", + " [-0.04069594]\n", + " [-0.04824063]\n", + " [-0.02560657]\n", + " [ 0.0519959 ]\n", + " [ 0.00457217]\n", + " [-0.06440781]\n", + " [-0.01698407]\n", + " [-0.05794093]\n", + " [ 0.00996123]\n", + " [ 0.08864151]\n", + " [-0.00512814]\n", + " [-0.06440781]\n", + " [ 0.01750591]\n", + " [-0.04500719]\n", + " [ 0.02828403]\n", + " [ 0.04121778]\n", + " [ 0.06492964]\n", + " [-0.03207344]\n", + " [-0.07626374]\n", + " [ 0.04984027]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03207344]\n", + " [ 0.00457217]\n", + " [ 0.02073935]\n", + " [ 0.01427248]\n", + " [ 0.11019775]\n", + " [ 0.00133873]\n", + " [ 0.05846277]\n", + " [-0.02129532]\n", + " [-0.0105172 ]\n", + " [-0.04716281]\n", + " [ 0.00457217]\n", + " [ 0.01750591]\n", + " [ 0.08109682]\n", + " [ 0.0347509 ]\n", + " [ 0.02397278]\n", + " [-0.00836158]\n", + " [-0.06117437]\n", + " [-0.00189471]\n", + " [-0.06225218]\n", + " [ 0.0164281 ]\n", + " [ 0.09618619]\n", + " [-0.06979687]\n", + " [-0.02129532]\n", + " [-0.05362969]\n", + " [ 0.0433734 ]\n", + " [ 0.05630715]\n", + " [-0.0816528 ]\n", + " [ 0.04984027]\n", + " [ 0.11127556]\n", + " [ 0.06169621]\n", + " [ 0.01427248]\n", + " [ 0.04768465]\n", + " [ 0.01211685]\n", + " [ 0.00564998]\n", + " [ 0.04660684]\n", + " [ 0.12852056]\n", + " [ 0.05954058]\n", + " [ 0.09295276]\n", + " [ 0.01535029]\n", + " [-0.00512814]\n", + " [ 0.0703187 ]\n", + " [-0.00405033]\n", + " [-0.00081689]\n", + " [-0.04392938]\n", + " [ 0.02073935]\n", + " [ 0.06061839]\n", + " [-0.0105172 ]\n", + " [-0.03315126]\n", + " [-0.06548562]\n", + " [ 0.0433734 ]\n", + " [-0.06225218]\n", + " [ 0.06385183]\n", + " [ 0.03043966]\n", + " [ 0.07247433]\n", + " [-0.0191397 ]\n", + " [-0.06656343]\n", + " [-0.06009656]\n", + " [ 0.06924089]\n", + " [ 0.05954058]\n", + " [-0.02668438]\n", + " [-0.02021751]\n", + " [-0.046085 ]\n", + " [ 0.07139652]\n", + " [-0.07949718]\n", + " [ 0.00996123]\n", + " [-0.03854032]\n", + " [ 0.01966154]\n", + " [ 0.02720622]\n", + " [-0.00836158]\n", + " [-0.01590626]\n", + " [ 0.00457217]\n", + " [-0.04285156]\n", + " [ 0.00564998]\n", + " [-0.03530688]\n", + " [ 0.02397278]\n", + " [-0.01806189]\n", + " [ 0.04229559]\n", + " [-0.0547075 ]\n", + " [-0.00297252]\n", + " [-0.06656343]\n", + " [-0.01267283]\n", + " [-0.04177375]\n", + " [-0.03099563]\n", + " [-0.00512814]\n", + " [-0.05901875]\n", + " [ 0.0250506 ]\n", + " [-0.046085 ]\n", + " [ 0.00349435]\n", + " [ 0.05415152]\n", + " [-0.04500719]\n", + " [-0.05794093]\n", + " [-0.05578531]\n", + " [ 0.00133873]\n", + " [ 0.03043966]\n", + " [ 0.00672779]\n", + " [ 0.04660684]\n", + " [ 0.02612841]\n", + " [ 0.04552903]\n", + " [ 0.04013997]\n", + " [-0.01806189]\n", + " [ 0.01427248]\n", + " [ 0.03690653]\n", + " [ 0.00349435]\n", + " [-0.07087468]\n", + " [-0.03315126]\n", + " [ 0.09403057]\n", + " [ 0.03582872]\n", + " [ 0.03151747]\n", + " [-0.06548562]\n", + " [-0.04177375]\n", + " [-0.03961813]\n", + " [-0.03854032]\n", + " [-0.02560657]\n", + " [-0.02345095]\n", + " [-0.06656343]\n", + " [ 0.03259528]\n", + " [-0.046085 ]\n", + " [-0.02991782]\n", + " [-0.01267283]\n", + " [-0.01590626]\n", + " [ 0.07139652]\n", + " [-0.03099563]\n", + " [ 0.00026092]\n", + " [ 0.03690653]\n", + " [ 0.03906215]\n", + " [-0.01482845]\n", + " [ 0.00672779]\n", + " [-0.06871905]\n", + " [-0.00943939]\n", + " [ 0.01966154]\n", + " [ 0.07462995]\n", + " [-0.00836158]\n", + " [-0.02345095]\n", + " [-0.046085 ]\n", + " [ 0.05415152]\n", + " [-0.03530688]\n", + " [-0.03207344]\n", + " [-0.0816528 ]\n", + " [ 0.04768465]\n", + " [ 0.06061839]\n", + " [ 0.05630715]\n", + " [ 0.09834182]\n", + " [ 0.05954058]\n", + " [ 0.03367309]\n", + " [ 0.05630715]\n", + " [-0.06548562]\n", + " [ 0.16085492]\n", + " [-0.05578531]\n", + " [-0.02452876]\n", + " [-0.03638469]\n", + " [-0.00836158]\n", + " [-0.04177375]\n", + " [ 0.12744274]\n", + " [-0.07734155]\n", + " [ 0.02828403]\n", + " [-0.02560657]\n", + " [-0.06225218]\n", + " [-0.00081689]\n", + " [ 0.08864151]\n", + " [-0.03207344]\n", + " [ 0.03043966]\n", + " [ 0.00888341]\n", + " [ 0.00672779]\n", + " [-0.02021751]\n", + " [-0.02452876]\n", + " [-0.01159501]\n", + " [ 0.02612841]\n", + " [-0.05901875]\n", + " [-0.03638469]\n", + " [-0.02452876]\n", + " [ 0.01858372]\n", + " [-0.0902753 ]\n", + " [-0.00512814]\n", + " [-0.05255187]\n", + " [-0.02237314]\n", + " [-0.02021751]\n", + " [-0.0547075 ]\n", + " [-0.00620595]\n", + " [-0.01698407]\n", + " [ 0.05522933]\n", + " [ 0.07678558]\n", + " [ 0.01858372]\n", + " [-0.02237314]\n", + " [ 0.09295276]\n", + " [-0.03099563]\n", + " [ 0.03906215]\n", + " [-0.06117437]\n", + " [-0.00836158]\n", + " [-0.0374625 ]\n", + " [-0.01375064]\n", + " [ 0.07355214]\n", + " [-0.02452876]\n", + " [ 0.03367309]\n", + " [ 0.0347509 ]\n", + " [-0.03854032]\n", + " [-0.03961813]\n", + " [-0.00189471]\n", + " [-0.03099563]\n", + " [-0.046085 ]\n", + " [ 0.00133873]\n", + " [ 0.06492964]\n", + " [ 0.04013997]\n", + " [-0.02345095]\n", + " [ 0.05307371]\n", + " [ 0.04013997]\n", + " [-0.02021751]\n", + " [ 0.01427248]\n", + " [-0.03422907]\n", + " [ 0.00672779]\n", + " [ 0.00457217]\n", + " [ 0.03043966]\n", + " [ 0.0519959 ]\n", + " [ 0.06169621]\n", + " [-0.00728377]\n", + " [ 0.00564998]\n", + " [ 0.05415152]\n", + " [-0.00836158]\n", + " [ 0.114509 ]\n", + " [ 0.06708527]\n", + " [-0.05578531]\n", + " [ 0.03043966]\n", + " [-0.02560657]\n", + " [ 0.10480869]\n", + " [-0.00620595]\n", + " [-0.04716281]\n", + " [-0.04824063]\n", + " [ 0.08540807]\n", + " [-0.01267283]\n", + " [-0.03315126]\n", + " [-0.00728377]\n", + " [-0.01375064]\n", + " [ 0.05954058]\n", + " [ 0.02181716]\n", + " [ 0.01858372]\n", + " [-0.01159501]\n", + " [-0.00297252]\n", + " [ 0.01750591]\n", + " [-0.02991782]\n", + " [-0.02021751]\n", + " [-0.05794093]\n", + " [ 0.06061839]\n", + " [-0.04069594]\n", + " [-0.07195249]\n", + " [-0.05578531]\n", + " [ 0.04552903]\n", + " [-0.00943939]\n", + " [-0.03315126]\n", + " [ 0.04984027]\n", + " [-0.08488624]\n", + " [ 0.00564998]\n", + " [ 0.02073935]\n", + " [-0.00728377]\n", + " [ 0.10480869]\n", + " [-0.02452876]\n", + " [-0.00620595]\n", + " [-0.03854032]\n", + " [ 0.13714305]\n", + " [ 0.17055523]\n", + " [ 0.00241654]\n", + " [ 0.03798434]\n", + " [-0.05794093]\n", + " [-0.00943939]\n", + " [-0.02345095]\n", + " [-0.0105172 ]\n", + " [-0.03422907]\n", + " [-0.00297252]\n", + " [ 0.06816308]\n", + " [ 0.00996123]\n", + " [ 0.00241654]\n", + " [-0.03854032]\n", + " [ 0.02612841]\n", + " [-0.08919748]\n", + " [ 0.06061839]\n", + " [-0.02884001]\n", + " [-0.02991782]\n", + " [-0.0191397 ]\n", + " [-0.04069594]\n", + " [ 0.01535029]\n", + " [-0.02452876]\n", + " [ 0.00133873]\n", + " [ 0.06924089]\n", + " [-0.06979687]\n", + " [-0.02991782]\n", + " [-0.046085 ]\n", + " [ 0.01858372]\n", + " [ 0.00133873]\n", + " [-0.03099563]\n", + " [-0.00405033]\n", + " [ 0.01535029]\n", + " [ 0.02289497]\n", + " [ 0.04552903]\n", + " [-0.04500719]\n", + " [-0.03315126]\n", + " [ 0.097264 ]\n", + " [ 0.05415152]\n", + " [ 0.12313149]\n", + " [-0.08057499]\n", + " [ 0.09295276]\n", + " [-0.05039625]\n", + " [-0.01159501]\n", + " [-0.0277622 ]\n", + " [ 0.05846277]\n", + " [ 0.08540807]\n", + " [-0.00081689]\n", + " [ 0.00672779]\n", + " [ 0.00888341]\n", + " [ 0.08001901]\n", + " [ 0.07139652]\n", + " [-0.02452876]\n", + " [-0.0547075 ]\n", + " [-0.03638469]\n", + " [ 0.0164281 ]\n", + " [ 0.07786339]\n", + " [-0.03961813]\n", + " [ 0.01103904]\n", + " [-0.04069594]\n", + " [-0.03422907]\n", + " [ 0.00564998]\n", + " [ 0.08864151]\n", + " [-0.03315126]\n", + " [-0.05686312]\n", + " [-0.03099563]\n", + " [ 0.05522933]\n", + " [-0.06009656]\n", + " [ 0.00133873]\n", + " [-0.02345095]\n", + " [-0.07410811]\n", + " [ 0.01966154]\n", + " [-0.01590626]\n", + " [-0.01590626]\n", + " [ 0.03906215]\n", + " [-0.0730303 ]]\n" + ] + } + ], + "source": [ + "#Reshaping to get a 2D array\n", + "X = X.reshape(-1, 1)\n", + "print(X.shape)\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chia dữ liệu huấn luyện và kiểm tra cho cả `X` và `y`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chọn mô hình và huấn luyện nó với dữ liệu đào tạo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = linear_model.LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sử dụng dữ liệu kiểm tra để dự đoán một dòng\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hiển thị kết quả trong một biểu đồ\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test, y_test, color='black')\n", + "plt.plot(X_test, y_pred, color='blue', linewidth=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "16ff1a974f6e4348e869e4a7d366b86a", + "translation_date": "2025-09-06T13:39:56+00:00", + "source_file": "2-Regression/1-Tools/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/2-Data/notebook.ipynb b/translations/vi/2-Regression/2-Data/notebook.ipynb new file mode 100644 index 000000000..e1c99098e --- /dev/null +++ b/translations/vi/2-Regression/2-Data/notebook.ipynb @@ -0,0 +1,46 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + }, + "coopTranslator": { + "original_hash": "1b2ab303ac6c604a34c6ca7a49077fc7", + "translation_date": "2025-09-06T13:46:08+00:00", + "source_file": "2-Regression/2-Data/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/2-Regression/2-Data/solution/R/lesson_2-R.ipynb b/translations/vi/2-Regression/2-Data/solution/R/lesson_2-R.ipynb new file mode 100644 index 000000000..560e03fb6 --- /dev/null +++ b/translations/vi/2-Regression/2-Data/solution/R/lesson_2-R.ipynb @@ -0,0 +1,673 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_2-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "f3c335f9940cfd76528b3ef918b9b342", + "translation_date": "2025-09-06T13:56:29+00:00", + "source_file": "2-Regression/2-Data/solution/R/lesson_2-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Xây dựng mô hình hồi quy: chuẩn bị và trực quan hóa dữ liệu\n", + "\n", + "## **Hồi quy tuyến tính cho bí ngô - Bài học 2**\n", + "#### Giới thiệu\n", + "\n", + "Bây giờ bạn đã có các công cụ cần thiết để bắt đầu xây dựng mô hình học máy với Tidymodels và Tidyverse, bạn đã sẵn sàng để bắt đầu đặt câu hỏi về dữ liệu của mình. Khi làm việc với dữ liệu và áp dụng các giải pháp ML, điều rất quan trọng là phải hiểu cách đặt câu hỏi đúng để khai thác tối đa tiềm năng của tập dữ liệu.\n", + "\n", + "Trong bài học này, bạn sẽ học:\n", + "\n", + "- Cách chuẩn bị dữ liệu của bạn để xây dựng mô hình.\n", + "\n", + "- Cách sử dụng `ggplot2` để trực quan hóa dữ liệu.\n", + "\n", + "Câu hỏi bạn cần trả lời sẽ quyết định loại thuật toán ML mà bạn sẽ sử dụng. Và chất lượng của câu trả lời bạn nhận được sẽ phụ thuộc rất nhiều vào bản chất của dữ liệu.\n", + "\n", + "Hãy cùng xem điều này qua một bài tập thực hành.\n", + "\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "Pg5aexcOPqAZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Nhập dữ liệu về bí ngô và triệu hồi Tidyverse\n", + "\n", + "Chúng ta sẽ cần các gói sau để phân tích và xử lý bài học này:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu trở nên nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "Bạn có thể cài đặt chúng bằng cách:\n", + "\n", + "`install.packages(c(\"tidyverse\"))`\n", + "\n", + "Đoạn mã dưới đây kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và sẽ cài đặt chúng cho bạn nếu thiếu.\n" + ], + "metadata": { + "id": "dc5WhyVdXAjR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "pacman::p_load(tidyverse)" + ], + "outputs": [], + "metadata": { + "id": "GqPYUZgfXOBt" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ, hãy khởi động một số gói và tải [dữ liệu](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/data/US-pumpkins.csv) được cung cấp cho bài học này!\n" + ], + "metadata": { + "id": "kvjDTPDSXRr2" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n =50)" + ], + "outputs": [], + "metadata": { + "id": "VMri-t2zXqgD" + } + }, + { + "cell_type": "markdown", + "source": [ + "Một `glimpse()` nhanh chóng cho thấy rằng có các giá trị trống và sự kết hợp giữa chuỗi ký tự (`chr`) và dữ liệu số (`dbl`). Cột `Date` thuộc kiểu ký tự và còn có một cột kỳ lạ tên là `Package`, nơi dữ liệu là sự pha trộn giữa `sacks`, `bins` và các giá trị khác. Thực tế, dữ liệu này khá lộn xộn 😤.\n", + "\n", + "Thực tế, không thường xuyên bạn nhận được một tập dữ liệu hoàn toàn sẵn sàng để sử dụng nhằm tạo ra một mô hình ML ngay lập tức. Nhưng đừng lo, trong bài học này, bạn sẽ học cách chuẩn bị một tập dữ liệu thô bằng cách sử dụng các thư viện R tiêu chuẩn 🧑‍🔧. Bạn cũng sẽ học các kỹ thuật khác nhau để trực quan hóa dữ liệu. 📈📊\n", + "
\n", + "\n", + "> Một lời nhắc lại: Toán tử pipe (`%>%`) thực hiện các thao tác theo trình tự logic bằng cách chuyển một đối tượng vào một hàm hoặc biểu thức gọi. Bạn có thể nghĩ toán tử pipe như đang nói \"và sau đó\" trong mã của bạn.\n" + ], + "metadata": { + "id": "REWcIv9yX29v" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Kiểm tra dữ liệu bị thiếu\n", + "\n", + "Một trong những vấn đề phổ biến nhất mà các nhà khoa học dữ liệu phải đối mặt là dữ liệu không đầy đủ hoặc bị thiếu. R biểu thị các giá trị bị thiếu hoặc không xác định bằng một giá trị đặc biệt: `NA` (Not Available).\n", + "\n", + "Vậy làm thế nào để chúng ta biết rằng khung dữ liệu chứa các giá trị bị thiếu?\n", + "
\n", + "- Một cách đơn giản là sử dụng hàm cơ bản của R `anyNA`, hàm này trả về các đối tượng logic `TRUE` hoặc `FALSE`.\n" + ], + "metadata": { + "id": "Zxfb3AM5YbUe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " anyNA()" + ], + "outputs": [], + "metadata": { + "id": "G--DQutAYltj" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuyệt vời, có vẻ như đang thiếu một số dữ liệu! Đây là một điểm tốt để bắt đầu.\n", + "\n", + "- Một cách khác là sử dụng hàm `is.na()` để xác định các phần tử bị thiếu trong từng cột với giá trị logic `TRUE`.\n" + ], + "metadata": { + "id": "mU-7-SB6YokF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "W-DxDOR4YxSW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Được rồi, đã hoàn thành công việc nhưng với một khung dữ liệu lớn như thế này, việc xem xét từng hàng và cột riêng lẻ sẽ không hiệu quả và gần như không thể😴.\n", + "\n", + "- Một cách trực quan hơn là tính tổng số giá trị bị thiếu cho mỗi cột:\n" + ], + "metadata": { + "id": "xUWxipKYY0o7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "pumpkins %>% \n", + " is.na() %>% \n", + " colSums()" + ], + "outputs": [], + "metadata": { + "id": "ZRBWV6P9ZArL" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tốt hơn nhiều! Có dữ liệu bị thiếu, nhưng có lẽ điều đó sẽ không ảnh hưởng đến nhiệm vụ hiện tại. Hãy xem phân tích tiếp theo sẽ mang lại điều gì.\n", + "\n", + "> Bên cạnh các bộ gói và hàm tuyệt vời, R còn có tài liệu hướng dẫn rất tốt. Ví dụ, sử dụng `help(colSums)` hoặc `?colSums` để tìm hiểu thêm về hàm này.\n" + ], + "metadata": { + "id": "9gv-crB6ZD1Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Dplyr: Ngữ pháp của việc xử lý dữ liệu\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "o4jLY5-VZO2C" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`dplyr`](https://dplyr.tidyverse.org/), một gói trong Tidyverse, là một ngữ pháp xử lý dữ liệu cung cấp một tập hợp các động từ nhất quán giúp bạn giải quyết các thách thức xử lý dữ liệu phổ biến nhất. Trong phần này, chúng ta sẽ khám phá một số động từ của dplyr! \n", + "
\n" + ], + "metadata": { + "id": "i5o33MQBZWWw" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::select()\n", + "\n", + "`select()` là một hàm trong gói `dplyr` giúp bạn chọn các cột để giữ lại hoặc loại bỏ.\n", + "\n", + "Để làm cho khung dữ liệu của bạn dễ làm việc hơn, hãy loại bỏ một số cột bằng cách sử dụng `select()`, chỉ giữ lại các cột bạn cần.\n", + "\n", + "Ví dụ, trong bài tập này, phân tích của chúng ta sẽ liên quan đến các cột `Package`, `Low Price`, `High Price` và `Date`. Hãy chọn các cột này.\n" + ], + "metadata": { + "id": "x3VGMAGBZiUr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(Package, `Low Price`, `High Price`, Date)\n", + "\n", + "\n", + "# Print data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "F_FgxQnVZnM0" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::mutate()\n", + "\n", + "`mutate()` là một hàm trong gói `dplyr`, giúp bạn tạo hoặc chỉnh sửa các cột, đồng thời giữ nguyên các cột hiện có.\n", + "\n", + "Cấu trúc chung của `mutate` là:\n", + "\n", + "`data %>% mutate(new_column_name = what_it_contains)`\n", + "\n", + "Hãy thử sử dụng `mutate` với cột `Date` bằng cách thực hiện các thao tác sau:\n", + "\n", + "1. Chuyển đổi các ngày (hiện tại thuộc kiểu ký tự) sang định dạng tháng (đây là ngày tháng kiểu Mỹ, nên định dạng là `MM/DD/YYYY`).\n", + "\n", + "2. Trích xuất tháng từ các ngày và lưu vào một cột mới.\n", + "\n", + "Trong R, gói [lubridate](https://lubridate.tidyverse.org/) giúp làm việc với dữ liệu ngày giờ dễ dàng hơn. Vì vậy, hãy sử dụng `dplyr::mutate()`, `lubridate::mdy()`, `lubridate::month()` để đạt được các mục tiêu trên. Chúng ta có thể loại bỏ cột `Date` vì sẽ không cần sử dụng nó nữa trong các thao tác tiếp theo.\n" + ], + "metadata": { + "id": "2KKo0Ed9Z1VB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load lubridate\n", + "library(lubridate)\n", + "\n", + "pumpkins <- pumpkins %>% \n", + " # Convert the Date column to a date object\n", + " mutate(Date = mdy(Date)) %>% \n", + " # Extract month from Date\n", + " mutate(Month = month(Date)) %>% \n", + " # Drop Date column\n", + " select(-Date)\n", + "\n", + "# View the first few rows\n", + "pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "5joszIVSZ6xe" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woohoo! 🤩\n", + "\n", + "Tiếp theo, hãy tạo một cột mới `Price`, đại diện cho giá trung bình của một quả bí ngô. Bây giờ, hãy lấy trung bình của các cột `Low Price` và `High Price` để điền vào cột Price mới. \n", + "
\n" + ], + "metadata": { + "id": "nIgLjNMCZ-6Y" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new column Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(Price = (`Low Price` + `High Price`)/2)\n", + "\n", + "# View the first few rows of the data\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "Zo0BsqqtaJw2" + } + }, + { + "cell_type": "markdown", + "source": [ + "Yeees!💪\n", + "\n", + "\"Nhưng khoan đã!\", bạn sẽ nói sau khi lướt qua toàn bộ tập dữ liệu với `View(pumpkins)`, \"Có điều gì đó kỳ lạ ở đây!\"🤔\n", + "\n", + "Nếu bạn nhìn vào cột `Package`, bí ngô được bán theo nhiều cách khác nhau. Một số được bán theo đơn vị `1 1/9 bushel`, một số theo đơn vị `1/2 bushel`, một số theo từng quả bí ngô, một số theo cân nặng, và một số được đóng trong các hộp lớn với kích thước khác nhau.\n", + "\n", + "Hãy kiểm tra điều này:\n" + ], + "metadata": { + "id": "p77WZr-9aQAR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Verify the distinct observations in Package column\n", + "pumpkins %>% \n", + " distinct(Package)" + ], + "outputs": [], + "metadata": { + "id": "XISGfh0IaUy6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuyệt vời!👏\n", + "\n", + "Bí ngô dường như rất khó để cân một cách nhất quán, vì vậy hãy lọc chúng bằng cách chỉ chọn những quả bí ngô có chuỗi *bushel* trong cột `Package` và đặt chúng vào một khung dữ liệu mới `new_pumpkins`.\n" + ], + "metadata": { + "id": "7sMjiVujaZxY" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::filter() và stringr::str_detect()\n", + "\n", + "[`dplyr::filter()`](https://dplyr.tidyverse.org/reference/filter.html): tạo một tập con của dữ liệu chỉ chứa **các hàng** thỏa mãn điều kiện của bạn, trong trường hợp này là các hàng có từ *bushel* trong cột `Package`.\n", + "\n", + "[stringr::str_detect()](https://stringr.tidyverse.org/reference/str_detect.html): phát hiện sự có mặt hoặc vắng mặt của một mẫu trong chuỗi.\n", + "\n", + "Gói [`stringr`](https://github.com/tidyverse/stringr) cung cấp các hàm đơn giản cho các thao tác chuỗi thông dụng.\n" + ], + "metadata": { + "id": "L8Qfcs92ageF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Retain only pumpkins with \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(Package, \"bushel\"))\n", + "\n", + "# Get the dimensions of the new data\n", + "dim(new_pumpkins)\n", + "\n", + "# View a few rows of the new data\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "hy_SGYREampd" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bạn có thể thấy rằng chúng tôi đã thu hẹp xuống còn khoảng 415 dòng dữ liệu chứa bí ngô theo giạ.🤩\n", + "
\n" + ], + "metadata": { + "id": "VrDwF031avlR" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### dplyr::case_when()\n", + "\n", + "**Nhưng khoan đã! Còn một việc nữa cần làm**\n", + "\n", + "Bạn có nhận thấy rằng số lượng giạ thay đổi theo từng hàng không? Bạn cần chuẩn hóa giá để hiển thị giá theo mỗi giạ, không phải theo 1 1/9 hay 1/2 giạ. Đã đến lúc thực hiện một số phép toán để chuẩn hóa.\n", + "\n", + "Chúng ta sẽ sử dụng hàm [`case_when()`](https://dplyr.tidyverse.org/reference/case_when.html) để *biến đổi* cột Price dựa trên một số điều kiện. `case_when` cho phép bạn vector hóa nhiều câu lệnh `if_else()`.\n" + ], + "metadata": { + "id": "mLpw2jH4a0tx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(Price = case_when(\n", + " str_detect(Package, \"1 1/9\") ~ Price/(1 + 1/9),\n", + " str_detect(Package, \"1/2\") ~ Price/(1/2),\n", + " TRUE ~ Price))\n", + "\n", + "# View the first few rows of the data\n", + "new_pumpkins %>% \n", + " slice_head(n = 30)" + ], + "outputs": [], + "metadata": { + "id": "P68kLVQmbM6I" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ, chúng ta có thể phân tích giá theo đơn vị dựa trên đo lường bằng giạ. Tuy nhiên, tất cả việc nghiên cứu về giạ bí ngô này cho thấy rằng việc `hiểu rõ bản chất của dữ liệu` của bạn là điều `rất quan trọng`!\n", + "\n", + "> ✅ Theo [The Spruce Eats](https://www.thespruceeats.com/how-much-is-a-bushel-1389308), trọng lượng của một giạ phụ thuộc vào loại nông sản, vì đây là một đơn vị đo thể tích. \"Ví dụ, một giạ cà chua được cho là nặng 56 pound... Lá và rau xanh chiếm nhiều không gian hơn nhưng lại nhẹ hơn, vì vậy một giạ rau chân vịt chỉ nặng 20 pound.\" Thật là phức tạp! Chúng ta không cần phải bận tâm đến việc chuyển đổi từ giạ sang pound, thay vào đó hãy định giá theo giạ. Tuy nhiên, tất cả việc nghiên cứu về giạ bí ngô này cho thấy rằng việc hiểu rõ bản chất của dữ liệu của bạn là điều rất quan trọng!\n", + ">\n", + "> ✅ Bạn có để ý rằng bí ngô được bán theo nửa giạ có giá rất đắt không? Bạn có thể tìm ra lý do tại sao không? Gợi ý: những quả bí ngô nhỏ đắt hơn rất nhiều so với những quả lớn, có lẽ vì có nhiều quả nhỏ hơn trong một giạ, do không gian bị chiếm bởi một quả bí ngô lớn rỗng ruột.\n" + ], + "metadata": { + "id": "pS2GNPagbSdb" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ cuối cùng, chỉ để thêm phần thú vị 💁‍♀️, hãy di chuyển cột Month lên vị trí đầu tiên, tức là `trước` cột `Package`.\n", + "\n", + "`dplyr::relocate()` được sử dụng để thay đổi vị trí các cột.\n" + ], + "metadata": { + "id": "qql1SowfbdnP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create a new data frame new_pumpkins\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(Month, .before = Package)\n", + "\n", + "new_pumpkins %>% \n", + " slice_head(n = 7)" + ], + "outputs": [], + "metadata": { + "id": "JJ1x6kw8bixF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm!👌 Giờ bạn đã có một tập dữ liệu sạch sẽ, gọn gàng để xây dựng mô hình hồi quy mới của mình! \n", + "
\n" + ], + "metadata": { + "id": "y8TJ0Za_bn5Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Trực quan hóa dữ liệu với ggplot2\n", + "\n", + "

\n", + " \n", + "

Đồ họa thông tin bởi Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "Có một câu nói *thông thái* như sau:\n", + "\n", + "> \"Biểu đồ đơn giản đã mang lại nhiều thông tin hơn cho nhà phân tích dữ liệu so với bất kỳ công cụ nào khác.\" --- John Tukey\n", + "\n", + "Một phần vai trò của nhà khoa học dữ liệu là thể hiện chất lượng và bản chất của dữ liệu mà họ đang làm việc. Để làm điều này, họ thường tạo ra các hình ảnh trực quan thú vị, hoặc các biểu đồ, đồ thị, và sơ đồ, thể hiện các khía cạnh khác nhau của dữ liệu. Bằng cách này, họ có thể trực quan hóa các mối quan hệ và khoảng trống mà nếu không sẽ khó phát hiện.\n", + "\n", + "Các hình ảnh trực quan cũng có thể giúp xác định kỹ thuật học máy phù hợp nhất với dữ liệu. Ví dụ, một biểu đồ phân tán có xu hướng theo một đường thẳng cho thấy dữ liệu là ứng viên tốt cho bài toán hồi quy tuyến tính.\n", + "\n", + "R cung cấp một số hệ thống để tạo biểu đồ, nhưng [`ggplot2`](https://ggplot2.tidyverse.org/index.html) là một trong những hệ thống thanh lịch và linh hoạt nhất. `ggplot2` cho phép bạn tạo biểu đồ bằng cách **kết hợp các thành phần độc lập**.\n", + "\n", + "Hãy bắt đầu với một biểu đồ phân tán đơn giản cho các cột Price và Month.\n", + "\n", + "Trong trường hợp này, chúng ta sẽ bắt đầu với [`ggplot()`](https://ggplot2.tidyverse.org/reference/ggplot.html), cung cấp một tập dữ liệu và ánh xạ thẩm mỹ (với [`aes()`](https://ggplot2.tidyverse.org/reference/aes.html)) sau đó thêm các lớp (như [`geom_point()`](https://ggplot2.tidyverse.org/reference/geom_point.html)) cho biểu đồ phân tán.\n" + ], + "metadata": { + "id": "mYSH6-EtbvNa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set a theme for the plots\n", + "theme_set(theme_light())\n", + "\n", + "# Create a scatter plot\n", + "p <- ggplot(data = new_pumpkins, aes(x = Price, y = Month))\n", + "p + geom_point()" + ], + "outputs": [], + "metadata": { + "id": "g2YjnGeOcLo4" + } + }, + { + "cell_type": "markdown", + "source": [ + "Đây có phải là một biểu đồ hữu ích không 🤷? Có điều gì khiến bạn ngạc nhiên về nó không?\n", + "\n", + "Nó không thực sự hữu ích vì tất cả những gì nó làm chỉ là hiển thị dữ liệu của bạn dưới dạng một loạt các điểm trong một tháng nhất định. \n", + "
\n" + ], + "metadata": { + "id": "Ml7SDCLQcPvE" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Làm thế nào để chúng ta làm cho nó hữu ích?**\n", + "\n", + "Để biểu đồ hiển thị dữ liệu hữu ích, bạn thường cần nhóm dữ liệu theo một cách nào đó. Ví dụ, trong trường hợp của chúng ta, tìm giá trung bình của bí ngô theo từng tháng sẽ cung cấp thêm thông tin chi tiết về các mẫu ẩn trong dữ liệu. Điều này dẫn chúng ta đến một công cụ khác của **dplyr**:\n", + "\n", + "#### `dplyr::group_by() %>% summarize()`\n", + "\n", + "Việc tính toán tổng hợp theo nhóm trong R có thể được thực hiện dễ dàng bằng cách sử dụng\n", + "\n", + "`dplyr::group_by() %>% summarize()`\n", + "\n", + "- `dplyr::group_by()` thay đổi đơn vị phân tích từ toàn bộ tập dữ liệu sang các nhóm riêng lẻ, chẳng hạn như theo từng tháng.\n", + "\n", + "- `dplyr::summarize()` tạo một khung dữ liệu mới với một cột cho mỗi biến nhóm và một cột cho mỗi thống kê tóm tắt mà bạn đã chỉ định.\n", + "\n", + "Ví dụ, chúng ta có thể sử dụng `dplyr::group_by() %>% summarize()` để nhóm các bí ngô thành các nhóm dựa trên cột **Month** và sau đó tìm **giá trung bình** cho từng tháng.\n" + ], + "metadata": { + "id": "jMakvJZIcVkh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price))" + ], + "outputs": [], + "metadata": { + "id": "6kVSUa2Bcilf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Ngắn gọn!✨\n", + "\n", + "Các đặc điểm phân loại như tháng được biểu diễn tốt hơn bằng biểu đồ cột 📊. Các lớp chịu trách nhiệm cho biểu đồ cột là `geom_bar()` và `geom_col()`. Tham khảo `?geom_bar` để tìm hiểu thêm.\n", + "\n", + "Hãy cùng tạo một cái nhé!\n" + ], + "metadata": { + "id": "Kds48GUBcj3W" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the average price of pumpkins per month then plot a bar chart\r\n", + "new_pumpkins %>%\r\n", + " group_by(Month) %>% \r\n", + " summarise(mean_price = mean(Price)) %>% \r\n", + " ggplot(aes(x = Month, y = mean_price)) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"Pumpkin Price\")" + ], + "outputs": [], + "metadata": { + "id": "VNbU1S3BcrxO" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Đây là một biểu đồ trực quan hóa dữ liệu hữu ích hơn! Dường như nó chỉ ra rằng giá cao nhất của bí ngô xảy ra vào tháng 9 và tháng 10. Điều này có đúng với mong đợi của bạn không? Tại sao hoặc tại sao không?\n", + "\n", + "Chúc mừng bạn đã hoàn thành bài học thứ hai 👏! Bạn đã chuẩn bị dữ liệu của mình để xây dựng mô hình, sau đó khám phá thêm nhiều thông tin chi tiết bằng cách sử dụng biểu đồ trực quan hóa!\n" + ], + "metadata": { + "id": "zDm0VOzzcuzR" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/2-Regression/2-Data/solution/notebook.ipynb b/translations/vi/2-Regression/2-Data/solution/notebook.ipynb new file mode 100644 index 000000000..0cde53f4d --- /dev/null +++ b/translations/vi/2-Regression/2-Data/solution/notebook.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
70BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
71BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN9/24/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
72BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1618.018.018.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
73BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/1/1617.017.017.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
74BALTIMORENaN1 1/9 bushel cartonsPIE TYPENaNNaN10/8/1615.015.015.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade \\\n", + "70 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "71 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "72 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "73 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "74 BALTIMORE NaN 1 1/9 bushel cartons PIE TYPE NaN NaN \n", + "\n", + " Date Low Price High Price Mostly Low ... Unit of Sale Quality \\\n", + "70 9/24/16 15.0 15.0 15.0 ... NaN NaN \n", + "71 9/24/16 18.0 18.0 18.0 ... NaN NaN \n", + "72 10/1/16 18.0 18.0 18.0 ... NaN NaN \n", + "73 10/1/16 17.0 17.0 17.0 ... NaN NaN \n", + "74 10/8/16 15.0 15.0 15.0 ... NaN NaN \n", + "\n", + " Condition Appearance Storage Crop Repack Trans Mode Unnamed: 24 \\\n", + "70 NaN NaN NaN NaN N NaN NaN \n", + "71 NaN NaN NaN NaN N NaN NaN \n", + "72 NaN NaN NaN NaN N NaN NaN \n", + "73 NaN NaN NaN NaN N NaN NaN \n", + "74 NaN NaN NaN NaN N NaN NaN \n", + "\n", + " Unnamed: 25 \n", + "70 NaN \n", + "71 NaN \n", + "72 NaN \n", + "73 NaN \n", + "74 NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Name 0\n", + "Type 406\n", + "Package 0\n", + "Variety 0\n", + "Sub Variety 167\n", + "Grade 415\n", + "Date 0\n", + "Low Price 0\n", + "High Price 0\n", + "Mostly Low 24\n", + "Mostly High 24\n", + "Origin 0\n", + "Origin District 396\n", + "Item Size 114\n", + "Color 145\n", + "Environment 415\n", + "Unit of Sale 404\n", + "Quality 415\n", + "Condition 415\n", + "Appearance 415\n", + "Storage 415\n", + "Crop 415\n", + "Repack 0\n", + "Trans Mode 415\n", + "Unnamed: 24 415\n", + "Unnamed: 25 391\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Month Package Low Price High Price Price\n", + "70 9 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "71 9 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "72 10 1 1/9 bushel cartons 18.00 18.0 16.20\n", + "73 10 1 1/9 bushel cartons 17.00 17.0 15.30\n", + "74 10 1 1/9 bushel cartons 15.00 15.0 13.50\n", + "... ... ... ... ... ...\n", + "1738 9 1/2 bushel cartons 15.00 15.0 30.00\n", + "1739 9 1/2 bushel cartons 13.75 15.0 28.75\n", + "1740 9 1/2 bushel cartons 10.75 15.0 25.75\n", + "1741 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "1742 9 1/2 bushel cartons 12.00 12.0 24.00\n", + "\n", + "[415 rows x 5 columns]\n" + ] + } + ], + "source": [ + "\n", + "# A set of new columns for a new dataframe. Filter out nonmatching columns\n", + "columns_to_select = ['Package', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Get an average between low and high price for the base pumpkin price\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "# Convert the date to its month only\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "# Create a new dataframe with this basic data\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "# Convert the price if the Package contains fractional bushel values\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/(1 + 1/9)\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price/(1/2)\n", + "\n", + "print(new_pumpkins)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "price = new_pumpkins.Price\n", + "month = new_pumpkins.Month\n", + "plt.scatter(price, month)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Pumpkin Price')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARAElEQVR4nO3de5AlZX3G8e8joKigiIwbVNYVQ6ErwcVaiRW0CgUNikEQKxFTijHJahlUSsvUqknE/LVE0KoYNVkDigloNCoQLt5AxUuCLrrhIhqUQgMiLBGE0goR+OWP0+sMszOzZ8ft0zO830/VqTndfc7phwae6XlPX1JVSJLa8aChA0iSJsvil6TGWPyS1BiLX5IaY/FLUmMsfklqzK5DBxjHPvvsU6tWrRo6hiQtK1dcccVtVTU1e/6yKP5Vq1axadOmoWNI0rKS5IdzzXeoR5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYZXECl3auVesvHDoCN2w4eugIUrMsfjXNX4JqkUM9ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTG/Fn2S/JF9M8p0k1yR5Yzf/lCQ3JdncPV7YVwZJ0rZ27fGz7wHeXFXfSrIncEWSz3fL3lNVp/W4bknSPHor/qq6Gbi5e35XkmuBx/W1PknSePrc4/+VJKuAQ4DLgcOAk5K8EtjE6K+C2yeRQ9L8Vq2/cOgI3LDh6KEjNKH3L3eT7AF8Eji5qu4EPgA8CVjD6C+C0+d537okm5Js2rJlS98xJakZvRZ/kt0Ylf7ZVfUpgKq6parurar7gA8Ch8713qraWFVrq2rt1NRUnzElqSl9HtUT4Azg2qp694z5+8542XHA1X1lkCRtq88x/sOAVwBXJdnczXsbcEKSNUABNwCv6TGDJGmWPo/q+SqQORZd1Nc6F+IXV5I04pm7ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JakxvxZ9kvyRfTPKdJNckeWM3f+8kn09yXffzUX1lkCRtq889/nuAN1fVauCZwJ8lWQ2sBy6pqgOAS7ppSdKE9Fb8VXVzVX2re34XcC3wOODFwFndy84Cju0rgyRpWxMZ40+yCjgEuBxYUVU3d4t+AqyY5z3rkmxKsmnLli2TiClJTei9+JPsAXwSOLmq7py5rKoKqLneV1Ubq2ptVa2dmprqO6YkNWOs4k/y0CQH7uiHJ9mNUemfXVWf6mbfkmTfbvm+wK07+rmSpMXbbvEn+T1gM/CZbnpNkvPHeF+AM4Brq+rdMxadD5zYPT8ROG8HM0uSfg3j7PGfAhwK3AFQVZuBJ47xvsOAVwDPTbK5e7wQ2AA8L8l1wJHdtCRpQnYd4zW/rKqfjXbgf2XOcfn7vaDqq0DmWXzEGOuVJPVgnOK/JsnLgV2SHAC8Afh6v7EkSX0ZZ6jn9cBTgbuBc4CfASf3mEmS1KPt7vFX1S+At3cPSdIyN85RPZ9PsteM6Ucl+WyvqSRJvRlnqGefqrpj60RV3Q48prdEkqRejVP89yVZuXUiyRMY46geSdLSNM5RPW8Hvprky4wOz3w2sK7XVJKk3ozz5e5nkjyd0aWVYXTNndv6jSVJ6su8Qz1Jntz9fDqwEvhx91jZzZMkLUML7fG/idGQzulzLCvgub0kkiT1at7ir6p1SR4E/EVVfW2CmSRJPVrwqJ6qug/4uwllkSRNwDiHc16S5PjMukqbJGl5Gqf4XwN8Arg7yZ1J7kpy5/beJElamsY5nHPPSQSRJE3GQodzHpDkvCRXJzknyeMmGUyS1I+FhnrOBC4Ajge+Dbx3IokkSb1aaKhnz6r6YPf8XUm+NYlAkqR+LVT8uyc5hOnbJz505nRV+YtAkpahhYr/ZuDdM6Z/MmPaM3claZla6Mzd50wyiCRpMsY5jl+S9ABi8UtSYyx+SWrMOHfgojt56wkzX19Vl/UVSpLUn+0Wf5JTgT8AvgPc280uwOKXpGVonD3+Y4EDq+runrNIkiZgnOK/HtgN2KHiT3Im8CLg1qo6qJt3CvCnwJbuZW+rqot25HMlqW+r1l84dARu2HB0b589TvH/Atic5BJmlH9VvWE77/swo5u4fGTW/PdU1Wk7ElKStPOMU/znd48dUlWXJVm1w4kkSb0a53r8Z+3kdZ6U5JXAJuDNVXX7XC9Kso7Rzd5ZuXLlTo4gSe1a6Hr8H+9+XpXkytmPRa7vA8CTgDWMrgV0+nwvrKqNVbW2qtZOTU0tcnWSpNkW2uN/Y/fzRTtrZVV1y9bnST7I6Hr/kqQJmnePv6pu7p6urqofznwAL1jMypLsO2PyOODqxXyOJGnxxvly9y+T3F1VlwIk+XPgOcDfL/SmJB8FDgf2SXIj8A7g8CRrGJ0AdgOjG7lLkiZonOI/BrggyVuAo4AnAy/e3puq6oQ5Zp+xY/EkSTvbOEf13JbkGOALwBXAS6uqek8mSerFvMWf5C5GQzJbPRjYH3hpkqqqR/QdTpK08y10B649JxlEkjQZ416W+SXAsxj9BfCVqjq3z1CSpP5s90YsSd4PvBa4itHhl69N8r6+g0mS+jHOHv9zgads/UI3yVnANb2mkiT1ZpxbL34fmHmxnP26eZKkZWicPf49gWuTfKObfgawKcn5AFV1TF/hJEk73zjF/1e9p5AkTcw4J3B9GSDJI7j/zdZ/2mMuSVJPxrnZ+jrgr4H/Be4Dwuiwzv37jSZJ6sM4Qz1vAQ6qqtv6DiNJ6t84R/X8gNF9dyVJDwDj7PG/Ffh6ksvZsZutS5KWoHGK/x+ASxmduXtfv3EkSX0bp/h3q6o39Z5EkjQR44zxX5xkXZJ9k+y99dF7MklSL8bZ4996J623zpjn4ZyStEyNcwLXEycRRJI0GeOcwPXKueZX1Ud2fhxJUt/GGep5xoznuwNHAN8CLH5JWobGGep5/czpJHsBH+srkCSpX+Mc1TPbzwHH/SVpmRpnjP/fGB3FA6NfFKuBj/cZSpLUn3HG+E+b8fwe4IdVdWNPeSRJPZu3+JPszugm67/J6HINZ1TVPZMKJknqx0Jj/GcBaxmV/guA0yeSSJLUq4WGelZX1W8BJDkD+MYCr91GkjOBFwG3VtVB3by9gX8BVgE3AL9fVbfveGxJ0mIttMf/y61PFjnE82HgqFnz1gOXVNUBwCXdtCRpghYq/qclubN73AUcvPV5kju398FVdRkw+768L2Y0hET389jFhJYkLd68Qz1VtUsP61tRVTd3z38CrOhhHZKkBSzmBK6doqqK6fMDttFdCnpTkk1btmyZYDJJemCbdPHfkmRfgO7nrfO9sKo2VtXaqlo7NTU1sYCS9EA36eI/Hzixe34icN6E1y9Jzeut+JN8FPh34MAkNyb5Y2AD8Lwk1wFHdtOSpAka55INi1JVJ8yz6Ii+1ilJ2r7BvtyVJA3D4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYXYdYaZIbgLuAe4F7qmrtEDkkqUWDFH/nOVV124Drl6QmOdQjSY0ZqvgL+FySK5KsGyiDJDVpqKGeZ1XVTUkeA3w+yXer6rKZL+h+IawDWLly5RAZJekBaZA9/qq6qft5K/Bp4NA5XrOxqtZW1dqpqalJR5SkB6yJF3+ShyfZc+tz4PnA1ZPOIUmtGmKoZwXw6SRb139OVX1mgByS1KSJF39VXQ88bdLrlSSNeDinJDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGjNI8Sc5Ksn3knw/yfohMkhSqyZe/El2Ad4HvABYDZyQZPWkc0hSq4bY4z8U+H5VXV9V/wd8DHjxADkkqUmpqsmuMHkpcFRV/Uk3/Qrgt6vqpFmvWwes6yYPBL430aDb2ge4beAMS4XbYprbYprbYtpS2RZPqKqp2TN3HSLJOKpqI7Bx6BxbJdlUVWuHzrEUuC2muS2muS2mLfVtMcRQz03AfjOmH9/NkyRNwBDF/03ggCRPTPJg4GXA+QPkkKQmTXyop6ruSXIS8FlgF+DMqrpm0jkWYckMOy0BbotpbotpbotpS3pbTPzLXUnSsDxzV5IaY/FLUmMsfklqzJI9jn9IM442+nFVfSHJy4HfAa4FNlbVLwcNOGFJ9gdewugw3HuB/wLOqao7Bw0maVH8cncOSc5m9EvxYcAdwB7Ap4AjGG2zE4dLN1lJ3gC8CLgMeCHwbUbb5DjgdVX1pcHCSVoUi38OSa6sqoOT7Mro5LLHVtW9SQL8Z1UdPHDEiUlyFbCm++d/GHBRVR2eZCVwXlUdMnDEiUnySOCtwLHAY4ACbgXOAzZU1R2DhVtCklxcVS8YOsekJHkEo/8uHg9cXFXnzFj2/qp63WDh5uFQz9we1A33PJzRXv8jgZ8CDwF2GzLYQHZlNMTzEEZ//VBVP0rS2rb4OHApcHhV/QQgyW8AJ3bLnj9gtolK8vT5FgFrJhhlKfgQcB3wSeDVSY4HXl5VdwPPHDTZPCz+uZ0BfJfRCWZvBz6R5HpG/xI/NmSwAfwj8M0klwPPBk4FSDLF6JdhS1ZV1akzZ3S/AE5N8uqBMg3lm8CXGRX9bHtNNsrgnlRVx3fPz03yduDSJMcMGWohDvXMI8ljAarqx0n2Ao4EflRV3xg02ACSPBV4CnB1VX136DxDSfI54AvAWVV1SzdvBfAq4HlVdeSA8SYqydXAcVV13RzL/ruq9pvjbQ9ISa4FnlpV982Y9yrgLcAeVfWEobLNx+KXxpTkUcB6RvePeEw3+xZG15raUFW3D5Vt0rrLq19VVdtcLj3JsVV17uRTDSPJ3wCfq6ovzJp/FPDeqjpgmGTzs/ilnSDJH1XVh4bOsRS4LaYt1W1h8Us7QZIfVdXKoXMsBW6LaUt1W/jlrjSmJFfOtwhYMcksQ3NbTFuO28Lil8a3AvhdYPZYfoCvTz7OoNwW05bdtrD4pfFdwOgojc2zFyT50sTTDMttMW3ZbQvH+CWpMV6dU5IaY/FLUmMsfglIUkn+ecb0rkm2JLlgkZ+3V5LXzZg+fLGfJe1sFr808nPgoCQP7aafx+jKrIu1F7DkrsoogcUvzXQRcHT3/ATgo1sXJNk7yblJrkzyH0kO7uafkuTMJF9Kcn13/wKADcCTkmxO8q5u3h5J/jXJd5Oc3V3mW5o4i1+a9jHgZUl2Bw4GLp+x7J3At7t7MbwN+MiMZU9mdBz3ocA7ustVrwd+UFVrquot3esOAU4GVgP7A4f1+M8izcvilzpVdSWwitHe/kWzFj8L+KfudZcCj+5uwAFwYVXdXVW3Mboxy3xna36jqm7sruK4uVuXNHGewCXd3/nAacDhwKPHfM/dM57fy/z/X437OqlX7vFL93cm8M6qumrW/K8AfwijI3SA27Zzs/m7gD37CCj9utzjkGaoqhuBv51j0SnAmd0FuX7B6HaLC33O/yT5WnfDkouBC3d2VmmxvGSDJDXGoR5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSY/4fZDFW+b6+4WkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "new_pumpkins.groupby(['Month'])['Price'].mean().plot(kind='bar')\n", + "plt.ylabel(\"Pumpkin Price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "95726f0b8283628d5356a4f8eb8b4b76", + "translation_date": "2025-09-06T13:46:35+00:00", + "source_file": "2-Regression/2-Data/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/3-Linear/notebook.ipynb b/translations/vi/2-Regression/3-Linear/notebook.ipynb new file mode 100644 index 000000000..2a2e5f4f0 --- /dev/null +++ b/translations/vi/2-Regression/3-Linear/notebook.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Giá Bí Ngô\n", + "\n", + "Tải các thư viện và tập dữ liệu cần thiết. Chuyển đổi dữ liệu thành một dataframe chứa một phần dữ liệu:\n", + "\n", + "- Chỉ lấy những quả bí ngô được định giá theo giạ\n", + "- Chuyển đổi ngày thành tháng\n", + "- Tính giá trung bình giữa giá cao và giá thấp\n", + "- Chuyển đổi giá để phản ánh mức giá theo số lượng giạ\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "columns_to_select = ['Package', 'Variety', 'City Name', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.loc[:, columns_to_select]\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Một biểu đồ phân tán cơ bản nhắc nhở chúng ta rằng chúng ta chỉ có dữ liệu tháng từ tháng Tám đến tháng Mười Hai. Chúng ta có lẽ cần thêm dữ liệu để có thể đưa ra kết luận theo cách tuyến tính.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter('Month','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.scatter('DayOfYear','Price',data=new_pumpkins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3-final" + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "b032d371c75279373507f003439a577e", + "translation_date": "2025-09-06T13:09:20+00:00", + "source_file": "2-Regression/3-Linear/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb b/translations/vi/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb new file mode 100644 index 000000000..8d3640228 --- /dev/null +++ b/translations/vi/2-Regression/3-Linear/solution/R/lesson_3-R.ipynb @@ -0,0 +1,1086 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_3-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "5015d65d61ba75a223bfc56c273aa174", + "translation_date": "2025-09-06T13:26:17+00:00", + "source_file": "2-Regression/3-Linear/solution/R/lesson_3-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "EgQw8osnsUV-" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Hồi quy tuyến tính và hồi quy đa thức để định giá bí ngô - Bài học 3\n", + "

\n", + " \n", + "

Đồ họa thông tin bởi Dasani Madipalli
\n", + "\n", + "\n", + "\n", + "\n", + "#### Giới thiệu\n", + "\n", + "Cho đến nay, bạn đã tìm hiểu về hồi quy với dữ liệu mẫu được thu thập từ tập dữ liệu định giá bí ngô mà chúng ta sẽ sử dụng xuyên suốt bài học này. Bạn cũng đã trực quan hóa nó bằng cách sử dụng `ggplot2`.💪\n", + "\n", + "Bây giờ bạn đã sẵn sàng đi sâu hơn vào hồi quy trong học máy. Trong bài học này, bạn sẽ tìm hiểu thêm về hai loại hồi quy: *hồi quy tuyến tính cơ bản* và *hồi quy đa thức*, cùng với một số toán học cơ bản liên quan đến các kỹ thuật này.\n", + "\n", + "> Xuyên suốt chương trình học này, chúng tôi giả định rằng bạn có kiến thức toán học tối thiểu và cố gắng làm cho nó dễ tiếp cận hơn đối với học viên đến từ các lĩnh vực khác, vì vậy hãy chú ý đến các ghi chú, 🧮 các điểm nhấn, sơ đồ và các công cụ học tập khác để hỗ trợ việc hiểu bài.\n", + "\n", + "#### Chuẩn bị\n", + "\n", + "Như đã nhắc lại, bạn đang tải dữ liệu này để đặt câu hỏi về nó.\n", + "\n", + "- Khi nào là thời điểm tốt nhất để mua bí ngô?\n", + "\n", + "- Giá của một thùng bí ngô nhỏ sẽ là bao nhiêu?\n", + "\n", + "- Tôi nên mua chúng trong giỏ nửa bushel hay trong hộp 1 1/9 bushel? Hãy tiếp tục khám phá dữ liệu này.\n", + "\n", + "Trong bài học trước, bạn đã tạo một `tibble` (một cách tái hiện hiện đại của khung dữ liệu) và điền vào đó một phần của tập dữ liệu gốc, chuẩn hóa giá theo bushel. Tuy nhiên, bằng cách làm như vậy, bạn chỉ có thể thu thập khoảng 400 điểm dữ liệu và chỉ trong các tháng mùa thu. Có lẽ chúng ta có thể tìm hiểu thêm chi tiết về bản chất của dữ liệu bằng cách làm sạch nó hơn nữa? Chúng ta sẽ xem... 🕵️‍♀️\n", + "\n", + "Để thực hiện nhiệm vụ này, chúng ta sẽ cần các gói sau:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [khung làm việc](https://www.tidymodels.org/packages/) bao gồm các gói dành cho mô hình hóa và học máy.\n", + "\n", + "- `janitor`: [gói janitor](https://github.com/sfirke/janitor) cung cấp các công cụ đơn giản để kiểm tra và làm sạch dữ liệu bẩn.\n", + "\n", + "- `corrplot`: [gói corrplot](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html) cung cấp một công cụ trực quan để khám phá ma trận tương quan, hỗ trợ sắp xếp lại biến tự động nhằm giúp phát hiện các mẫu ẩn giữa các biến.\n", + "\n", + "Bạn có thể cài đặt chúng bằng lệnh:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"corrplot\"))`\n", + "\n", + "Đoạn mã dưới đây kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng cho bạn nếu chúng bị thiếu.\n" + ], + "metadata": { + "id": "WqQPS1OAsg3H" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, corrplot)" + ], + "outputs": [], + "metadata": { + "id": "tA4C2WN3skCf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c06cd805-5534-4edc-f72b-d0d1dab96ac0" + } + }, + { + "cell_type": "markdown", + "source": [ + "Chúng ta sẽ tải các gói tuyệt vời này và làm cho chúng khả dụng trong phiên làm việc R hiện tại của chúng ta. (Đây chỉ là minh họa, `pacman::p_load()` đã làm điều này cho bạn)\n", + "\n", + "## 1. Đường hồi quy tuyến tính\n", + "\n", + "Như bạn đã học trong Bài học 1, mục tiêu của bài tập hồi quy tuyến tính là vẽ được một *đường* *phù hợp nhất* để:\n", + "\n", + "- **Hiển thị mối quan hệ giữa các biến**. Hiển thị mối quan hệ giữa các biến.\n", + "\n", + "- **Dự đoán**. Dự đoán chính xác vị trí mà một điểm dữ liệu mới sẽ nằm trong mối quan hệ với đường đó.\n", + "\n", + "Để vẽ loại đường này, chúng ta sử dụng một kỹ thuật thống kê gọi là **Hồi quy Bình phương Tối thiểu**. Thuật ngữ `bình phương tối thiểu` có nghĩa là tất cả các điểm dữ liệu xung quanh đường hồi quy được bình phương và sau đó cộng lại. Lý tưởng nhất, tổng cuối cùng này càng nhỏ càng tốt, vì chúng ta muốn số lỗi thấp, hay `bình phương tối thiểu`. Do đó, đường phù hợp nhất là đường cho chúng ta giá trị thấp nhất của tổng các lỗi bình phương - vì vậy mới có tên gọi *hồi quy bình phương tối thiểu*.\n", + "\n", + "Chúng ta làm điều này vì muốn mô hình hóa một đường có khoảng cách tích lũy nhỏ nhất từ tất cả các điểm dữ liệu của chúng ta. Chúng ta cũng bình phương các giá trị trước khi cộng chúng lại vì chúng ta quan tâm đến độ lớn của chúng hơn là hướng của chúng.\n", + "\n", + "> **🧮 Hiển thị toán học**\n", + ">\n", + "> Đường này, gọi là *đường phù hợp nhất*, có thể được biểu diễn bằng [một phương trình](https://en.wikipedia.org/wiki/Simple_linear_regression):\n", + ">\n", + "> Y = a + bX\n", + ">\n", + "> `X` là '`biến giải thích` hoặc `biến dự đoán`'. `Y` là '`biến phụ thuộc` hoặc `kết quả`'. Độ dốc của đường là `b` và `a` là giao điểm trục y, tức là giá trị của `Y` khi `X = 0`.\n", + ">\n", + "\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/slope.png \"slope = $y/x$\")\n", + " Đồ họa thông tin bởi Jen Looper\n", + ">\n", + "> Đầu tiên, tính độ dốc `b`.\n", + ">\n", + "> Nói cách khác, và liên hệ với câu hỏi ban đầu về dữ liệu bí ngô của chúng ta: \"dự đoán giá của một giạ bí ngô theo tháng\", `X` sẽ là giá và `Y` sẽ là tháng bán.\n", + ">\n", + "> ![](../../../../../../2-Regression/3-Linear/solution/images/calculation.png)\n", + " Đồ họa thông tin bởi Jen Looper\n", + "> \n", + "> Tính giá trị của Y. Nếu bạn đang trả khoảng \\$4, chắc hẳn là tháng Tư!\n", + ">\n", + "> Phép toán tính toán đường này phải thể hiện độ dốc của đường, điều này cũng phụ thuộc vào giao điểm, hoặc vị trí của `Y` khi `X = 0`.\n", + ">\n", + "> Bạn có thể quan sát phương pháp tính toán các giá trị này trên trang web [Math is Fun](https://www.mathsisfun.com/data/least-squares-regression.html). Cũng hãy ghé thăm [máy tính hồi quy bình phương tối thiểu này](https://www.mathsisfun.com/data/least-squares-calculator.html) để xem cách các giá trị số ảnh hưởng đến đường.\n", + "\n", + "Không quá đáng sợ, đúng không? 🤓\n", + "\n", + "#### Tương quan\n", + "\n", + "Một thuật ngữ nữa cần hiểu là **Hệ số Tương quan** giữa các biến X và Y cho trước. Sử dụng biểu đồ phân tán, bạn có thể nhanh chóng hình dung hệ số này. Một biểu đồ với các điểm dữ liệu phân tán theo một đường gọn gàng có tương quan cao, nhưng một biểu đồ với các điểm dữ liệu phân tán khắp nơi giữa X và Y có tương quan thấp.\n", + "\n", + "Một mô hình hồi quy tuyến tính tốt sẽ là mô hình có Hệ số Tương quan cao (gần 1 hơn 0) sử dụng phương pháp Hồi quy Bình phương Tối thiểu với một đường hồi quy.\n" + ], + "metadata": { + "id": "cdX5FRpvsoP5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **2. Một điệu nhảy với dữ liệu: tạo một khung dữ liệu sẽ được sử dụng để mô hình hóa**\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "WdUKXk7Bs8-V" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tải các thư viện cần thiết và tập dữ liệu. Chuyển dữ liệu thành một khung dữ liệu chứa một phần của tập dữ liệu:\n", + "\n", + "- Chỉ lấy bí ngô được định giá theo đơn vị bushel\n", + "\n", + "- Chuyển đổi ngày thành tháng\n", + "\n", + "- Tính giá trung bình dựa trên giá cao và giá thấp\n", + "\n", + "- Chuyển đổi giá để phản ánh mức giá theo số lượng bushel\n", + "\n", + "> Chúng ta đã đề cập đến các bước này trong [bài học trước](https://github.com/microsoft/ML-For-Beginners/blob/main/2-Regression/2-Data/solution/lesson_2-R.ipynb).\n" + ], + "metadata": { + "id": "fMCtu2G2s-p8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core Tidyverse packages\n", + "library(tidyverse)\n", + "library(lubridate)\n", + "\n", + "# Import the pumpkins data\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\")\n", + "\n", + "\n", + "# Get a glimpse and dimensions of the data\n", + "glimpse(pumpkins)\n", + "\n", + "\n", + "# Print the first 50 rows of the data set\n", + "pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "ryMVZEEPtERn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Với tinh thần phiêu lưu thuần túy, hãy cùng khám phá [`gói janitor`](../../../../../../2-Regression/3-Linear/solution/R/github.com/sfirke/janitor) cung cấp các hàm đơn giản để kiểm tra và làm sạch dữ liệu bẩn. Ví dụ, hãy xem qua tên các cột trong dữ liệu của chúng ta:\n" + ], + "metadata": { + "id": "xcNxM70EtJjb" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "5XtpaIigtPfW" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤔 Chúng ta có thể làm tốt hơn. Hãy làm cho các tên cột này `friendR` bằng cách chuyển chúng sang quy ước [snake_case](https://en.wikipedia.org/wiki/Snake_case) bằng cách sử dụng `janitor::clean_names`. Để tìm hiểu thêm về hàm này: `?clean_names`\n" + ], + "metadata": { + "id": "IbIqrMINtSHe" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Clean names to the snake_case convention\n", + "pumpkins <- pumpkins %>% \n", + " clean_names(case = \"snake\")\n", + "\n", + "# Return column names\n", + "pumpkins %>% \n", + " names()" + ], + "outputs": [], + "metadata": { + "id": "a2uYvclYtWvX" + } + }, + { + "cell_type": "markdown", + "source": [ + "Rất gọn gàng 🧹! Bây giờ, hãy nhảy múa với dữ liệu bằng cách sử dụng `dplyr` như trong bài học trước! 💃\n" + ], + "metadata": { + "id": "HfhnuzDDtaDd" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Select desired columns\n", + "pumpkins <- pumpkins %>% \n", + " select(variety, city_name, package, low_price, high_price, date)\n", + "\n", + "\n", + "\n", + "# Extract the month from the dates to a new column\n", + "pumpkins <- pumpkins %>%\n", + " mutate(date = mdy(date),\n", + " month = month(date)) %>% \n", + " select(-date)\n", + "\n", + "\n", + "\n", + "# Create a new column for average Price\n", + "pumpkins <- pumpkins %>% \n", + " mutate(price = (low_price + high_price)/2)\n", + "\n", + "\n", + "# Retain only pumpkins with the string \"bushel\"\n", + "new_pumpkins <- pumpkins %>% \n", + " filter(str_detect(string = package, pattern = \"bushel\"))\n", + "\n", + "\n", + "# Normalize the pricing so that you show the pricing per bushel, not per 1 1/9 or 1/2 bushel\n", + "new_pumpkins <- new_pumpkins %>% \n", + " mutate(price = case_when(\n", + " str_detect(package, \"1 1/9\") ~ price/(1.1),\n", + " str_detect(package, \"1/2\") ~ price*2,\n", + " TRUE ~ price))\n", + "\n", + "# Relocate column positions\n", + "new_pumpkins <- new_pumpkins %>% \n", + " relocate(month, .before = variety)\n", + "\n", + "\n", + "# Display the first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "X0wU3gQvtd9f" + } + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm!👌 Giờ bạn đã có một tập dữ liệu sạch sẽ, gọn gàng để xây dựng mô hình hồi quy mới của mình!\n", + "\n", + "Bạn có muốn xem biểu đồ phân tán không?\n" + ], + "metadata": { + "id": "UpaIwaxqth82" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Set theme\n", + "theme_set(theme_light())\n", + "\n", + "# Make a scatter plot of month and price\n", + "new_pumpkins %>% \n", + " ggplot(mapping = aes(x = month, y = price)) +\n", + " geom_point(size = 1.6)\n" + ], + "outputs": [], + "metadata": { + "id": "DXgU-j37tl5K" + } + }, + { + "cell_type": "markdown", + "source": [ + "Một biểu đồ phân tán nhắc nhở chúng ta rằng chúng ta chỉ có dữ liệu tháng từ tháng Tám đến tháng Mười Hai. Chúng ta có lẽ cần thêm dữ liệu để có thể đưa ra kết luận theo cách tuyến tính.\n", + "\n", + "Hãy cùng xem lại dữ liệu mô hình của chúng ta:\n" + ], + "metadata": { + "id": "Ve64wVbwtobI" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Display first 5 rows\n", + "new_pumpkins %>% \n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "HFQX2ng1tuSJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nếu chúng ta muốn dự đoán `price` của một quả bí ngô dựa trên các cột `city` hoặc `package`, vốn thuộc loại ký tự, thì sao? Hoặc đơn giản hơn, làm thế nào để tìm mối tương quan (yêu cầu cả hai đầu vào phải là số) giữa, ví dụ, `package` và `price`? 🤷🤷\n", + "\n", + "Các mô hình học máy hoạt động tốt nhất với các đặc trưng dạng số thay vì giá trị văn bản, vì vậy bạn thường cần chuyển đổi các đặc trưng phân loại thành các biểu diễn dạng số.\n", + "\n", + "Điều này có nghĩa là chúng ta phải tìm cách định dạng lại các biến dự đoán của mình để làm cho chúng dễ sử dụng hơn đối với mô hình, một quá trình được gọi là `feature engineering`.\n" + ], + "metadata": { + "id": "7hsHoxsStyjJ" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Tiền xử lý dữ liệu để mô hình hóa với recipes 👩‍🍳👨‍🍳\n", + "\n", + "Các hoạt động định dạng lại giá trị dự đoán để giúp mô hình sử dụng hiệu quả hơn được gọi là `kỹ thuật đặc trưng`.\n", + "\n", + "Các mô hình khác nhau có yêu cầu tiền xử lý khác nhau. Ví dụ, phương pháp bình phương tối thiểu yêu cầu `mã hóa các biến phân loại` như tháng, loại và tên thành phố. Điều này đơn giản chỉ là `chuyển đổi` một cột với các `giá trị phân loại` thành một hoặc nhiều `cột số` thay thế cho cột ban đầu.\n", + "\n", + "Ví dụ, giả sử dữ liệu của bạn bao gồm đặc trưng phân loại sau:\n", + "\n", + "| thành phố |\n", + "|:------------:|\n", + "| Denver |\n", + "| Nairobi |\n", + "| Tokyo |\n", + "\n", + "Bạn có thể áp dụng *mã hóa thứ tự* để thay thế một giá trị số nguyên duy nhất cho mỗi danh mục, như sau:\n", + "\n", + "| thành phố |\n", + "|:---------:|\n", + "| 0 |\n", + "| 1 |\n", + "| 2 |\n", + "\n", + "Và đó chính là điều chúng ta sẽ làm với dữ liệu của mình!\n", + "\n", + "Trong phần này, chúng ta sẽ khám phá một gói tuyệt vời khác của Tidymodels: [recipes](https://tidymodels.github.io/recipes/) - được thiết kế để giúp bạn tiền xử lý dữ liệu **trước khi** huấn luyện mô hình. Cốt lõi của nó, một recipe là một đối tượng định nghĩa các bước cần áp dụng cho một tập dữ liệu để chuẩn bị cho việc mô hình hóa.\n", + "\n", + "Bây giờ, hãy tạo một recipe để chuẩn bị dữ liệu của chúng ta cho việc mô hình hóa bằng cách thay thế một số nguyên duy nhất cho tất cả các quan sát trong các cột dự đoán:\n" + ], + "metadata": { + "id": "AD5kQbcvt3Xl" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\n", + "pumpkins_recipe <- recipe(price ~ ., data = new_pumpkins) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "# Print out the recipe\n", + "pumpkins_recipe" + ], + "outputs": [], + "metadata": { + "id": "BNaFKXfRt9TU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuyệt vời! 👏 Chúng ta vừa tạo ra công thức đầu tiên để xác định một kết quả (giá cả) và các yếu tố dự đoán tương ứng, đồng thời tất cả các cột dự đoán đều được mã hóa thành một tập hợp các số nguyên 🙌! Hãy cùng phân tích nhanh:\n", + "\n", + "- Lệnh gọi `recipe()` với một công thức cho phép công thức xác định *vai trò* của các biến bằng cách sử dụng dữ liệu `new_pumpkins` làm tham chiếu. Ví dụ, cột `price` được gán vai trò là `outcome`, trong khi các cột còn lại được gán vai trò là `predictor`.\n", + "\n", + "- `step_integer(all_predictors(), zero_based = TRUE)` chỉ định rằng tất cả các yếu tố dự đoán sẽ được chuyển đổi thành một tập hợp các số nguyên, bắt đầu đánh số từ 0.\n", + "\n", + "Chúng tôi chắc chắn rằng bạn có thể đang nghĩ: \"Thật tuyệt vời!! Nhưng nếu tôi cần xác nhận rằng các công thức đang thực hiện đúng như mong đợi thì sao? 🤔\"\n", + "\n", + "Đó là một ý tưởng tuyệt vời! Bạn thấy đấy, một khi công thức của bạn được định nghĩa, bạn có thể ước tính các tham số cần thiết để thực sự tiền xử lý dữ liệu, sau đó trích xuất dữ liệu đã được xử lý. Bạn thường không cần làm điều này khi sử dụng Tidymodels (chúng ta sẽ thấy cách thông thường ngay sau đây-\\> `workflows`), nhưng nó có thể rất hữu ích khi bạn muốn kiểm tra nhanh để xác nhận rằng các công thức đang hoạt động đúng như mong đợi.\n", + "\n", + "Để làm điều đó, bạn sẽ cần hai động từ nữa: `prep()` và `bake()`. Và như mọi khi, những người bạn nhỏ R của chúng ta từ [`Allison Horst`](https://github.com/allisonhorst/stats-illustrations) sẽ giúp bạn hiểu rõ hơn về điều này!\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ], + "metadata": { + "id": "KEiO0v7kuC9O" + } + }, + { + "cell_type": "markdown", + "source": [ + "[`prep()`](https://recipes.tidymodels.org/reference/prep.html): ước tính các tham số cần thiết từ tập dữ liệu huấn luyện, sau đó có thể áp dụng cho các tập dữ liệu khác. Ví dụ, đối với một cột dự đoán cụ thể, quan sát nào sẽ được gán giá trị nguyên 0, 1, 2, v.v.\n", + "\n", + "[`bake()`](https://recipes.tidymodels.org/reference/bake.html): sử dụng công thức đã được chuẩn bị và áp dụng các thao tác cho bất kỳ tập dữ liệu nào.\n", + "\n", + "Vậy nên, hãy chuẩn bị và áp dụng công thức của chúng ta để thực sự xác nhận rằng, trong quá trình xử lý, các cột dự đoán sẽ được mã hóa trước khi mô hình được huấn luyện.\n" + ], + "metadata": { + "id": "Q1xtzebuuTCP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep the recipe\n", + "pumpkins_prep <- prep(pumpkins_recipe)\n", + "\n", + "# Bake the recipe to extract a preprocessed new_pumpkins data\n", + "baked_pumpkins <- bake(pumpkins_prep, new_data = NULL)\n", + "\n", + "# Print out the baked data set\n", + "baked_pumpkins %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "FGBbJbP_uUUn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo!🥳 Dữ liệu đã xử lý `baked_pumpkins` đã được mã hóa tất cả các biến dự đoán, xác nhận rằng các bước tiền xử lý được định nghĩa trong công thức của chúng ta hoạt động đúng như mong đợi. Điều này khiến việc đọc dữ liệu trở nên khó khăn hơn đối với bạn nhưng lại dễ hiểu hơn nhiều đối với Tidymodels! Hãy dành chút thời gian để tìm hiểu xem quan sát nào đã được ánh xạ thành số nguyên tương ứng.\n", + "\n", + "Cũng cần lưu ý rằng `baked_pumpkins` là một khung dữ liệu mà chúng ta có thể thực hiện các phép tính trên đó.\n", + "\n", + "Ví dụ, hãy thử tìm mối tương quan tốt giữa hai điểm dữ liệu của bạn để có thể xây dựng một mô hình dự đoán tốt. Chúng ta sẽ sử dụng hàm `cor()` để làm điều này. Gõ `?cor()` để tìm hiểu thêm về hàm này.\n" + ], + "metadata": { + "id": "1dvP0LBUueAW" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Find the correlation between the city_name and the price\n", + "cor(baked_pumpkins$city_name, baked_pumpkins$price)\n", + "\n", + "# Find the correlation between the package and the price\n", + "cor(baked_pumpkins$package, baked_pumpkins$price)\n" + ], + "outputs": [], + "metadata": { + "id": "3bQzXCjFuiSV" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hóa ra, mối tương quan giữa Thành phố và Giá chỉ ở mức yếu. Tuy nhiên, có một mối tương quan tốt hơn giữa Gói hàng và Giá của nó. Điều này hợp lý, đúng không? Thông thường, hộp sản phẩm càng lớn thì giá càng cao.\n", + "\n", + "Nhân tiện, hãy thử hình dung ma trận tương quan của tất cả các cột bằng cách sử dụng gói `corrplot`.\n" + ], + "metadata": { + "id": "BToPWbgjuoZw" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the corrplot package\n", + "library(corrplot)\n", + "\n", + "# Obtain correlation matrix\n", + "corr_mat <- cor(baked_pumpkins %>% \n", + " # Drop columns that are not really informative\n", + " select(-c(low_price, high_price)))\n", + "\n", + "# Make a correlation plot between the variables\n", + "corrplot(corr_mat, method = \"shade\", shade.col = NA, tl.col = \"black\", tl.srt = 45, addCoef.col = \"black\", cl.pos = \"n\", order = \"original\")" + ], + "outputs": [], + "metadata": { + "id": "ZwAL3ksmutVR" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Tuyệt vời hơn nhiều.\n", + "\n", + "Một câu hỏi hay để đặt ra từ dữ liệu này sẽ là: '`Giá mà tôi có thể mong đợi cho một gói bí ngô nhất định là bao nhiêu?`' Hãy bắt đầu ngay thôi!\n", + "\n", + "> Lưu ý: Khi bạn **`bake()`** công thức đã chuẩn bị **`pumpkins_prep`** với **`new_data = NULL`**, bạn sẽ trích xuất dữ liệu huấn luyện đã được xử lý (tức là đã được mã hóa). Nếu bạn có một tập dữ liệu khác, ví dụ như tập kiểm tra, và muốn xem cách công thức xử lý trước nó, bạn chỉ cần bake **`pumpkins_prep`** với **`new_data = test_set`**\n", + "\n", + "## 4. Xây dựng mô hình hồi quy tuyến tính\n", + "\n", + "

\n", + " \n", + "

Đồ họa thông tin bởi Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YqXjLuWavNxW" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ chúng ta đã xây dựng một công thức và thực sự xác nhận rằng dữ liệu sẽ được tiền xử lý một cách phù hợp, hãy cùng xây dựng một mô hình hồi quy để trả lời câu hỏi: `Giá của một gói bí ngô cụ thể sẽ là bao nhiêu?`\n", + "\n", + "#### Huấn luyện mô hình hồi quy tuyến tính bằng tập huấn luyện\n", + "\n", + "Như bạn có thể đã nhận ra, cột *price* là biến `kết quả` trong khi cột *package* là biến `dự đoán`.\n", + "\n", + "Để thực hiện điều này, trước tiên chúng ta sẽ chia dữ liệu sao cho 80% vào tập huấn luyện và 20% vào tập kiểm tra, sau đó định nghĩa một công thức để mã hóa cột dự đoán thành một tập hợp các số nguyên, rồi xây dựng một đặc tả mô hình. Chúng ta sẽ không chuẩn bị và nướng công thức của mình vì đã biết rằng nó sẽ tiền xử lý dữ liệu như mong đợi.\n" + ], + "metadata": { + "id": "Pq0bSzCevW-h" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "set.seed(2056)\n", + "# Split the data into training and test sets\n", + "pumpkins_split <- new_pumpkins %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "\n", + "# Extract training and test data\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "\n", + "\n", + "# Create a recipe for preprocessing the data\n", + "lm_pumpkins_recipe <- recipe(price ~ package, data = pumpkins_train) %>% \n", + " step_integer(all_predictors(), zero_based = TRUE)\n", + "\n", + "\n", + "\n", + "# Create a linear model specification\n", + "lm_spec <- linear_reg() %>% \n", + " set_engine(\"lm\") %>% \n", + " set_mode(\"regression\")" + ], + "outputs": [], + "metadata": { + "id": "CyoEh_wuvcLv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm! Bây giờ chúng ta đã có một công thức và một mô tả mô hình, chúng ta cần tìm cách kết hợp chúng lại thành một đối tượng để trước tiên xử lý dữ liệu (prep+bake phía sau hậu trường), huấn luyện mô hình trên dữ liệu đã được xử lý và cũng cho phép các hoạt động xử lý hậu kỳ tiềm năng. Thật tuyệt vời để bạn yên tâm, đúng không!🤩\n", + "\n", + "Trong Tidymodels, đối tượng tiện lợi này được gọi là [`workflow`](https://workflows.tidymodels.org/) và nó tiện lợi lưu giữ các thành phần mô hình của bạn! Đây là thứ mà chúng ta gọi là *pipelines* trong *Python*.\n", + "\n", + "Vậy hãy gói gọn mọi thứ vào một workflow nhé!📦\n" + ], + "metadata": { + "id": "G3zF_3DqviFJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Hold modelling components in a workflow\n", + "lm_wf <- workflow() %>% \n", + " add_recipe(lm_pumpkins_recipe) %>% \n", + " add_model(lm_spec)\n", + "\n", + "# Print out the workflow\n", + "lm_wf" + ], + "outputs": [], + "metadata": { + "id": "T3olroU3v-WX" + } + }, + { + "cell_type": "markdown", + "source": [ + "👌 Thêm vào đó, một quy trình làm việc có thể được điều chỉnh/huấn luyện theo cách tương tự như một mô hình.\n" + ], + "metadata": { + "id": "zd1A5tgOwEPX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Train the model\n", + "lm_wf_fit <- lm_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the model coefficients learned \n", + "lm_wf_fit" + ], + "outputs": [], + "metadata": { + "id": "NhJagFumwFHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "Từ đầu ra của mô hình, chúng ta có thể thấy các hệ số được học trong quá trình huấn luyện. Chúng đại diện cho các hệ số của đường hồi quy tốt nhất, giúp giảm thiểu tổng lỗi giữa biến thực tế và biến dự đoán.\n", + "\n", + "#### Đánh giá hiệu suất mô hình bằng tập kiểm tra\n", + "\n", + "Đã đến lúc xem mô hình hoạt động như thế nào 📏! Chúng ta làm điều này như thế nào?\n", + "\n", + "Bây giờ chúng ta đã huấn luyện xong mô hình, có thể sử dụng nó để đưa ra dự đoán cho `test_set` bằng cách dùng `parsnip::predict()`. Sau đó, chúng ta có thể so sánh các dự đoán này với các giá trị nhãn thực tế để đánh giá mức độ hiệu quả (hoặc không hiệu quả!) của mô hình.\n", + "\n", + "Hãy bắt đầu bằng cách tạo dự đoán cho tập kiểm tra, sau đó gắn các cột vào tập kiểm tra.\n" + ], + "metadata": { + "id": "_4QkGtBTwItF" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make predictions for the test set\n", + "predictions <- lm_wf_fit %>% \n", + " predict(new_data = pumpkins_test)\n", + "\n", + "\n", + "# Bind predictions to the test set\n", + "lm_results <- pumpkins_test %>% \n", + " select(c(package, price)) %>% \n", + " bind_cols(predictions)\n", + "\n", + "\n", + "# Print the first ten rows of the tibble\n", + "lm_results %>% \n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "UFZzTG0gwTs9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Vâng, bạn vừa huấn luyện một mô hình và sử dụng nó để đưa ra dự đoán! 🔮 Nó có tốt không? Hãy đánh giá hiệu suất của mô hình nhé!\n", + "\n", + "Trong Tidymodels, chúng ta thực hiện việc này bằng cách sử dụng `yardstick::metrics()`! Đối với hồi quy tuyến tính, hãy tập trung vào các chỉ số sau:\n", + "\n", + "- `Root Mean Square Error (RMSE)`: Căn bậc hai của [MSE](https://en.wikipedia.org/wiki/Mean_squared_error). Đây là một chỉ số tuyệt đối có cùng đơn vị với nhãn (trong trường hợp này là giá của một quả bí ngô). Giá trị càng nhỏ, mô hình càng tốt (hiểu một cách đơn giản, nó đại diện cho mức giá trung bình mà dự đoán bị sai lệch!).\n", + "\n", + "- `Coefficient of Determination (thường được gọi là R-squared hoặc R2)`: Một chỉ số tương đối, trong đó giá trị càng cao, mô hình càng phù hợp. Về cơ bản, chỉ số này thể hiện mức độ mà mô hình có thể giải thích được sự biến thiên giữa giá trị dự đoán và giá trị thực tế của nhãn.\n" + ], + "metadata": { + "id": "0A5MjzM7wW9M" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Evaluate performance of linear regression\n", + "metrics(data = lm_results,\n", + " truth = price,\n", + " estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "reJ0UIhQwcEH" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hiệu suất của mô hình giảm đi. Hãy xem liệu chúng ta có thể có được một chỉ báo tốt hơn bằng cách trực quan hóa biểu đồ phân tán của gói hàng và giá, sau đó sử dụng các dự đoán đã thực hiện để vẽ một đường phù hợp nhất.\n", + "\n", + "Điều này có nghĩa là chúng ta sẽ phải chuẩn bị và xử lý tập kiểm tra để mã hóa cột gói hàng, sau đó kết hợp nó với các dự đoán được tạo ra bởi mô hình của chúng ta.\n" + ], + "metadata": { + "id": "fdgjzjkBwfWt" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Encode package column\n", + "package_encode <- lm_pumpkins_recipe %>% \n", + " prep() %>% \n", + " bake(new_data = pumpkins_test) %>% \n", + " select(package)\n", + "\n", + "\n", + "# Bind encoded package column to the results\n", + "lm_results <- lm_results %>% \n", + " bind_cols(package_encode %>% \n", + " rename(package_integer = package)) %>% \n", + " relocate(package_integer, .after = package)\n", + "\n", + "\n", + "# Print new results data frame\n", + "lm_results %>% \n", + " slice_head(n = 5)\n", + "\n", + "\n", + "# Make a scatter plot\n", + "lm_results %>% \n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\n", + " geom_point(size = 1.6) +\n", + " # Overlay a line of best fit\n", + " geom_line(aes(y = .pred), color = \"orange\", size = 1.2) +\n", + " xlab(\"package\")\n", + " \n" + ], + "outputs": [], + "metadata": { + "id": "R0nw719lwkHE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Thật tuyệt! Như bạn có thể thấy, mô hình hồi quy tuyến tính không thực sự khái quát tốt mối quan hệ giữa một gói hàng và giá tương ứng của nó.\n", + "\n", + "🎃 Chúc mừng, bạn vừa tạo ra một mô hình có thể giúp dự đoán giá của một vài loại bí ngô. Vườn bí ngô cho kỳ nghỉ của bạn sẽ thật đẹp. Nhưng có lẽ bạn có thể tạo ra một mô hình tốt hơn!\n", + "\n", + "## 5. Xây dựng mô hình hồi quy đa thức\n", + "\n", + "

\n", + " \n", + "

Đồ họa thông tin bởi Dasani Madipalli
\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "HOCqJXLTwtWI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Đôi khi dữ liệu của chúng ta không có mối quan hệ tuyến tính, nhưng chúng ta vẫn muốn dự đoán kết quả. Hồi quy đa thức có thể giúp chúng ta đưa ra dự đoán cho các mối quan hệ phi tuyến phức tạp hơn.\n", + "\n", + "Hãy xem xét mối quan hệ giữa kích thước gói và giá cả trong bộ dữ liệu bí ngô của chúng ta. Mặc dù đôi khi có mối quan hệ tuyến tính giữa các biến - bí ngô có thể tích lớn hơn thì giá cao hơn - nhưng đôi khi những mối quan hệ này không thể được biểu diễn dưới dạng mặt phẳng hoặc đường thẳng.\n", + "\n", + "> ✅ Đây là [một số ví dụ khác](https://online.stat.psu.edu/stat501/lesson/9/9.8) về dữ liệu có thể sử dụng hồi quy đa thức \n", + "> \n", + "> Hãy xem lại mối quan hệ giữa Loại bí ngô và Giá cả trong biểu đồ trước đó. Biểu đồ phân tán này có nhất thiết phải được phân tích bằng một đường thẳng không? Có lẽ là không. Trong trường hợp này, bạn có thể thử hồi quy đa thức. \n", + "> \n", + "> ✅ Đa thức là các biểu thức toán học có thể bao gồm một hoặc nhiều biến và hệ số \n", + "\n", + "#### Huấn luyện mô hình hồi quy đa thức bằng tập dữ liệu huấn luyện\n", + "\n", + "Hồi quy đa thức tạo ra một *đường cong* để phù hợp hơn với dữ liệu phi tuyến.\n", + "\n", + "Hãy xem liệu mô hình đa thức có hoạt động tốt hơn trong việc đưa ra dự đoán hay không. Chúng ta sẽ thực hiện quy trình tương tự như trước:\n", + "\n", + "- Tạo một công thức chỉ định các bước tiền xử lý cần thực hiện trên dữ liệu để chuẩn bị cho việc mô hình hóa, ví dụ: mã hóa các biến dự đoán và tính toán đa thức với bậc *n*\n", + "\n", + "- Xây dựng một mô hình cụ thể\n", + "\n", + "- Kết hợp công thức và mô hình cụ thể vào một quy trình làm việc\n", + "\n", + "- Tạo mô hình bằng cách khớp quy trình làm việc\n", + "\n", + "- Đánh giá mức độ hiệu quả của mô hình trên dữ liệu kiểm tra\n", + "\n", + "Hãy bắt đầu ngay thôi!\n" + ], + "metadata": { + "id": "VcEIpRV9wzYr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Specify a recipe\r\n", + "poly_pumpkins_recipe <-\r\n", + " recipe(price ~ package, data = pumpkins_train) %>%\r\n", + " step_integer(all_predictors(), zero_based = TRUE) %>% \r\n", + " step_poly(all_predictors(), degree = 4)\r\n", + "\r\n", + "\r\n", + "# Create a model specification\r\n", + "poly_spec <- linear_reg() %>% \r\n", + " set_engine(\"lm\") %>% \r\n", + " set_mode(\"regression\")\r\n", + "\r\n", + "\r\n", + "# Bundle recipe and model spec into a workflow\r\n", + "poly_wf <- workflow() %>% \r\n", + " add_recipe(poly_pumpkins_recipe) %>% \r\n", + " add_model(poly_spec)\r\n", + "\r\n", + "\r\n", + "# Create a model\r\n", + "poly_wf_fit <- poly_wf %>% \r\n", + " fit(data = pumpkins_train)\r\n", + "\r\n", + "\r\n", + "# Print learned model coefficients\r\n", + "poly_wf_fit\r\n", + "\r\n", + " " + ], + "outputs": [], + "metadata": { + "id": "63n_YyRXw3CC" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Đánh giá hiệu suất mô hình\n", + "\n", + "👏👏Bạn đã xây dựng một mô hình đa thức, hãy thực hiện dự đoán trên tập kiểm tra!\n" + ], + "metadata": { + "id": "-LHZtztSxDP0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make price predictions on test data\r\n", + "poly_results <- poly_wf_fit %>% predict(new_data = pumpkins_test) %>% \r\n", + " bind_cols(pumpkins_test %>% select(c(package, price))) %>% \r\n", + " relocate(.pred, .after = last_col())\r\n", + "\r\n", + "\r\n", + "# Print the results\r\n", + "poly_results %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "YUFpQ_dKxJGx" + } + }, + { + "cell_type": "markdown", + "source": [ + "Woo-hoo, hãy đánh giá cách mô hình hoạt động trên test_set bằng cách sử dụng `yardstick::metrics()`.\n" + ], + "metadata": { + "id": "qxdyj86bxNGZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "metrics(data = poly_results, truth = price, estimate = .pred)" + ], + "outputs": [], + "metadata": { + "id": "8AW5ltkBxXDm" + } + }, + { + "cell_type": "markdown", + "source": [ + "🤩🤩 Hiệu suất tốt hơn nhiều.\n", + "\n", + "`rmse` đã giảm từ khoảng 7 xuống khoảng 3, cho thấy lỗi giữa giá thực tế và giá dự đoán đã giảm. Bạn có thể *hiểu một cách đơn giản* rằng trung bình, các dự đoán sai lệch khoảng \\$3. `rsq` đã tăng từ khoảng 0.4 lên 0.8.\n", + "\n", + "Tất cả các chỉ số này đều cho thấy rằng mô hình đa thức hoạt động tốt hơn nhiều so với mô hình tuyến tính. Làm tốt lắm!\n", + "\n", + "Hãy xem liệu chúng ta có thể trực quan hóa điều này không!\n" + ], + "metadata": { + "id": "6gLHNZDwxYaS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Bind encoded package column to the results\r\n", + "poly_results <- poly_results %>% \r\n", + " bind_cols(package_encode %>% \r\n", + " rename(package_integer = package)) %>% \r\n", + " relocate(package_integer, .after = package)\r\n", + "\r\n", + "\r\n", + "# Print new results data frame\r\n", + "poly_results %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_line(aes(y = .pred), color = \"midnightblue\", size = 1.2) +\r\n", + " xlab(\"package\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "A83U16frxdF1" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bạn có thể thấy một đường cong phù hợp với dữ liệu của mình tốt hơn! 🤩\n", + "\n", + "Bạn có thể làm cho đường này mượt mà hơn bằng cách truyền một công thức đa thức vào `geom_smooth` như sau:\n" + ], + "metadata": { + "id": "4U-7aHOVxlGU" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a scatter plot\r\n", + "poly_results %>% \r\n", + " ggplot(mapping = aes(x = package_integer, y = price)) +\r\n", + " geom_point(size = 1.6) +\r\n", + " # Overlay a line of best fit\r\n", + " geom_smooth(method = lm, formula = y ~ poly(x, degree = 4), color = \"midnightblue\", size = 1.2, se = FALSE) +\r\n", + " xlab(\"package\")" + ], + "outputs": [], + "metadata": { + "id": "5vzNT0Uexm-w" + } + }, + { + "cell_type": "markdown", + "source": [ + "Giống như một đường cong mượt mà!🤩\n", + "\n", + "Đây là cách bạn tạo một dự đoán mới:\n" + ], + "metadata": { + "id": "v9u-wwyLxq4G" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a hypothetical data frame\r\n", + "hypo_tibble <- tibble(package = \"bushel baskets\")\r\n", + "\r\n", + "# Make predictions using linear model\r\n", + "lm_pred <- lm_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Make predictions using polynomial model\r\n", + "poly_pred <- poly_wf_fit %>% predict(new_data = hypo_tibble)\r\n", + "\r\n", + "# Return predictions in a list\r\n", + "list(\"linear model prediction\" = lm_pred, \r\n", + " \"polynomial model prediction\" = poly_pred)\r\n" + ], + "outputs": [], + "metadata": { + "id": "jRPSyfQGxuQv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Dự đoán của `polynomial model` thực sự hợp lý, dựa trên biểu đồ phân tán của `price` và `package`! Và nếu đây là một mô hình tốt hơn so với mô hình trước đó, khi nhìn vào cùng một dữ liệu, bạn cần dự trù ngân sách cho những quả bí ngô đắt tiền hơn này!\n", + "\n", + "🏆 Làm tốt lắm! Bạn đã tạo hai mô hình hồi quy trong một bài học. Trong phần cuối cùng về hồi quy, bạn sẽ học về hồi quy logistic để xác định các danh mục.\n", + "\n", + "## **🚀Thử thách**\n", + "\n", + "Kiểm tra một số biến khác nhau trong notebook này để xem cách mối tương quan ảnh hưởng đến độ chính xác của mô hình.\n", + "\n", + "## [**Câu hỏi sau bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/14/)\n", + "\n", + "## **Ôn tập & Tự học**\n", + "\n", + "Trong bài học này, chúng ta đã học về Hồi quy tuyến tính. Có những loại hồi quy quan trọng khác. Hãy tìm hiểu về các kỹ thuật Stepwise, Ridge, Lasso và Elasticnet. Một khóa học hay để học thêm là [Khóa học Stanford Statistical Learning](https://online.stanford.edu/courses/sohs-ystatslearning-statistical-learning).\n", + "\n", + "Nếu bạn muốn tìm hiểu thêm về cách sử dụng framework Tidymodels tuyệt vời, hãy tham khảo các tài nguyên sau:\n", + "\n", + "- Trang web Tidymodels: [Bắt đầu với Tidymodels](https://www.tidymodels.org/start/)\n", + "\n", + "- Max Kuhn và Julia Silge, [*Tidy Modeling with R*](https://www.tmwr.org/)*.*\n", + "\n", + "###### **CẢM ƠN ĐẾN:**\n", + "\n", + "[Allison Horst](https://twitter.com/allison_horst?lang=en) vì đã tạo ra những hình minh họa tuyệt vời giúp R trở nên thân thiện và hấp dẫn hơn. Tìm thêm các hình minh họa tại [bộ sưu tập của cô ấy](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n" + ], + "metadata": { + "id": "8zOLOWqMxzk5" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/2-Regression/3-Linear/solution/notebook.ipynb b/translations/vi/2-Regression/3-Linear/solution/notebook.ipynb new file mode 100644 index 000000000..39b8d056d --- /dev/null +++ b/translations/vi/2-Regression/3-Linear/solution/notebook.ipynb @@ -0,0 +1,1111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hồi quy tuyến tính và hồi quy đa thức để định giá bí ngô - Bài học 3\n", + "\n", + "Tải các thư viện cần thiết và tập dữ liệu. Chuyển đổi dữ liệu thành một dataframe chứa một phần dữ liệu:\n", + "\n", + "- Chỉ lấy bí ngô được định giá theo đơn vị bushel\n", + "- Chuyển đổi ngày thành tháng\n", + "- Tính giá trung bình dựa trên giá cao và giá thấp\n", + "- Chuyển đổi giá để phản ánh mức giá theo số lượng bushel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from datetime import datetime\n", + "\n", + "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthDayOfYearVarietyCityPackageLow PriceHigh PricePrice
709267PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
719267PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7210274PIE TYPEBALTIMORE1 1/9 bushel cartons18.018.016.363636
7310274PIE TYPEBALTIMORE1 1/9 bushel cartons17.017.015.454545
7410281PIE TYPEBALTIMORE1 1/9 bushel cartons15.015.013.636364
\n", + "
" + ], + "text/plain": [ + " Month DayOfYear Variety City Package Low Price \\\n", + "70 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "71 9 267 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "72 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18.0 \n", + "73 10 274 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17.0 \n", + "74 10 281 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15.0 \n", + "\n", + " High Price Price \n", + "70 15.0 13.636364 \n", + "71 18.0 16.363636 \n", + "72 18.0 16.363636 \n", + "73 17.0 15.454545 \n", + "74 15.0 13.636364 " + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n", + "\n", + "new_pumpkins = pd.DataFrame(\n", + " {'Month': month, \n", + " 'DayOfYear' : day_of_year, \n", + " 'Variety': pumpkins['Variety'], \n", + " 'City': pumpkins['City Name'], \n", + " 'Package': pumpkins['Package'], \n", + " 'Low Price': pumpkins['Low Price'],\n", + " 'High Price': pumpkins['High Price'], \n", + " 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Biểu đồ phân tán nhắc nhở chúng ta rằng chúng ta chỉ có dữ liệu tháng từ tháng Tám đến tháng Mười Hai. Chúng ta có lẽ cần thêm dữ liệu để có thể đưa ra kết luận theo cách tuyến tính.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('Month','Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.14878293554077535\n", + "-0.16673322492745407\n" + ] + } + ], + "source": [ + "print(new_pumpkins['Month'].corr(new_pumpkins['Price']))\n", + "print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Có vẻ như mối tương quan khá nhỏ, nhưng có một mối quan hệ quan trọng hơn - vì các điểm giá trong biểu đồ trên dường như có một số cụm riêng biệt. Hãy tạo một biểu đồ để hiển thị các loại bí ngô khác nhau:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax=None\n", + "colors = ['red','blue','green','yellow']\n", + "for i,var in enumerate(new_pumpkins['Variety'].unique()):\n", + " ax = new_pumpkins[new_pumpkins['Variety']==var].plot.scatter('DayOfYear','Price',ax=ax,c=colors[i],label=var)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.2669192282197318\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']\n", + "print(pie_pumpkins['DayOfYear'].corr(pie_pumpkins['Price']))\n", + "pie_pumpkins.plot.scatter('DayOfYear','Price')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hồi quy tuyến tính\n", + "\n", + "Chúng ta sẽ sử dụng Scikit Learn để huấn luyện mô hình hồi quy tuyến tính:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.77 (17.2%)\n" + ] + } + ], + "source": [ + "X = pie_pumpkins['DayOfYear'].to_numpy().reshape(-1,1)\n", + "y = pie_pumpkins['Price']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(X_train,y_train)\n", + "\n", + "pred = lin_reg.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(X_test,y_test)\n", + "plt.plot(X_test,pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Độ dốc của đường thẳng có thể được xác định từ các hệ số hồi quy tuyến tính:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([-0.01751876]), 21.133734359909326)" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg.coef_, lin_reg.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([16.64893156])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pumpkin price on programmer's day\n", + "\n", + "lin_reg.predict([[256]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hồi quy đa thức\n", + "\n", + "Đôi khi mối quan hệ giữa các đặc điểm và kết quả vốn dĩ không tuyến tính. Ví dụ, giá bí ngô có thể cao vào mùa đông (tháng 1, 2), sau đó giảm vào mùa hè (tháng 5-7), rồi lại tăng lên. Hồi quy tuyến tính không thể tìm ra mối quan hệ này một cách chính xác.\n", + "\n", + "Trong trường hợp này, chúng ta có thể cân nhắc thêm các đặc điểm bổ sung. Một cách đơn giản là sử dụng các đa thức từ các đặc điểm đầu vào, điều này sẽ dẫn đến **hồi quy đa thức**. Trong Scikit Learn, chúng ta có thể tự động tính trước các đặc điểm đa thức bằng cách sử dụng pipelines:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.73 (17.0%)\n", + "Model determination: 0.07639977655280217\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)\n", + "\n", + "plt.scatter(X_test,y_test)\n", + "plt.plot(sorted(X_test),pipeline.predict(sorted(X_test)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Các phương pháp mã hóa\n", + "\n", + "Trong thế giới lý tưởng, chúng ta muốn có thể dự đoán giá cho các loại bí ngô khác nhau bằng cùng một mô hình. Để tính đến loại bí ngô, trước tiên chúng ta cần chuyển đổi nó sang dạng số, hay còn gọi là **mã hóa**. Có một số cách để thực hiện điều này:\n", + "\n", + "* Mã hóa số đơn giản sẽ tạo một bảng các loại khác nhau, sau đó thay thế tên loại bằng một chỉ số trong bảng đó. Đây không phải là ý tưởng tốt nhất cho hồi quy tuyến tính, vì hồi quy tuyến tính sẽ tính đến giá trị số của chỉ số, và giá trị số này có khả năng không tương quan một cách tuyến tính với giá cả.\n", + "* Mã hóa one-hot, sẽ thay thế cột `Variety` bằng 4 cột khác nhau, mỗi cột đại diện cho một loại, chứa giá trị 1 nếu hàng tương ứng thuộc loại đó, và 0 nếu không.\n", + "\n", + "Đoạn mã dưới đây cho thấy cách chúng ta có thể mã hóa one-hot cho một loại:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
700001
710001
720001
730001
740001
...............
17380100
17390100
17400100
17410100
17420100
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 0 0 0 1\n", + "71 0 0 0 1\n", + "72 0 0 0 1\n", + "73 0 0 0 1\n", + "74 0 0 0 1\n", + "... ... ... ... ...\n", + "1738 0 1 0 0\n", + "1739 0 1 0 0\n", + "1740 0 1 0 0\n", + "1741 0 1 0 0\n", + "1742 0 1 0 0\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hồi quy tuyến tính trên các loại\n", + "\n", + "Bây giờ chúng ta sẽ sử dụng cùng một đoạn mã như trên, nhưng thay vì `DayOfYear`, chúng ta sẽ sử dụng loại đã được mã hóa one-hot làm đầu vào:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 5.24 (19.7%)\n", + "Model determination: 0.774085281105197\n" + ] + } + ], + "source": [ + "def run_linear_regression(X,y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + " lin_reg = LinearRegression()\n", + " lin_reg.fit(X_train,y_train)\n", + "\n", + " pred = lin_reg.predict(X_test)\n", + "\n", + " mse = np.sqrt(mean_squared_error(y_test,pred))\n", + " print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + " score = lin_reg.score(X_train,y_train)\n", + " print('Model determination: ', score)\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chúng ta cũng có thể thử sử dụng các đặc điểm khác theo cách tương tự, và kết hợp chúng với các đặc điểm số, chẳng hạn như `Month` hoặc `DayOfYear`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.84 (10.5%)\n", + "Model determination: 0.9401096672643048\n" + ] + } + ], + "source": [ + "X = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "\n", + "run_linear_regression(X,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hồi quy đa thức\n", + "\n", + "Hồi quy đa thức cũng có thể được sử dụng với các đặc trưng phân loại đã được mã hóa one-hot. Mã nguồn để huấn luyện hồi quy đa thức về cơ bản sẽ giống như chúng ta đã thấy ở trên.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.25%)\n", + "Model determination: 0.9652870784724543\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "pipeline.fit(X_train,y_train)\n", + "\n", + "pred = pipeline.predict(X_test)\n", + "\n", + "mse = np.sqrt(mean_squared_error(y_test,pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "\n", + "score = pipeline.score(X_train,y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d77bd89ae7e79780c68c58bab91f13f8", + "translation_date": "2025-09-06T13:12:40+00:00", + "source_file": "2-Regression/3-Linear/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/4-Logistic/notebook.ipynb b/translations/vi/2-Regression/4-Logistic/notebook.ipynb new file mode 100644 index 000000000..4bf58d7c5 --- /dev/null +++ b/translations/vi/2-Regression/4-Logistic/notebook.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Các Loại Bí Ngô và Màu Sắc\n", + "\n", + "Tải các thư viện cần thiết và tập dữ liệu. Chuyển dữ liệu thành một dataframe chứa một phần dữ liệu:\n", + "\n", + "Hãy xem xét mối quan hệ giữa màu sắc và loại bí ngô.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \\\n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "dee08c2b49057b0de8b6752c4dbca368", + "translation_date": "2025-09-06T13:26:53+00:00", + "source_file": "2-Regression/4-Logistic/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb b/translations/vi/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb new file mode 100644 index 000000000..7cef80b66 --- /dev/null +++ b/translations/vi/2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Xây dựng mô hình hồi quy logistic - Bài học 4\n", + "\n", + "![Đồ họa thông tin hồi quy logistic vs. hồi quy tuyến tính](../../../../../../2-Regression/4-Logistic/images/linear-vs-logistic.png)\n", + "\n", + "#### **[Câu hỏi trước bài học](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/15/)**\n", + "\n", + "#### Giới thiệu\n", + "\n", + "Trong bài học cuối cùng về Hồi quy, một trong những kỹ thuật *cổ điển* cơ bản của ML, chúng ta sẽ tìm hiểu về Hồi quy Logistic. Bạn sẽ sử dụng kỹ thuật này để khám phá các mẫu nhằm dự đoán các danh mục nhị phân. Đây có phải là kẹo sô-cô-la hay không? Bệnh này có lây hay không? Khách hàng này có chọn sản phẩm này hay không?\n", + "\n", + "Trong bài học này, bạn sẽ học:\n", + "\n", + "- Các kỹ thuật cho hồi quy logistic\n", + "\n", + "✅ Nâng cao hiểu biết của bạn về cách làm việc với loại hồi quy này trong [module học này](https://learn.microsoft.com/training/modules/introduction-classification-models/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "## Điều kiện tiên quyết\n", + "\n", + "Sau khi làm việc với dữ liệu bí ngô, chúng ta đã đủ quen thuộc để nhận ra rằng có một danh mục nhị phân mà chúng ta có thể làm việc: `Color`.\n", + "\n", + "Hãy xây dựng một mô hình hồi quy logistic để dự đoán, dựa trên một số biến, *màu sắc của một quả bí ngô có khả năng là gì* (cam 🎃 hoặc trắng 👻).\n", + "\n", + "> Tại sao chúng ta lại nói về phân loại nhị phân trong một bài học nhóm về hồi quy? Chỉ vì sự tiện lợi về ngôn ngữ, vì hồi quy logistic thực chất là [một phương pháp phân loại](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression), mặc dù dựa trên tuyến tính. Tìm hiểu về các cách khác để phân loại dữ liệu trong nhóm bài học tiếp theo.\n", + "\n", + "Để thực hiện bài học này, chúng ta sẽ cần các gói sau:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [bộ khung các gói](https://www.tidymodels.org/packages/) dành cho mô hình hóa và học máy.\n", + "\n", + "- `janitor`: [Gói janitor](https://github.com/sfirke/janitor) cung cấp các công cụ đơn giản để kiểm tra và làm sạch dữ liệu bẩn.\n", + "\n", + "- `ggbeeswarm`: [Gói ggbeeswarm](https://github.com/eclarke/ggbeeswarm) cung cấp các phương pháp để tạo biểu đồ kiểu beeswarm sử dụng ggplot2.\n", + "\n", + "Bạn có thể cài đặt chúng bằng lệnh:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"janitor\", \"ggbeeswarm\"))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng nếu chúng bị thiếu.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, janitor, ggbeeswarm)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Xác định câu hỏi**\n", + "\n", + "Đối với mục đích của chúng ta, chúng ta sẽ biểu diễn điều này dưới dạng nhị phân: 'Trắng' hoặc 'Không Trắng'. Trong tập dữ liệu của chúng ta cũng có một danh mục 'sọc', nhưng số lượng trường hợp rất ít, vì vậy chúng ta sẽ không sử dụng nó. Dù sao thì danh mục này cũng biến mất khi chúng ta loại bỏ các giá trị null khỏi tập dữ liệu.\n", + "\n", + "> 🎃 Một sự thật thú vị, đôi khi chúng ta gọi bí ngô trắng là bí ngô 'ma'. Chúng không dễ khắc lắm, vì vậy chúng không phổ biến như bí ngô màu cam, nhưng trông chúng rất ngầu! Vì vậy, chúng ta cũng có thể diễn đạt lại câu hỏi của mình thành: 'Ma' hoặc 'Không Ma'. 👻\n", + "\n", + "## **Về hồi quy logistic**\n", + "\n", + "Hồi quy logistic khác với hồi quy tuyến tính, mà bạn đã học trước đây, ở một vài điểm quan trọng.\n", + "\n", + "#### **Phân loại nhị phân**\n", + "\n", + "Hồi quy logistic không cung cấp các tính năng giống như hồi quy tuyến tính. Hồi quy logistic đưa ra dự đoán về một `danh mục nhị phân` (\"màu cam hoặc không màu cam\"), trong khi hồi quy tuyến tính có khả năng dự đoán `giá trị liên tục`, ví dụ như dựa trên nguồn gốc của một quả bí ngô và thời gian thu hoạch, *giá của nó sẽ tăng bao nhiêu*.\n", + "\n", + "![Đồ họa thông tin của Dasani Madipalli](../../../../../../2-Regression/4-Logistic/images/pumpkin-classifier.png)\n", + "\n", + "### Các loại phân loại khác\n", + "\n", + "Có các loại hồi quy logistic khác, bao gồm hồi quy đa thức và hồi quy thứ bậc:\n", + "\n", + "- **Đa thức**, liên quan đến việc có nhiều hơn một danh mục - \"Màu cam, Trắng và Sọc\".\n", + "\n", + "- **Thứ bậc**, liên quan đến các danh mục có thứ tự, hữu ích nếu chúng ta muốn sắp xếp các kết quả theo logic, như các quả bí ngô được sắp xếp theo một số kích thước hữu hạn (mini, nhỏ, vừa, lớn, rất lớn, cực lớn).\n", + "\n", + "![Hồi quy đa thức vs hồi quy thứ bậc](../../../../../../2-Regression/4-Logistic/images/multinomial-vs-ordinal.png)\n", + "\n", + "#### **Các biến KHÔNG CẦN phải tương quan**\n", + "\n", + "Bạn còn nhớ hồi quy tuyến tính hoạt động tốt hơn với các biến có tương quan cao không? Hồi quy logistic thì ngược lại - các biến không cần phải tương quan. Điều này phù hợp với tập dữ liệu của chúng ta, vốn có các mối tương quan khá yếu.\n", + "\n", + "#### **Bạn cần nhiều dữ liệu sạch**\n", + "\n", + "Hồi quy logistic sẽ cho kết quả chính xác hơn nếu bạn sử dụng nhiều dữ liệu hơn; tập dữ liệu nhỏ của chúng ta không phải là tối ưu cho nhiệm vụ này, vì vậy hãy ghi nhớ điều đó.\n", + "\n", + "✅ Hãy suy nghĩ về các loại dữ liệu phù hợp với hồi quy logistic\n", + "\n", + "## Bài tập - làm sạch dữ liệu\n", + "\n", + "Đầu tiên, làm sạch dữ liệu một chút, loại bỏ các giá trị null và chỉ chọn một số cột:\n", + "\n", + "1. Thêm đoạn mã sau:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Load the core tidyverse packages\n", + "library(tidyverse)\n", + "\n", + "# Import the data and clean column names\n", + "pumpkins <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/2-Regression/data/US-pumpkins.csv\") %>% \n", + " clean_names()\n", + "\n", + "# Select desired columns\n", + "pumpkins_select <- pumpkins %>% \n", + " select(c(city_name, package, variety, origin, item_size, color)) \n", + "\n", + "# Drop rows containing missing values and encode color as factor (category)\n", + "pumpkins_select <- pumpkins_select %>% \n", + " drop_na() %>% \n", + " mutate(color = factor(color))\n", + "\n", + "# View the first few rows\n", + "pumpkins_select %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bạn luôn có thể xem nhanh dataframe mới của mình bằng cách sử dụng hàm [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html) như dưới đây:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "pumpkins_select %>% \n", + " glimpse()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hãy xác nhận rằng chúng ta thực sự sẽ thực hiện một bài toán phân loại nhị phân:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Subset distinct observations in outcome column\n", + "pumpkins_select %>% \n", + " distinct(color)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Trực quan hóa - biểu đồ phân loại\n", + "Đến thời điểm này, bạn đã tải lại dữ liệu về bí ngô và làm sạch nó để giữ lại một tập dữ liệu chứa một vài biến, bao gồm Màu sắc. Hãy trực quan hóa dataframe trong notebook bằng thư viện ggplot.\n", + "\n", + "Thư viện ggplot cung cấp một số cách thú vị để trực quan hóa dữ liệu của bạn. Ví dụ, bạn có thể so sánh phân bố dữ liệu cho từng Loại và Màu sắc trong một biểu đồ phân loại.\n", + "\n", + "1. Tạo biểu đồ như vậy bằng cách sử dụng hàm geombar, sử dụng dữ liệu bí ngô của chúng ta, và chỉ định ánh xạ màu cho từng loại bí ngô (màu cam hoặc màu trắng):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Specify colors for each value of the hue variable\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# Create the bar plot\n", + "ggplot(pumpkins_select, aes(y = variety, fill = color)) +\n", + " geom_bar(position = \"dodge\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(y = \"Variety\", fill = \"Color\") +\n", + " theme_minimal()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bằng cách quan sát dữ liệu, bạn có thể thấy cách dữ liệu Màu sắc liên quan đến Loại.\n", + "\n", + "✅ Dựa trên biểu đồ phân loại này, bạn có thể hình dung ra những khám phá thú vị nào?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Xử lý dữ liệu: mã hóa đặc trưng\n", + "\n", + "Bộ dữ liệu pumpkins của chúng ta chứa các giá trị dạng chuỗi cho tất cả các cột. Làm việc với dữ liệu phân loại rất trực quan đối với con người nhưng không phải đối với máy móc. Các thuật toán học máy hoạt động tốt với dữ liệu dạng số. Đó là lý do tại sao mã hóa là một bước rất quan trọng trong giai đoạn xử lý dữ liệu, vì nó cho phép chúng ta chuyển đổi dữ liệu phân loại thành dữ liệu dạng số mà không làm mất thông tin. Mã hóa tốt sẽ giúp xây dựng một mô hình tốt.\n", + "\n", + "Đối với mã hóa đặc trưng, có hai loại mã hóa chính:\n", + "\n", + "1. Bộ mã hóa thứ tự (Ordinal encoder): phù hợp với các biến thứ tự, là các biến phân loại mà dữ liệu của chúng tuân theo một thứ tự logic, như cột `item_size` trong bộ dữ liệu của chúng ta. Nó tạo ra một ánh xạ sao cho mỗi danh mục được biểu diễn bằng một con số, con số này là thứ tự của danh mục trong cột.\n", + "\n", + "2. Bộ mã hóa phân loại (Categorical encoder): phù hợp với các biến danh nghĩa, là các biến phân loại mà dữ liệu của chúng không tuân theo một thứ tự logic, như tất cả các đặc trưng khác ngoài `item_size` trong bộ dữ liệu của chúng ta. Đây là một dạng mã hóa one-hot, nghĩa là mỗi danh mục được biểu diễn bằng một cột nhị phân: biến được mã hóa sẽ bằng 1 nếu quả bí thuộc về loại đó và bằng 0 nếu không.\n", + "\n", + "Tidymodels cung cấp một gói rất tiện lợi khác: [recipes](https://recipes.tidymodels.org/) - một gói dùng để xử lý dữ liệu. Chúng ta sẽ định nghĩa một `recipe` để chỉ định rằng tất cả các cột dự đoán nên được mã hóa thành một tập hợp các số nguyên, `prep` để ước tính các số lượng và thống kê cần thiết cho bất kỳ thao tác nào, và cuối cùng `bake` để áp dụng các tính toán cho dữ liệu mới.\n", + "\n", + "> Thông thường, recipes thường được sử dụng như một bộ tiền xử lý cho việc mô hình hóa, nơi nó định nghĩa các bước cần được áp dụng cho một tập dữ liệu để chuẩn bị cho việc mô hình hóa. Trong trường hợp đó, **rất khuyến khích** bạn sử dụng `workflow()` thay vì tự ước tính một recipe bằng prep và bake. Chúng ta sẽ thấy tất cả điều này ngay sau đây.\n", + ">\n", + "> Tuy nhiên, hiện tại chúng ta đang sử dụng recipes + prep + bake để chỉ định các bước cần được áp dụng cho một tập dữ liệu nhằm chuẩn bị cho việc phân tích dữ liệu và sau đó trích xuất dữ liệu đã được xử lý với các bước đã áp dụng.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Preprocess and extract data to allow some data analysis\n", + "baked_pumpkins <- recipe(color ~ ., data = pumpkins_select) %>%\n", + " # Define ordering for item_size column\n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " # Convert factors to numbers using the order defined above (Ordinal encoding)\n", + " step_integer(item_size, zero_based = F) %>%\n", + " # Encode all other predictors using one hot encoding\n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%\n", + " prep(data = pumpkin_select) %>%\n", + " bake(new_data = NULL)\n", + "\n", + "# Display the first few rows of preprocessed data\n", + "baked_pumpkins %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "✅ Những lợi ích của việc sử dụng bộ mã hóa thứ tự (ordinal encoder) cho cột Item Size là gì?\n", + "\n", + "### Phân tích mối quan hệ giữa các biến\n", + "\n", + "Bây giờ, sau khi đã tiền xử lý dữ liệu, chúng ta có thể phân tích mối quan hệ giữa các đặc trưng và nhãn để hiểu rõ hơn về khả năng dự đoán nhãn của mô hình dựa trên các đặc trưng. Cách tốt nhất để thực hiện loại phân tích này là vẽ biểu đồ dữ liệu. \n", + "Chúng ta sẽ tiếp tục sử dụng hàm ggplot geom_boxplot_ để trực quan hóa mối quan hệ giữa Item Size, Variety và Color trong một biểu đồ phân loại. Để biểu diễn dữ liệu tốt hơn, chúng ta sẽ sử dụng cột Item Size đã được mã hóa và cột Variety chưa được mã hóa.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Define the color palette\n", + "palette <- c(ORANGE = \"orange\", WHITE = \"wheat\")\n", + "\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins_select_plot<-pumpkins_select\n", + "pumpkins_select_plot$item_size <- baked_pumpkins$item_size\n", + "\n", + "# Create the grouped box plot\n", + "ggplot(pumpkins_select_plot, aes(x = `item_size`, y = color, fill = color)) +\n", + " geom_boxplot() +\n", + " facet_grid(variety ~ ., scales = \"free_x\") +\n", + " scale_fill_manual(values = palette) +\n", + " labs(x = \"Item Size\", y = \"\") +\n", + " theme_minimal() +\n", + " theme(strip.text = element_text(size = 12)) +\n", + " theme(axis.text.x = element_text(size = 10)) +\n", + " theme(axis.title.x = element_text(size = 12)) +\n", + " theme(axis.title.y = element_blank()) +\n", + " theme(legend.position = \"bottom\") +\n", + " guides(fill = guide_legend(title = \"Color\")) +\n", + " theme(panel.spacing = unit(0.5, \"lines\"))+\n", + " theme(strip.text.y = element_text(size = 4, hjust = 0)) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Sử dụng biểu đồ swarm\n", + "\n", + "Vì Color là một danh mục nhị phân (Trắng hoặc Không), nó cần 'một [phương pháp chuyên biệt](https://github.com/rstudio/cheatsheets/blob/main/data-visualization.pdf) để trực quan hóa'.\n", + "\n", + "Hãy thử sử dụng `biểu đồ swarm` để hiển thị sự phân bố của màu sắc liên quan đến kích thước vật phẩm.\n", + "\n", + "Chúng ta sẽ sử dụng [gói ggbeeswarm](https://github.com/eclarke/ggbeeswarm), cung cấp các phương pháp để tạo biểu đồ kiểu beeswarm bằng ggplot2. Biểu đồ beeswarm là một cách để vẽ các điểm mà thông thường sẽ chồng lên nhau, sao cho chúng nằm cạnh nhau thay vì chồng lấp.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create beeswarm plots of color and item_size\n", + "baked_pumpkins %>% \n", + " mutate(color = factor(color)) %>% \n", + " ggplot(mapping = aes(x = color, y = item_size, color = color)) +\n", + " geom_quasirandom() +\n", + " scale_color_brewer(palette = \"Dark2\", direction = -1) +\n", + " theme(legend.position = \"none\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bây giờ chúng ta đã hiểu mối quan hệ giữa các danh mục nhị phân của màu sắc và nhóm lớn hơn của kích thước, hãy cùng khám phá hồi quy logistic để xác định màu sắc có khả năng của một quả bí ngô.\n", + "\n", + "## Xây dựng mô hình của bạn\n", + "\n", + "Chọn các biến bạn muốn sử dụng trong mô hình phân loại và chia dữ liệu thành tập huấn luyện và tập kiểm tra. [rsample](https://rsample.tidymodels.org/), một gói trong Tidymodels, cung cấp cơ sở hạ tầng để chia dữ liệu và lấy mẫu lại một cách hiệu quả:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Split data into 80% for training and 20% for testing\n", + "set.seed(2056)\n", + "pumpkins_split <- pumpkins_select %>% \n", + " initial_split(prop = 0.8)\n", + "\n", + "# Extract the data in each split\n", + "pumpkins_train <- training(pumpkins_split)\n", + "pumpkins_test <- testing(pumpkins_split)\n", + "\n", + "# Print out the first 5 rows of the training set\n", + "pumpkins_train %>% \n", + " slice_head(n = 5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🙌 Chúng ta đã sẵn sàng huấn luyện một mô hình bằng cách khớp các đặc trưng huấn luyện với nhãn huấn luyện (màu sắc).\n", + "\n", + "Chúng ta sẽ bắt đầu bằng việc tạo một công thức (recipe) để chỉ định các bước tiền xử lý cần thực hiện trên dữ liệu nhằm chuẩn bị cho việc mô hình hóa, ví dụ: mã hóa các biến phân loại thành một tập hợp các số nguyên. Tương tự như `baked_pumpkins`, chúng ta tạo một `pumpkins_recipe` nhưng không `prep` và `bake` vì nó sẽ được tích hợp vào một quy trình làm việc (workflow), điều này sẽ được giải thích trong vài bước tiếp theo.\n", + "\n", + "Có khá nhiều cách để chỉ định một mô hình hồi quy logistic trong Tidymodels. Xem `?logistic_reg()`. Hiện tại, chúng ta sẽ chỉ định một mô hình hồi quy logistic thông qua engine mặc định `stats::glm()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Create a recipe that specifies preprocessing steps for modelling\n", + "pumpkins_recipe <- recipe(color ~ ., data = pumpkins_train) %>% \n", + " step_mutate(item_size = ordered(item_size, levels = c('sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo'))) %>%\n", + " step_integer(item_size, zero_based = F) %>% \n", + " step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)\n", + "\n", + "# Create a logistic model specification\n", + "log_reg <- logistic_reg() %>% \n", + " set_engine(\"glm\") %>% \n", + " set_mode(\"classification\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bây giờ chúng ta đã có một công thức và một mô tả mô hình, chúng ta cần tìm cách kết hợp chúng lại thành một đối tượng có thể thực hiện các nhiệm vụ sau: tiền xử lý dữ liệu (chuẩn bị + xử lý ngầm), huấn luyện mô hình trên dữ liệu đã được tiền xử lý, và cũng hỗ trợ các hoạt động hậu xử lý tiềm năng.\n", + "\n", + "Trong Tidymodels, đối tượng tiện lợi này được gọi là một [`workflow`](https://workflows.tidymodels.org/) và nó giúp bạn lưu trữ các thành phần mô hình một cách thuận tiện.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Bundle modelling components in a workflow\n", + "log_reg_wf <- workflow() %>% \n", + " add_recipe(pumpkins_recipe) %>% \n", + " add_model(log_reg)\n", + "\n", + "# Print out the workflow\n", + "log_reg_wf\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sau khi một quy trình làm việc được *xác định*, một mô hình có thể được `huấn luyện` bằng cách sử dụng hàm [`fit()`](https://tidymodels.github.io/parsnip/reference/fit.html). Quy trình làm việc sẽ ước tính một công thức và tiền xử lý dữ liệu trước khi huấn luyện, vì vậy chúng ta sẽ không cần phải thực hiện thủ công bằng cách sử dụng prep và bake.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Train the model\n", + "wf_fit <- log_reg_wf %>% \n", + " fit(data = pumpkins_train)\n", + "\n", + "# Print the trained workflow\n", + "wf_fit\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mô hình hiển thị các hệ số đã học được trong quá trình huấn luyện.\n", + "\n", + "Bây giờ chúng ta đã huấn luyện mô hình bằng dữ liệu huấn luyện, chúng ta có thể thực hiện dự đoán trên dữ liệu kiểm tra bằng [parsnip::predict()](https://parsnip.tidymodels.org/reference/predict.model_fit.html). Hãy bắt đầu bằng cách sử dụng mô hình để dự đoán nhãn cho tập kiểm tra và xác suất cho từng nhãn. Khi xác suất lớn hơn 0.5, lớp dự đoán là `WHITE`, ngược lại là `ORANGE`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make predictions for color and corresponding probabilities\n", + "results <- pumpkins_test %>% select(color) %>% \n", + " bind_cols(wf_fit %>% \n", + " predict(new_data = pumpkins_test)) %>%\n", + " bind_cols(wf_fit %>%\n", + " predict(new_data = pumpkins_test, type = \"prob\"))\n", + "\n", + "# Compare predictions\n", + "results %>% \n", + " slice_head(n = 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rất tuyệt! Điều này cung cấp thêm một số hiểu biết về cách hồi quy logistic hoạt động.\n", + "\n", + "### Hiểu rõ hơn thông qua ma trận nhầm lẫn\n", + "\n", + "So sánh từng dự đoán với giá trị thực tế \"ground truth\" tương ứng không phải là cách hiệu quả nhất để xác định mức độ chính xác của mô hình. May mắn thay, Tidymodels có một vài thủ thuật khác: [`yardstick`](https://yardstick.tidymodels.org/) - một gói dùng để đo lường hiệu quả của mô hình thông qua các chỉ số hiệu suất.\n", + "\n", + "Một chỉ số hiệu suất liên quan đến các vấn đề phân loại là [`ma trận nhầm lẫn`](https://wikipedia.org/wiki/Confusion_matrix). Ma trận nhầm lẫn mô tả mức độ hiệu quả của một mô hình phân loại. Ma trận nhầm lẫn liệt kê số lượng ví dụ trong mỗi lớp được mô hình phân loại chính xác. Trong trường hợp của chúng ta, nó sẽ cho bạn biết có bao nhiêu quả bí ngô màu cam được phân loại là màu cam và bao nhiêu quả bí ngô màu trắng được phân loại là màu trắng; ma trận nhầm lẫn cũng cho thấy có bao nhiêu quả bị phân loại vào các danh mục **sai**.\n", + "\n", + "Hàm [**`conf_mat()`**](https://tidymodels.github.io/yardstick/reference/conf_mat.html) từ yardstick tính toán sự đối chiếu giữa các lớp quan sát và dự đoán.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Confusion matrix for prediction results\n", + "conf_mat(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hãy cùng phân tích ma trận nhầm lẫn. Mô hình của chúng ta được yêu cầu phân loại bí ngô thành hai danh mục nhị phân, danh mục `trắng` và danh mục `không trắng`.\n", + "\n", + "- Nếu mô hình của bạn dự đoán một quả bí ngô là trắng và thực tế nó thuộc danh mục 'trắng', chúng ta gọi đó là `đúng dương`, được biểu thị bằng số ở góc trên bên trái.\n", + "\n", + "- Nếu mô hình của bạn dự đoán một quả bí ngô là không trắng và thực tế nó thuộc danh mục 'trắng', chúng ta gọi đó là `sai âm`, được biểu thị bằng số ở góc dưới bên trái.\n", + "\n", + "- Nếu mô hình của bạn dự đoán một quả bí ngô là trắng và thực tế nó thuộc danh mục 'không trắng', chúng ta gọi đó là `sai dương`, được biểu thị bằng số ở góc trên bên phải.\n", + "\n", + "- Nếu mô hình của bạn dự đoán một quả bí ngô là không trắng và thực tế nó thuộc danh mục 'không trắng', chúng ta gọi đó là `đúng âm`, được biểu thị bằng số ở góc dưới bên phải.\n", + "\n", + "| Thực tế |\n", + "|:-----:|\n", + "\n", + "\n", + "| | | |\n", + "|---------------|--------|-------|\n", + "| **Dự đoán** | TRẮNG | CAM |\n", + "| TRẮNG | TP | FP |\n", + "| CAM | FN | TN |\n", + "\n", + "Như bạn có thể đoán, sẽ tốt hơn nếu có số lượng lớn các giá trị đúng dương và đúng âm, đồng thời giảm số lượng sai dương và sai âm, điều này cho thấy mô hình hoạt động tốt hơn.\n", + "\n", + "Ma trận nhầm lẫn rất hữu ích vì nó dẫn đến các chỉ số khác giúp chúng ta đánh giá hiệu quả của một mô hình phân loại tốt hơn. Hãy cùng tìm hiểu một số chỉ số này:\n", + "\n", + "🎓 Độ chính xác: `TP/(TP + FP)` được định nghĩa là tỷ lệ các dự đoán dương thực sự là dương. Còn được gọi là [giá trị dự đoán dương](https://en.wikipedia.org/wiki/Positive_predictive_value \"Positive predictive value\").\n", + "\n", + "🎓 Độ nhạy: `TP/(TP + FN)` được định nghĩa là tỷ lệ kết quả dương trên tổng số mẫu thực sự là dương. Còn được gọi là `độ nhạy cảm`.\n", + "\n", + "🎓 Độ đặc hiệu: `TN/(TN + FP)` được định nghĩa là tỷ lệ kết quả âm trên tổng số mẫu thực sự là âm.\n", + "\n", + "🎓 Độ chính xác tổng thể: `TP + TN/(TP + TN + FP + FN)` Tỷ lệ nhãn được dự đoán chính xác trên tổng số mẫu.\n", + "\n", + "🎓 F Measure: Trung bình trọng số của độ chính xác và độ nhạy, với giá trị tốt nhất là 1 và kém nhất là 0.\n", + "\n", + "Hãy cùng tính các chỉ số này!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Combine metric functions and calculate them all at once\n", + "eval_metrics <- metric_set(ppv, recall, spec, f_meas, accuracy)\n", + "eval_metrics(data = results, truth = color, estimate = .pred_class)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hiển thị đường cong ROC của mô hình này\n", + "\n", + "Hãy thực hiện một hình ảnh hóa nữa để xem cái gọi là [`đường cong ROC`](https://en.wikipedia.org/wiki/Receiver_operating_characteristic):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Make a roc_curve\n", + "results %>% \n", + " roc_curve(color, .pred_ORANGE) %>% \n", + " autoplot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Đường cong ROC thường được sử dụng để đánh giá đầu ra của một bộ phân loại dựa trên tỷ lệ dương tính thật so với dương tính giả. Đường cong ROC thường hiển thị `True Positive Rate`/Độ nhạy trên trục Y, và `False Positive Rate`/1-Đặc hiệu trên trục X. Do đó, độ dốc của đường cong và khoảng cách giữa đường trung điểm và đường cong rất quan trọng: bạn muốn một đường cong nhanh chóng đi lên và vượt qua đường trung điểm. Trong trường hợp của chúng ta, ban đầu có một số dương tính giả, sau đó đường cong đi lên và vượt qua đúng cách.\n", + "\n", + "Cuối cùng, hãy sử dụng `yardstick::roc_auc()` để tính toán Diện Tích Dưới Đường Cong thực tế. Một cách để diễn giải AUC là xác suất mà mô hình xếp hạng một ví dụ dương tính ngẫu nhiên cao hơn một ví dụ âm tính ngẫu nhiên.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "# Calculate area under curve\n", + "results %>% \n", + " roc_auc(color, .pred_ORANGE)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kết quả là khoảng `0.975`. Vì AUC dao động từ 0 đến 1, bạn muốn có một điểm số cao, bởi vì một mô hình dự đoán chính xác 100% sẽ có AUC bằng 1; trong trường hợp này, mô hình *khá tốt*.\n", + "\n", + "Trong các bài học tương lai về phân loại, bạn sẽ học cách cải thiện điểm số của mô hình (chẳng hạn như xử lý dữ liệu không cân bằng trong trường hợp này).\n", + "\n", + "## 🚀Thử thách\n", + "\n", + "Có rất nhiều điều để khám phá về hồi quy logistic! Nhưng cách tốt nhất để học là thử nghiệm. Tìm một tập dữ liệu phù hợp với loại phân tích này và xây dựng một mô hình với nó. Bạn học được gì? mẹo: thử [Kaggle](https://www.kaggle.com/search?q=logistic+regression+datasets) để tìm các tập dữ liệu thú vị.\n", + "\n", + "## Ôn tập & Tự học\n", + "\n", + "Đọc vài trang đầu của [bài báo này từ Stanford](https://web.stanford.edu/~jurafsky/slp3/5.pdf) về một số ứng dụng thực tiễn của hồi quy logistic. Hãy suy nghĩ về các nhiệm vụ phù hợp hơn với từng loại hồi quy mà chúng ta đã học cho đến thời điểm này. Loại nào sẽ hoạt động tốt nhất?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "langauge": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "feaf125f481a89c468fa115bf2aed580", + "translation_date": "2025-09-06T13:38:14+00:00", + "source_file": "2-Regression/4-Logistic/solution/R/lesson_4-R.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/vi/2-Regression/4-Logistic/solution/notebook.ipynb b/translations/vi/2-Regression/4-Logistic/solution/notebook.ipynb new file mode 100644 index 000000000..35e8203df --- /dev/null +++ b/translations/vi/2-Regression/4-Logistic/solution/notebook.ipynb @@ -0,0 +1,1256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hồi quy Logistic - Bài học 4\n", + "\n", + "Tải các thư viện cần thiết và tập dữ liệu. Chuyển đổi dữ liệu thành một dataframe chứa một tập con của dữ liệu:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " City Name Type Package Variety Sub Variety Grade Date \n", + "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \\\n", + "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", + "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", + "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", + "\n", + " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \n", + "0 270.0 280.0 270.0 ... NaN NaN NaN \\\n", + "1 270.0 280.0 270.0 ... NaN NaN NaN \n", + "2 160.0 160.0 160.0 ... NaN NaN NaN \n", + "3 160.0 160.0 160.0 ... NaN NaN NaN \n", + "4 90.0 100.0 90.0 ... NaN NaN NaN \n", + "\n", + " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", + "0 NaN NaN NaN E NaN NaN NaN \n", + "1 NaN NaN NaN E NaN NaN NaN \n", + "2 NaN NaN NaN N NaN NaN NaN \n", + "3 NaN NaN NaN N NaN NaN NaN \n", + "4 NaN NaN NaN N NaN NaN NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "full_pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", + "\n", + "full_pumpkins.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City NamePackageVarietyOriginItem SizeColor
2BALTIMORE24 inch binsHOWDEN TYPEDELAWAREmedORANGE
3BALTIMORE24 inch binsHOWDEN TYPEVIRGINIAmedORANGE
4BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
5BALTIMORE24 inch binsHOWDEN TYPEMARYLANDlgeORANGE
6BALTIMORE36 inch binsHOWDEN TYPEMARYLANDmedORANGE
\n", + "
" + ], + "text/plain": [ + " City Name Package Variety Origin Item Size Color\n", + "2 BALTIMORE 24 inch bins HOWDEN TYPE DELAWARE med ORANGE\n", + "3 BALTIMORE 24 inch bins HOWDEN TYPE VIRGINIA med ORANGE\n", + "4 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "5 BALTIMORE 24 inch bins HOWDEN TYPE MARYLAND lge ORANGE\n", + "6 BALTIMORE 36 inch bins HOWDEN TYPE MARYLAND med ORANGE" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the columns we want to use\n", + "columns_to_select = ['City Name','Package','Variety', 'Origin','Item Size', 'Color']\n", + "pumpkins = full_pumpkins.loc[:, columns_to_select]\n", + "\n", + "# Drop rows with missing values\n", + "pumpkins.dropna(inplace=True)\n", + "\n", + "pumpkins.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hãy cùng xem dữ liệu của chúng ta!\n", + "\n", + "Bằng cách trực quan hóa nó với Seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "# Specify colors for each values of the hue variable\n", + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# Plot a bar plot to visualize how many pumpkins of each variety are orange or white\n", + "sns.catplot(\n", + " data=pumpkins, y=\"Variety\", hue=\"Color\", kind=\"count\",\n", + " palette=palette, \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tiền xử lý dữ liệu\n", + "\n", + "Hãy mã hóa các đặc trưng và nhãn để dễ dàng hơn trong việc vẽ biểu đồ dữ liệu và huấn luyện mô hình.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['med', 'lge', 'sml', 'xlge', 'med-lge', 'jbo', 'exjbo'],\n", + " dtype=object)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the different values of the 'Item Size' column\n", + "pumpkins['Item Size'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "# Encode the 'Item Size' column using ordinal encoding\n", + "item_size_categories = [['sml', 'med', 'med-lge', 'lge', 'xlge', 'jbo', 'exjbo']]\n", + "ordinal_features = ['Item Size']\n", + "ordinal_encoder = OrdinalEncoder(categories=item_size_categories)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "# Encode all the other features using one-hot encoding\n", + "categorical_features = ['City Name', 'Package', 'Variety', 'Origin']\n", + "categorical_encoder = OneHotEncoder(sparse_output=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_MICHIGANcat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIA
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.01.0
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_MICHIGAN cat__Origin_NEW JERSEY \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NEW YORK cat__Origin_NORTH CAROLINA cat__Origin_OHIO \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_PENNSYLVANIA cat__Origin_TENNESSEE cat__Origin_TEXAS \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VERMONT cat__Origin_VIRGINIA \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "5 0.0 0.0 \n", + "6 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "ct = ColumnTransformer(transformers=[\n", + " ('ord', ordinal_encoder, ordinal_features),\n", + " ('cat', categorical_encoder, categorical_features)\n", + " ])\n", + "# Get the encoded features as a pandas DataFrame\n", + "ct.set_output(transform='pandas')\n", + "encoded_features = ct.fit_transform(pumpkins)\n", + "encoded_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ord__Item Sizecat__City Name_ATLANTAcat__City Name_BALTIMOREcat__City Name_BOSTONcat__City Name_CHICAGOcat__City Name_COLUMBIAcat__City Name_DALLAScat__City Name_DETROITcat__City Name_LOS ANGELEScat__City Name_MIAMI...cat__Origin_NEW JERSEYcat__Origin_NEW YORKcat__Origin_NORTH CAROLINAcat__Origin_OHIOcat__Origin_PENNSYLVANIAcat__Origin_TENNESSEEcat__Origin_TEXAScat__Origin_VERMONTcat__Origin_VIRGINIAColor
21.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
31.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00
43.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
53.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
61.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00
\n", + "

5 rows × 49 columns

\n", + "
" + ], + "text/plain": [ + " ord__Item Size cat__City Name_ATLANTA cat__City Name_BALTIMORE \n", + "2 1.0 0.0 1.0 \\\n", + "3 1.0 0.0 1.0 \n", + "4 3.0 0.0 1.0 \n", + "5 3.0 0.0 1.0 \n", + "6 1.0 0.0 1.0 \n", + "\n", + " cat__City Name_BOSTON cat__City Name_CHICAGO cat__City Name_COLUMBIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_DALLAS cat__City Name_DETROIT cat__City Name_LOS ANGELES \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__City Name_MIAMI ... cat__Origin_NEW JERSEY cat__Origin_NEW YORK \n", + "2 0.0 ... 0.0 0.0 \\\n", + "3 0.0 ... 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 \n", + "5 0.0 ... 0.0 0.0 \n", + "6 0.0 ... 0.0 0.0 \n", + "\n", + " cat__Origin_NORTH CAROLINA cat__Origin_OHIO cat__Origin_PENNSYLVANIA \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_TENNESSEE cat__Origin_TEXAS cat__Origin_VERMONT \n", + "2 0.0 0.0 0.0 \\\n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 \n", + "\n", + " cat__Origin_VIRGINIA Color \n", + "2 0.0 0 \n", + "3 1.0 0 \n", + "4 0.0 0 \n", + "5 0.0 0 \n", + "6 0.0 0 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "# Encode the 'Color' column using label encoding\n", + "label_encoder = LabelEncoder()\n", + "encoded_label = label_encoder.fit_transform(pumpkins['Color'])\n", + "encoded_pumpkins = encoded_features.assign(Color=encoded_label)\n", + "encoded_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ORANGE', 'WHITE']" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's look at the mapping between the encoded values and the original values\n", + "list(label_encoder.inverse_transform([0, 1]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "palette = {\n", + " 'ORANGE': 'orange',\n", + " 'WHITE': 'wheat',\n", + "}\n", + "# We need the encoded Item Size column to use it as the x-axis values in the plot\n", + "pumpkins['Item Size'] = encoded_pumpkins['ord__Item Size']\n", + "\n", + "g = sns.catplot(\n", + " data=pumpkins,\n", + " x=\"Item Size\", y=\"Color\", row='Variety',\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.8, aspect=4, palette=palette,\n", + ")\n", + "# Defining axis labels \n", + "g.set(xlabel=\"Item Size\", ylabel=\"\").set(xlim=(0,6))\n", + "g.set_titles(row_template=\"{row_name}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGwCAYAAACHJU4LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB9+0lEQVR4nO3deXQc1Z33/3dV9aatZcnaF1tesTEYL3gLdmzAxDAOkEDCPgES8jwTiJMZMkxgzu8MhFmAMUlIgkOWYSDJQIAwLE54MAEvbLHBbGFzAjZeZFuLV+1qqZffH1dSd6m7Zcu2UBs+r3N0wP3tunVv1e263666V7JisVgMERERkQxkD3cFRERERNJRoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLM9wV+BoRKNRdu/eTV5eHpZlDXd1RERE5DDEYjFaWlqoqKjAtge+Z3JcJyq7d++murp6uKshIiIiR6C2tpaqqqoB33NcJyp5eXmAaWgwGBzm2oiIiMjhaG5uprq6um8cH8hxnaj0Pu4JBoNKVERERI4zhzNtQ5NpRUREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkYylRERERkYylREVEREQylhIVERERyVhKVERERCRjKVERERGRjDXsv0J/165dfPe73+Xpp5+mvb2d8ePHc99993HqqacOd9U+2SIhqP1fOPg25E2E0ZeAJzseb3wR6laBNwg1l0N2wh+Nat0G238L4Tao/DwUzY3HultNrHULFMyA6i+C7TWxWBR2r4I9L0KgDMZcAf6R8W0Pvge1j5r/r/4SjJgSj4X2wbYHoKMOihdAxdlg9eTZ0W6ofRwOvAG542D0peDNjW+7dwPs+gN4ckwstyYea99pyu1uhvKzoWRBPBZuh+0PQ8tfYcRUqL4QHH9PW2JQ/xw0rAF/sWlLoGSwZ0FERA7BisViseHa+YEDB5g+fTqnn3463/jGNyguLubDDz9k3LhxjBs37pDbNzc3k5+fT1NTk/7Wz2B07oXVi6DpvfhrOTWweB1kj4JXroGP/jses31w2sNQ/QXY9hCs/1uIhePxCdfBrLuhZTM8twg6dsVjBdPhzNXg5MAL55vkp5c3HxY9DcXzYNOd8OYN7npO+0848QaTaKw9G7qb4rHyJfDZlRBph9VnmiSlV1YlnLkWghPgtWXwwd3xmOWBeb+Gmkth55Pw0kUQ7YrHx14Nc+6F9h2mLW3b4rH8KaZcXwG89GXY+UQ85smBhX+A0kUpD7mIiMQNZvwe1kTlxhtv5OWXX+bFF188ou2VqByhjd+ED1ckvz7qyzDmKnh+aXLMPxKWboKV4yDckhw/cx1sWg67n0qOTb7BJEKvXZccy58CC38Pvx9v7rgksmw4dzM8f647qep16gqTSGxanhyrWGr2u3pRcsyTB+dtgacmmzs1/S18CrbeDzt+lxybcB2MnAUbrkqO5Y6Fcz+M3+kREZGUBjN+D+sVdeXKlZx66ql8+ctfpqSkhOnTp/PLX/4y7ftDoRDNzc2uHzkCiXcCXK8/CbueTB0L7YMP70mdpIB5jFT3dPr97UxTbtN7sOW/k5MUMK9tvjd1knKocuuehh3/mzoWbjFtSZWk9JW7Mk3s8fT7bP0IDr6TOiYiIkdkWBOVjz76iHvuuYcJEybwzDPP8I1vfINvfetb/OpXv0r5/ttuu438/Py+n+rq6o+5xp8QvfMs+rP95iftdtkDxAJgedPH0u0TwMlKH/McYp/pyrW84AkMsO0hyrV9g4/BwMdPREQGbVgTlWg0yowZM/iP//gPpk+fzv/5P/+Hr3/96/zsZz9L+f6bbrqJpqamvp/a2tqPucafEDWXp3n9MvOTSk4NTPymmQSbxDJljroo9bajL0u/z+IFMP7rJgHoz/abWPGC5Nihyh11EdRcYerWX6DMtCWnJvW2NQOUW3N5+ljBDMiflDomIiJHZFgTlfLyck488UTXa5MnT2bHjh0p3+/3+wkGg64fOQJT/hkqPu9+rXgBTLvDrOCZdkd8pQ6YgX3+I+YOxYJHwV8Uj9k+mHkXFJwCM38II+e4y62+ECZdD6MvhonLcCUOeRNg3v0QKIbPPGAmpPby5JjXAsXmPXkTEwq1TKJRc4kpu/pL7n2OnGPqUjAVZv7IfQfEXwTzf2faMv937sTL8sC023uOwe3JCVLFUnPsqs6Fyf/knouSU2PqKyIix9SwTqa97LLLqK2tdU2m/Yd/+AdeeeUV/vSnPx1ye02mPUr73+hZnnyCWXmTqKPOLL/1BqH8HHASBvtIJ+x+2ixPLv9c8rLcxpegdbO5w1Aw1R1r2QJ7XoKscihb7B7su5tNuQAV55h994pFTX066qB4PuT1WxV24O348uSSfglGZyPU/dEkPxXnuO/eRLrMSqTuJlOfrHL3tnvWx5cnF85wx1q3QePzpv1lZ4E97Kv9RUSOC8fNqp+NGzfymc98hu9973tcdNFFvPrqq3z961/nF7/4BZdfnub2egIlKiIiIsef42bVz6xZs3j88cf57W9/y0knncS//uu/ctdddx1WkiIiIiKffMN6R+Vo6Y6KiIjI8ee4uaMiIiIiMhAlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsz3Du/JZbbuF73/ue67UTTjiBv/zlL8NUo34e9ANd8X9f3AHbH4Gm9yB/Coy+CJxAPN6wFuqeBX8h1FwOWeXxWMsW2P4QRDqh+gtQODMe626GbQ9C2zYYORsqzwO759TEorDrKdj7MmRVwZjLwVcQ3/bAn6H2MbAcGH0xBE+IxzobYev/QGgPlJ4OZWeBZZlYJAS1/wsH34a8iTD6EvBkx7dtfBHqVoE3aNqSXRWPtW6D7b+FcBtUfh6K5ia0pdXEWrdAwQyo/iLY3nhbdq+CPS9CoAzGXAH+kfFtD74HtY+a/6/+EoyYEo+F9sG2B6CjDooXQMXZYPXk2dFuqH0cDrwBueNg9KXgzY1vu3cD7PoDeHJMLLcmHmvfacrtbobys6FkQTwWboftD0PLX2HEVKi+EBx/T1tiUP8cNKwBf7FpS6Akvm3zX822sQhUXwAFp8RjXQdg6wPQsROKPmOOYV9bwrBrJex7FXJqoOYycw567X8dap8w9Rh9KeSNi8c66kxbQvuh/CxzzntFOo9d3930X7D9Z/H4uO/AnDv5pIt07CXSsRfL8eLJqcTyxI9ftLuNSNtuYrEonuxSbP+Ivlgs2k24dTexcDu2fwROdilWz/mOxWJEOhqJdu7H8gRMuY4vXm6omXB7HRYWTk4Fti/er2OREOHWXcQiIZxAEXZWEVbP5zsWixBpqyfa1YztzcHJqcSynXhbOvcRad+DZXtwciuxPVkJbWk3bYmGcbJLcQLx600sGibSuotouB3bl4+TU+ZqS7SjkUjnfizHjye3yt2WrhYibXUAODnl2L68hLZ09bSlEydQiJ1VktCWaE9bmrA92Ti5lVh2fOiKdB4g0t5g2pJTge2NX8ei4Q4irbt62lKME4hfb2LRCJG23US7W7F9wZ62OPG2dPaebz+e3Eqs3s8+EO1qNceIGJ7scmx//DMai3QRbttFLNyJ7S/oOd8JbWlvIBo6iOXJxpNbgdV7fQSioYOE2xuwsHFyK7C9OfFyw52m3Eg3TlYRTlZRv7bUEe1uwfbm4eSUu8738c6KxWKx4dr5LbfcwqOPPspzzz3X95rH46GoqGiAreKam5vJz8+nqamJYDB46A0G40Hr0O/JmwBnroOsMvjTFWaA7uVkwYLHzIC65T549etm0Oo1+Z9g+h1mcF5zJnQ2xGMj58IZfwTLA+v+BhrXxWP+kXD6H6FwBrxzK7xzc0KFLDj1JzDxOmhYB8+fC+HWeLjqfJj/KHQdhNWLzKDVK6cGFq+D7FHwyjXw0X/HY7YPTnvYDFLbHoL1fwuxcDw+4TqYdTe0bIbnFkHHrnisYDqcuRqcHHjhfJP89PLmw6KnoXgebLoT3rzBfXyn/SeceINJNNaeDd1N8Vj5EvjsSoi0w+ozTZLSK6sSzlwLwQnw2jL44O6EQ+SBeb+Gmkth55Pw0kUQTUhGx14Nc+6F9h2mLW3b4rH8KaZcXwG89GXY+UQ85smBhX+A0kXwwU/htW8CCR+tk2+Bk2+G/W/C2rNM4tWrZKE5DrEIrDkL9m2IxwIlcMYak7S9dSO8f0dCW2yY/UsY91XY/Qy8+EWIdMTjoy+Bzzxg+tZzC6Hlw3jsaPpuKp6RcNHegd9znIrFYnTteZNI2+74i5aNv+RUnOwSwi21dO19m8Tz7ckfh69wMtGuFjrrN5gvBj1s/wj8ZXMBi1DDq0Q7E/qC7SVQNhfbn0/3gQ/oPviBqy7ekSfhDdYQ6dhLqGGj67w42aX4SmZCNExn3Xpi3S3x6nqy8JfNw/Jk0bX3bSKttQml2vhKZuDJKSPcuouuPW+525I3Gl/RyUS72wjVrScW6YyX6wsSKJsLtodQw0aiHXsSivXgL52DEyigu2kL3fs3udtSMBnviHFEOg8QanjFJOm9m2YV4y+dBdEInfUbiHXFP/uWE8BfPhfbm0vXvncJN29LKNXCVzwNT24l4bZ6uhrfAKLxY5Rbja9oKrFwB6H69cTC8c+L5c0jUD4XbC9dja8TaU+4JlsO/tJZOFlFdDdvo3vfu+62jJiIt2Ai0VCTOd/R7nhbAoX4S+cAMUL1G4iGDiYcIx+B8nnYvjy69m8i3LTFVa6vaCqevFFE2hsJNb5mvuz1tiWnAl/xdIiEzPkOt8Wr68nBXz4POyGZzjSDGb+HPVF54okneOutt45o+yFLVH4/HVoOs05jr4KKpWbg6i+rAs5+E1aONt9G+1uy0QzOiYlIr5NvMYNf/8EboHAWzLsfnpqSHLO9cN5WeO6z0PpRcnzu/bBvI3y4Ijk26ssw5ip4fmlyzD8Slm6CleMg3JIcP3MdbFoOu59Kjk2+wSRCr12XHMufAgt/D78f7/oQAmYgPnezSbgSk6pep64wicSm5cmxiqVmv6sXJcc8eXDeFnhqsjth6LXwKdh6P+z4XXJswnUwchZsuCo5ljsWFr8AK8e4LlR9lr4H66+C/RuTY9OXmztU79ySHCtZZOLPzEqOOQE4bzusmg4du5Pj839nzslH9yfHjqbvpnLZsF1KhlS4bXfPgOdmOX78FQvo3Lkmue8C/or5dO/f5E5EenhHTATbSRq8AWxfPr7iaXTuej5FbSwCVWf0DLLtSVFf0SnmW3nL9qSYk1OOJ7fKJDhJO/USqFpEZ+1a95eQ3raUzaO7aQvRjsakmCd/LJYnO2nwBjP4+0tnmWOUQqDqDEING11JVS/vyJOIhdsJNyVfx+ysErz54wjVr08u1PIQqDqdzl3rUn4O/aWzCLfu7Lu742pL3mhs/wi69v45RbHZ+Mvm9bQlua8HKhfStectoglJVV9bCidDNJKUeALYgZF4CycT2v1SirbY5hjtfpFYQrLby1cyg0h7I5HWnUkxJ7cKf/G05DIzxGDG72F99APw4YcfUlFRQSAQYN68edx2222MGjUq5XtDoRChUPxkNTc3D02lDjdJAXMbPtWgBGbg+PCe9Bf67b9NnaSA+bbuyUkd278Rtv4mdSzaDZt/kTpJ6S13X4oLFZg7DImPlRKF9pm2pEpSwDxGqns6/T5zxqSONb0HW+5NeaEnFoXN96ZOUnrLbUu+IAOmLjljU8fCLaYtqZKU3nJ3rkwTezx1QgDmmG/+Rfr+sPXXqZMUMI+uEu+GJGpcZx69pBLphM33pK9T7ePpz8vR9N1PkUhbQ8rXY5GQSQhS9V0g0ro7ZZICEG6v73vM0F+0q4lwioGnZ6+EW7anTFIAIu31REPJAyWYdiQ+ZnDvtNvcmUiRpACE2+pSJil95SY8bnHVtruFcMuO1PsEwi07UiYpptx6Ymk+E9GORsKe1PskFibcsi1t3w63N7jvliTus70+ZUJgim0n3LqDVEkKQLh1Z8okBXrakuauZLRzH+HWXSljxKLmGKWpU6StnkjiXazEWJo2Ho+GdTLtnDlzuP/++1m1ahX33HMPW7duZcGCBbS0pO64t912G/n5+X0/1dXVH3ONU3D8YPvTx9MlGwBOtplbkoo9ULmW2TbtPgeI2f74PItB7ZOB9+kEwEpzEXQC6fd5qHIHastA5VpeGOi256HaYvsGH4ND1DcHSPNIcaByLcc8jkm7z4H62EBtOcT59g5Q7qeJlf4yaVkDfNezbNKdb8uyByw37XXhkPt00pd7iH0esi0DlGsdYVsYYB6FmWNxpG0Z6PjZA5Q7wPEDLAZoy0DtHPAYWQPWd+ByD3G+PyGGtSXnnHMOX/7yl5k6dSpLlizh//2//8fBgwd55JFHUr7/pptuoqmpqe+ntrY25fuO2ui/O/z31lxuflIJTjLzRXyFyTHLhjFfMfNGBltu+edg3NVmvkV/nlwY/3dmIutgy625zPykklMDE79pJsEmsUyZoy5Kve3oy9Lvs3gBjP+6e2JnL9tvYsULkmOHKnfURVBzBSkHikCZaUtOTeptawYod6DjVzDDHHtPbnLM8phzVr5k8OVWnQ9jr0x94fEVwIRrITh58OUequ9OSNN3P2U8uZUpX7e8uXiCNfHJ4v23y6vCyS5NGXNyKtOWa2cV48mrJmXftRw8+aOxfPmpy82txElTrie3AicnTVs8WTjBmrSJvze3CienPGXMk1uZtlzbX4g3OCp137VsvHmjsP2p+5iTU5H2GDk55XjzqlLGcPw4wTFYntTJ/UDHfqDjZ/ny8eSPTpM4WHjyqrGzilNuO9AxcrJLe853CrYXb7AGy5vimtJTridNuelePx5lVMo1YsQIJk6cyObNm1PG/X4/wWDQ9TMkTrvn8N5Xthim3gplZ5g5JYkdOLsKTnsIPFkw/xH3IxXbD7N+biZ7nrrCTDhNVHO5GXzGXg3jrsF1wcqfYiZQZleZ+SaJ37S9QbNPX76ZRJn4uMWyzZyNqnNhyj9Dxefd+yxeANPuMCt4pt3hvvgGykwbPAFY8Cj4EyY72z6YeZdZ1TLzhzByjrvc6gth0vVmRdLEZe625E0wc20Cxaa+iXcGPDnmtUCxeU/exIRCLZNo1Fxiyq7+knufI+eYuhRMhZk/ct9R8BeZeRuegPlvYuJleWDa7T3H4PbkBKliqTl2VeeaydCJF9+cGlNfX76ZeJy4UsfJgrn3mXM2+xfmHCa2Zdw15lxPuLYnuUpQMM30kbzxps8k3gHxFfS0Jduc98SVWZYDJ91s+ubUW01fTXSkfTfdHaHyNEnqJ4CTVWTmlCS03XIC+ItnYNkO/pKZ7s+LZeMrmortzcU38iQsn/s65eRU4gnW4ORW4+S6ByjLm2e29WThKz7F3ccsD/6SGVi2F3/xdKx+d+88+WPxZJfizR+PnVXiitn+QryFk3ECBXgLJrnaguPHVzITu68tiXfgbLyFU7D9QXyFU1yrmQCc7DKz39wKk7QltsWTg6/4FCzHbyZ9JvYxy8FXPL0ndgpWv7t3nmCNGYTzx+JkuxMk2z/C1MUXxFs4BdcwZvvw97TFVzKzX+Jl4S2YZI5B4aSkBMnMexmPJ7sUT/44V8zyZJljbnvxl8xwf0m0bHzFp5hzVjQVy5vn2rb3PHuCNUnJiuUL4ht5ErY3B1/RVPf5tr34S2b29LEZWK4vcxaeERNM3yyYiB1wL0CxA+b1T4phnUzbX2trK6NGjeKWW27hW9/61iHfP6SrfgBeuBJ2/tr8f9YY+OJHsPdVaH4fgidC0Wz3+9t3Qv0as8SzfIn74hVuh91Pm2f+FWe7l+XGYtD4vJkYWjjLvSwXzFLXvRvMAFJ6RnyJMZilrrtXmeXM5ee4l+VGw1D/rFmmXLLQvSwXYP8bPcuTTzArbxJ11Jnlt96gKTdhmSGRTtOWcJu5uxNwXxRpfAlaN5s7DAVT3bGWLbDnJbP8tWyx+4PZ3WzKBag4xz3Yx6KmPh11UDzfvSwX4MDb8eXJJf0SjM5GqPujSX4qznHfvYl0mZVI3U2mPln9vjXuWR9fnlzY7y5V6zZz3gIlZul3wpJJulvN3JBo2JzvxME+FjPLmttroWiee0k5QNP78eXJJQvd5zu0z5xvJ2DakjhYRbuh7hmzPLnsDHfiAse27yauivuETqLtLxruINqxD8vxYmcVu27lx6IRIh2NEIvgZJW4luWapa77iIU7sP0jXMtywSx1jYYOYHmysAMj+5ayglnqGunYA5Zlyk3oY7FYlGjHXmKREHZgpGtZLkA01ES0qxnLm+taYgxmqWukYy/YHpzskgHaUuxalgsQ6dxPrLsN25+P3S8Ji3a3maXWTsC1XNqU202k3cyncLKLXfNlzNLmvcQindiBQteyXHOMmomGmrC82a4lxuYYhXqOkdNzjJyEcqNE2hshGsbJKnItKTdtOUCsZ3my7XffpYp2txPt3Ifl+HvakniMwj3HKNZzjNKd7wLXknLTlpae5cnpzndj2rZEO/YQi3RjZ410LSkHiIQOEOtqxfLl4vjTzDXMIMfNqp9//Md/5Nxzz2X06NHs3r2bm2++mbfeeov333+f4uLUt9ASDXmiIiIiIsfccbPqZ+fOnVx66aXs27eP4uJi5s+fz4YNGw4rSREREZFPvmFNVB56KM2SSxEREREybDKtiIiISCIlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsz3BXINHtt9/OTTfdxLe//W3uuuuu4a4OPGi5/31xB2x/BJreg/wpMPoicALxeMNaqHsW/IVQczlklcdjLVtg+0MQ6YTqL0DhzHisuxm2PQht22DkbKg8D+yeUxOLwq6nYO/LkFUFYy4HX0F82wN/htrHwHJg9MUQPCEe62yErf8DoT1QejqUnQVWT5siIaj9Xzj4NuRNhNGXgCc7vm3ji1C3CrxB05bsqnisdRts/y2E26Dy81A0N6EtrSbWugUKZkD1F8H2xtuyexXseRECZTDmCvCPjG978D2ofdT8f/WXYMSUeCy0D7Y9AB11ULwAKs4GqyfPjnZD7eNw4A3IHQejLwVvbnzbvRtg1x/Ak2NiuTXxWPtOU253M5SfDSUL4rFwO2x/GFr+CiOmQvWF4Ph72hKD+uegYQ34i01bAiXxbZv/araNRaD6Aig4JR7rOgBbH4COnVD0GXMM+9oShl0rYd+rkFMDNZeZc9Br/+tQ+4Spx+hLIW9cPNZRZ9oS2g/lZ5lz3ivSeez67qolwL54nHK4bDcyeLFYjEhHI9HO/VieAJ6cSizH1xePhpoJt9dhYeHkVGD74v06FgkRbt1FLBLCCRRhZxVh9Xy+Y7EIkbZ6ol3N2N4cnJxKLNvp2zbSuY9I+x4s24OTW4ntyYrvs7udSNtuYtEwTnYpTiB+vYlFw0RadxENt2P78nFyyrB6+m4sFiPa0Uikcz+W48eTW+VuS1cLkbY6AJyccmxfXkJbunra0okTKMTOKkloS7SnLU3Ynmyc3EosOz50RToPEGlvMG3JqcD2xq9j0XAHkdZdPW0pxgnErzexaIRI226i3a3YvmBPW5x4Wzr3EunY29OWSqzezz4Q7Wo1x4gYnuxybH/8MxqLdBFu20Us3IntL8DJLnW3pb2BaOgglicbT24FVu/1EYiGDhJub8DCxsmtwPbmpOw3nzZWLBaLDXclADZu3MhFF11EMBjk9NNPP6xEpbm5mfz8fJqamggGg4d8/6D0T1JSyZsAZ66DrDL40xVmgO7lZMGCx8yAuuU+ePXrZtDqNfmfYPodZnBecyZ0NsRjI+fCGX8EywPr/gYa18Vj/pFw+h+hcAa8cyu8c3NChSw49Scw8TpoWAfPnwvh1ni46nyY/yh0HYTVi8yg1SunBhavg+xR8Mo18NF/x2O2D0572AxS2x6C9X8LsXA8PuE6mHU3tGyG5xZBx654rGA6nLkanBx44XyT/PTy5sOip6F4Hmy6E968wX18p/0nnHiDSTTWng3dTfFY+RL47EqItMPqM02S0iurEs5cC8EJ8Noy+ODuhEPkgXm/hppLYeeT8NJFEO2Kx8deDXPuhfYdpi1t2+Kx/CmmXF8BvPRl2PlEPObJgYV/gNJF8MFP4bVvAgkfrZNvgZNvhv1vwtqzTOLVq2ShOQ6xCKw5C/ZtiMcCJXDGGpO0vXUjvH9HQltsmP1LGPdV2P0MvPhFiHTE46Mvgc88YPrWcwuh5cN47Gj6bjqXZcSl5LgRi0YINbxKtDOhL9heAmVzsf35dB/4gO6DH7i28Y48CW+whkjHXkING13nxckuxVcyE6JhOuvWE+tu6YtZniz8ZfOwPFl07X2bSGttQqk2vpIZeHLKCLfuomvPWyT2XU/eaHxFJxPtbiNUt55YpDNeri9IoGwu2B5CDRuJduxJKNaDv3QOTqCA7qYtdO/f5G5LwWS8I8YR6TxAqOEVk6T3bppVjL90FkQjdNZvINYV/+xbTgB/+Vxsby5d+94l3LwtoVQLX/E0PLmVhNvq6Wp8A4jGj1FuNb6iqcTCHYTq1xMLxz8vljePQPlcsL10Nb5OpD3hmmw5+Etn4WQV0d28je5977rbMmIi3oKJRENNdNZvMF+eetsSKMRfOgeIEarfQDR0MOEY+QiUz8P25dG1fxPhpi2ucn1FU/HkjeKTaDDjd0YkKq2trcyYMYOf/vSn/Nu//RvTpk0b3kTlcJKUXmOvgoqlZuDqL6sCzn4TVo4230b7W7LRDM6JiUivk28xg1//wRugcBbMux+empIcs71w3lZ47rPQ+lFyfO79sG8jfLgiOTbqyzDmKnh+aXLMPxKWboKV4yDckhw/cx1sWg67n0qOTb7BJEKvXZccy58CC38Pvx9v7rgksmw4d7NJuBKTql6nrjCJxKblybGKpWa/qxclxzx5cN4WeGqyO2HotfAp2Ho/7PhdcmzCdTByFmy4KjmWOxYWvwArx7guVH2Wvgfrr4L9G5Nj05ebO1Tv3JIcK1lk4s/MSo45AThvO6yaDh0p7mrM/505Jx/dnxw7mr6bihKVQUk1eAPYvnx8xdPo3PV8iq0sAlVn9Ayy7UlRX9Ep5lt5y/akmJNTjie3yiQ4STv1EqhaRGftWveXkB7+snl0N20h2tGYFPPkj8XyZCcN3mAGf3/pLDp3rknRFkxbGja6kqpe3pEnEQu3E25Kvo7ZWSV488cRql+fXKjlIVB1Op271qX8HPpLZxFu3dl3d8fVlrzR2P4RdO39c4pis/GXzetpS3JfD1QupGvPW0QTkqq+thROhmgkKfEEsAMj8RZOJrT7pRRtscmqXuy6M/VJMZjxOyMe/Vx33XUsXbqUxYsX82//9m9p3xcKhQiFQn3/bm5u/jiqN7DaJ1IPSmAGjg/vSX+h3/7b1EkKmG/rnjS3/fZvhK2/SR2LdsPmX6ROUnrL3ZfiQgXmDkPiY6VEoX2mLamSFDCPkeqeTr/PnDGpY03vwZZ7k5MUMK9tvjd1ktJbblvyBRkwdckZmzoWbjFtSZWk9Ja7c2Wa2OOpEwIwx3zzL9L3h62/SZ2kgHl0lXg3JFHjOvPoJZVIp2lLujrVPp7+vBxN35WjFmmrT/l6tKuJcOvONFvFCLfuSJmkAETa64mGkgdKs78G12MG9067zZ2JFEkKQLitLmWS0lduwuMWV227Wwi37Ei9TyDcsiNlkmLKrSeW5jMR7Wgk7Em9T2Jhwi3b0vbtcHuD+25J4j7b64lFQiljsXB7T1tSJ+Th1p0pkxToaUuau5LRzn1EWtN8fmNRIh178ORWpo5/Sgx7ovLQQw/xxhtvsHFjmgt4gttuu43vfe97H0OtBsHxg+1PH0+XbAA42WZuSaoObA9UrmW2HajcdGx/fJ7FoPZ5iHKdAFheiKX4kDuB9Ps8VLnpLkaHKtfygieQOnaofToB87grmqYt9gDfbg55XixSXuicQPrHK5YD9gBtGbCPDVDfo+m7cvSsAdYyWE760AAxLCd9uZY94D4ta4DhoHfbVF8oLLtvnkraOqVjD9BO2yEWOdK2DHT8bMw6klRtGeD49dQpfXCgmI1FLE2KYx2iL2jNy7AegdraWr797W/zwAMPEAgMcCHucdNNN9HU1NT3U1tbe8htjszIQ7+lV83l5ieV4CQzX8RXmByzbBjzFTNvZLDlln8Oxl1t5lv058mFCX9nJrIOttyay8xPKjk1MPGbZhJsEsuUOeqi1NuOviz9PosXwPivuyd29rL9Jla8IDl2qHJHXQQ1V5i69RcoM23JqUm9bc0A5Q50/ApmmGPvyU2OWR5zzsqXDL7cqvNh3FWpL1i+Aph4LQQnD77cI+27ckyk+5ZsZxXjyasmZd+1HDzB0Vi+/JTbOrmVOGnK9eRW4OSkjlmeLJxgTdrE35tbhZNTnjLmya1MW67tL8QbHJW671o23rxR2P7UfczJqUh7jJyccrx5VSljOH6c4BishAnC7m0r05c7wPGzfPl4gqPTJCQWnrxq7KzilNsOdIyc7NKe852C7cXJKkkd+xQZ1kTl9ddfp7GxkRkzZuDxePB4PDz//PP8+Mc/xuPxEIm4v2H6/X6CwaDrZ0hctvfw3le2GKbeCmVnmDkliR04uwpOewg8WTD/EfcjFdsPs35uJnueusJMOE1UczlMuNZM7Bx3Da4LVv4UM4Eyu8rMN3ESPozeoNmnL99Mokx83GLZZs5G1bkw5Z+h4vPufRYvgGl3mBU80+6Ir9QBM7DPf8TcoVjwKPiLEtrig5l3mVUtM38II+e4y62+ECZdb1YkTVzmbkveBDPXJlBs6pv4Dd6TY14LFJv35E1MKNQyiUbNJabs6i+59zlyjqlLwVSY+SP3HQV/kZm34QmY/yYmXpYHpt3ecwxuT06QKpaaY1d1rpkMnXjxzakx9fXlm4nHiSt1nCyYe585Z7N/Yc5hYlvGXWPO9YRre5KrBAXTTB/JG2/6TOIdEF9BT1uyzXlPXJllOXDSzaZvTr3V9NVER9p300o9KEh6Tm41Tq57gLK8efiKpmJ7svAVn+LuY5YHf8kMLNuLv3g6Vr+7jZ78sXiyS/Hmj8fuN7jZ/kK8hZNxAgV4Cybh+hw6fnwlM7FtB3/JzH534Gy8hVOw/UF8hVOw/SPcbcguM/vNrcATrHG3xZODr/gULMePr3i6u49ZDr7i6T2xU7D6rW7xBGvw5FbiyR+Lk+1OkGz/CFMXXxBv4RRcw5jtw9/TFl/JzH6Jl4W3YJI5BoWTkhIkM+9lPJ7sUjz541wxy5NljrntxV8yw/0l0bLxFZ9izlnRVCxvnmvb3vPsCdYkJSuWL4hv5EnY3hx8RVPd59v24i+ZOfBdnE+JYZ1M29LSwvbt7jkGV199NZMmTeK73/0uJ5100oDbD+mqH4AHg0DC89PLYrD3VWh+H4InQtFs9/vbd0L9GrPEs3yJe7APt8Pup80z/4qz3ctyYzFofN5MDC2c5V6WC2ap694NZgApPSO+xBjMUtfdq8xy5vJz3Mtyo2Gof9YsUy5Z6F6WC7D/jZ7lySeYlTeJOurM8ltv0JSbOJkr0mnaEm4zd3cC/TL+xpegdbO5w1Aw1R1r2QJ7XjLLX8sWuz+Y3c2mXICKc9yDfSxq6tNRB8Xz3ctyAQ68HV+eXNIvwehshLo/muSn4hz33ZtIl1mJ1N1k6pPV71vjnvXx5cmF/e5StW4z5y1QYpZ+JyyZpLvVzA2Jhs35ThzsYzGzrLm9FormuZeUAzS9H1+eXLLQfb5D+8z5dgKmLYmDVbQb6p4xy5PLznAnLnBs+27ihHNNoj0q0a5WoqEDWJ4s7MDIvqWsYJa6Rjr2gGXhZJW4luXGYlGiHXuJRULYgZGuZbkA0VAT0a5mLG+ua4kxQCzcSaRjL9genOwS16ObWDRCpKMRYhGcrGLXslyASOd+Yt1t2P58bJ/7uhvtbjNLrZ2Aa7m0KbebSLtZFeRkF7vmy5ilzXuJRTqxA4VJy3KjXc1EQ01Y3mzXEmNzjEI9x8jpOUZOQrlRIu2NEA3jZBVh9XscHOk8QKxnebLtd9+lina3E+3ch+X4e9qSeIzCPcco1nOM4tdHs7R5H7FwB7a/wLWk3LSlpWd5crrz3ZiyLZ80x92qn0SLFi0a/lU/IiIiMmQGM35rlo6IiIhkrGFf9dPfunXrhrsKIiIikiGO6I7KwYMH+a//+i9uuukm9u/fD8Abb7zBrl27DrGliIiIyOEb9B2Vt99+m8WLF5Ofn8+2bdv4+te/TmFhIY899hg7duzg17/+9VDUU0RERD6FBn1H5frrr+eqq67iww8/dP3uk7/5m7/hhRdeOKaVExERkU+3QScqGzdu5P/+3/+b9HplZSX19al/JbSIiIjIkRh0ouL3+1P+jZ0PPviA4uLUv5VPRERE5EgMOlE577zzuPXWW+nuNn/wybIsduzYwXe/+10uvPDCY15BERER+fQadKLy/e9/n9bWVkpKSujo6GDhwoWMHz+evLw8/v3f/30o6igiIiKfUoNe9ZOfn8+zzz7LSy+9xNtvv01rayszZsxg8eLFh95YREREZBAGnajs2LGD0tJS5s+fz/z58/tej8Vi1NbWMmrUqGNaQREREfn0GvSjn5qaGmbMmMGWLVtcrzc2NjJmzJg0W4mIiIgM3hH9ZtrJkycze/ZsVq9e7Xo9w/6+oYiIiBznBp2oWJbFT3/6U/6//+//Y+nSpfz4xz92xURERESOlUHPUem9a/IP//APTJo0iUsvvZR33nmHf/mXfznmlRMREZFPt6P668nnnHMOf/rTnzjvvPN49dVXj1WdRERERIAjSFQWLlyIz+fr+/eJJ57IK6+8wgUXXKA5KiIiIocpEon0/fLUTyKfz4dtH9FUWBcrdhxnF83NzeTn59PU1EQwGBzu6oiIiBxSLBajvr6egwcPDndVhpRt24wZM8Z1c6PXYMbvw7qj0tzc3FdQqr/zk0gJg4iISHq9SUpJSQnZ2dmfyIUo0WiU3bt3U1dXx6hRo46qjYeVqBQUFFBXV0dJSQkjRoxIucNYLIZlWUQikSOujIiIyCdZJBLpS1JGjhw53NUZUsXFxezevZtwOIzX6z3icg4rUVmzZg2FhYUArF279oh3JiIi8mnWOyclOzt7mGsy9Hof+UQikaFPVBYuXJjy/0VERGTwPomPe/o7Vm087Om4e/fuZfv27a7X3nvvPa6++mouuugiHnzwwWNSIREREZFeh52oLFu2zPVbaBsbG1mwYAEbN24kFApx1VVX8Zvf/GZIKikiIiKfToedqGzYsIHzzjuv79+//vWvKSws5K233uLJJ5/kP/7jP1ixYsWQVFJERESOzi233MK0adOGuxqDdtiJSn19PTU1NX3/XrNmDRdccAEej5nmct555/Hhhx8e8wqKiIiIGYeXLVvG2LFj8fv9VFdXc+655yb9geBPmsNOVILBoOuX07z66qvMmTOn79+WZREKhY5p5URERAS2bdvGzJkzWbNmDcuXL+edd95h1apVnH766Vx33XUfWz2G4zfpHnaiMnfuXH784x8TjUZ59NFHaWlp4YwzzuiLf/DBB1RXVw9JJUVERD7Nrr32WizL4tVXX+XCCy9k4sSJTJkyheuvv54NGzYAsGPHDs4//3xyc3MJBoNcdNFFNDQ0pC0zGo1y6623UlVVhd/vZ9q0aaxataovvm3bNizL4uGHH2bhwoUEAgEeeOCBIW9rf4edqPzrv/4rK1euJCsri4svvph/+qd/oqCgoC/+0EMPaemyiIjIMbZ//35WrVrFddddR05OTlJ8xIgRRKNRzj//fPbv38/zzz/Ps88+y0cffcTFF1+cttwf/ehHfP/73+fOO+/k7bffZsmSJSmncdx44418+9vfZtOmTSxZsuSYt+9QDvuPEk6dOpVNmzbx8ssvU1ZW5nrsA3DJJZdw4oknHvMKioiIfJpt3ryZWCzGpEmT0r5n9erVvPPOO2zdurXv6cavf/1rpkyZwsaNG5k1a1bSNnfeeSff/e53ueSSSwC44447WLt2LXfddZdrcczf//3fc8EFFxzjVh2+Qf315KKiIs4///yUsaVLlx6TComIiEjc4fzt4E2bNlFdXe2agnHiiScyYsQINm3alJSoNDc3s3v3bk477TTX66eddhp//vOfXa+deuqpR1H7o3f0f39ZREREhsyECROwLIu//OUvw7L/VI+bPk5KVERERDJYYWEhS5YsYcWKFbS1tSXFDx48yOTJk6mtraW2trbv9ffff5+DBw+mnJYRDAapqKjg5Zdfdr3+8ssvZ9w0jkE9+hEREZGP34oVKzjttNOYPXs2t956K1OnTiUcDvPss89yzz338P7773PyySdz+eWXc9dddxEOh7n22mtZuHBh2kc3N9xwAzfffDPjxo1j2rRp3Hfffbz11lvDsrJnIEpUREREMtzYsWN54403+Pd//3e+853vUFdXR3FxMTNnzuSee+7BsiyefPJJli1bxmc/+1ls2+bss8/mJz/5Sdoyv/Wtb9HU1MR3vvMdGhsbOfHEE1m5ciUTJkz4GFt2aFbscGbppNDY2EhjYyPRaNT1+tSpU49JxQ5Hc3Mz+fn5NDU1EQwGP7b9ioiIHInOzk62bt3KmDFjCAQCw12dITVQWwczfg/6jsrrr7/OlVdeyaZNm/pmIluWRSwWw7IsIpHIYIsUERERSWnQicpXv/pVJk6cyL333ktpaSmWZQ1FvUREREQGn6h89NFH/O///i/jx48fivqIiIiI9Bn08uQzzzwz6ZfBiIiIiAyFQd9R+a//+i+uvPJK3n33XU466SS8Xq8rft555x2zyomIiMin26ATlfXr1/Pyyy/z9NNPJ8U0mVZERESOpUE/+lm2bBlXXHEFdXV1RKNR14+SFBERETmWBp2o7Nu3j3/4h3+gtLR0KOojIiIi0mfQicoFF1zA2rVrh6IuIiIiIi6DnqMyceJEbrrpJl566SVOPvnkpMm03/rWt45Z5UREROTT7YhW/eTm5vL888/z/PPPu2KWZSlRERER+ThEI7DnReiog6xyKF4AtjPku12xYgXLly+nvr6eU045hZ/85CfMnj17yPY36ERl69atx2zn99xzD/fccw/btm0DYMqUKfzLv/wL55xzzjHbx1F5sN9v3b24A7Y/Ak3vQf4UGH0ROAl/v6BhLdQ9C/5CqLncdJxeLVtg+0MQ6YTqL0DhzHisuxm2PQht22DkbKg8D+yeUxOLwq6nYO/LkFUFYy4HX0F82wN/htrHwHJg9MUQPCEe62yErf8DoT1QejqUnQW9v0k4EoLa/4WDb0PeRBh9CXiy49s2vgh1q8AbNG3JrorHWrfB9t9CuA0qPw9FcxPa0mpirVugYAZUfxFsb7wtu1eZD1agDMZcAf6R8W0Pvge1j5r/r/4SjJgSj4X2wbYHzAeyeAFUnA1Wz5PLaDfUPg4H3oDccTD6UvDmxrfduwF2/QE8OSaWWxOPte805XY3Q/nZULIgHgu3w/aHoeWvMGIqVF8Ijr+nLTGofw4a1oC/2LQlUBLftvmvZttYBKovgIJT4rGuA7D1AejYCUWfMcewry1h2LUS9r0KOTVQc5k5B732vw61T5h6jL4U8sbFYx11pi2h/VB+ljnnvSKdx67vrur/l1i9cFkXcvyIdO4j0r4Hy/bg5FZie7L6YtHudiJtu4lFwzjZpTiB+PUmFg0Tad1FNNyO7cvHySnD6um7sViMaEcjkc79WI4fT24VluOLl9vVQqStDgAnpxzblxcvN9JFuHUXsUgnTqAQO6uk77eex2JRIm31RLuasD3ZOLmVWHZ86Ip0HiDS3mDaklOB7Y1fx6LhDiKtu3raUowTiF9vYtEIkbbdRLtbsX3BnrY48bZ07iXSsbenLZVYvZ/9TFH7GLz+bXMN65VdBTN/ZK45Q+Thhx/m+uuv52c/+xlz5szhrrvuYsmSJfz1r3+lpKTk0AUcgSP+o4RdXV1s3bqVcePG4fEc2R9h/v3vf4/jOEyYMIFYLMavfvUrli9fzptvvsmUKVMOuf2Q/lHC/klKKnkT4Mx1kFUGf7rCDNC9nCxY8JgZULfcB69+3QxavSb/E0y/wwzOa86EzoZ4bORcOOOPYHlg3d9A47p4zD8STv8jFM6Ad26Fd25OqJAFp/4EJl4HDevg+XMh3BoPV50P8x+FroOwepEZtHrl1MDidZA9Cl65Bj7673jM9sFpD5tBattDsP5vIRaOxydcB7PuhpbN8Nwi6NgVjxVMhzNXg5MDL5xvkp9e3nxY9DQUz4NNd8KbN7iP77T/hBNvMInG2rOhuykeK18Cn10JkXZYfaZJUnplVcKZayE4AV5bBh/cnXCIPDDv11BzKex8El66CKIJg+zYq2HOvdC+w7SlbVs8lj/FlOsrgJe+DDufiMc8ObDwD1C6CD74Kbz2TSDho3XyLXDyzbD/TVh7lkm8epUsNMchFoE1Z8G+DfFYoATOWGOStrduhPfvSGiLDbN/CeO+CrufgRe/CJGOeHz0JfCZB0zfem4htHwYjx1N303nsiO6lMjHKBaL0bX3bSKttQmv2vhKZuDJKSPcuouuPW+R2Hc9eaPxFZ1MtLuNUN16YpHOvpjlCxIomwu2h1DDRqIdexKK9eAvnYMTKKC7aQvd+ze56uItmIx3xDginQcINbxikvTeTbOK8ZfOgmiEzvoNxLrin33LCeAvn4vtzaVr37uEm7cllGrhK56GJ7eScFs9XY1vAPE/nOvkVuMrmkos3EGofj2xcPzzYnnzCJTPBdtLV+PrRNoTrsmWg790Fk5W0WEe6fSOyR8lrH0MXvwSrmsMAD3j1oJHhyxZmTNnDrNmzeLuu811NRqNUl1dzbJly7jxxhtd7z1Wf5Rw0IlKe3s7y5Yt41e/+hUAH3zwAWPHjmXZsmVUVlYmVXSwCgsLWb58OV/72tcO+d4hS1QOJ0npNfYqqFhqBq7+sirg7Ddh5WjzbbS/JRvN4JyYiPQ6+RYz+PUfvAEKZ8G8++GpFMmc7YXztsJzn4XWj5Ljc++HfRvhwxXJsVFfhjFXwfNLk2P+kbB0E6wcB+GW5PiZ62DTctj9VHJs8g0mEXrtuuRY/hRY+Hv4/XhzxyWRZcO5m03ClZhU9Tp1hUkkNi1PjlUsNftdvSg55smD87bAU5PdCUOvhU/B1vthx++SYxOug5GzYMNVybHcsbD4BVg5xtzl6W/pe7D+Kti/MTk2fbm5Q/XOLcmxkkUm/sys5JgTgPO2w6rp0LE7OT7/d+acfHR/cuxo+m4qSlQyXqS9gVBDiv5newlULaKzdq37S0gPf9k8upu2EO1oTIp58sdiebLp3vduUszy5uEvnUXnzjUp6xOoOoNQw0Zi3cnXFO/Ik4iF2wk3JV/H7KwSvPnjCNWvTy7U8hCoOp3OXetSfg79pbMIt+7su7vjakveaGz/CLr2Jv/2dcuTTaDq9KP++3ZHnahEI7Cyxn0nxcUyd1bO23rMHwN1dXWRnZ3No48+yhe+8IW+16+88koOHjzIk08+6Xr/sP315Jtuuok///nPrFu3jrPPPrvv9cWLF3PLLbcccaISiUT43e9+R1tbG/PmzUv5nlAoRCgU6vt3c3PzEe3rmKp9IvWgBGbg+PCe9Bf67Q+lTlLAfFv35KSO7d8IW3+TOhbths2/SJ2k9Ja7L8WFCswdhsTHSolC+0xbUiUpYB4j1SX/EsC+feaMSR1reg+23JucpIB5bfO9qZOU3nLbtqeO1T1tHgOlEm4xbUmVpPSWu3NlmtjjqRMCMMd88y/S94etv06dpIB5dJV4NyRR4zrTV1KJdJq2pKtT7ePpz8vR9F05LrnuEiSKdps7EymSFIBwe13KJAUg0taAlfC4JVGsu4Vwy4609Qm37EiZpJhy64ml+UxEOxoJe1Lvk1iYcMu2tH073N6Q9jhE2uuJRUIpY7FwO7GuFiz/Mb57P1h7XhwgSQGIQXuteV/pomO667179xKJRJJ+PUlpaSl/+ctfjum+Eg06UXniiSd4+OGHmTt3riuznDJlClu2bBl0Bd555x3mzZtHZ2cnubm5PP7445x44okp33vbbbfxve99b9D7GFKOH+wBnl160yQbYG6xW07q2+r2QOVa4KT5kMLAMdsfn2cxqH0eolwnAJYXYik+5E4g/T4PVW66i9GhyrW8R75PJ2Aed0XTtMX2Jb/ea8D65mBuzaa48+AE0j9esRzTV9Luc6A+NkB9D9V3BypXjk9W+t9IYVnphwML22yb6guFZffNU0m98QDf6geIWbZDLJKmXMs+RFsGKNeyMb+ZI1VbnAHLHTD2celIvhN0VO87Dgz6qO/ZsyflhJm2trYjuiV2wgkn8NZbb/HKK6/wjW98gyuvvJL3338/5Xtvuukmmpqa+n5qa2tTvu9jVXO5+UklOMk8KvAVJscsG8Z8xcwbGWy55Z+DcVeb+Rb9eXJhwt+ZiayDLbfmMvOTSk4NTPymmQSbxDJljroo9bajL0u/z+IFMP7r7omdvWy/iRUvSI4dqtxRF0HNFfQ9s00UKDNtyalJvW3NAOUOdPwKZsD4vzPnoD/LY85Z+ZLBl1t1Poy9MvVF0lcAE6+F4OTBl3uovjsxTd+V45aTU5nydcuThROsSZvce3KrcHLK08Qq05Zr+wvxBkel7ruWjTc4Ctufuo85ORV4clOX6+SU482rShnD8eMEx2B5Uif3Tk5l+nJzK3HSxCxfPrYvxWf745aV+jwc8fsGoaioCMdxaGhw35FqaGigrCzV2HBsDDpROfXUU3nqqfg8hN7k5L/+67/SPrIZiM/nY/z48cycOZPbbruNU045hR/96Ecp3+v3+wkGg66fIXG4z9rLFsPUW6HsDDOnJDGLz66C0x4CTxbMf8T9SMX2w6yfm8mep64wE04T1VwOE641EzvHXYNrsM2fYiZQZleZ+SaJ37S9QbNPX76ZRJn4uMWyzZyNqnNhyj9Dxefd+yxeANPuMCt4pt0RX6kDZmCf/wh4AmaSlj9hQpntg5l3mVUtM38II+e4y62+ECZdb1YkTVzmbkveBDPXJlBs6pv4Dd6TY14LFJv35E1MKNQyiUbNJabs6i+59zlyjqlLwVQzAz7xjoK/yMzb8ATMfxMTL8sD027vOQa3JydIFUvNsas610yGTrz45tSY+vryzcTjxJU6ThbMvc+cs9m/MOcwsS3jrjHnesK1PclVgoJppo/kjTd9JvEOiK+gpy3Z5rwnrsyyHDjpZtM3p95q+mqiI+27clxzAgV4Cybh+hw6fnwlM7FtB3/JzH534Gy8hVOw/UF8hVOw/SPc5WWX4ckfiye3Ak+wxhWzPDn4ik/Bcvz4iqe7+5jl4Cue3hM7BavfnWdPsAZPbiWe/LE42e4B1/aPMHXxBfEWTsE1jNk+/D1t8ZXM7Jd4WXgLJpljUDgpKUEy817G48kuxZPvfmxsebLwF/e7Tg+X4gU9n/V0NwYsyK5O/wXvKPh8PmbOnMnq1av7XotGo6xevfqIxv/DNejJtC+99BLnnHMOV1xxBffffz//9//+X95//33+9Kc/8fzzzzNz5sxDFzKAM844g1GjRnH//fcf8r1DuuoHkifVXhaDva9C8/sQPBGK+q0bb98J9WvMEs/yJe7BPtwOu582z/wrznYvy43FoPF5MzG0cJZ7WS6Ypa57N5jOWXpGfIkxmKWuu1eZ5czl57iX5UbDUP+sWaZcstC9LBdg/xs9y5NPMCtvEnXUmeW33qApN2GZIZFO05Zwm7m7E+h3h63xJWjdbO4wFEx1x1q2wJ6XTLZfttg92Hc3m3IBKs5xD/axqKlPRx0Uz3cvywU48HZ8eXJJvw9oZyPU/dEkPxXnuO/eRLrMSqTuJlOf/t9C9qyPL08u7HeXqnWbOW+BErP0O2HJJN2tZm5INGzOd+JgH4uZZc3ttVA0z72kHKDp/fjy5JKF7vMd2mfOtxMwbUl81BTthrpnzPLksjPciQsc276b+NnQJNrjTizcSaRjL9genOwS16ObWDRCpKMRYhGcrOKkZbmRzv3Eutuw/fnYPvd1N9rdRrRzP5YTwM4qct1lj0W7ibSbVUFOdjFWQh8zS5v3Eot0YgcKsfslLtGuZqKhJixvtmuJMUAsEiLSsQcsByerBCthAmksFiXS3gjRME5WEZbHfec20nmAWM/yZNuf368t7UQ792E5/p62HJvHPsd21Q+4HyUP/aqfhx9+mCuvvJKf//znzJ49m7vuuotHHnmEv/zlL0lzV4Zt1Q/Ali1buP322/nzn/9Ma2srM2bM4Lvf/S4nn3zyoMq56aabOOeccxg1ahQtLS08+OCD3HHHHTzzzDOcddZZh9x+yBMVERGRY+iYJCqQ5veoVJs73EP4e1QA7r777r5f+DZt2jR+/OMfM2fOnKT3DWuicqx87WtfY/Xq1dTV1ZGfn8/UqVP57ne/e1hJCihRERGR48sxS1Rg2H4z7eEatuXJjuNQV1eXNKF23759lJSUEIkcxi+G6nHvvfcOdvciIiICJik5xkuQM9GgH7qluwETCoXw+QZYtikiIiIySId9R+XHP/4xYFb59P5hwl6RSIQXXniBSZMmHfsaioiIyKfWYScqP/zhDwFzR+VnP/sZjhN/Dubz+aipqeFnP/vZsa+hiIiIfGoddqLS+1eTTz/9dB577DEKCvS7FURERGRoDXoy7dq1a4eiHiIiIiJJDjtRuf766w/rfT/4wQ+OuDIiIiIiiQ47UXnzzTcP+Z6j/fPXIiIiIokOO1HRIx8RERH5uA3Z36wOBoN89NFHQ1W8iIiIfAoMWaIyjL+ZX0RE5BMvFosR6dhLuHUXkY69Qz7uvvDCC5x77rlUVFRgWRZPPPHEkO6v16BX/YiIiMjwCrfV0b3vPWKRzr7XLCeAd+QUPDnlA2x55Nra2jjllFP46le/ygUXDO0fPkykREVEROQ4Em6ro6vx9aTXY5FO83rJzCFJVs455xzOOeecY17uoQzZox8RERE5tmKxGN373hvwPd373vtETb9QoiIiInKciHbucz3uSSUW6STaue9jqtHQU6IiIiJynIhFQsf0fceDIUtUOjo66OjoGKriRUREPnUsx39M33c8GLJEJSsri6ysrKEqXkRE5FPHDozEcgIDvsdyAtiBkR9TjYaeVv2IiIgcJyzLwjtySspVP728I6cMyZ+0aW1tZfPmzX3/3rp1K2+99RaFhYWMGjXqmO+vlxIVERGR44gnpxxKZn7sv0fltdde4/TTT+/7d+8fK77yyiu5//77h2SfoERFRETkuOPJKcfJLutZBRTCcvzmsdAQ/nHgRYsWDcuy5yFLVPSXlEVERIaOZVk4WUXDXY0hp7/1IyIiIhlryBKVp59+msrKyqEqXkRERD4FDuvRT++EmcPxgx/8AID58+cfWY1EREREehxWovLmm2+6/v3GG28QDoc54YQTAPjggw9wHIeZM2ce+xqKiIh8wkSj0eGuwpA7VlNADitRWbt2bd///+AHPyAvL49f/epXFBQUAHDgwAGuvvpqFixYcEwqJSIi8knk8/mwbZvdu3dTXFyMz+f7RC4+icVi7Nmzx/zeF6/3qMqyYoNMeSorK/njH//IlClTXK+/++67fO5zn2P37t1HVaHBaG5uJj8/n6amJoLB4Me2XxERkSPV1dVFXV0d7e3tw12VIWVZFlVVVeTm5ibFBjN+D3p5cnNzM3v27El6fc+ePbS0tAy2OBERkU8Vn8/HqFGjCIfDRCKR4a7OkPF6vTiOc9TlDDpR+eIXv8jVV1/N97//fWbPng3AK6+8wg033MAFF1xw1BUSERH5pOt9JHK0j0U+DQadqPzsZz/jH//xH7nsssvo7u42hXg8fO1rX2P58uXHvIIiIiLy6TWoOSqRSISXX36Zk08+GZ/Px5YtWwAYN24cOTk5Q1bJdDRHRURE5PgzZHNUHMfhc5/7HJs2bWLMmDFMnTr1qCoqIiIiMpBB/2bak046iY8++mgo6iIiIiLiMuhE5d/+7d/4x3/8R/7whz9QV1dHc3Oz60dERETkWBn071Gx7Xhuk/hLamKxGJZlfaxLrTRHRURE5PgzpL9HJfG31IqIiIgMpUEnKgsXLhyKeoiIiIgkGXSiAnDw4EHuvfdeNm3aBMCUKVP46le/Sn5+/jGtnIiIiHy6DXoy7Wuvvca4ceP44Q9/yP79+9m/fz8/+MEPGDduHG+88cZQ1FFEREQ+pQY9mXbBggWMHz+eX/7yl3g85oZMOBzmmmuu4aOPPuKFF14Ykoqmosm0IiIix5/BjN+DTlSysrJ48803mTRpkuv1999/n1NPPfVj/WuQSlRERESOP4MZvwf96CcYDLJjx46k12tra8nLyxtscSIiIiJpDTpRufjii/na177Gww8/TG1tLbW1tTz00ENcc801XHrppUNRRxEREfmUGvSqnzvvvBPLsvjKV75COBwGwOv18o1vfIPbb7/9mFdQREREPr0GPUelV3t7u+uvJ2dnZx/Tih0OzVERERE5/gzpb6btlZ2dzcknn3ykmwNw22238dhjj/GXv/yFrKwsPvOZz3DHHXdwwgknHFW5x8yDlvvfF3fA9keg6T3InwKjLwInEI83rIW6Z8FfCDWXQ1Z5PNayBbY/BJFOqP4CFM6Mx7qbYduD0LYNRs6GyvPA7jk1sSjsegr2vgxZVTDmcvAVxLc98GeofQwsB0ZfDMGEY9fZCFv/B0J7oPR0KDsLev/sQSQEtf8LB9+GvIkw+hLwJCSbjS9C3SrwBk1bsqvisdZtsP23EG6Dys9D0dyEtrSaWOsWKJgB1V8E2xtvy+5VsOdFCJTBmCvAPzK+7cH3oPZR8//VX4IRU+Kx0D7Y9gB01EHxAqg4G6yeJ5fRbqh9HA68AbnjYPSl4M2Nb7t3A+z6A3hyTCy3Jh5r32nK7W6G8rOhZEE8Fm6H7Q9Dy19hxFSovhAcf09bYlD/HDSsAX+xaUugJL5t81/NtrEIVF8ABafEY10HYOsD0LETij5jjmFfW8KwayXsexVyaqDmMnMOeu1/HWqfMPUYfSnkjYvHOupMW0L7ofwsc857RTqPXd9941ZoXBmPV1wKix5EMkcsFiHSVk+0qxnbm4OTU4llO33xSOc+Iu17sGwPTm4ltierLxbtbifStptYNIyTXYoTiF9vYtEwkdZdRMPt2L58nJwyrJ6+G4vFiHY0Euncj+X48eRWYTm+eLldLUTa6gBwcsqxffE5jbFIF+HWXcQinTiBQuyskr4/0RKLRXva0oTtycbJrcSy40NXpPMAkfYG05acCmxv/DoWDXcQad3V05ZinED8ehOLRoi07Sba3YrtC/a0xYm3pXMvkY69PW2pxOr97MvH7ojvqBwLZ599NpdccgmzZs0iHA7zz//8z7z77ru8//775OTkHHL7Ib2j0j9JSSVvApy5DrLK4E9XmAG6l5MFCx4zA+qW++DVr5tBq9fkf4Lpd5jBec2Z0NkQj42cC2f8ESwPrPsbaFwXj/lHwul/hMIZ8M6t8M7NCRWy4NSfwMTroGEdPH8uhFvj4arzYf6j0HUQVi8yg1avnBpYvA6yR8Er18BH/x2P2T447WEzSG17CNb/LcTC8fiE62DW3dCyGZ5bBB274rGC6XDmanBy4IXzTfLTy5sPi56G4nmw6U548wb38Z32n3DiDSbRWHs2dDfFY+VL4LMrIdIOq880SUqvrEo4cy0EJ8Bry+CDuxMOkQfm/RpqLoWdT8JLF0G0Kx4fezXMuRfad5i2tG2Lx/KnmHJ9BfDSl2HnE/GYJwcW/gFKF8EHP4XXvgkkfLROvgVOvhn2vwlrzzKJV6+SheY4xCKw5izYtyEeC5TAGWtM0vbWjfD+HQltsWH2L2HcV2H3M/DiFyHSEY+PvgQ+84DpW88thJYP47Gj6bsp+eGyzkO8Rz4OsUgXnXXriXW39L1mebLwl83D8mTRtfdtIq21CVvY+Epm4MkpI9y6i649b5HYdz15o/EVnUy0u41Q3Xpikfh5tnxBAmVzwfYQathItGNPQrEe/KVzcAIFdDdtoXv/Jlc9vQWT8Y4YR6TzAKGGV0yS3rtpVjH+0lkQjdBZv4FYV/yzbzkB/OVzsb25dO17l3DztoRSLXzF0/DkVhJuq6er8Q0g2hd1cqvxFU0lFu4gVL+eWDj+ebG8eQTK54LtpavxdSLtCddky8FfOgsnq+hQh18O05AuTx5Ke/bsoaSkhOeff57Pfvazh3z/kCUqv6uA7rrDe+/Yq6BiqRm4+suqgLPfhJWjzbfR/pZsNINzYiLS6+RbzODXf/AGKJwF8+6Hp6Ykx2wvnLcVnvsstH6UHJ97P+zbCB+uSI6N+jKMuQqeX5oc84+EpZtg5TgItyTHz1wHm5bD7qeSY5NvMInQa9clx/KnwMLfw+/HmzsuiSwbzt1sEq7EpKrXqStMIrFpeXKsYqnZ7+pFyTFPHpy3BZ6a7E4Yei18CrbeDzt+lxybcB2MnAUbrkqO5Y6FxS/AyjHmLk9/S9+D9VfB/o3JsenLzR2qd25JjpUsMvFnZiXHnACctx1WTYeO3cnx+b8z5+Sj+5NjR9N3U7ksYy4ln2pde98h3LI96XUnpxxPbhWhhhT9z/YSqFxE58617i8hPfxl8+hu2kK0ozEp5skfi+XJpnvfu0kxy5uHv3QWnTvXpKxroOoMQg0bXUlVL+/Ik4iF2wk3JV/H7KwSvPnjCNWvTy7U8hCoPp3OnetSfg79pbMIt+7su7vjakveaGz/CLr2/jlFsdkEqk53/TFeOXIfy6OfodDUZLLmwsLClPFQKEQoFOr7d3Nz89BU5HCTFDC34VMNSmAGjg/vSX+h3/5Q6iQFzLd1T5q7Svs3wtbfpI5Fu2HzL1InKb3l7ktxoQJzhyHxsVKi0D7TllRJCpjHSHVPp99nzpjUsab3YMt/JycpYF7bcm/qJKW33LbkCzJg6pIzNnUs3GLakipJ6S1358o0scdTJwRgjvnmX6TvD1t/kzpJAfPoKvFuSKLGdaavpBLpNG1JV6fax9Ofl6Ppu5KxXHcCEl9va8DqfQzbX7TbJDcpkhSAcHtdyiSlr1xv6jmKse4Wwi3Jv86ir9yWHSmTFFNuPbE0n4loRyPhNPskFibcvD1t3w63N6Q/Ru31xCKhlLFYuJ1YVwuWX/MhP26DXp48VKLRKH//93/PaaedxkknnZTyPbfddhv5+fl9P9XV1R9zLVNw/GAP8OwyXbIB5ha75aSO2QOVa4EzwORlzwAx2x+fZzGofTLwPp0AWGkugk4g/T7BHIcj3We6ci0veAKpY4dTru0bfOyQ5WYDab6NDVSu5YA9QFsG7GMDteUo+q5kLivNZd2y08egb65JyhgDbGvZA26b9hp3iJiZUzPAPgcYvqyByrXsAcp1BjxGA8ZkyGTMUb/uuut49913eeihNN8cgZtuuommpqa+n9ra2rTvPSrFKR59pFNzuflJJTjJzBfxpbhDZNkw5itm3shgyy3/HIy72sy36M+TC+P/zkxkHWy5NZeZn1RyamDiN80k2CSWKXPURam3HX1Z+n0WL4DxX3dP7Oxl+02seEFy7FDljroIaq4gZWIQKDNtyalJvW3NAOUOdPwKZsCEvzPnoD/LY85Z+ZLBl1t1Poy7KvVF0lcAE6+F4OTBl3ukfVcympNbmfJ1T24FTk7qmOXJwgmOSZv4e3KrcHLK08Qq05Zr+wvxBkel7ruWjTc4Ctufuo85ORV40rTFySnHk1eVMobjxwnWYHlSfwFycirTl5tbmfb4Wb58bF+Kz7YMuYxIVL75zW/yhz/8gbVr11JVlabzAX6/n2Aw6PoZEmf94fDeV7YYpt4KZWeYOSWJWXx2FZz2EHiyYP4j7kcqth9m/dxM9jx1hZlwmqjmcphwrZnYOe4aXINt/hQzgTK7ysw3Sbwb4Q2affryzSTKxMctlm3mbFSdC1P+GSo+795n8QKYdodZwTPtjvhKHTAD+/xHzB2KBY+CP2FCme2DmXeZVS0zfwgj57jLrb4QJl1vViRNXOZuS94EM9cmUGzqm/gN3pNjXgsUm/fkTUwo1DKJRs0lpuzqL7n3OXKOqUvBVJj5I/cdBX+RmbfhCZj/JiZelgem3d5zDG5PTpAqlppjV3WumQydePHNqTH19eWbiceJK3WcLJh7nzlns39hzmFiW8ZdY871hGt7kqsEBdNMH8kbb/pM4h0QX0FPW7LNeU9cmWU5cNLNpm9OvdX01URH2nfTKZh/6PfIx8KbPx47q8T1mu0vxFs4GSdQgLdgEq7PoePHVzIT23bwl8zsdwfOxls4BdsfxFc4Bds/wlWuk12GJ38sntwKPMEaV8zy5OArPgXL8eMrnu7uY5aDr3h6T+wULK/77p0nWIMntxJP/licbHeCZPtHmLr4gngLp+Aaxmwf/p62+Epm9ku8LLwFk8wxKJyUlCCZeS/j8WSX4skf54pZniz8xf2u0/KxGdbJtLFYjGXLlvH444+zbt06JkyYMKjth/z3qKw6A/avNf9vBeHSJtj7KjS/D8EToWi2+/3tO6F+jVniWb7EPdiH22H30+aZf8XZ7mW5sRg0Pm8mhhbOci/LBbPUde8GM4CUnhFfYgxmqevuVWY5c/k57mW50TDUP2uWKZcsdC/LBdj/Rs/y5BPMyptEHXVm+a03aMpNWGZIpNO0Jdxm7u4E3BdFGl+C1s3mDkPBVHesZQvsecksfy1b7B7su5tNuQAV57gH+1jU1KejDornu5flAhx4O748uaRfgtHZCHV/NMlPxTnuuzeRLrMSqbvJ1Cer37fGPevjy5ML+92lat1mzlugxCz9TlgySXermRsSDZvznTjYx2JmWXN7LRTNcy8pB2h6P748uWSh+3yH9pnz7QRMWxIf80W7oe4Zszy57Ax34gLHtu8mrorTJNqMFA01Ee1qxvLmupYYA8TCnUQ69oLtwckucT26iUUjRDoaIRbBySpOWpYb6dxPrLsN25+P7XNfd6PdbUQ792M5AeysItfE01i0m0i7WRXkZBe75suYpc17iUU6sQOF2P0Sl2hXM9FQE5Y327XEGCAWCRHp2AOWg5NV4lqGHYtFibQ3QjSMk1WE1e9xcKTzALGe5cm2P79fW9qJdu7Dcvw9bcmI7/WfGMfNqp9rr72WBx98kCeffNL1u1Py8/PJyhpg3kIP/cI3ERGR489xk6ikW+Z13333cdVVVx1yeyUqIiIix5/jZnlyBv0KFxEREclAeugmIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMZSoiIiIiIZS4mKiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhnLM5w7f+GFF1i+fDmvv/46dXV1PP7443zhC18Yziq5PWi5/31xB2x/BJreg/wpMPoicALxeMNaqHsW/IVQczlklcdjLVtg+0MQ6YTqL0DhzHisuxm2PQht22DkbKg8D+yeUxOLwq6nYO/LkFUFYy4HX0F82wN/htrHwHJg9MUQPCEe62yErf8DoT1QejqUnQVWT5siIaj9Xzj4NuRNhNGXgCc7vm3ji1C3CrxB05bsqnisdRts/y2E26Dy81A0N6EtrSbWugUKZkD1F8H2xtuyexXseRECZTDmCvCPjG978D2ofdT8f/WXYMSUeCy0D7Y9AB11ULwAKs4GqyfPjnZD7eNw4A3IHQejLwVvbnzbvRtg1x/Ak2NiuTXxWPtOU253M5SfDSUL4rFwO2x/GFr+CiOmQvWF4Ph72hKD+uegYQ34i01bAiXxbZv/araNRaD6Aig4JR7rOgBbH4COnVD0GXMM+9oShl0rYd+rkFMDNZeZc9Br/+tQ+4Spx+hLIW9cPNZRZ9oS2g/lZ5lz3ivSeez67qqlQEM8TjVctgP55ItFw0RadxENt2P78nFyyrB6+m4sFiPa0Uikcz+W48eTW4Xl+Pq2jXa1EGmrA8DJKcf25cXLjXQRbt1FLNKJEyjEzirB6rlWxWJRIm31RLuasD3ZOLmVWHZ86Ip0HiDS3oBle3ByKrC98etYNNxBpHUXsWgYJ7sYJ5BwvZHjhhWLxWLDtfOnn36al19+mZkzZ3LBBRcMOlFpbm4mPz+fpqYmgsHgoTcYjP5JSip5E+DMdZBVBn+6wgzQvZwsWPCYGVC33Aevft0MWr0m/xNMv8MMzmvOhM6EC//IuXDGH8HywLq/gcZ18Zh/JJz+RyicAe/cCu/cnFAhC079CUy8DhrWwfPnQrg1Hq46H+Y/Cl0HYfUiM2j1yqmBxesgexS8cg189N/xmO2D0x42g9S2h2D930IsHI9PuA5m3Q0tm+G5RdCxKx4rmA5nrgYnB1443yQ/vbz5sOhpKJ4Hm+6EN29wH99p/wkn3mASjbVnQ3dTPFa+BD67EiLtsPpMk6T0yqqEM9dCcAK8tgw+uDvhEHlg3q+h5lLY+SS8dBFEu+LxsVfDnHuhfYdpS9u2eCx/iinXVwAvfRl2PhGPeXJg4R+gdBF88FN47ZtAwkfr5Fvg5Jth/5uw9iyTePUqWWiOQywCa86CfRvisUAJnLHGJG1v3Qjv35HQFhtm/xLGfRV2PwMvfhEiHfH46EvgMw+YvvXcQmj5MB47mr6bzmXDdimRj0G0u41Q3Xpikc6+1yxfkEDZXLA9hBo2Eu3YE9/A9uAvnYMTKKC7aQvd+ze5yvMWTMY7YhyRzgOEGl4xSXrvplnF+EtnQTRCZ/0GYl3xz77lBPCXz8X25tK1713CzdsSSrXwFU/Dk1tJuK2ersY3gGhf1Mmtxlc0tS8JkuEzmPF7WBOVRJZlZU6icjhJSq+xV0HFUjNw9ZdVAWe/CStHm2+j/S3ZaAbnxESk18m3mMGv/+ANUDgL5t0PT01JjtleOG8rPPdZaP0oOT73fti3ET5ckRwb9WUYcxU8vzQ55h8JSzfBynEQbkmOn7kONi2H3U8lxybfYBKh165LjuVPgYW/h9+PN3dcElk2nLvZJFyJSVWvU1eYRGLT8uRYxVKz39WLkmOePDhvCzw12Z0w9Fr4FGy9H3b8Ljk24ToYOQs2XJUcyx0Li1+AlWPMXZ7+lr4H66+C/RuTY9OXmztU79ySHCtZZOLPzEqOOQE4bzusmg4du5Pj839nzslH9yfHjqbvpqJE5ROts/5Voh2NSa978sdiebLp3vduUszy5uEvnUXnzjUpywxUnUGoYSOx7uRrinfkScTC7YSbkq9jdlYJ3vxxhOrXJxdqeQhUnU7nrnUpP4f+0lk42aUp6yMfn8GM38P66GewQqEQoVCo79/Nzc3DWJsetU+kHpTADBwf3pP+Qr/9odRJCphv656c1LH9G2Hrb1LHot2w+Repk5TecvelGCjB3GFIfKyUKLTPtCVVkgLmMVLd0+n3mTMmdazpPdhyb3KSAua1zfemTlJ6y23bnjpW97R5DJRKuMW0JVWS0lvuzpVpYo+nTgjAHPPNv0jfH7b+JnWSAubRVeLdkESN60xfSSXSCZvvSV+n2sfTn5ej6bvyqRKLRVMmKQCRtgashMctru26Wwi3pH8sGG7ZkTJJMeXWE0vzmYh2NBJOs09iYcIt29L27XB7gxKV48xxNZn2tttuIz8/v++nurp6uKtk5grY/vRxb5pkA8wtdstJHbMHKtcCJ82HFAaO2f74PItB7fMQ5ToBsLzpY+n2eahyPYfYZ7pyLe+R79MJmMddg40dstxsIM3duoHKtRywA6ljkD6hPVS5h+q7A5Urnz5WmuHCsvvmqaSOp7nGAdjpY5btkHaIsmysAYYva4B9DlhXyUjH1Rm76aabaGpq6vupra0doj0NItuuudz8pBKcZB4V+AqTY5YNY75i5o0Mttzyz8G4q818i/48uTDh78xE1sGWW3OZ+UklpwYmftNMgk1imTJHXZR629GXpd9n8QIY/3X3xM5ett/Eihckxw5V7qiLoOYKUiYGgTLTlpya1NvWDFDuQMevYIY59p7c5JjlMeesfMngy606H8ZdlXqg8BXAhGshOHnw5R6q705M03flU8eybJyc8pQxT24lTk5lypjtL8QbHJW671o23rxR2P7UfczJqcCTm7pcJ6ccT15VyhiOHyc4BsuTlWbb1GVK5jquEhW/308wGHT9DInL6g/vfWWLYeqtUHaGmVOSmMVnV8FpD4EnC+Y/4n6kYvth1s/NZM9TV5gJp4lqLjeDz9irYdw1uAbb/ClmAmV2lZlv4iR8GL1Bs09fvplEmfi4xbLNnI2qc2HKP0PF5937LF4A0+4wK3im3RFfqQNmYJ//CHgCsOBR8BcltMUHM+8yq1pm/hBGznGXW30hTLrerEiauMzdlrwJZq5NoNjUN/EbvCfHvBYoNu/Jm5hQqGUSjZpLTNnVX3Lvc+QcU5eCqTDzR+47Cv4iM2/DEzD/TUy8LA9Mu73nGNyenCBVLDXHrupcMxk68eKbU2Pq68s3E48TV+o4WTD3PnPOZv/CnMPEtoy7xpzrCdf2JFcJCqaZPpI33vSZxDsgvoKetmSb8564Msty4KSbTd+ceqvpq4mOtO+mlXfot8hxzVc4Bds/wvWak12GJ38sntwKPMEaV8zy5OArPgXL8eMrnu7uY5aDr3h6T+wUrH53nj3BGjy5lXjyx+JkuxMk2z/C1MUXxFs4BdcwZvvwl8zEth18JTP73VW18BZMwgkcTn+WTKLJtAN5sBA4EP/3ZTHY+yo0vw/BE6Fotvv97Tuhfo1Z4lm+xD3Yh9th99PmmX/F2e5lubEYND5vJoYWznIvywWz1HXvBjOAlJ4RX2IMZqnr7lVmOXP5Oe5ludEw1D9rlimXLHQvywXY/0bP8uQTzMqbRB11ZvmtN2jKTVhmSKTTtCXcZu7uJC7LBWh8CVo3mzsMBVPdsZYtsOcls/y1bLF7sO9uNuUCVJzjHuxjUVOfjjoonu9elgtw4O348uSSfglGZyPU/dEkPxXnuO/eRLrMSqTuJlOfrH7fGvesjy9PLux3l6p1mzlvgRKz9DthySTdrWZuSDRsznfiYB+LmWXN7bVQNM+9pByg6f348uSShe7zHdpnzrcTMG1JfDQW7Ya6Z8zy5LIz3IkLHNu+mzjhXJNoP1UinfuJdbdh+/Oxfe7rbrS7jWjnfiwngJ1V5FpdE4t2E2k3q4Kc7GKshD5mljbvJRbpxA4UYvdLXKJdzURDTVje7KQlxrFIiEjHHrAcnKySnkdGveVGibQ3QjSMk1WE5RngEap8rI6bVT+tra1s3rwZgOnTp/ODH/yA008/ncLCQkaNGnXI7Yc8UREREZFj7rhZ9fPaa69x+unxX0p1/fXXA3DllVdy//33D1OtREREJFMMa6KyaNEiMuTJk4iIiGSg42oyrYiIiHy6KFERERGRjKVERURERDKWEhURERHJWEpUREREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkYylRERERkYylREVEREQylhIVERERyVhKVERERCRjKVERERGRjKVERURERDKWEhURERHJWEpUREREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkYylRERERkYylREVEREQylhIVERERyVhKVERERCRjKVERERGRjKVERURERDKWEhURERHJWEpUREREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkYylRERERkYylREVEREQylhIVERERyVhKVERERCRjKVERERGRjKVERURERDKWEhURERHJWEpUREREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkY3mGuwIAK1asYPny5dTX13PKKafwk5/8hNmzZw93teBBy/3viztg+yPQ9B7kT4HRF4ETiMcb1kLds+AvhJrLIas8HmvZAtsfgkgnVH8BCmfGY93NsO1BaNsGI2dD5Xlg95yaWBR2PQV7X4asKhhzOfgK4tse+DPUPgaWA6MvhuAJ8VhnI2z9HwjtgdLToewssHraFAlB7f/CwbchbyKMvgQ82fFtG1+EulXgDZq2ZFfFY63bYPtvIdwGlZ+HorkJbWk1sdYtUDADqr8Itjfelt2rYM+LECiDMVeAf2R824PvQe2j5v+rvwQjpsRjoX2w7QHoqIPiBVBxNlg9eXa0G2ofhwNvQO44GH0peHPj2+7dALv+AJ4cE8uticfad5pyu5uh/GwoWRCPhdth+8PQ8lcYMRWqLwTH39OWGNQ/Bw1rwF9s2hIoiW/b/FezbSwC1RdAwSnxWNcB2PoAdOyEos+YY9jXljDsWgn7XoWcGqi5zJyDXvtfh9onTD1GXwp54+KxjjrTltB+KD/LnPNekc5j13dXnYqbA5eFOd60b/2D699Zo88h0lZHtLsF25uHk1OOZTt98UjHXiIde7EcL56cSixP/PhFu9uItO0mFoviyS7F9o/oi8Wi3YRbdxMLt2P7R+Bkl2L1nO9YLEako5Fo534sT8CU6/ji5YaaCbfXYWHh5FRg++L9OhYJEW7dRSwSwgkUYWcVYfV8vmOxCJG2eqJdzdjeHJycSndbOvcRad+DZXtwciuxPVkJbWk3bYmGcbJLcQLx600sGibSuotouB3bl4+TU+ZqS7SjkUjnfizHjye3yt2WrhYibXUAODnl2L68hLZ09bSlEydQiJ1VktCWaE9bmrA92Ti5lVh2fOiKdB4g0t5g2pJTge2NX8ei4Q4irbt62lKME4hfb2LRCJG23US7W7F9wZ62OPG2dPaebz+e3Eqs3s8+EO1qNceIGJ7scmx//DMai3QRbttFLNyJ7S/oOd8JbWlvIBo6iOXJxpNbgdV7fQSioYOE2xuwsHFyK7C9OfFyw52m3Eg3TlYRTlZRv7Ycm77bWf8XiO7ti+MpJbt6FsPJisViseGswMMPP8xXvvIVfvaznzFnzhzuuusufve73/HXv/6VkpKSAbdtbm4mPz+fpqYmgsHggO8dtP5JSip5E+DMdZBVBn+6wgzQvZwsWPCYGVC33Aevft0MWr0m/xNMv8MMzmvOhM6GeGzkXDjjj2B5YN3fQOO6eMw/Ek7/IxTOgHduhXduTqiQBaf+BCZeBw3r4PlzIdwaD1edD/Mfha6DsHqRGbR65dTA4nWQPQpeuQY++u94zPbBaQ+bQWrbQ7D+byGWMDBNuA5m3Q0tm+G5RdCxKx4rmA5nrgYnB1443yQ/vbz5sOhpKJ4Hm+6EN29wH99p/wkn3mASjbVnQ3dTPFa+BD67EiLtsPpMk6T0yqqEM9dCcAK8tgw+uDvhEHlg3q+h5lLY+SS8dBFEu+LxsVfDnHuhfYdpS9u2eCx/iinXVwAvfRl2PhGPeXJg4R+gdBF88FN47ZtAwkfr5Fvg5Jth/5uw9iyTePUqWWiOQywCa86CfRvisUAJnLHGJG1v3Qjv35HQFhtm/xLGfRV2PwMvfhEiHfH46EvgMw+YvvXcQmj5MB47mr6bzmXDeikZlP5JSiqWJwd/+Twsx0/XnjeJtO1OCNr4S07FyS4h3FJL1963STzfnvxx+AonE+1qobN+g/li0MP2j8BfNhewCDW8SrQzoS/YXgJlc7H9+XQf+IDugx+46uQdeRLeYA2Rjr2EGja6zouTXYqvZCZEw3TWrSfW3ZLQliz8ZfOwPFl07X2bSGttQqk2vpIZeHLKCLfuomvPW+625I3GV3Qy0e42QnXriUU64+X6ggTK5oLtIdSwkWjHnoRiPfhL5+AECuhu2kL3/k3uthRMxjtiHJHOA4QaXjFJeu+mWcX4S2dBNEJn/QZiXfHPvuUE8JfPxfbm0rXvXcLN2xLPGr7iaXhyKwm31dPV+AYQjR+j3Gp8RVOJhTsI1a8nFo5/XixvHoHyuWB76Wp8nUh7wjXZcvCXzsLJKqK7eRvd+951t2XERLwFE4mGmsz5jnbH2xIoxF86B4gRqt9ANHQw4Rj5CJTPw/bl0bV/E+GmLa5yfUVT8eSNItLeSKjxNfNlr7ctORX4iqdDJGTOd7gtXt2j6LvpZI/5/CHfMxiDGb+HPVGZM2cOs2bN4u67zWASjUaprq5m2bJl3HjjjQNuO2SJyuEkKb3GXgUVS83A1V9WBZz9Jqwcbb6N9rdkoxmcExORXiffYga//oM3QOEsmHc/PDUlOWZ74byt8NxnofWj5Pjc+2HfRvhwRXJs1JdhzFXw/NLkmH8kLN0EK8dBuCU5fuY62LQcdj+VHJt8g0mEXrsuOZY/BRb+Hn4/3vUhBMxAfO5mk3AlJlW9Tl1hEolNy5NjFUvNflcvSo558uC8LfDUZHfC0GvhU7D1ftjxu+TYhOtg5CzYcFVyLHcsLH4BVo5xXaj6LH0P1l8F+zcmx6YvN3eo3rklOVayyMSfSfGtxgnAedth1XTo2J0cn/87c04+uj85djR9N5XjJFFp3/pHoOuQ7wNwcqtwskt6Bjw3y/Hjr1hA5841yX0X8FfMp3v/Jnci0sM7YiLYTtLgDWD78vEVT6Nz1/MpamQRqDqjZ5BtT4r6ik4x38pbtie3JaccT26VSXCSduolULmIzp1r3V9CettSNo/upi1EOxqTYp78sVie7KTBG8zg7y+dZY5RCoGqMwg1bHQlVb28I08iFm4n3JR8HbOzSvDmjyNUvz65UMtDoOp0OnetS/k59JfOIty6s+/ujqsteaOx/SPo2vvnFMVm4y+b19OW5L4eqFxI1563iCYkVX1tKZwM0UhS4glgB0biLZxMaPdLKdpim2O0+0ViCcluL1/JDCLtjURadybFjqbvpjKcicqwPvrp6uri9ddf56abbup7zbZtFi9ezPr1yR0wFAoRCsVPVnNz88dSzwHVPpF6UAIzcHx4T/oL/fbfpk5SwHxb9+Skju3fCFt/kzoW7YbNv0idpPSWuy/FhQrMHYbEx0qJQvtMW1IlKWAeI9U9nX6fOWNSx5regy3/nfrDEovClntTJym95bYlX5ABU5ecsalj4RbTllRJSm+5O1emiT2eOiEAc8w3/yJ9f9j6m9RJCphHV4l3QxI1rjOPXlKJdJq2pKtT7ePpz8vR9N3j2uElKYD5Vp3mu1wsEjIJQZoLfbh1V8okBSDcXt/3mKG/aFcT4RQDT89eCbfuSJmkmPrWEw0lD5QAkbYG12MG9067e9qS+hFeuL0uZZLSV27C4xZXbbtbCLfsSL1PINyyI2WSYsqtJ5bmMxHtaCTsSb1PYmHCLdvS9u1we4P7bkniPtvrUyYEpth2wq07SHf3Idy6M2WSAj1tSXNXMtq5j3DrrpQxYlFzjNLUKdJWTyTxLlZi7Cj6bqYZ1sm0e/fuJRKJUFpa6nq9tLSU+vr6pPffdttt5Ofn9/1UV1d/XFVNz/GD7U8fT5dsADjZZm5JKvZA5Vpm27T7HCBm++PzLAa1TwbepxMAK81F0Amk3yeYRw1Hus905VpeSHgOO+hybd/gYzDwsXeygTR36wYq13LAHqAtA/axgdpyiPPtHaDcTwvLjs8fShlP/13PJCKpz7d1yHLTXBf6yh1gu3TlHmKf1kAxBtjWsgfcFnuA+g4QM3MsjrQtAx0/e4ByBzh+gMUhjn3a2EDHyDpEfYfqfGfEFNXDclyt+rnppptoamrq+6mtrT30RkOt5nLzk0pwkpkv4itMjlk2jPmKmTcy2HLLPwfjrk59kfTkwvi/MxNZB1tuzWXmJ5WcGpj4TTMJNollyhx1UeptR1+Wfp/FC2D8190TO3vZfhMrXpAcO1S5oy6CmitIOVAEykxbcmpSb1szQLkDHb+CGebYe3KTY5bHnLPyJYMvt+p8GHdV6ouOrwAmXgvByYMv91B9d0Kavnu8s4sO/Z4enpxKPLmVKWOWNxdvsCY+Wbz/tnlVONmlKWPOAOXaWcV48qpJ2XctB09wNJYvP3W5uZU4acr15Fbg5KRpiycLJzgmbeLvya3CySlPE6tMW67tL8SbNyp137VsvHmjsP2p+5iTU5H2GDk55XjzqlLGcPw4wTFYntRfgAY69gMdP8uXjyd/dJqExMKTV42dVZxy24GOkZNd2nO+U7C9eII1WN4U15Secj1pyj1U3/UM0HczzbAmKkVFRTiOQ0OD+zZcQ0MDZWXJA6Lf7ycYDLp+hsThPmsvWwxTb4WyM8ycksQOnF0Fpz0EniyY/4j7kYrth1k/N5M9T11hJpwmqrkcJlxrJnaOuwbXBSt/iplAmV1l5psk3o3wBs0+fflmEmXi4xbLNnM2qs6FKf8MFf2eNxYvgGl3mBU80+5wd+BAmWmDJwALHgV/woXe9sHMu8yqlpk/hJFz3OVWXwiTrjcrkiYuc7clb4KZaxMoNvVNvDPgyTGvBYrNe/ImJhRqmUSj5hJTdvWX3PscOcfUpWAqzPyR+46Cv8jM2/AEzH8TEy/LA9Nu7zkGtycnSBVLzbGrOtdMhk68+ObUmPr68s3E48SVOk4WzL3PnLPZvzDnMLEt464x53rCtT3JVYKCaaaP5I03fSbxDoivoKct2ea8J67Mshw46WbTN6feavpqoiPtu58A2aPnHvpNgB0owlswESeryMwpSei7lhPAXzwDy3bwl8x0f14sG1/RVGxvLr6RJ2H53NcpJ6cST7AGJ7caJ9c9QFnePLOtJwtf8SnuPmZ58JfMwLK9+IunY/W7e+fJH4snuxRv/njsLPdCBNtfiLdwMk6gAG/BJFdbcPz4SmZi97Ul8Q6cjbdwCrY/iK9wims1E4CTXWb2m1thBr7Etnhy8BWfguX4zaTPxD5mOfiKp/fETsHqd/fOE6wxg3D+WJxsd4Jk+0eYuviCeAun4BrGbB/+nrb4Smb2S7wsvAWTzDEonJSUIJl5L+PxZJfiyR/nilmeLHPMbS/+khnuL4mWja/4FHPOiqZiefNc2/aeZ0+wJilZsXxBfCNPwvbm4Cua6j7fthd/ycyePjYDy/VlzsIzYoLpmwUTsQPu5PuI+25aqRPjj0tGTKadPXs2P/nJTwAzmXbUqFF885vfHL7JtL36T6q9LAZ7X4Xm9yF4IhT1W0LdvhPq15glnuVL3B0g3A67nzbP/CvOdi/LjcWg8XkzMbRwlntZLpilrns3mAGk9Iz4EmMwS113rzLLmcvPcS/LjYah/lmzTLlkoXtZLsD+N3qWJ59gVt4k6qgzy2+9QVNuwjJDIp2mLeE2c3cn4L4o0vgStG42dxgKprpjLVtgz0tm+WvZYvcHs7vZlAtQcY57sI9FTX066qB4vntZLsCBt+PLk0v6JRidjVD3R5P8VJzjvnsT6TIrkbqbTH2y+n1r3LM+vjy5sN9dqtZt5rwFSszS74Qlk3S3mrkh0bA534mDfSxmljW310LRPPeScoCm9+PLk0sWus93aJ85307AtCVxsIp2Q90zZnly2RnuxAWObd9N/GwcJ5No+2vf9ieI7e/5l032mL8hEjpArKsVy5eL43cnaNFwB9GOfViOFzur2HUrPxaNEOlohFgEJ6vEtSzXLHXdRyzcge0f4VqWC2apazR0AMuThR0Y2beUFcxS10jHHrAsU25CH4vFokQ79hKLhLADI13LcgGioSaiXc1Y3lzXEmMwS10jHXvB9uBklwzQlmLXslyASOd+Yt1t2P587H5JWLS7zSy1dgKu5dKm3G4i7WY+hZNd7JovY5Y27yUW6cQOFLqW5Zpj1Ew01ITlzXYtMTbHKNRzjJyeY+QklBsl0t4I0TBOVpFrWa5pywFiPcuTbb97MI52txPt3Ifl+HvakniMwj3HKNZzjNKd7wLXknLTlpae5cnpzndj2rZEO/YQi3RjZ410LSkHjmnfTVwVd6wn0fY6rlb9PPzww1x55ZX8/Oc/Z/bs2dx111088sgj/OUvf0mau9LfkCcqIiIicswdN6t+AC6++GL27NnDv/zLv1BfX8+0adNYtWrVIZMUERER+eQb9jsqR0N3VERERI4/gxm/j6tVPyIiIvLpokRFREREMpYSFREREclYSlREREQkYylRERERkYylREVEREQylhIVERERyVhKVERERCRjKVERERGRjDXsv0L/aPT+Ut3m5uZhromIiIgcrt5x+3B+Of5xnai0tLQAUF1dfYh3ioiISKZpaWkhPz9/wPcc13/rJxqNsnv3bvLy8lx/Kls+mZqbm6murqa2tlZ/20nkE0af70+XWCxGS0sLFRUV2PbAs1CO6zsqtm1TVVU13NWQj1kwGNSFTOQTSp/vT49D3Unppcm0IiIikrGUqIiIiEjGUqIixw2/38/NN9+M3+8f7qqIyDGmz7ekc1xPphUREZFPNt1RERERkYylREVEREQylhIVERERyVhKVERERCRjKVGR48aKFSuoqakhEAgwZ84cXn311eGukogcpRdeeIFzzz2XiooKLMviiSeeGO4qSYZRoiLHhYcffpjrr7+em2++mTfeeINTTjmFJUuW0NjYONxVE5Gj0NbWximnnMKKFSuGuyqSobQ8WY4Lc+bMYdasWdx9992A+TtP1dXVLFu2jBtvvHGYaycix4JlWTz++ON84QtfGO6qSAbRHRXJeF1dXbz++ussXry47zXbtlm8eDHr168fxpqJiMhQU6IiGW/v3r1EIhFKS0tdr5eWllJfXz9MtRIRkY+DEhURERHJWEpUJOMVFRXhOA4NDQ2u1xsaGigrKxumWomIyMdBiYpkPJ/Px8yZM1m9enXfa9FolNWrVzNv3rxhrJmIiAw1z3BXQORwXH/99Vx55ZWceuqpzJ49m7vuuou2tjauvvrq4a6aiByF1tZWNm/e3PfvrVu38tZbb1FYWMioUaOGsWaSKbQ8WY4bd999N8uXL6e+vp5p06bx4x//mDlz5gx3tUTkKKxbt47TTz896fUrr7yS+++//+OvkGQcJSoiIiKSsTRHRURERDKWEhURERHJWEpUREREJGMpUREREZGMpURFREREMpYSFREREclYSlREREQkYylRERERkYylREVEMtott9zCtGnThrsaIjJMlKiIyJCqr69n2bJljB07Fr/fT3V1Neeee67rj0yKiKSjP0ooIkNm27ZtnHbaaYwYMYLly5dz8skn093dzTPPPMN1113HX/7yl4+lHt3d3Xi93o9lXyJybOmOiogMmWuvvRbLsnj11Ve58MILmThxIlOmTOH6669nw4YNAOzYsYPzzz+f3NxcgsEgF110EQ0NDWnLjEaj3HrrrVRVVeH3+5k2bRqrVq3qi2/btg3Lsnj44YdZuHAhgUCABx54YMjbKiJDQ4mKiAyJ/fv3s2rVKq677jpycnKS4iNGjCAajXL++eezf/9+nn/+eZ599lk++ugjLr744rTl/uhHP+L73/8+d955J2+//TZLlizhvPPO48MPP3S978Ybb+Tb3/42mzZtYsmSJce8fSLy8dCjHxEZEps3byYWizFp0qS071m9ejXvvPMOW7dupbq6GoBf//rXTJkyhY0bNzJr1qykbe68806++93vcskllwBwxx13sHbtWu666y5WrFjR976///u/54ILLjjGrRKRj5vuqIjIkIjFYod8z6ZNm6iuru5LUgBOPPFERowYwaZNm5Le39zczO7duznttNNcr5922mlJ7z/11FOPsOYikkmUqIjIkJgwYQKWZX1sE2b7S/W4SUSOP0pURGRIFBYWsmTJElasWEFbW1tS/ODBg0yePJna2lpqa2v7Xn///fc5ePAgJ554YtI2wWCQiooKXn75ZdfrL7/8csr3i8jxT3NURGTIrFixgtNOO43Zs2dz6623MnXqVMLhMM8++yz33HMP77//PieffDKXX345d911F+FwmGuvvZaFCxemfXRzww03cPPNNzNu3DimTZvGfffdx1tvvaWVPSKfUEpURGTIjB07ljfeeIN///d/5zvf+Q51dXUUFxczc+ZM7rnnHizL4sknn2TZsmV89rOfxbZtzj77bH7yk5+kLfNb3/oWTU1NfOc736GxsZETTzyRlStXMmHChI+xZSLycbFihzPjTURERGQYaI6KiIiIZCwlKiIiIpKxlKiIiIhIxlKiIiIiIhlLiYqIiIhkLCUqIiIikrGUqIiIiEjGUqIiIiIiGUuJioiIiGQsJSoiIiKSsZSoiIiISMb6/wHCnL2DXQSZ/gAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Suppressing warning message claiming that a portion of points cannot be placed into the plot due to the high number of data points\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore', category=UserWarning, module='seaborn')\n", + "\n", + "palette = {\n", + " 0: 'orange',\n", + " 1: 'wheat'\n", + "}\n", + "sns.swarmplot(x=\"Color\", y=\"ord__Item Size\", hue=\"Color\", data=encoded_pumpkins, palette=palette)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Lưu ý**: Bỏ qua cảnh báo KHÔNG phải là một phương pháp tốt và nên tránh bất cứ khi nào có thể. Các cảnh báo thường chứa những thông điệp hữu ích giúp chúng ta cải thiện mã và giải quyết vấn đề. \n", + "\n", + "Lý do chúng ta bỏ qua cảnh báo cụ thể này là để đảm bảo tính dễ đọc của biểu đồ. Việc vẽ tất cả các điểm dữ liệu với kích thước dấu nhỏ hơn, đồng thời giữ sự nhất quán với màu sắc của bảng màu, sẽ tạo ra một hình ảnh không rõ ràng.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# X is the encoded features\n", + "X = encoded_pumpkins[encoded_pumpkins.columns.difference(['Color'])]\n", + "# y is the encoded label\n", + "y = encoded_pumpkins['Color']\n", + "\n", + "# Split the data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.98 0.96 166\n", + " 1 0.85 0.67 0.75 33\n", + "\n", + " accuracy 0.92 199\n", + " macro avg 0.89 0.82 0.85 199\n", + "weighted avg 0.92 0.92 0.92 199\n", + "\n", + "Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + " 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0\n", + " 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 1 1]\n", + "F1-score: 0.7457627118644068\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Train a logistic regression model on the pumpkin dataset\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "# Evaluate the model and print the results\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('F1-score: ', f1_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[162, 4],\n", + " [ 11, 22]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, roc_auc_score\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "y_scores = model.predict_proba(X_test)\n", + "# calculate ROC curve\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", + "\n", + "# plot ROC curve\n", + "fig = plt.figure(figsize=(6, 6))\n", + "# Plot the diagonal 50% line\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "# Plot the FPR and TPR achieved by our model\n", + "plt.plot(fpr, tpr)\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9749908725812341\n" + ] + } + ], + "source": [ + "# Calculate AUC score\n", + "auc = roc_auc_score(y_test,y_scores[:,1])\n", + "print(auc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "vscode": { + "interpreter": { + "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" + } + }, + "coopTranslator": { + "original_hash": "ef50cc584e0b79412610cc7da15e1f86", + "translation_date": "2025-09-06T13:28:32+00:00", + "source_file": "2-Regression/4-Logistic/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/3-Web-App/1-Web-App/notebook.ipynb b/translations/vi/3-Web-App/1-Web-App/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/vi/3-Web-App/1-Web-App/solution/notebook.ipynb b/translations/vi/3-Web-App/1-Web-App/solution/notebook.ipynb new file mode 100644 index 000000000..11f6df572 --- /dev/null +++ b/translations/vi/3-Web-App/1-Web-App/solution/notebook.ipynb @@ -0,0 +1,267 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5fa2e8f4584c78250ca9729b46562ceb", + "translation_date": "2025-09-06T14:32:24+00:00", + "source_file": "3-Web-App/1-Web-App/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ufos = pd.read_csv('../data/ufos.csv')\n", + "ufos.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "\n", + "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", + "\n", + "ufos.Country.unique()\n", + "\n", + "# 0 au, 1 ca, 2 de, 3 gb, 4 us" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" + ] + } + ], + "source": [ + "ufos.dropna(inplace=True)\n", + "\n", + "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", + "\n", + "ufos.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", + "\n", + "ufos.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", + "\n", + "X = ufos[Selected_features]\n", + "y = ufos['Country']\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", + "\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", + "\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, classification_report \n", + "from sklearn.linear_model import LogisticRegression\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)\n", + "\n", + "print(classification_report(y_test, predictions))\n", + "print('Predicted labels: ', predictions)\n", + "print('Accuracy: ', accuracy_score(y_test, predictions))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[3]\n" + ] + } + ], + "source": [ + "import pickle\n", + "model_filename = 'ufo-model.pkl'\n", + "pickle.dump(model, open(model_filename,'wb'))\n", + "\n", + "model = pickle.load(open('ufo-model.pkl','rb'))\n", + "print(model.predict([[50,44,-12]]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/1-Introduction/notebook.ipynb b/translations/vi/4-Classification/1-Introduction/notebook.ipynb new file mode 100644 index 000000000..0e10f1ada --- /dev/null +++ b/translations/vi/4-Classification/1-Introduction/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "d544ef384b7ba73757d830a72372a7f2", + "translation_date": "2025-09-06T14:51:02+00:00", + "source_file": "4-Classification/1-Introduction/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/translations/vi/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb new file mode 100644 index 000000000..46b61ef2c --- /dev/null +++ b/translations/vi/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb @@ -0,0 +1,721 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_10-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "2621e24705e8100893c9bf84e0fc8aef", + "translation_date": "2025-09-06T15:02:12+00:00", + "source_file": "4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ItETB4tSFprR" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Giới thiệu về phân loại: Làm sạch, chuẩn bị và trực quan hóa dữ liệu của bạn\n", + "\n", + "Trong bốn bài học này, bạn sẽ khám phá một trọng tâm cơ bản của học máy cổ điển - *phân loại*. Chúng ta sẽ cùng nhau sử dụng các thuật toán phân loại khác nhau với một tập dữ liệu về các món ăn tuyệt vời của châu Á và Ấn Độ. Hy vọng bạn đang đói!\n", + "\n", + "

\n", + " \n", + "

Hãy cùng khám phá các món ăn châu Á trong những bài học này! Hình ảnh bởi Jen Looper
\n", + "\n", + "\n", + "\n", + "\n", + "Phân loại là một dạng [học có giám sát](https://wikipedia.org/wiki/Supervised_learning) có nhiều điểm tương đồng với các kỹ thuật hồi quy. Trong phân loại, bạn huấn luyện một mô hình để dự đoán một `danh mục` mà một mục thuộc về. Nếu học máy là về việc dự đoán giá trị hoặc tên của các đối tượng bằng cách sử dụng tập dữ liệu, thì phân loại thường chia thành hai nhóm: *phân loại nhị phân* và *phân loại đa lớp*.\n", + "\n", + "Hãy nhớ:\n", + "\n", + "- **Hồi quy tuyến tính** giúp bạn dự đoán mối quan hệ giữa các biến và đưa ra dự đoán chính xác về vị trí mà một điểm dữ liệu mới sẽ nằm trong mối quan hệ với đường đó. Ví dụ, bạn có thể dự đoán giá trị số như *giá của một quả bí ngô vào tháng 9 so với tháng 12*.\n", + "\n", + "- **Hồi quy logistic** giúp bạn khám phá \"danh mục nhị phân\": ở mức giá này, *quả bí ngô này có màu cam hay không màu cam*?\n", + "\n", + "Phân loại sử dụng các thuật toán khác nhau để xác định các cách khác nhau nhằm xác định nhãn hoặc lớp của một điểm dữ liệu. Hãy làm việc với dữ liệu về món ăn này để xem liệu, bằng cách quan sát một nhóm nguyên liệu, chúng ta có thể xác định nguồn gốc của món ăn đó hay không.\n", + "\n", + "### [**Câu hỏi trước bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/19/)\n", + "\n", + "### **Giới thiệu**\n", + "\n", + "Phân loại là một trong những hoạt động cơ bản của nhà nghiên cứu học máy và nhà khoa học dữ liệu. Từ việc phân loại cơ bản một giá trị nhị phân (\"email này có phải là spam hay không?\"), đến phân loại hình ảnh phức tạp và phân đoạn bằng cách sử dụng thị giác máy tính, việc có thể phân loại dữ liệu thành các lớp và đặt câu hỏi về nó luôn hữu ích.\n", + "\n", + "Nói theo cách khoa học hơn, phương pháp phân loại của bạn tạo ra một mô hình dự đoán cho phép bạn ánh xạ mối quan hệ giữa các biến đầu vào và biến đầu ra.\n", + "\n", + "

\n", + " \n", + "

Vấn đề nhị phân và đa lớp mà các thuật toán phân loại cần xử lý. Đồ họa thông tin bởi Jen Looper
\n", + "\n", + "\n", + "\n", + "Trước khi bắt đầu quá trình làm sạch dữ liệu, trực quan hóa nó và chuẩn bị cho các nhiệm vụ học máy, hãy tìm hiểu một chút về các cách khác nhau mà học máy có thể được sử dụng để phân loại dữ liệu.\n", + "\n", + "Xuất phát từ [thống kê](https://wikipedia.org/wiki/Statistical_classification), phân loại sử dụng học máy cổ điển dựa vào các đặc điểm như `người hút thuốc`, `cân nặng`, và `tuổi` để xác định *khả năng phát triển bệnh X*. Là một kỹ thuật học có giám sát tương tự như các bài tập hồi quy bạn đã thực hiện trước đó, dữ liệu của bạn được gắn nhãn và các thuật toán học máy sử dụng các nhãn đó để phân loại và dự đoán các lớp (hoặc 'đặc điểm') của một tập dữ liệu và gán chúng vào một nhóm hoặc kết quả.\n", + "\n", + "✅ Hãy dành một chút thời gian để tưởng tượng một tập dữ liệu về các món ăn. Một mô hình phân loại đa lớp có thể trả lời điều gì? Một mô hình phân loại nhị phân có thể trả lời điều gì? Nếu bạn muốn xác định liệu một món ăn cụ thể có khả năng sử dụng cỏ cà ri hay không thì sao? Nếu bạn muốn xem liệu, với một túi quà gồm hồi, atisô, súp lơ, và cải ngựa, bạn có thể tạo ra một món ăn Ấn Độ điển hình hay không?\n", + "\n", + "### **Xin chào 'bộ phân loại'**\n", + "\n", + "Câu hỏi mà chúng ta muốn đặt ra với tập dữ liệu về món ăn này thực sự là một câu hỏi **đa lớp**, vì chúng ta có nhiều món ăn quốc gia tiềm năng để làm việc. Với một nhóm nguyên liệu, dữ liệu sẽ phù hợp với lớp nào trong số nhiều lớp này?\n", + "\n", + "Tidymodels cung cấp một số thuật toán khác nhau để sử dụng nhằm phân loại dữ liệu, tùy thuộc vào loại vấn đề bạn muốn giải quyết. Trong hai bài học tiếp theo, bạn sẽ tìm hiểu về một số thuật toán này.\n", + "\n", + "#### **Yêu cầu trước**\n", + "\n", + "Để bài học này, chúng ta sẽ cần các gói sau để làm sạch, chuẩn bị và trực quan hóa dữ liệu:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [khung làm việc](https://www.tidymodels.org/packages/) gồm các gói dành cho mô hình hóa và học máy.\n", + "\n", + "- `DataExplorer`: [Gói DataExplorer](https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html) được thiết kế để đơn giản hóa và tự động hóa quá trình EDA và tạo báo cáo.\n", + "\n", + "- `themis`: [Gói themis](https://themis.tidymodels.org/) cung cấp các bước bổ sung trong công thức để xử lý dữ liệu không cân bằng.\n", + "\n", + "Bạn có thể cài đặt chúng bằng lệnh:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng nếu chúng bị thiếu.\n" + ], + "metadata": { + "id": "ri5bQxZ-Fz_0" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, DataExplorer, themis, here)" + ], + "outputs": [], + "metadata": { + "id": "KIPxa4elGAPI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Chúng ta sẽ tải các gói tuyệt vời này sau và làm cho chúng khả dụng trong phiên làm việc R hiện tại của chúng ta. (Đây chỉ là để minh họa, `pacman::p_load()` đã làm điều đó cho bạn)\n" + ], + "metadata": { + "id": "YkKAxOJvGD4C" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Bài tập - làm sạch và cân bằng dữ liệu của bạn\n", + "\n", + "Nhiệm vụ đầu tiên trước khi bắt đầu dự án này là làm sạch và **cân bằng** dữ liệu của bạn để đạt được kết quả tốt hơn.\n", + "\n", + "Hãy cùng khám phá dữ liệu nào! 🕵️\n" + ], + "metadata": { + "id": "PFkQDlk0GN5O" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Import data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# View the first 5 rows\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": { + "id": "Qccw7okxGT0S" + } + }, + { + "cell_type": "markdown", + "source": [ + "Thú vị! Nhìn qua thì cột đầu tiên là một loại cột `id`. Hãy tìm hiểu thêm một chút thông tin về dữ liệu.\n" + ], + "metadata": { + "id": "XrWnlgSrGVmR" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Basic information about the data\r\n", + "df %>%\r\n", + " introduce()\r\n", + "\r\n", + "# Visualize basic information above\r\n", + "df %>% \r\n", + " plot_intro(ggtheme = theme_light())" + ], + "outputs": [], + "metadata": { + "id": "4UcGmxRxGieA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Từ kết quả, chúng ta có thể thấy ngay rằng chúng ta có `2448` hàng và `385` cột và `0` giá trị bị thiếu. Chúng ta cũng có 1 cột rời rạc, *cuisine*.\n", + "\n", + "## Bài tập - tìm hiểu về các loại ẩm thực\n", + "\n", + "Bây giờ công việc bắt đầu trở nên thú vị hơn. Hãy khám phá sự phân bố dữ liệu theo từng loại ẩm thực.\n" + ], + "metadata": { + "id": "AaPubl__GmH5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Count observations per cuisine\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(n)\r\n", + "\r\n", + "# Plot the distribution\r\n", + "theme_set(theme_light())\r\n", + "df %>% \r\n", + " count(cuisine) %>% \r\n", + " ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n", + " geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n", + " ylab(\"cuisine\")" + ], + "outputs": [], + "metadata": { + "id": "FRsBVy5eGrrv" + } + }, + { + "cell_type": "markdown", + "source": [ + "Có một số lượng hữu hạn các nền ẩm thực, nhưng phân bố dữ liệu lại không đồng đều. Bạn có thể khắc phục điều đó! Trước khi làm vậy, hãy khám phá thêm một chút.\n", + "\n", + "Tiếp theo, hãy gán mỗi nền ẩm thực vào một tibble riêng và tìm hiểu xem có bao nhiêu dữ liệu (số hàng, số cột) cho từng nền ẩm thực.\n", + "\n", + "> Một [tibble](https://tibble.tidyverse.org/) là một dạng khung dữ liệu hiện đại.\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ], + "metadata": { + "id": "vVvyDb1kG2in" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Create individual tibble for the cuisines\r\n", + "thai_df <- df %>% \r\n", + " filter(cuisine == \"thai\")\r\n", + "japanese_df <- df %>% \r\n", + " filter(cuisine == \"japanese\")\r\n", + "chinese_df <- df %>% \r\n", + " filter(cuisine == \"chinese\")\r\n", + "indian_df <- df %>% \r\n", + " filter(cuisine == \"indian\")\r\n", + "korean_df <- df %>% \r\n", + " filter(cuisine == \"korean\")\r\n", + "\r\n", + "\r\n", + "# Find out how much data is available per cuisine\r\n", + "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n", + " \"japanese df:\", dim(japanese_df), \"\\n\",\r\n", + " \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n", + " \"indian_df:\", dim(indian_df), \"\\n\",\r\n", + " \"korean_df:\", dim(korean_df))" + ], + "outputs": [], + "metadata": { + "id": "0TvXUxD3G8Bk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## **Bài tập - Khám phá các nguyên liệu hàng đầu theo từng loại ẩm thực bằng dplyr**\n", + "\n", + "Bây giờ bạn có thể đi sâu vào dữ liệu và tìm hiểu những nguyên liệu đặc trưng cho từng loại ẩm thực. Bạn nên loại bỏ dữ liệu lặp lại gây nhầm lẫn giữa các loại ẩm thực, vì vậy hãy cùng tìm hiểu vấn đề này.\n", + "\n", + "Hãy tạo một hàm `create_ingredient()` trong R để trả về một dataframe nguyên liệu. Hàm này sẽ bắt đầu bằng cách loại bỏ một cột không hữu ích và sắp xếp các nguyên liệu theo số lượng của chúng.\n", + "\n", + "Cấu trúc cơ bản của một hàm trong R là:\n", + "\n", + "`myFunction <- function(arglist){`\n", + "\n", + "**`...`**\n", + "\n", + "**`return`**`(value)`\n", + "\n", + "`}`\n", + "\n", + "Một giới thiệu ngắn gọn về hàm trong R có thể được tìm thấy [tại đây](https://skirmer.github.io/presentations/functions_with_r.html#1).\n", + "\n", + "Hãy bắt đầu ngay thôi! Chúng ta sẽ sử dụng các [động từ dplyr](https://dplyr.tidyverse.org/) mà chúng ta đã học trong các bài học trước. Tóm tắt lại:\n", + "\n", + "- `dplyr::select()`: giúp bạn chọn những **cột** cần giữ lại hoặc loại bỏ.\n", + "\n", + "- `dplyr::pivot_longer()`: giúp bạn \"kéo dài\" dữ liệu, tăng số lượng hàng và giảm số lượng cột.\n", + "\n", + "- `dplyr::group_by()` và `dplyr::summarise()`: giúp bạn tìm các thống kê tóm tắt cho các nhóm khác nhau và đưa chúng vào một bảng gọn gàng.\n", + "\n", + "- `dplyr::filter()`: tạo một tập hợp con của dữ liệu chỉ chứa các hàng thỏa mãn điều kiện của bạn.\n", + "\n", + "- `dplyr::mutate()`: giúp bạn tạo hoặc chỉnh sửa các cột.\n", + "\n", + "Hãy xem hướng dẫn [*nghệ thuật*](https://allisonhorst.shinyapps.io/dplyr-learnr/#section-welcome) của Allison Horst, giới thiệu một số hàm xử lý dữ liệu hữu ích trong dplyr *(một phần của Tidyverse)*.\n" + ], + "metadata": { + "id": "K3RF5bSCHC76" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Creates a functions that returns the top ingredients by class\r\n", + "\r\n", + "create_ingredient <- function(df){\r\n", + " \r\n", + " # Drop the id column which is the first colum\r\n", + " ingredient_df = df %>% select(-1) %>% \r\n", + " # Transpose data to a long format\r\n", + " pivot_longer(!cuisine, names_to = \"ingredients\", values_to = \"count\") %>% \r\n", + " # Find the top most ingredients for a particular cuisine\r\n", + " group_by(ingredients) %>% \r\n", + " summarise(n_instances = sum(count)) %>% \r\n", + " filter(n_instances != 0) %>% \r\n", + " # Arrange by descending order\r\n", + " arrange(desc(n_instances)) %>% \r\n", + " mutate(ingredients = factor(ingredients) %>% fct_inorder())\r\n", + " \r\n", + " \r\n", + " return(ingredient_df)\r\n", + "} # End of function" + ], + "outputs": [], + "metadata": { + "id": "uB_0JR82HTPa" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ chúng ta có thể sử dụng hàm để có cái nhìn tổng quan về mười nguyên liệu phổ biến nhất theo từng loại ẩm thực. Hãy thử nghiệm với `thai_df`.\n" + ], + "metadata": { + "id": "h9794WF8HWmc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Call create_ingredient and display popular ingredients\r\n", + "thai_ingredient_df <- create_ingredient(df = thai_df)\r\n", + "\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10)" + ], + "outputs": [], + "metadata": { + "id": "agQ-1HrcHaEA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Trong phần trước, chúng ta đã sử dụng `geom_col()`, hãy xem cách bạn có thể sử dụng `geom_bar` để tạo biểu đồ cột. Sử dụng `?geom_bar` để đọc thêm.\n" + ], + "metadata": { + "id": "kHu9ffGjHdcX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Make a bar chart for popular thai cuisines\r\n", + "thai_ingredient_df %>% \r\n", + " slice_head(n = 10) %>% \r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"steelblue\") +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "fb3Bx_3DHj6e" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "RHP_xgdkHnvM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Japanese cuisines and make bar chart\r\n", + "create_ingredient(df = japanese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"darkorange\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")\r\n" + ], + "outputs": [], + "metadata": { + "id": "019v8F0XHrRU" + } + }, + { + "cell_type": "markdown", + "source": [ + "Còn về các món ăn Trung Quốc thì sao?\n" + ], + "metadata": { + "id": "iIGM7vO8Hu3v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Chinese cuisines and make bar chart\r\n", + "create_ingredient(df = chinese_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"cyan4\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lHd9_gd2HyzU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "ir8qyQbNH1c7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Indian cuisines and make bar chart\r\n", + "create_ingredient(df = indian_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#041E42FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "ApukQtKjH5FO" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qv30cwY1H-FM" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Get popular ingredients for Korean cuisines and make bar chart\r\n", + "create_ingredient(df = korean_df) %>% \r\n", + " slice_head(n = 10) %>%\r\n", + " ggplot(aes(x = n_instances, y = ingredients)) +\r\n", + " geom_bar(stat = \"identity\", width = 0.5, fill = \"#852419FF\", alpha = 0.8) +\r\n", + " xlab(\"\") + ylab(\"\")" + ], + "outputs": [], + "metadata": { + "id": "lumgk9cHIBie" + } + }, + { + "cell_type": "markdown", + "source": [ + "Từ các biểu đồ trực quan, giờ đây chúng ta có thể loại bỏ những nguyên liệu phổ biến nhất gây nhầm lẫn giữa các nền ẩm thực khác nhau, sử dụng `dplyr::select()`.\n", + "\n", + "Ai cũng yêu thích gạo, tỏi và gừng!\n" + ], + "metadata": { + "id": "iO4veMXuIEta" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)" + ], + "outputs": [], + "metadata": { + "id": "iHJPiG6rIUcK" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Xử lý dữ liệu bằng recipes 👩‍🍳👨‍🍳 - Đối phó với dữ liệu không cân bằng ⚖️\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n", + "\n", + "Vì bài học này liên quan đến ẩm thực, chúng ta cần đặt `recipes` vào ngữ cảnh.\n", + "\n", + "Tidymodels cung cấp thêm một gói tiện ích khác: `recipes` - một gói dùng để tiền xử lý dữ liệu.\n" + ], + "metadata": { + "id": "kkFd-JxdIaL6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hãy cùng xem lại sự phân bố của các món ăn của chúng ta.\n" + ], + "metadata": { + "id": "6l2ubtTPJAhY" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "old_label_count <- df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "old_label_count" + ], + "outputs": [], + "metadata": { + "id": "1e-E9cb7JDVi" + } + }, + { + "cell_type": "markdown", + "source": [ + "Như bạn có thể thấy, số lượng các món ăn không được phân bố đồng đều. Các món ăn Hàn Quốc gần gấp 3 lần các món ăn Thái. Dữ liệu không cân bằng thường có ảnh hưởng tiêu cực đến hiệu suất của mô hình. Hãy nghĩ về một bài toán phân loại nhị phân. Nếu phần lớn dữ liệu của bạn thuộc về một lớp, mô hình học máy sẽ dự đoán lớp đó thường xuyên hơn, chỉ vì có nhiều dữ liệu hơn cho lớp đó. Việc cân bằng dữ liệu sẽ điều chỉnh bất kỳ sự lệch lạc nào và giúp loại bỏ sự mất cân bằng này. Nhiều mô hình hoạt động tốt nhất khi số lượng quan sát là bằng nhau và do đó thường gặp khó khăn với dữ liệu không cân bằng.\n", + "\n", + "Có hai cách chính để xử lý các tập dữ liệu không cân bằng:\n", + "\n", + "- thêm các quan sát vào lớp thiểu số: `Over-sampling`, ví dụ sử dụng thuật toán SMOTE\n", + "\n", + "- loại bỏ các quan sát từ lớp đa số: `Under-sampling`\n", + "\n", + "Bây giờ chúng ta sẽ minh họa cách xử lý các tập dữ liệu không cân bằng bằng cách sử dụng một `recipe`. Recipe có thể được xem như một bản thiết kế mô tả các bước cần thực hiện trên một tập dữ liệu để chuẩn bị cho việc phân tích dữ liệu.\n" + ], + "metadata": { + "id": "soAw6826JKx9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = df_select) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "cuisines_recipe" + ], + "outputs": [], + "metadata": { + "id": "HS41brUIJVJy" + } + }, + { + "cell_type": "markdown", + "source": [ + "Hãy cùng phân tích các bước tiền xử lý.\n", + "\n", + "- Lệnh gọi `recipe()` với một công thức cho phép recipe xác định *vai trò* của các biến bằng cách sử dụng dữ liệu `df_select` làm tham chiếu. Ví dụ, cột `cuisine` được gán vai trò `outcome`, trong khi các cột còn lại được gán vai trò `predictor`.\n", + "\n", + "- [`step_smote(cuisine)`](https://themis.tidymodels.org/reference/step_smote.html) tạo ra một *đặc tả* cho một bước trong recipe, bước này sẽ tạo ra các ví dụ mới cho lớp thiểu số một cách tổng hợp bằng cách sử dụng các điểm lân cận gần nhất của các trường hợp này.\n", + "\n", + "Bây giờ, nếu chúng ta muốn xem dữ liệu đã được tiền xử lý, chúng ta cần [**`prep()`**](https://recipes.tidymodels.org/reference/prep.html) và [**`bake()`**](https://recipes.tidymodels.org/reference/bake.html) recipe của mình.\n", + "\n", + "`prep()`: ước tính các tham số cần thiết từ tập huấn luyện để sau đó có thể áp dụng cho các tập dữ liệu khác.\n", + "\n", + "`bake()`: sử dụng một recipe đã được chuẩn bị và áp dụng các thao tác lên bất kỳ tập dữ liệu nào.\n" + ], + "metadata": { + "id": "Yb-7t7XcJaC8" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Prep and bake the recipe\r\n", + "preprocessed_df <- cuisines_recipe %>% \r\n", + " prep() %>% \r\n", + " bake(new_data = NULL) %>% \r\n", + " relocate(cuisine)\r\n", + "\r\n", + "# Display data\r\n", + "preprocessed_df %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Quick summary stats\r\n", + "preprocessed_df %>% \r\n", + " introduce()" + ], + "outputs": [], + "metadata": { + "id": "9QhSgdpxJl44" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ hãy kiểm tra phân bố của các món ăn và so sánh chúng với dữ liệu mất cân bằng.\n" + ], + "metadata": { + "id": "dmidELh_LdV7" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Distribution of cuisines\r\n", + "new_label_count <- preprocessed_df %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))\r\n", + "\r\n", + "list(new_label_count = new_label_count,\r\n", + " old_label_count = old_label_count)" + ], + "outputs": [], + "metadata": { + "id": "aSh23klBLwDz" + } + }, + { + "cell_type": "markdown", + "source": [ + "Yum! Dữ liệu thật sạch sẽ, cân bằng và rất ngon miệng 😋!\n", + "\n", + "> Thông thường, một công thức thường được sử dụng như một bộ tiền xử lý cho việc mô hình hóa, nơi nó xác định các bước cần áp dụng cho một tập dữ liệu để chuẩn bị cho việc mô hình hóa. Trong trường hợp đó, một `workflow()` thường được sử dụng (như chúng ta đã thấy trong các bài học trước) thay vì tự tay ước tính một công thức.\n", + ">\n", + "> Vì vậy, bạn thường không cần phải **`prep()`** và **`bake()`** công thức khi sử dụng tidymodels, nhưng chúng là những hàm hữu ích để kiểm tra rằng công thức đang hoạt động đúng như mong đợi, giống như trong trường hợp của chúng ta.\n", + ">\n", + "> Khi bạn **`bake()`** một công thức đã được chuẩn bị với **`new_data = NULL`**, bạn sẽ nhận lại dữ liệu mà bạn đã cung cấp khi định nghĩa công thức, nhưng đã trải qua các bước tiền xử lý.\n", + "\n", + "Bây giờ hãy lưu một bản sao của dữ liệu này để sử dụng trong các bài học sau:\n" + ], + "metadata": { + "id": "HEu80HZ8L7ae" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Save preprocessed data\r\n", + "write_csv(preprocessed_df, \"../../../data/cleaned_cuisines_R.csv\")" + ], + "outputs": [], + "metadata": { + "id": "cBmCbIgrMOI6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tệp CSV mới này hiện có thể được tìm thấy trong thư mục dữ liệu gốc.\n", + "\n", + "**🚀Thử thách**\n", + "\n", + "Chương trình học này chứa một số bộ dữ liệu thú vị. Hãy khám phá các thư mục `data` và xem liệu có bộ dữ liệu nào phù hợp cho phân loại nhị phân hoặc đa lớp không? Bạn sẽ đặt câu hỏi gì với bộ dữ liệu này?\n", + "\n", + "## [**Câu hỏi sau bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/20/)\n", + "\n", + "## **Ôn tập & Tự học**\n", + "\n", + "- Xem qua [gói themis](https://github.com/tidymodels/themis). Có những kỹ thuật nào khác mà chúng ta có thể sử dụng để xử lý dữ liệu mất cân bằng?\n", + "\n", + "- Trang web tham khảo [Tidy models](https://www.tidymodels.org/start/).\n", + "\n", + "- H. Wickham và G. Grolemund, [*R for Data Science: Visualize, Model, Transform, Tidy, and Import Data*](https://r4ds.had.co.nz/).\n", + "\n", + "#### CẢM ƠN ĐẾN:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) vì đã tạo ra những hình minh họa tuyệt vời giúp R trở nên thân thiện và hấp dẫn hơn. Tìm thêm hình minh họa tại [bộ sưu tập của cô ấy](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) và [Jen Looper](https://www.twitter.com/jenlooper) vì đã tạo ra phiên bản Python gốc của module này ♥️\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ], + "metadata": { + "id": "WQs5621pMGwf" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/1-Introduction/solution/notebook.ipynb b/translations/vi/4-Classification/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..0b0f6b621 --- /dev/null +++ b/translations/vi/4-Classification/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "source": [ + "# Các Món Ăn Ngon Châu Á và Ấn Độ\n", + "\n", + "## Giới thiệu \n", + "Ẩm thực Châu Á và Ấn Độ nổi tiếng với hương vị phong phú, đa dạng và cách chế biến độc đáo. Từ các món ăn cay nồng của Ấn Độ đến các món hấp dẫn từ Nhật Bản, Trung Quốc và Thái Lan, mỗi món ăn đều mang một câu chuyện riêng.\n", + "\n", + "## Các món ăn phổ biến \n", + "\n", + "### 1. Sushi \n", + "Sushi là một món ăn truyền thống của Nhật Bản, thường được làm từ cơm trộn giấm kết hợp với cá sống, rau củ hoặc các nguyên liệu khác. \n", + "- **Nguyên liệu chính**: Cá sống, cơm, rong biển. \n", + "- **Mẹo nhỏ**: Để có hương vị ngon nhất, hãy sử dụng cá tươi và cơm vừa nấu. \n", + "\n", + "### 2. Cà ri Ấn Độ \n", + "Cà ri Ấn Độ là một món ăn đậm đà, thường được nấu với các loại gia vị như nghệ, thì là, và bột cà ri. \n", + "- **Nguyên liệu chính**: Thịt gà, thịt cừu hoặc rau củ, nước cốt dừa, gia vị. \n", + "- **Mẹo nhỏ**: Hãy để cà ri nấu chậm để gia vị thấm đều vào nguyên liệu. \n", + "\n", + "### 3. Phở Việt Nam \n", + "Phở là một món ăn truyền thống của Việt Nam, nổi tiếng với nước dùng thơm ngon và sợi phở mềm mại. \n", + "- **Nguyên liệu chính**: Bánh phở, thịt bò hoặc gà, hành lá, rau thơm. \n", + "- **Mẹo nhỏ**: Nước dùng ngon là yếu tố quan trọng nhất, hãy ninh xương trong nhiều giờ để đạt được hương vị đậm đà. \n", + "\n", + "### 4. Pad Thai \n", + "Pad Thai là một món ăn nổi tiếng của Thái Lan, được làm từ mì xào với tôm, đậu phụ, trứng và nước sốt đặc biệt. \n", + "- **Nguyên liệu chính**: Mì gạo, tôm, đậu phụ, trứng, nước sốt me. \n", + "- **Mẹo nhỏ**: Thêm một chút nước cốt chanh và đậu phộng rang để tăng hương vị. \n", + "\n", + "## Kết luận \n", + "Ẩm thực Châu Á và Ấn Độ không chỉ là những món ăn, mà còn là một phần của văn hóa và truyền thống. Hãy thử khám phá và thưởng thức những món ăn này để cảm nhận sự đa dạng và phong phú của ẩm thực thế giới! \n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Cài đặt Imblearn để kích hoạt SMOTE. Đây là một gói Scikit-learn giúp xử lý dữ liệu mất cân bằng khi thực hiện phân loại. (https://imbalanced-learn.org/stable/)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", + "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../../data/cuisines.csv')" + ] + }, + { + "source": [ + "Tập dữ liệu này bao gồm 385 cột biểu thị tất cả các loại nguyên liệu trong các nền ẩm thực khác nhau từ một tập hợp các nền ẩm thực được cho.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 65 indian 0 0 0 0 0 \n", + "1 66 indian 1 0 0 0 0 \n", + "2 67 indian 0 0 0 0 0 \n", + "3 68 indian 0 0 0 0 0 \n", + "4 69 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 385 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
065indian00000000...0000000000
166indian10000000...0000000000
267indian00000000...0000000000
368indian00000000...0000000000
469indian00000000...0000000010
\n

5 rows × 385 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "korean 799\n", + "indian 598\n", + "chinese 442\n", + "japanese 320\n", + "thai 289\n", + "Name: cuisine, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df.cuisine.value_counts()" + ] + }, + { + "source": [ + "Hiển thị các món ăn trong biểu đồ thanh\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVmMrESSHJPk1iTfl+QPk2xv589q69cnuTLJx4Fr29gbktyc5PYkbx26rg8n2ZrkjiQbhsYfSfJrSW5LckOSI0d1fyRJ32okJZLkucAHgfXAKUBV1QnA+cAfJVnZNj0ZOK+qvj/JOcDatv2JwAuSnNG2+6mqegGwDnhdksPb+CHADVX1fOCTwM/OkmVDki1Jtuz+2q5R3F1JWrJGUSJrgI8Ar66q24AXA+8HqKq7gb8Djm3bXlNV/9SWz2mnW4FbgOMYlAoMiuM24AbgWUPj/wpsbstbgek9w1TVpqpaV1Xrlh28eqHuoySJ0XwB4y7gCwzK4859bPvVoeUA/7Oqfn94gyRnAmcDp1XV15J8ApiZyTxWVdWWd7PEvlBSksZtFDORfwVeAbwmyauA/we8GiDJscCzgXtmudzVwE8lWdW2PSrJdwCrgX9uBXIccOoIMkuSOozklXtVfTXJy4BrgP8OnJBkO/BvwPqqejTJnpf5aJLnAde3dY8APwn8FfDzSe5iUD43jCKzJGnu8vjeoCe/FVNra+qCS8YdQ5IW1Xz/nkiSrVW1brZ1E/E5EUnSgckSkSR1s0QkSd0sEUlSN0tEktRtSX0474SjVrNlnu9SkCQ9zpmIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqtqT+KFWSrzD7n+adFEcAD487xBMw3/yYb37MNz/zyXd0Va2ZbcWS+u4s4J69/XWuSZBki/n6mW9+zDc/SzWfu7MkSd0sEUlSt6VWIpvGHWAfzDc/5psf883Pksy3pA6sS5IW1lKbiUiSFpAlIknqtmRKJMlLk9yT5HNJNo4pw3uTPJhkx9DYYUmuSfLZ9u+3t/Ek+d8t7+1JTl6EfM9K8tdJ7kxyR5L/PEkZk6xMclOS21q+t7bx5yS5seX4QJKD2viKdv5zbf30KPO121yW5NYkmycw284k25NsS7KljU3EY9tu89AkVyS5O8ldSU6blHxJntt+bjOnLye5cFLytdv8lfZ7sSPJZe33ZfTPv6p60p+AZcDfAMcABwG3AcePIccZwMnAjqGx3wA2tuWNwK+35R8B/hIIcCpw4yLkmwJObstPAz4DHD8pGdvtrGrLTwFubLf7Z8Ar2/jvAb/Qll8L/F5bfiXwgUX4Gb4e+BNgczs/Sdl2AkfsMTYRj227zT8CfqYtHwQcOkn5hnIuAx4Ajp6UfMBRwOeBpw4979YvxvNvUX7o4z4BpwFXD51/E/CmMWWZ5t+XyD3AVFueYvCBSIDfB86fbbtFzPoR4AcnMSNwMHAL8B8ZfAp3+Z6PNXA1cFpbXt62ywgzPRO4FngJsLn9BzIR2drt7ORbS2QiHltgdftPMJOYb49M5wDXTVI+BiVyL3BYez5tBn5oMZ5/S2V31swPeMZ9bWwSHFlV/9CWHwCObMtjzdymtycxeLU/MRnb7qJtwIPANQxmmF+qqn+bJcM387X1u4DDRxjvEuCNwDfa+cMnKBtAAR9NsjXJhjY2KY/tc4CHgD9suwPfk+SQCco37JXAZW15IvJV1f3A24AvAP/A4Pm0lUV4/i2VEjkg1OBlwdjfc51kFfBB4MKq+vLwunFnrKrdVXUig1f9pwDHjSvLsCQvAx6sqq3jzvIEXlxVJwM/DPxikjOGV475sV3OYFfv71bVScBXGewe+qZxP/cA2jGFlwOX77lunPnasZgfY1DGzwAOAV66GLe9VErkfuBZQ+ef2cYmwReTTAG0fx9s42PJnOQpDArk0qr60CRmBKiqLwF/zWCKfmiSme+BG87wzXxt/WrgH0cU6XTg5Ul2An/KYJfWb09INuCbr1apqgeBP2dQwpPy2N4H3FdVN7bzVzAolUnJN+OHgVuq6ovt/KTkOxv4fFU9VFWPAR9i8Jwc+fNvqZTIzcDa9k6FgxhMR68cc6YZVwIXtOULGByHmBl/TXuXx6nArqFp80gkCfAHwF1V9fZJy5hkTZJD2/JTGRyvuYtBmZy3l3wzuc8DPt5eLS64qnpTVT2zqqYZPL8+XlWvnoRsAEkOSfK0mWUG+/V3MCGPbVU9ANyb5Llt6AeAOycl35DzeXxX1kyOScj3BeDUJAe33+OZn9/on3+LcSBqEk4M3i3xGQb70P/rmDJcxmB/5WMMXnn9NIP9kNcCnwU+BhzWtg3wzpZ3O7BuEfK9mMF0/HZgWzv9yKRkBL4XuLXl2wG8uY0fA9wEfI7BboYVbXxlO/+5tv6YRXqcz+Txd2dNRLaW47Z2umPmd2BSHtt2mycCW9rj+2Hg2ycs3yEMXq2vHhqbpHxvBe5uvxt/DKxYjOefX3siSeq2VHZnSZJGwBKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd3+PxNFbW14TY8fAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df.cuisine.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n" + ] + } + ], + "source": [ + "\n", + "thai_df = df[(df.cuisine == \"thai\")]\n", + "japanese_df = df[(df.cuisine == \"japanese\")]\n", + "chinese_df = df[(df.cuisine == \"chinese\")]\n", + "indian_df = df[(df.cuisine == \"indian\")]\n", + "korean_df = df[(df.cuisine == \"korean\")]\n", + "\n", + "print(f'thai df: {thai_df.shape}')\n", + "print(f'japanese df: {japanese_df.shape}')\n", + "print(f'chinese df: {chinese_df.shape}')\n", + "print(f'indian df: {indian_df.shape}')\n", + "print(f'korean df: {korean_df.shape}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_ingredient_df(df):\n", + " # transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\n", + " ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\n", + " # drop ingredients that have a 0 sum\n", + " ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\n", + " # sort df\n", + " ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\n", + " return ingredient_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAeAklEQVR4nO3de7RVdb338fcHRFAhTPBCeNnogzckQJcdb11ES01QSxGLUtNHjl1EM2twhmbUo+PY5dhN07DHII+nuKSPFKe0FPEasTcCG1TUEk8QCmjuJIRg+33+WL+ti82+AWvvORfr8xpjjT3Xb/7mnN85x2J/+M0595qKCMzMzPKiW9YFmJmZlXIwmZlZrjiYzMwsVxxMZmaWKw4mMzPLlV2yLmBn0L9//6ipqcm6DDOzitG/f3/uv//++yPi9ObzHExlUFNTQ21tbdZlmJlVFEn9W2r3qTwzM8sVB5OZmeWKg8nMzHLF15jMzLrApk2bWLFiBRs2bMi6lC7Xq1cv9t9/f3r06NGh/g6mMqhf2UDNxNlZl2G2Q5bfdGbWJezUVqxYQZ8+faipqUFS1uV0mYjg1VdfZcWKFQwaNKhDy/hUnplZF9iwYQP9+vWrqlACkES/fv22aaRYEcEkaYqk81pof4+kme0su7y1WxLNzLpStYVSk23d74o+lRcRfwW2CiwzM6tcuQwmSRcC1wABLAYagQ9IuhrYD/hKRMyUVAP8OiKOktQd+CZwOvAWcEdE/LBknbsB9wD3RMQdkj4FTAB2BeYBn4uIRknrgO8Do4A3gbMj4pWu2G8zqx7lvi5d7muEvXv3Zt26dWVdZ0fl7lSepCHAdcDIiBgGXJlmDQBOohgYN7Ww6HigBhgeEe8F7i6Z1xv4FfDzFEpHAGOBEyNiOMXgG5f67gH8IW37EeCyVuocL6lWUm3j+obt3l8zM9tS7oIJGAnMiIi1ABHxWmr/fxHxVkQ8DezbwnKnAj+OiM3NlgO4D/hpRPwsvT8FOAaYL2lhen9wmvdP4Ndpuo5i2G0lIiZHRCEiCt1377sdu2lm1nUmTpzIrbfe+vb7SZMmccMNN3DKKadw9NFHM3ToUO67776tlnv44YcZNWrU2++/8IUvMGXKFADq6ur44Ac/yDHHHMNpp53GqlWrylJrHoOpNRtLprf1CuLjwOl65wqcgKkRMTy9DouISWnepnjnefON5PR0p5nZthg7dizTp09/+/306dO56KKLuPfee1mwYAFz5szhS1/6Eu/8+mvbpk2buOKKK5g5cyZ1dXVccsklXHvttWWpNY+/dB8C7pV0c0S8KmmvDi73O+BfJc2JiM2S9ioZNV2fXrcCnwMeBO6T9N2IWJ220SciXir3zpiZ5cGIESNYvXo1f/3rX1mzZg3vfve72W+//fjiF7/II488Qrdu3Vi5ciWvvPIK++23X7vrW7ZsGUuWLOHDH/4wAI2NjQwYMKAsteYumCJiqaQbgbmSGoGnOrjoT4BDgcWSNgF3ALeUzL8SuFPStyLiK5KuAx6Q1A3YBHwecDCZ2U5rzJgxzJw5k5dffpmxY8dy9913s2bNGurq6ujRowc1NTVb/b3RLrvswltvvfX2+6b5EcGQIUN48skny15n7oIJICKmAlPbmN87/VwOHJWmNwNXp1dp35qSt58paZ8GTGtt3Wl6JtDm30mZmVWKsWPHctlll7F27Vrmzp3L9OnT2WeffejRowdz5szhpZe2/r/5QQcdxNNPP83GjRt58803efDBBznppJM47LDDWLNmDU8++STHH388mzZt4rnnnmPIkCE7XGcug6nSDB3Yl1p/nYuZbYMsvgJqyJAhvPHGGwwcOJABAwYwbtw4Ro8ezdChQykUChx++OFbLXPAAQdw/vnnc9RRRzFo0CBGjBgBwK677srMmTOZMGECDQ0NbN68mauuuqoswaSOXuiy1hUKhfCDAs2sLc888wxHHHFE1mVkpqX9l1QXEYXmfSvprjwzM6sCDiYzM8sVB5OZWRep1ksn27rfDiYzsy7Qq1cvXn311aoLp6bnMfXq1avDy/iuPDOzLrD//vuzYsUK1qxZk3UpXa7pCbYd5WAyM+sCPXr06PATXKudT+WZmVmuOJjMzCxXHExmZpYrvsZUBvUrG8r+NEqzvMriq3SsunjEZGZmuVJ1wSTpvyXtmXUdZmbWsqo6lZeeYDsqIt5qt7OZmWVipx8xSaqRtEzSz4AlQKOk/mnehZIWS1ok6a7UtrekX0qan14nZlm/mVm1qZYR02Dgooj4g6TlAJKGANcBJ0TE2pJHuH8f+G5EPCbpQOB+YKvvqpc0HhgP0P1de3fBLpiZVYdqCaaXIuIPzdpGAjMiYi1ARLyW2k8Fjiye9QPgXZJ6R8S60oUjYjIwGaDngMHV9eVXZmadqFqC6R/b0LcbcFxEbGi3p5mZld1Of42pDQ8BYyT1Ayg5lfcAcEVTJ0nDM6jNzKxqVW0wRcRS4EZgrqRFwM1p1gSgkG6KeBq4PKsazcyqkart2SCdoVAoRG1tbdZlmJlVFEl1EVFo3l61IyYzM8snB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Uq1fLt4p6pf2UDNxNlZl2GWmeU3nZl1CbYT8YjJzMxyxcFkZma5UjHBJOksSRPLtK5Jkq4px7rMzKy8KuIak6RdImIWMCvD7W/OYttmZtWmy0dMki5MD+FbJOkuSTWSHkptD0o6MPWbIul2SfOAb0m6WNItad5oSfMkPSXp95L2Te2TJN0p6WFJf5Y0oWS710p6TtJjwGEl7YdI+q2kOkmPSjq8pe134SEyM6tqXTpikjQEuA44ISLWpseZTwWmRsRUSZcAPwDOSYvsn/o2Srq4ZFWPAcdFREj638BXgC+leYcDJwN9gGWSbgPeC1wADKe4zwuAutR/MnB5RDwv6V+AHwEjm2+/hX0ZD4wH6P6uvXfksJiZWYmuPpU3EpgREWsBIuI1SccDH0/z72LL0cmMlkKBYmBMkzQA2BV4sWTe7IjYCGyUtBrYF3g/cG9ErAeQNCv97A2cAMyQ1LR8zw5sn4iYTDHU6DlgsB8DbGZWJnm/xvSPVtp/CNwcEbMkfQiYVDJvY8l0I23vYzfg9YgYvo3bNzOzTtLV15geAsZI6geQTuU9QfE0G8A44NEOrKcvsDJNX9SB/o8A50jaTVIfYDRARPwdeFHSmFSPJA3r6M6YmVn5demIKSKWSroRmCupEXgKuAL4qaQvA2uAz3RgVZMonn77G8WwG9TOdhdImgYsAlYD80tmjwNuk3Qd0AP4RepnZmYZUIQvj+yoQqEQtbW1WZdhZlZRJNVFRKF5e8X8ga2ZmVUHB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Urev128ItSvbKBm4uysyzDL3PKbzsy6BNsJeMRkZma54mAyM7NcyTSYJJ0j6cgO9Jsi6bwW2j8k6ddlrKcg6Qdp+mJJt5Rr3WZm1jFZj5jOAdoNpq4SEbURMSHrOszMqlmbwSTpJkmfL3k/SdI1kr4sab6kxZK+XjL/q5KWSXpM0s8lXZPaD5H0W0l1kh6VdLikE4CzgG9LWpj6XJbWu0jSLyXtXlLOqZJqJT0naVQLte4h6U5Jf5T0lKSz29ivXpJ+Kqk+9T05tXd4BCZpfKqntnF9Q0cWMTOzDmhvxDQNOL/k/fkUnzI7GHgfMBw4RtIHJB0LnAsMA84ASh/+NBm4IiKOAa4BfhQRTwCzgC9HxPCI+BNwT0QcGxHDgGeAS0vWUZO2eSZwu6RezWq9FngoIt4HnEwx8PZoZb8+D0REDAU+AUxtYX1tiojJEVGIiEL33ftuy6JmZtaGNm8Xj4inJO0j6T3A3sDfgKHARyg+Fh2gN8Wg6gPcFxEbgA2SfgUgqTdwAsVHoTetumcrmzxK0g3Anmm995fMmx4RbwHPS/ozcHizZT8CnNU0SgN6AQdSDLjmTgJ+mPbxWUkvAYe2dSzMzKxrdOTvmGYA5wH7URxBHQT8e0T8uLSTpKtaWb4b8HpEDO/AtqYA50TEIkkXAx8qmdf8GfDN3ws4NyKWdWA7ZmaWUx25+WEacAHFcJpBcRRzSRoJIWmgpH2Ax4HR6fpNb2AUQET8HXhR0pjUX5KGpXW/QXGk1aQPsEpSD2BcszrGSOom6RDgYKB5AN0PXKE0LJM0oo19erRp/ZIOpTiycqCZmeVAu8EUEUspBsbKiFgVEQ8A/wU8KakemAn0iYj5FK8ZLQZ+A9QDTXcFjAMulbQIWAo03ZjwC+DL6QaEQ4CvAvMohtyzzUr5H+CPad2Xp1OGpf4P0ANYLGlpet+aHwHdUv3TgIsjYmN7x8LMzDqfIpqfEduBlUm9I2JdupvuEWB8RCwo2wZyqlAoRG1tbdZlmJlVFEl1EVFo3l7u78qbnP5gthcwtRpCyczMyquswRQRnyzn+naUpNOAbzZrfjEiPpZFPWZm1r6d+tvFI+J+trzl3MzMci7rryQyMzPbgoPJzMxyxcFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYrO/XfMXWV+pUN1EycnXUZZhVh+U1nZl2C5ZxHTGZmlisOplZIGi7po1nXYWZWbRxMrRsOOJjMzLpY7oJJ0oWSFktaJOkuSTWSHkptD0o6MPXbV9K9qd8iSSek9qslLUmvq1JbjaRnJN0haamkByTtluY9LKmQpvtLWi5pV+AbwFhJCyWNzeZomJlVn1wFk6QhwHXAyIgYBlwJ/JDiIzTeC9wN/CB1/wEwN/U7Glgq6RjgM8C/AMcBl5U8yXYwcGtEDAFeB85trY6I+CdwPTAtIoZHxLQWah0vqVZSbeP6hq1XYmZm2yVXwQSMBGZExFqAiHgNOJ7iE3MB7gJOKul7W+rXGBENad69EfGPiFgH3AO8P/V/MSIWpuk6oGZHCo2IyRFRiIhC99377siqzMysRN6CqTOVPjq9kXduld/MO8ehV5dWZGZmW8lbMD0EjJHUD0DSXsATwAVp/jjg0TT9IPDZ1K+7pL5p3jmSdpe0B/Cxkv6tWQ4ck6bPK2l/A+izQ3tjZmbbLFfBFBFLgRuBuZIWATcDVwCfkbQY+DTF606knydLqqd4au7I9Cj3KcAfgXnATyLiqXY2+x3gs5KeAvqXtM8BjvTND2ZmXUsRkXUNFa/ngMEx4KLvZV2GWUXwNz9YE0l1EVFo3u6vJCqDoQP7Uut/bGZmZZGrU3lmZmYOJjMzyxUHk5mZ5YqDyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWKw4mMzPLFX8lURnUr2ygZuLsrMswq2j+Dj1r4hGTmZnlSsUHk6RvSDo16zrMzKw8Kv5UXkRc39nbkNQ9Iho7eztmZlZhIyZJX5W0TNJjkn4u6RpJUySdl+Yvl/R1SQsk1Us6PLXvLel3kpZK+omklyT1T/M+JemP6YGAP5bUPbWvk/Qf6YGFx2e202ZmVaZigknSscC5wDDgDGCrh0slayPiaOA24JrU9jXgoYgYAswEDkzrPAIYC5wYEcOBRoqPbwfYA5gXEcMi4rEW6hkvqVZSbeP6hrLso5mZVdapvBOB+yJiA7BB0q9a6XdP+lkHfDxNnwR8DCAifivpb6n9FOAYYL4kgN2A1WleI/DL1oqJiMnAZCg+wXZ7dsjMzLZWScHUURvTz0ba3z8BUyPi31qYt8HXlczMul7FnMoDHgdGS+olqTcwahuXPR9A0keAd6f2B4HzJO2T5u0l6aAy1mxmZtuoYkZMETFf0ixgMfAKUA909OLO14GfS/o08CTwMvBGRKyVdB3wgKRuwCbg88BLZd8BMzPrEEVUzuURSb0jYp2k3YFHgPERsaADy/UEGiNis6TjgdvSzQ5lUSgUora2tlyrMzOrCpLqImKrG9kqZsSUTJZ0JNCL4rWhdkMpORCYnkZF/wQu66wCzcxsx1RUMEXEJ7dzueeBEWUux8zMOkEl3fxgZmZVwMFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYrDiYzM8sVB5OZmeVKRf2BbV7Vr2ygZuLsrMswqzrLbzoz6xKsE3jEZGZmueJgMjOzXKmIYJJ0uaQL0/QUSedt53qGS/poeaszM7NyqohrTBFxe5lWNRwoAP/dfIakXSJic5m2Y2Zm2ymXwZRGR9cAQfHBgH8C1kXEd5r1ux4YDewGPAH8a0SEpIeBecDJwJ7Apen9N4DdJJ0E/DtwBHAIcDDwP5I+A9xGMbw2A1dHxJzO3VszMyuVu1N5koYA1wEjI2IYcGUb3W+JiGMj4iiK4VT6uPVdIuJ9wFXA1yLin8D1wLSIGB4R01K/I4FTI+ITFJ9eGxExFPgEMFVSr1bqHC+pVlJt4/qOPkjXzMzak7tgAkYCMyJiLUBEvNZG35MlzZNUn5YbUjLvnvSzDqhpYx2zIuLNNH0S8J9pu89SfMT6oS0tFBGTI6IQEYXuu/dtZ5fMzKyjcnkqryPSSOZHQCEi/iJpEsUn2zbZmH420vZ+/qNzKjQzs+2RxxHTQ8AYSf0AJO3VSr+mEForqTfQkTv13gD6tDH/UWBc2u6hFB/JvqwjRZuZWXnkLpgiYilwIzBX0iLg5lb6vQ7cASwB7gfmd2D1c4AjJS2UNLaF+T8CuqVTg9OAiyNiYwv9zMyskygisq6h4hUKhaitrc26DDOziiKpLiIKzdtzN2IyM7Pq5mAyM7NccTCZmVmuOJjMzCxXHExmZpYrDiYzM8sVB5OZmeWKg8nMzHLFwWRmZrniYDIzs1yp2G8Xz5P6lQ3UTJyddRlm1omW33Rm1iVUDY+YzMwsVxxMZmaWKw4mMzPLlVwEk6QLJS2WtEjSXZJGp0emPyXp95L2ldRN0vOS9k7LdJP0gqS90+uXkuan14mpzyRJd0p6WNKfJU1I7TWSnpF0h6Slkh6QtFuad4ik30qqk/SopMOzOzJmZtUn82CSNAS4DhgZEcOAK4HHgOMiYgTwC+ArEfEW8J+kJ8wCpwKLImIN8H3guxFxLHAu8JOSTRwOnAa8D/iapB6pfTBwa0QMAV5PywFMBq6IiGOAayg+PLClusdLqpVU27i+YYePg5mZFeXhrryRwIyIWAsQEa9JGgpMkzQA2BV4MfW9E7gP+B5wCfDT1H4qxSfTNq3zXelx6wCz01NoN0paDeyb2l+MiIVpug6oScucAMwoWVfPloqOiMkUQ4yeAwb7aYtmZmWSh2BqyQ+BmyNilqQPAZMAIuIvkl6RNJLiCKhp9NSN4ghrQ+lKUriUPhq9kXf2uXn7bmk9r0fE8LLujZmZdVjmp/KAh4AxkvoBSNoL6AusTPMvatb/JxRP6c2IiMbU9gBwRVMHSdsVLBHxd+BFSWPSeiRp2Pasy8zMtk/mwRQRS4EbgbmSFgE3UxwhzZBUB6xttsgsoDfvnMYDmAAU0g0UTwOX70BJ44BLUy1LgbN3YF1mZraNFFFZl0ckFSje6PD+rGtp0nPA4Bhw0feyLsPMOpG/+aH8JNVFRKF5e16vMbVI0kTgs7xzbSkXhg7sS60/tGZmZZH5qbxtERE3RcRBEfFY1rWYmVnnqKhgMjOznZ+DyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWKw4mMzPLFQeTmZnlSkV9JVFe1a9soGbi7KzLMLOdWDV9V59HTGZmlitVEUySJkh6RtLf0hfBttbvYkm3dGVtZma2pWo5lfc54NSIWJF1IWZm1radfsQk6XbgYOA3kr7YNCKSNEbSEkmLJD1Sssh7JP1W0vOSvpVJ0WZmVWynD6aIuBz4K3Ay8LeSWdcDp0XEMOCskvbhwFhgKDBW0gEtrVfSeEm1kmob1zd0TvFmZlVopw+mNjwOTJF0GdC9pP3BiGiIiA3A08BBLS0cEZMjohARhe679+2Ccs3MqkPVBlMaSV0HHADUSeqXZm0s6dZI9VyHMzPLhar9pSvpkIiYB8yTdAbFgDIzs4xV7YgJ+LakeklLgCeARVkXZGZmVTJiioiaNDklvYiIj7fQ9e35qc+oTi3MzMy2UhXB1NmGDuxLbRV9XYiZWWeq5lN5ZmaWQw4mMzPLFQeTmZnlioPJzMxyxcFkZma54mAyM7NccTCZmVmuOJjMzCxXHExmZpYr/uaHMqhf2UDNxNlZl2Fm1qWWd9I33njEZGZmueJgMjOzXKnaYJJ0saRb0vTlki7MuiYzM6vSa0ySttjviLg9q1rMzGxLFR1Mkr4KfApYA/wFqAMagPHArsALwKcjYr2kKcAGYATwOLC4ZD2TgHUR8R1J/wu4Hdib4qPVx0TEn7pqn8zMql3FnsqTdCxwLjAMOAMopFn3RMSxETEMeAa4tGSx/YETIuLqNlZ9N3BrWv4EYFUr2x8vqVZSbeP6hh3cGzMza1LJI6YTgfsiYgOwQdKvUvtRkm4A9gR6A/eXLDMjIhpbW6GkPsDAiLgXIK27RRExGZgM0HPA4NihPTEzs7dV7IipDVOAL0TEUODrQK+Sef/IpCIzM+uwSg6mx4HRknpJ6g2MSu19gFWSegDjtmWFEfEGsELSOQCSekravZxFm5lZ2yo2mCJiPjCL4k0MvwHqKd748FVgHsXgenY7Vv1pYIKkxcATwH5lKdjMzDpEEZV7eURS74hYl0Y1jwDjI2JBV9dRKBSitra2qzdrZlbRJNVFRKF5eyXf/AAwWdKRFK8jTc0ilMzMrLwqOpgi4pNZ12BmZuVVsdeYzMxs5+RgMjOzXHEwmZlZrlT0XXl5IekNYFnWdWyH/sDarIvYDq67a7nurlUtda8FiIjTm8+o6JsfcmRZS7c85p2kWtfddVx313LdXaucdftUnpmZ5YqDyczMcsXBVB6Tsy5gO7nuruW6u5br7lplq9s3P5iZWa54xGRmZrniYDIzs1xxMO0ASadLWibpBUkTs66nNZIOkDRH0tOSlkq6MrVPkrRS0sL0+mjWtTYnabmk+lRfbWrbS9LvJD2ffr476zpLSTqs5JgulPR3SVfl9XhLulPSaklLStpaPMYq+kH6zC+WdHTO6v62pGdTbfdK2jO110h6s+TY356zulv9bEj6t3S8l0k6LZuqW617WknNyyUtTO07drwjwq/teAHdgT8BBwO7AouAI7Ouq5VaBwBHp+k+wHPAkcAk4Jqs62un9uVA/2Zt3wImpumJwDezrrOdz8nLwEF5Pd7AB4CjgSXtHWPgoxSffybgOGBezur+CLBLmv5mSd01pf1yeLxb/Gykf6eLgJ7AoPQ7p3te6m42/z+A68txvD1i2n7vA16IiD9HxD+BXwBnZ1xTiyJiVaRHgkTxKb3PAAOzrWqHnA1MTdNTgXMyrKU9pwB/ioiXsi6kNRHxCPBas+bWjvHZwM+i6A/AnpIGdE2lW2qp7oh4ICI2p7d/APbv8sLa0crxbs3ZwC8iYmNEvAi8QPF3T5drq25JAs4Hfl6ObTmYtt9A4C8l71dQAb/sJdUAIyg+5RfgC+m0x515OyWWBPCApDpJ41PbvhGxKk2/DOybTWkdcgFb/mPN+/Fu0toxrqTP/SUUR3dNBkl6StJcSe/Pqqg2tPTZqJTj/X7glYh4vqRtu4+3g6mKSOoN/BK4KiL+DtwGHAIMB1ZRHIrnzUkRcTRwBvB5SR8onRnF8wa5/JsHSbsCZwEzUlMlHO+t5PkYt0bStcBm4O7UtAo4MCJGAFcD/yXpXVnV14KK/GyU+ARb/gdsh463g2n7rQQOKHm/f2rLJUk9KIbS3RFxD0BEvBIRjRHxFnAHGZ0iaEtErEw/VwP3UqzxlabTR+nn6uwqbNMZwIKIeAUq43iXaO0Y5/5zL+liYBQwLoUq6VTYq2m6juK1mkMzK7KZNj4blXC8dwE+DkxratvR4+1g2n7zgcGSBqX/GV8AzMq4phal87//F3gmIm4uaS+9NvAxYEnzZbMkaQ9JfZqmKV7YXkLxOF+Uul0E3JdNhe3a4n+ReT/ezbR2jGcBF6a7844DGkpO+WVO0unAV4CzImJ9Sfvekrqn6YOBwcCfs6lya218NmYBF0jqKWkQxbr/2NX1teNU4NmIWNHUsMPHO4u7O3aWF8U7lJ6j+L+Ba7Oup406T6J4KmYxsDC9PgrcBdSn9lnAgKxrbVb3wRTvSFoELG06xkA/4EHgeeD3wF5Z19pC7XsArwJ9S9pyebwphucqYBPFaxiXtnaMKd6Nd2v6zNcDhZzV/QLFazJNn/PbU99z02doIbAAGJ2zulv9bADXpuO9DDgjT3Wn9inA5c367tDx9lcSmZlZrvhUnpmZ5YqDyczMcsXBZGZmueJgMjOzXHEwmZlZrjiYzMwsVxxMZmaWK/8fnSxrKwF+wYgAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "thai_ingredient_df = create_ingredient_df(thai_df)\r\n", + "thai_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n", + "japanese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfeElEQVR4nO3deZwV1Z338c8XbCEKYlSMiEurISECsl1NULO5xD2aCYgT4hLzkkcn4pgEnyGPJoMTM+OSbTRGJRmFUUcF1GjCqHGUuBL1NggNKmgiPmGJ4tZiEILwmz/qtN603U033O5bl/6+X69+dd1Tp079Thfxl3Oqbh1FBGZmZnnRrdIBmJmZlXJiMjOzXHFiMjOzXHFiMjOzXHFiMjOzXNmm0gFsDXbZZZeora2tdBhmZlWlrq7u1Yjo27TciakMamtrKRaLlQ7DzKyqSHqpuXJP5ZmZWa44MZmZWa44MZmZWa74HpOZWSdYv349y5YtY+3atZUOpdP17NmTPfbYg5qamjbVd2Iqg/rlDdROmlXpMKyDLb30uEqHYFVs2bJl9O7dm9raWiRVOpxOExG89tprLFu2jH322adNx3gqz8ysE6xdu5add965SyUlAEnsvPPO7RopOjGZmXWSrpaUGrW3305MZmaWK7m4xyTpNGAiEMACYDpwEbAt8BowDlgFLAYOjohVkroBS4BRqZlrgb3S9vkR8Zikyals3/T7pxFxpaRa4B7gUeBgYDlwYkS8I2k/4GqgL7AGOCsinuu43ptZV1Tu+9Llvgfaq1cv3n777bK22VYVHzFJGkSWhA6LiKHAP5IljE9FxHDgVuD/RsRG4CayJAVwBDA/IlYB/w78JCIOBL4M/LLkFAOBo4CDgH+W1PhYyADg6ogYBLyZjgOYAkyIiJFkyfLnLcQ9XlJRUnHDmoYt/juYmVmm4okJOAyYERGvAkTE68AewH2S6oELgEGp7vXAaWn7TOCGtH0E8DNJTwN3AztI6pX2zYqIdan9V4CPpPIXI+LptF0H1KZjDgZmpLauA/o1F3RETImIQkQUum/XZwv/BGZmHWvSpElcffXV732ePHkyl1xyCYcffjgjRoxgyJAh3HXXXR847ne/+x3HH3/8e5/PPfdcpk6dCkBdXR2f/exnGTlyJEcddRQrV64sS6x5SEzNuQr4WUQMAf4P0BMgIv4EvCzpMLIR0D2pfjeyEdaw9NM/IhrHoOtK2t3A+9OXzZV3A94saWdYRHyiIzpoZtaZxo4dy/Tp09/7PH36dE4//XTuvPNO5s6dy+zZs/n2t79NRLSpvfXr1zNhwgRmzpxJXV0dZ555JhdeeGFZYs3DPaYHgTsl/TgiXpO0E9CH7L4PwOlN6v+SbErvxojYkMp+C0wArgCQNKxkNNRmEfGWpBcljYmIGcoeJTkgIuZvRr/MzHJj+PDhvPLKK6xYsYJVq1bx4Q9/mN12241vfvObPPzww3Tr1o3ly5fz8ssvs9tuu22yvcWLF7Nw4UKOPPJIADZs2EC/fs1OMLVbxRNTRCyS9APgIUkbgHnAZLLptDfIElfpt7LuJpvCu6Gk7DzgakkLyPr0MHD2ZoY0DrhG0kVADdk9LicmM6t6Y8aMYebMmfz5z39m7Nix3HzzzaxatYq6ujpqamqora39wPeNttlmGzZu3Pje58b9EcGgQYOYM2dO2eOseGICiIhpwLQmxR+c7MwMJXvo4b0n5dL9o7HNtDu5yefBJR8Hl5T/sGT7ReDotsZuZlYtxo4dy1lnncWrr77KQw89xPTp09l1112pqalh9uzZvPTSB1eh2HvvvXnmmWdYt24d77zzDg888ACHHnooH//4x1m1ahVz5sxh1KhRrF+/niVLljBo0KBmztw+uUhMbSVpEnAO7z+ZlwtD+veh6NfVmFk7VOIVV4MGDWL16tX079+ffv36MW7cOE444QSGDBlCoVBg4MCBHzhmzz335OSTT2bw4MHss88+DB8+HIBtt92WmTNnct5559HQ0MC7777L+eefX5bEpLbe6LKWFQqF8EKBZtaaZ599lk98ous+S9Vc/yXVRUShad28PpVnZmZdlBOTmZnlihOTmVkn6aq3TtrbbycmM7NO0LNnT1577bUul5wa12Pq2bNnm4+pqqfyzMyq1R577MGyZctYtWpVpUPpdI0r2LaVE5OZWSeoqalp8wquXZ2n8szMLFecmMzMLFecmMzMLFd8j6kM6pc3lH01SsuvSrxKxqwr8YjJzMxypcslJkn/LWnHSsdhZmbN61JTeWnhv+MjYuMmK5uZWUVs9SMmSbWSFkv6T2AhsEHSLmnfaZIWSJov6cZU1lfS7ZKeSj+HVDJ+M7OupquMmAYAp0fE7yUtBZA0CLgIODgiXk1LugP8O/CTiHhU0l7AfcAH3lUvaTwwHqD7Dn07oQtmZl1DV0lML0XE75uUHQbMSKvfEhGvp/IjgP2zWT8AdpDUKyLeLj04IqYAUwB69BvQtV5+ZWbWgbpKYvpLO+p2Az4VEWs3WdPMzMpuq7/H1IoHgTGSdgYomcr7LTChsZKkYRWIzcysy+qyiSkiFgE/AB6SNB/4cdp1HlBID0U8A5xdqRjNzLoidbW1QTpCoVCIYrFY6TDMzKqKpLqIKDQt77IjJjMzyycnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzy5Wu8nbxDlW/vIHaSbMqHYblxNJLj6t0CGZVzSMmMzPLFScmMzPLlapITJJ2lzSz0nGYmVnHq4rEFBErImJ0Jc4tyffhzMw6Ue4Sk6RLJX2j5PNkSRMlLUyfz5B0h6R7JT0v6fKSul+QNEfSXEkzJPVK5cdKek5SnaQrJf0mlR+U6s+T9Likj5ec425JDwIPdOofwMysi8tdYgJuA04u+Xwy8ESTOsOAscAQYKykPSXtAlwEHBERI4Ai8C1JPYHrgGMiYiTQt6Sd54BPR8Rw4HvAv5bsGwGMjojPNhekpPGSipKKG9Y0bG5fzcysidxNU0XEPEm7StqdLIm8AfypSbUHIqIBIC1/vjewI7A/8JgkgG2BOcBA4I8R8WI69hZgfNruA0yTNAAIoKbkHPdHxOutxDkFmALQo98ALwNsZlYmuUtMyQxgNLAb2QiqqXUl2xvI+iGyZPL3pRUlDWvlPN8HZkfElyTVAr8r2feXdkdtZmZbLI9TeZAlo1PIktOMNh7ze+AQSR8FkLS9pI8Bi4F9U+KBbAqwUR9gedo+Y8tCNjOzcshlYoqIRUBvYHlErGzjMavIksstkhaQpvEi4h3gH4B7JdUBq4HGm0KXA/8maR75HT2amXUpitj6b49I6hURbyu7+XQ18HxE/KRc7RcKhSgWi+VqzsysS5BUFxGFpuW5HDF1gLMkPQ0sIpu+u67C8ZiZWQu6xPRVGh2VbYRkZmYdp6uMmMzMrEo4MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa50iS/YdrT65Q3UTppV6TDM2mTppcdVOgSzVnnEZGZmuVL1iUnS+ZK2K1Nbn2tcdt3MzCqj6hMTcD7QrsQkqXsHxWJmZluoqhJTWvxvlqT5khZK+mdgd2C2pNmpzjWSipIWSbq45Nilki6TNBcYI+mjkv4ntTVX0n6pai9JMyU9J+nmtFSGmZl1kmp7+OFoYEVEHAcgqQ/wNeDzEfFqqnNhRLyeRkUPSDogIhakfa9FxIh07BPApRFxp6SeZEl6T2A4MAhYATwGHAI82jQQSeOB8QDdd+jbMb01M+uCqmrEBNQDR6aRz6cjoqGZOienUdE8sgSzf8m+2wAk9Qb6R8SdABGxNiLWpDpPRsSyiNgIPA3UNhdIREyJiEJEFLpv16csnTMzsyobMUXEEkkjgGOBSyQ9ULpf0j7ARODAiHhD0lSgZ0mVv7ThNOtKtjdQZX8jM7NqV1UjJkm7A2si4ibgCmAEsBronarsQJZ8GiR9BDimuXYiYjWwTNJJqd0e5Xqyz8zMtky1jQaGAFdI2gisB84BRgH3SloREZ+XNA94DvgT2T2ilpwKXCfpX1JbYzo2dDMzawtFRKVjqHo9+g2Ifqf/tNJhmLWJ3/xgeSGpLiIKTcurbcSUS0P696Ho/7GbmZVFVd1jMjOzrZ8Tk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YoTk5mZ5YpfSVQG9csbqJ00q9JhmLWL35lneeURk5mZ5YoTk5mZ5UpFE5OkkyTt34Z6UyWNbqb8c5J+U8Z4CpKuTNtnSPpZudo2M7O2qfSI6SRgk4mps0REMSLOq3QcZmZdWauJSdKlkr5R8nmypImSLpD0lKQFki4u2f9dSYslPSrpFkkTU/l+ku6VVCfpEUkDJR0MfJFsRdqnU52zUrvzJd3eZLnzIyQVJS2RdHwzsW4v6XpJT0qaJ+nEVvrVU9INkupT3c+n8jaPwCSNT/EUN6xpaMshZmbWBpsaMd0GnFzy+WRgFTAAOAgYBoyU9BlJBwJfBoYCxwClqxJOASZExEhgIvDziHgcuBu4ICKGRcQfgDsi4sCIGAo8C3y9pI3adM7jgGsl9WwS64XAgxFxEPB5soS3fQv9+gYQETEE+HtgWjPttSoipkREISIK3bfr055DzcysFa0+Lh4R8yTtKml3oC/wBjAE+AIwL1XrRZaoegN3RcRaYK2kXwNI6gUcDMyQ1Nh0jxZOOVjSJcCOqd37SvZNj4iNwPOS/ggMbHLsF4AvNo7SgJ7AXmQJrqlDgatSH5+T9BLwsdb+FmZm1jna8j2mGcBoYDeyEdTewL9FxHWllSSd38Lx3YA3I2JYG841FTgpIuZLOgP4XMm+aFK36WcBX46IxW04j5mZ5VRbHn64DTiFLDnNIBvFnJlGQkjqL2lX4DHghHT/phdwPEBEvAW8KGlMqi9JQ1Pbq8lGWo16Aysl1QDjmsQxRlI3SfsB+wJNE9B9wASlYZmk4a306ZHG9iV9jGxk5YRmZpYDm0xMEbGILGEsj4iVEfFb4L+AOZLqgZlA74h4iuye0QLgHqAeaHwqYBzwdUnzgUVA44MJtwIXpAcQ9gO+CzxBluSeaxLK/weeTG2fnaYMS30fqAEWSFqUPrfk50C3FP9twBkRsW5TfwszM+t4img6I7YFjUm9IuLt9DTdw8D4iJhbthPkVKFQiGKxWOkwzMyqiqS6iCg0LS/3u/KmpC/M9gSmdYWkZGZm5VXWxBQRXylne1tK0lHAZU2KX4yIL1UiHjMz27St+u3iEXEff/vIuZmZ5VylX0lkZmb2N5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV5yYzMwsV7bq7zF1lvrlDdROmlXpMMw6xNJLj6t0CNbFeMRkZma54sRkZma54sTUhKSzJZ2WtqdKGl3pmMzMuhLfY2oiIq6tdAxmZl1ZWUdMkraXNEvSfEkLJY2VNFLSQ5LqJN0nqV+qe56kZyQtkHRrKjtI0py0cODjkj6eys+Q9CtJ90taKulcSd9K9X4vaadUbz9J96ZzPSJpYCux1kp6MJ3/AUl7pfLJkia2oa/jJRUlFTesadhUdTMza6NyT+UdDayIiKERMRi4F7gKGB0RI4HrgR+kupOA4RFxAHB2KnsO+HREDAe+B/xrSduDgb8DDkxtrEn15gCnpTpTgAnpXBPJVqptyVVka0YdANwMXNmejkbElIgoRESh+3Z92nOomZm1otxTefXAjyRdBvwGeIMsodwvCaA7sDLVXQDcLOlXwK9SWR9gmqQBQJAtld5odkSsBlZLagB+XXLOAyT1Ag4GZqRzAfRoJdZRZIkO4Ebg8vZ318zMyq3cCwUukTQCOBa4BHgQWBQRo5qpfhzwGeAE4EJJQ4DvkyWgL0mqBX5XUn9dyfbGks8byfrRDXgzIoaVrUNmZtbpyn2PaXeyKbabgCuATwJ9JY1K+2skDZLUDdgzImYD/0Q2UuqVfi9PzZ3RnnNHxFvAi5LGpHNJ0tBWDnkcOCVtjwMeac/5zMysY5R7Km8IcIWkjcB64BzgXeBKSX3S+X4KLAFuSmUCroyINyVdTjaVdxGwOa9SGAdck46vAW4F5rdQdwJwg6QLgFXA1zbjfAAM6d+Hor8db2ZWFoqISsdQ9QqFQhSLxUqHYWZWVSTVRUShabm/YGtmZrmy1X/BVtKFwJgmxTMi4gfN1Tczs8ra6hNTSkBOQmZmVcJTeWZmlitOTGZmlitOTGZmlitOTGZmlitOTGZmlitOTGZmlitb/ePinaF+eQO1kzbnDUpmtqWW+nVgWx2PmMzMLFecmMzMLFe2usSUlkxfmLY/J+k3afuLkiZVNjozM9uULnOPKSLuBu6udBxmZta63I2YJG0vaZak+ZIWShor6UBJj6eyJyX1TiOjRyTNTT8Hb6LdMyT9LG3XSnpQ0gJJD0jaK5VPlXRlOtcfJY3ujD6bmdn78jhiOhpYERHHAaTFBOcBYyPiKUk7AO8ArwBHRsRaSQOAW4APrOvRgquAaRExTdKZwJXASWlfP+BQYCDZCGtmcw1IGg+MB+i+Q9/299LMzJqVuxETUA8cKekySZ8G9gJWRsRTkC2hHhHvkq1Q+wtJ9cAMYP92nGMU8F9p+0ayRNToVxGxMSKeAT7SUgMRMSUiChFR6L5dn3ac2szMWpO7EVNELJE0AjgWuAR4sIWq3wReBoaSJdi1ZQphXcm2ytSmmZm1Ue5GTJJ2B9ZExE3AFcAngX6SDkz7e0vaBuhDNpLaCJwKdG/HaR4HTknb44BHyhW/mZltmdyNmIAhwBWSNgLrgXPIRi5XSfoQ2f2lI4CfA7dLOg24F/hLO84xAbhB0gXAKuBrZYzfzMy2gCKi0jFUvUKhEMVisdJhmJlVFUl1EfGBh9ZyN5VnZmZdmxOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlihOTmZnlSh7flVd16pc3UDtpVqXDMLMyWnrpcZUOocvyiMnMzHKlyyamJkutn53eUm5mZhXWJafy0npO74mIaysVi5mZ/a2qTkySvgt8lWxNpT8BdUADMB7YFngBODUi1kiaSrbK7XDgMWBBSTuTgbcj4oeSPgpcC/QFNgBjIuIPndUnM7Ourmqn8tKKtl8mW1r9GKBxTY87IuLAiBgKPAt8veSwPYCDI+JbrTR9M3B1Ov5gYGUL5x8vqSipuGFNwxb2xszMGlXziOkQ4K6IWAuslfTrVD5Y0iXAjkAv4L6SY2ZExIaWGpTUG+gfEXcCpLabFRFTgCkAPfoN8GqLZmZlUrUjplZMBc6NiCHAxUDPkn3tWX7dzMwqoJoT02PACZJ6SuoFHJ/KewMrJdUA49rTYESsBpZJOglAUg9J25UzaDMza13VJqaIeAq4m+whhnuAerIHH74LPEGWuJ7bjKZPBc6TtAB4HNitLAGbmVmbKKJ6b49I6hURb6dRzcPA+IiY29lx9Og3IPqd/tPOPq2ZdSC/+aHjSaqLiELT8mp++AFgiqT9ye4jTatEUgIY0r8PRf8jNjMri6pOTBHxlUrHYGZm5VW195jMzGzr5MRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma54sRkZma5UtVvfsiL+uUN1E6aVekwzKwD+J15nc8jJjMzy5WqT0yS/kXSEZWOw8zMyqPqp/Ii4nsdfQ5J3Vtbkt3MzMqnqkZMkr4rabGkRyXdImmipKmSRqf9SyVdLGmupHpJA1N5X0n3S1ok6ZeSXpK0S9r3VUlPSnpa0nWSuqfytyX9SNJ8YFTFOm1m1sVUTWKSdCDwZWAocAzwgcWlklcjYgRwDTAxlf0z8GBEDAJmAnulNj8BjAUOiYhhwAbeX459e+CJiBgaEY82E894SUVJxQ1rGsrSRzMzq66pvEOAuyJiLbBW0q9bqHdH+l0H/F3aPhT4EkBE3CvpjVR+ODASeEoSwIeAV9K+DcDtLQUTEVOAKZCtYLs5HTIzsw+qpsTUVuvS7w1sun8iW/n2O83sW+v7SmZmna9qpvKAx4ATJPWU1As4vp3Hngwg6QvAh1P5A8BoSbumfTtJ2ruMMZuZWTtVzYgpIp6SdDewAHgZqAfaenPnYuAWSacCc4A/A6sj4lVJFwG/ldQNWA98A3ip7B0wM7M2qZrElPwwIiZL2g54GKiLiF807oyI2pLtIvC59LEBOCoi3pU0CjgwItalercBtzU9UUT06rBemJlZi6otMU2RtD/Qk+ze0Nw2HrcXMD2Niv4KnFXOoIb070PRry0xMyuLqkpMEfGVzTzueWB4mcMxM7MOUE0PP5iZWRfgxGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrnixGRmZrlSVV+wzav65Q3UTppV6TDMzDrV0g56441HTGZmlitOTGZmlitOTGZmlitOTGZmlisdmpgkbS9plqT5khZKGivpcEnzJNVLul5SD0mHSfpVyXFHSrqzhTa7S5qa2quX9M1Ufpakp9K5bk9rNpHqji45/u2S7X9KbcyXdGkq20/SvZLqJD0iaWBH/X3MzOyDOnrEdDSwIiKGRsRg4F5gKjA2IoaQPRV4DjAbGCipbzrua8D1LbQ5DOgfEYNTGzek8jsi4sCIGAo8C3y9tcAkHQOcCHwyHXN52jUFmBARI4GJwM9bOH68pKKk4oY1bV1I18zMNqWjE1M9cKSkyyR9GqgFXoyIJWn/NOAzERHAjcBXJe0IjALuaaHNPwL7SrpK0tHAW6l8cBrh1APjgEGbiO0I4IaIWAMQEa9L6gUcDMyQ9DRwHdCvuYMjYkpEFCKi0H27Ppv6O5iZWRt16PeYImKJpBHAscAlwIOtVL8B+DWwFpgREe+20OYbkoYCRwFnAycDZ5KNxE6KiPmSzuD9ZdXfJSXgtILttq3E0A14MyKGtaV/ZmZWfh19j2l3YE1E3ARcQTYSqpX00VTlVOAhgIhYAawALuL96bnm2twF6BYRt6e6I9Ku3sBKSTVkI6ZGS4GRafuLQE3avh/4Wsm9qJ0i4i3gRUljUplSEjQzs07S0W9+GAJcIWkjsJ7sflIfsqmybYCngGtL6t8M9I2IZ1tpsz9wQxr9AHwn/f4u8ASwKv3uncp/AdwlaT7ZPa6/AETEvZKGAUVJfwX+G/h/ZEntGkkXkSWxW4H5m9l/MzNrJ2W3d/JB0s+AeRHxH5WOpT0KhUIUi8VKh2FmVlUk1UVEoWl5bt6VJ6mObDTz7UrHYmZmlZObxJQez/4bkp4AejQpPjUi6jsnKjMz62y5SUzNiYhPVjoGMzPrXH4lkZmZ5YoTk5mZ5UqunsqrVpJWA4srHUcZ7QK8Wukgysx9yr+trT+w9fWp3P3ZOyL6Ni3M9T2mKrK4uUceq5Wk4tbUH3CfqsHW1h/Y+vrUWf3xVJ6ZmeWKE5OZmeWKE1N5TKl0AGW2tfUH3KdqsLX1B7a+PnVKf/zwg5mZ5YpHTGZmlitOTGZmlitOTFtA0tGSFkt6QdKkSsezuSQtlVQv6WlJxVS2k6T7JT2ffn+40nG2RtL1kl6RtLCkrNk+pHW2rkzXbUFazDJXWujPZEnL03V6WtKxJfu+k/qzWNJRlYm6ZZL2lDRb0jOSFkn6x1RezdeopT5V83XqKelJSfNTny5O5ftIeiLFfpukbVN5j/T5hbS/tiyBRIR/NuMH6A78AdiXbFXc+cD+lY5rM/uyFNilSdnlwKS0PQm4rNJxbqIPnyFbNHLhpvpAtqLyPYCATwFPVDr+NvZnMjCxmbr7p39/PYB90r/L7pXuQ5MY+wEj0nZvYEmKu5qvUUt9qubrJKBX2q4hW9vuU8B04JRUfi1wTtr+B+DatH0KcFs54vCIafMdBLwQEX+MiL+SLSh4YoVjKqcTgWlpexpwUgVj2aSIeBh4vUlxS304EfjPyPwe2FFSv86JtG1a6E9LTgRujYh1EfEi8ALZv8/ciIiVETE3ba8GniVb9LOar1FLfWpJNVyniIi308ea9BPAYcDMVN70OjVev5nA4ZK0pXE4MW2+/sCfSj4vo/V/lHkWwG8l1Ukan8o+EhEr0/afgY9UJrQt0lIfqvnanZumtq4vmV6tqv6k6Z7hZP9vfKu4Rk36BFV8nSR1l/Q08ApwP9nI7s2IeDdVKY37vT6l/Q3AzlsagxOTARwaESOAY4BvSPpM6c7IxulV/b2CraEPwDXAfsAwYCXwo8qG036SegG3A+dHxFul+6r1GjXTp6q+ThGxISKGAXuQjegGdnYMTkybbzmwZ8nnPVJZ1YmI5en3K8CdZP8YX26cOkm/X6lchJutpT5U5bWLiJfTfzQ2Ar/g/WmgquiPpBqy/4DfHBF3pOKqvkbN9anar1OjiHgTmA2MIptKbXy3amnc7/Up7e8DvLal53Zi2nxPAQPS0yrbkt34u7vCMbWbpO0l9W7cBr4ALCTry+mp2unAXZWJcIu01Ie7gdPSk1+fAhpKppNyq8k9li+RXSfI+nNKekJqH2AA8GRnx9eadN/hP4BnI+LHJbuq9hq11Kcqv059Je2Ytj8EHEl272w2MDpVa3qdGq/faODBNPLdMpV+CqSaf8ieHFpCNgd7YaXj2cw+7Ev2pNB8YFFjP8jmiR8Angf+B9ip0rFuoh+3kE2brCebA/96S30ge/Lo6nTd6oFCpeNvY39uTPEuSP9B6FdS/8LUn8XAMZWOv5n+HEo2TbcAeDr9HFvl16ilPlXzdToAmJdiXwh8L5XvS5ZEXwBmAD1Sec/0+YW0f99yxOFXEpmZWa54Ks/MzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLFicnMzHLlfwHH5sUVMquziAAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n", + "chinese_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfTElEQVR4nO3de5RXdb3/8ecLHEEdQkU0fngZNBRFBGH0eMtMTSs17QjiL0tMj/ws01ylHUrr4MnWsU4Xu1iGHsPMo1zSBcVKLe+aF2ZAbhJqggWSgOUkGoTj+/fH/ox8HecK35m9v8zrsdZ3zd6f/dmf/d6f9WXefPbesz+KCMzMzIqiV94BmJmZlXJiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQtku7wC2BbvttlvU1NTkHYaZWUWpr69fFxEDm5c7MZVBTU0NdXV1eYdhZlZRJL3YUrkv5ZmZWaE4MZmZWaE4MZmZWaH4HpOZWTfYtGkTK1euZMOGDXmH0u369u3LnnvuSVVVVYfqOzGVwaJVDdRMmpN3GLaNWnHtKXmHYGWwcuVK+vXrR01NDZLyDqfbRASvvPIKK1euZMiQIR3ax5fyzMy6wYYNGxgwYECPSkoAkhgwYECnRopdnpgkXSppqaTbuvpY5SBphaTd8o7DzLY9PS0pNenseXfHpbzPAidGxMpuONbbJG0XEW925zHNzGzrdWliknQDsC/wG0l3APsBBwNVwOSImCXpPOBjwI5p+10R8aW0//qIqE7LY4FTI+I8SfsBtwE7AbOAyyKiWtJxwNeBvwHDgP0lfRK4FNgeeBL4bEQ0SjoJuBroA/wR+HRErC+JfQfgTuDOiLixyzrJzHqkct+XLve9yOrqatavX99+xS7QpZfyIuIi4CXgg2RJ5P6IODyt/7eknVLVUcB4YAQwXtJe7TT9feD7ETECaD4SGw18PiL2l3RgavfoiBgFNALnpEt1V5GN5EYDdcAXStqoBn4F3N5aUpI0UVKdpLrGNxra7wwzM+uQ7nz44SRgkqSngQeBvsDeadt9EdEQERuAZ4B92mnrSGBGWv7fZtueiojlafkEYAwwNx33BLIR3BHAQcBjqXxCs2POAn4WET9vLYCImBIRtRFR23vH/u2Ea2aWr0mTJnH99de/vT558mSuueYaTjjhBEaPHs2IESOYNWvWu/Z78MEHOfXUU99e/9znPsfUqVMBqK+v5wMf+ABjxozh5JNPZvXq1WWJtTsTk4AzI2JU+uwdEUvTto0l9RrZfImxdN73vh08zuvNjnlLyTEPiIjJqfy3JeUHRcQFJfs9BnxYPfVOpZltc8aPH8/06dPfXp8+fToTJkzgrrvuYt68eTzwwAN88YtfJCLaaGWzTZs2cckllzBz5kzq6+s5//zzufLKK8sSa3cmpnuAS5p+2Us6tAP7vCzpQEm9gI+XlD8BnJmWz25j//uAsZJ2T8fcVdI+af+jJb0vle8kaf+S/b5Gdp/q+uYNmplVokMPPZQ1a9bw0ksvsWDBAnbZZRfe+9738pWvfIVDDjmEE088kVWrVvHyyy93qL1ly5axePFiPvShDzFq1CiuueYaVq4szzNu3fkHtl8HrgMWpkSzHDi17V2YBPwaWEt2H6g6lV8G/ELSlcDdQIs3eSLiGUlXAfemY24CLo6IJ9JDF7dL6pOqXwU8W7L754GbJX2r6WEMM7NKNm7cOGbOnMlf/vIXxo8fz2233cbatWupr6+nqqqKmpqad/290Xbbbcdbb7319nrT9ohg+PDhPP7442WPs8sTU0TUlKz+vxa2TwWmlqyfWrI8E5jZQrOrgCMiIiSdDRyQ6j9Idv+qtP1pwLQWjns/cFg78X66hWObmVWk8ePHc+GFF7Ju3Toeeughpk+fzu67705VVRUPPPAAL7747lko9tlnH5555hk2btzIP/7xD+677z6OOeYYDjjgANauXcvjjz/OkUceyaZNm3j22WcZPnz4VsdZqa8kGgP8KF0WfBU4P89gRgzuT51fG2NmnZDHq6aGDx/Oa6+9xuDBgxk0aBDnnHMOp512GiNGjKC2tpZhw4a9a5+99tqLs846i4MPPpghQ4Zw6KHZXZjtt9+emTNncumll9LQ0MCbb77JZZddVpbEpI7e6LLW1dbWhicKNLO2LF26lAMPPDDvMHLT0vlLqo+I2uZ1/a48MzMrFCcmMzMrFCcmM7Nu0lNvnXT2vJ2YzMy6Qd++fXnllVd6XHJqmo+pb9+OviOhcp/KMzOrKHvuuScrV65k7dq1eYfS7ZpmsO0oJyYzs25QVVXV4RlcezpfyjMzs0JxYjIzs0JxYjIzs0LxPaYyWLSqoeyzUZptiTxec2NWbh4xmZlZoTgxmZlZoTgxtUMZ95OZWTcp9C9cSTWS/iDpNklLJc2UtKOkMZIeklQv6R5Jg1L9ByV9X9LTkhZLOjyVT5Z0q6THJT0n6cKSY1whaa6khZKuLjnuMkk/BxYDe+Vx/mZmPVElPPxwAHBBRDwm6WbgYrJp1k+PiLWSxgPfYPOcTDtGxChJxwI3Awen8kOAI4CdgPmS5qRtQ4HDAQGz035/SuUTIuKJloKSNBGYCND7PQPLfc5mZj1WJSSmP0fEY2n5F8BXyBLKb7N5AukNrC6pfztARDws6T2Sdk7lsyLiH8A/JD1AloyOAU4C5qc61WQJ6U/Ai60lpdT+FGAKQJ9BQ3vWy6/MzLpQJSSm5r/0XwOWRMSRHawfbZQL+K+I+GnpBkk1wOudjtTMzLZaoe8xJXtLakpCnwCeAAY2lUmqklQ6l+/4VH4M0BARDan8dEl9JQ0AjgPmAvcA50uqTvsMlrR7l5+RmZm1qhJGTMuAi9P9pWeAH5IllB9I6k92DtcBS1L9DZLmA1Vsvu8EsBB4ANgN+HpEvAS8JOlA4PF0WXA98EmgscvPyszMWqQizw2SLqn9OiIObqdqU/0Hgcsjoq5Z+WRgfUR8u8whAlBbWxt1dXXtVzQzs7dJqo+I2ubllXApz8zMepBCX8qLiBVsfty7I/WPa6V8cnkiMjOzruYRk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFUqh/8C2Uixa1UDNpDl5h2HWI6y49pS8Q7Au5hGTmZkVihOTmZkVSq6JSdIZkg7qQL2pksa2UH6cpF+XMZ5aST9Iy+dJ+lG52jYzs47Je8R0BtBuYuouEVEXEZfmHYeZWU/WZmKSdK2ki0vWJ0u6XNIVkuZKWijp6pLtX5W0TNKjkm6XdHkq30/S3ZLqJT0iaZiko4CPAf8t6elU58LU7gJJv5S0Y0k4J0qqk/SspFNbiHUnSTdLekrSfEmnt3FefSX9TNKiVPeDqbysIzAzM+u89kZM04CzStbPAtYCQ4HDgVHAGEnHSjoMOBMYCXwEKJ38aQpwSUSMAS4HfhwRvwdmA1dExKiI+CNwZ0QcFhEjgaXABSVt1KRjngLcIKlvs1ivBO6PiMOBD5IlvJ1aOa+LgYiIEcD/BW5pob02SZqYEmVd4xsN7e9gZmYd0ubj4hExX9Lukv4PMBD4GzACOAmYn6pVkyWqfsCsiNhANr35rwAkVQNHATPS9OUAfVo55MGSrgF2Tu3eU7JtekS8BTwn6QVgWLN9TwI+1jRKA/oCe5MluOaOIZuinYj4g6QXgf3b6ovmImIKWcKlz6ChxZ0G2MyswnTk75hmAGOB95KNoPYB/isiflpaSdJlrezfC3g1IkZ14FhTgTMiYoGk84DjSrY1/+XffF3AmRGxrAPHMTOzgurIww/TgLPJktMMslHM+WkkhKTBknYHHgNOS/dvqoFTASLi78BySeNSfUkamdp+jWyk1aQfsFpSFXBOszjGSeolaT9gX6B5AroHuERpWCbp0DbO6ZGm9iXtTzayckIzMyuAdhNTRCwhSxirImJ1RNwL/C/wuKRFwEygX0TMJbtntBD4DbAIaLr5cg5wgaQFwBKg6cGEO4Ar0gMI+wFfBZ4kS3J/aBbKn4CnUtsXpUuGpb4OVAELJS1J6635MdArxT8NOC8iNrbXF2Zm1vUUUb7bI5KqI2J9epruYWBiRMwr2wEKqra2Nurq6vIOw8ysokiqj4ja5uXlflfelPQHs32BW3pCUjIzs/Iqa2KKiE+Us72tJelk4JvNipdHxMfziMfMzNq3Tb9dPCLu4Z2PnJuZWcHl/UoiMzOzd3BiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQtmm/46puyxa1UDNpDl5h2HWY6y49pS8Q7Au5BGTmZkVSsUnJkn/KenEvOMwM7PyqPhLeRHxta4+hqTeEdHY1ccxM7MKGzFJ+qqkZZIelXS7pMslTZU0Nm1fIelqSfMkLZI0LJUPlPRbSUsk3STpRUm7pW2flPSUpKcl/VRS71S+XtJ30hxSR+Z20mZmPUzFJCZJhwFnAiOBjwDvmsMjWRcRo4GfAJensv8A7o+I4WQTG+6d2jwQGA8cnaZ+b2TzzLk7AU9GxMiIeLSFeCZKqpNU1/hGQ/PNZma2hSrpUt7RwKw0c+0GSb9qpd6d6Wc98K9p+Rjg4wARcbekv6XyE4AxwNw0I/sOwJq0rRH4ZWvBRMQUYApAn0FDyzfboplZD1dJiamjmqZIb6T98xPZhIZfbmHbBt9XMjPrfhVzKQ94DDhNUl9J1cCpndz3LABJJwG7pPL7gLGSdk/bdpW0TxljNjOzTqqYEVNEzJU0G1gIvAwsAjp6c+dq4HZJnwIeB/4CvBYR6yRdBdwrqRewCbgYeLHsJ2BmZh1SMYkp+XZETJa0I/AwUB8RNzZtjIiakuU64Li02gCcHBFvSjoSOCwiNqZ604BpzQ8UEdVddhZmZtaqSktMUyQdBPQluzc0r4P77Q1MT6OifwIXljOoEYP7U+dXpJiZlUVFJaaI+MQW7vcccGiZwzEzsy5QSQ8/mJlZD+DEZGZmheLEZGZmheLEZGZmheLEZGZmheLEZGZmheLEZGZmheLEZGZmheLEZGZmhVJRb34oqkWrGqiZNCfvMMxsK63wq8UKwSMmMzMrlB6bmCSdJ+lHafkiSefmHZOZmfXQS3mS3nHeEXFDXrGYmdk7VXRikvRV4JPAWuDPQD3Z3EsTge2B54FPRcQbkqYCG8jeMv4Y2YSDTe1MBtZHxLclvQ+4ARhINj37uIj4Y3edk5lZT1exl/IkHQacCYwEPgLUpk13RsRhETESWApcULLbnsBREfGFNpq+Dbg+7X8UsLqV40+UVCeprvGNjk6ka2Zm7ankEdPRwKyI2ABskPSrVH6wpGuAnYFq4J6SfWZERGNrDUrqBwyOiLsAUtstiogpwBSAPoOGxladiZmZva1iR0xtmAp8LiJGAFeTzXbb5PVcIjIzsw6r5MT0GHCapL6SqoFTU3k/YLWkKuCczjQYEa8BKyWdASCpj6Qdyxm0mZm1rWITU0TMBWaTPcTwG2AR2YMPXwWeJEtcf9iCpj8FXCppIfB74L1lCdjMzDpEEZV7e0RSdUSsT6Oah4GJETGvu+PoM2hoDJpwXXcf1szKzG9+6F6S6iOitnl5JT/8ADBF0kFk95FuySMpAYwY3J86f6HNzMqiohNTRHwi7xjMzKy8KvYek5mZbZucmMzMrFCcmMzMrFCcmMzMrFCcmMzMrFCcmMzMrFCcmMzMrFCcmMzMrFCcmMzMrFAq+s0PRbFoVQM1k+bkHYaZdTG/S697eMRkZmaF4sRkZmaF4sRkZmaFUojEJOlcSQslLZB0q6TTJD0pab6k30naQ1IvSc9JGpj26SXpeUkD0+eXkuamz9GpzmRJN0t6UNILki5N5TWSlkq6UdISSfdK2iFt20/S3ZLqJT0iaVh+PWNm1vPknpgkDQeuAo6PiJHA54FHgSMi4lDgDuBLEfEW8As2T5d+IrAgItYC3we+FxGHAWcCN5UcYhhwMnA48B9pynWAocD1ETEceDXtBzAFuCQixgCXAz9uJe6Jkuok1TW+0bDV/WBmZpkiPJV3PDAjItYBRMRfJY0ApkkaBGwPLE91bwZmAdcB5wM/S+UnAgdJamrzPZKq0/KciNgIbJS0BtgjlS+PiKfTcj1Qk/Y5CphR0lafloKOiClkSYw+g4ZW7jTAZmYFU4TE1JIfAt+NiNmSjgMmA0TEnyW9LOl4shFQ0+ipF9kIa0NpIym5bCwpamTzOTcv3yG182pEjCrr2ZiZWYflfikPuB8YJ2kAgKRdgf7AqrR9QrP6N5Fd0psREY2p7F7gkqYKkrYosUTE34HlksaldiRp5Ja0ZWZmWyb3xBQRS4BvAA9JWgB8l2yENENSPbCu2S6zgWo2X8YDuBSoTQ9QPANctBUhnQNckGJZApy+FW2ZmVknKaKybo9IqiV70OH9ecfSpLa2Nurq6vIOw8ysokiqj4ja5uVFvcfUIkmTgM+w+d6SmZltY3K/lNcZEXFtROwTEY/mHYuZmXWNikpMZma27XNiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQqmoNz8U1aJVDdRMmpN3GGbWDVZce0reIWzzPGIyM7NCcWIyM7NCqZjEJOlj6SWu5WhrsqTLy9GWmZmVV0XcY5K0XUTMJpuLKa/jv5nHsc3MeppuHzFJOjdN6LdA0q2SaiTdn8ruk7R3qjdV0g2SngS+Jek8ST9K206T9KSk+ZJ+J2mPVD5Z0s2SHpT0gqRLS457paRnJT0KHFBSvp+kuyXVS3pE0rCWjt+NXWRm1qN164hJ0nDgKuCoiFiXplG/BbglIm6RdD7wA+CMtMueqW6jpPNKmnoUOCIiQtK/AV8Cvpi2DQM+CPQDlkn6CXAIcDYwiuyc5wH1qf4U4KKIeE7SvwA/Bo5vfvwWzmUiMBGg93sGbk23mJlZie6+lHc8MCMi1gFExF8lHQn8a9p+K+8cncxoKSmQJYxpkgYB2wPLS7bNiYiNwEZJa4A9gPcDd0XEGwCSZqef1cBRZNO4N+3fpwPHJyKmkCU1+gwaWlnTAJuZFVjR7zG93kr5D4HvRsRsSccBk0u2bSxZbqTtc+wFvBoRozp5fDMz6yLdfY/pfmCcpAEA6VLe78kus0E2ZfojHWinP7AqLU/oQP2HgTMk7SCpH3AaQET8HVguaVyKR5JGdvRkzMys/Lp1xBQRSyR9A3hIUiMwH7gE+JmkK4C1wKc70NRksstvfyNLdkPaOe48SdOABcAaYG7J5nOAn0i6CqgC7kj1zMwsB4rw7ZGtVVtbG3V1dXmHYWZWUSTVR0Rt8/KK+QNbMzPrGZyYzMysUJyYzMysUJyYzMysUJyYzMysUJyYzMysUJyYzMysUJyYzMysUJyYzMysUJyYzMysUIr+dvGKsGhVAzWT5uQdhpkVzIprT8k7hIrkEZOZmRVKj0pMkv5T0ol5x2FmZq3rUZfyIuJrecdgZmZtq/gRk6QvSFqcPpdJqpG0VNKNkpZIulfSDqnuVElj0/IJkuZLWiTpZkl9UvkKSVdLmpe2Dcvz/MzMepqKTkySxpBNLPgvwBHAhcAuwFDg+ogYDrwKnNlsv77AVGB8RIwgGzl+pqTKuogYDfwEuLyVY0+UVCeprvGNhrKel5lZT1bRiQk4BrgrIl6PiPXAncD7geUR8XSqUw/UNNvvgFTn2bR+C3BsyfY729gXgIiYEhG1EVHbe8f+W30iZmaWqfTE1JqNJcuNdP5eWtP+W7KvmZlthUpPTI8AZ0jaUdJOwMdTWXuWATWS3pfWPwU81EUxmplZJ1T0aCAi5kmaCjyVim4C/taB/TZI+jQwQ9J2wFzghi4L1MzMOkwRkXcMFa/PoKExaMJ1eYdhZgXjNz+0TVJ9RNQ2L6/oEVNRjBjcnzp/Ac3MyqLS7zGZmdk2xonJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxYnJzMwKxa8kKoNFqxqomTQn7zDMrAL4/Xnt84jJzMwKJdfEJGlnSZ/NM4YUx02SDso7DjMzy3/EtDPQqcQkqXc5A5DUOyL+LSKeKWe7Zma2ZfJOTNcC+0l6WtJcSb9u2iDpR5LOS8srJH1T0jxgnKQHJX1PUp2kpZIOk3SnpOckXVPSxiclPZXa/2lTUpO0XtJ3JC0Ajkzt1aZtH5Y0T9ICSfd1Z2eYmVn+iWkS8MeIGAVc0U7dVyJidETckdb/mSaYugGYBVwMHAycJ2mApAOB8cDRqf1G4Jy0707AkxExMiIebTqApIHAjcCZETESGNdaMJImpsRY1/hGQ2fP28zMWlFJT+VNa7Y+O/1cBCyJiNUAkl4A9gKOAcYAcyUB7ACsSfs0Ar9s4RhHAA9HxHKAiPhra8FExBRgCmQz2G7B+ZiZWQuKlJje5J0juL7Ntr/ebH1j+vlWyXLT+naAgFsi4sstHGtDRDRuRaxmZtZF8r6U9xrQLy2/CBwkqY+knYETtrLt+4CxknYHkLSrpH3a2ecJ4FhJQ5r22coYzMysk3IdMUXEK5Iek7QY+A0wHVgMLAfmb2Xbz0i6CrhXUi9gE9l9qBfb2GetpInAnWmfNcCHtiYOMzPrHEX49sjW6jNoaAyacF3eYZhZBfCbHzaTVJ8eYnuHIt1jqlgjBvenzl82M7OyyPsek5mZ2Ts4MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaH4zQ9lsGhVAzWT5uQdhplZt+qq1yt5xGRmZoXixGRmZoXS4xOTpIsknZt3HGZmlunx95gi4oa8YzAzs80qbsQk6VxJCyUtkHSrpKmSxpZsX59+HifpIUmzJL0g6VpJ50h6StIiSfulepMlXZ6WH5T0zVTnWUnvz+cszcx6ropKTJKGA1cBx0fESODz7ewyErgIOBD4FLB/RBwO3ARc0so+26U6lwH/0UYsEyXVSaprfKOhk2diZmatqajEBBwPzIiIdQAR8dd26s+NiNURsRH4I3BvKl8E1LSyz53pZ30bdYiIKRFRGxG1vXfs38HwzcysPZWWmFryJuk8JPUCti/ZtrFk+a2S9bdo/f5aU53GNuqYmVkXqbTEdD8wTtIAAEm7AiuAMWn7x4CqfEIzM7NyqKgRQUQskfQN4CFJjcB84N+BWZIWAHcDr+cZo5mZbR1FRN4xVLza2tqoq6vLOwwzs4oiqT4iapuXV9qlPDMz28Y5MZmZWaE4MZmZWaE4MZmZWaE4MZmZWaH4qbwykPQasCzvODphN2Bd3kF0guPtWpUWL1RezI63ZftExMDmhRX1d0wFtqylRx6LSlKd4+06jrfrVVrMjrdzfCnPzMwKxYnJzMwKxYmpPKbkHUAnOd6u5Xi7XqXF7Hg7wQ8/mJlZoXjEZGZmheLEZGZmheLEtBUkfVjSMknPS5qUdzwtkbRC0iJJT0uqS2W7SvqtpOfSz11yjvFmSWskLS4pazFGZX6Q+nyhpNEFiXeypFWpn5+W9NGSbV9O8S6TdHIO8e4l6QFJz0haIunzqbyQfdxGvIXsY0l9JT0laUGK9+pUPkTSkymuaZK2T+V90vrzaXtNQeKdKml5Sf+OSuXd/32ICH+24AP0JpuufV+yWXMXAAflHVcLca4AdmtW9i1gUlqeBHwz5xiPBUYDi9uLEfgo8BtAwBHAkwWJdzJweQt1D0rfjT7AkPSd6d3N8Q4CRqflfsCzKa5C9nEb8Rayj1M/VaflKuDJ1G/TgbNT+Q3AZ9LyZ4Eb0vLZwLRu7t/W4p0KjG2hfrd/Hzxi2nKHA89HxAsR8U/gDuD0nGPqqNOBW9LyLcAZOcZCRDwM/LVZcWsxng78PDJPADtLGtQ9kWZaibc1pwN3RMTGiFgOPE/23ek2EbE6Iual5deApcBgCtrHbcTbmlz7OPXT+rRalT4BHA/MTOXN+7ep32cCJ0hSN4XbVryt6fbvgxPTlhsM/LlkfSVt/+PJSwD3SqqXNDGV7RERq9PyX4A98gmtTa3FWOR+/1y61HFzyeXRQsWbLhsdSva/5ML3cbN4oaB9LKm3pKeBNcBvyUZtr0bEmy3E9Ha8aXsDMCDPeCOiqX+/kfr3e5L6NI836fL+dWLa9h0TEaOBjwAXSzq2dGNkY/VC/81AJcQI/ATYDxgFrAa+k2847yapGvglcFlE/L10WxH7uIV4C9vHEdEYEaOAPclGa8NyDqlNzeOVdDDwZbK4DwN2Bf49r/icmLbcKmCvkvU9U1mhRMSq9HMNcBfZP5qXm4bi6eea/CJsVWsxFrLfI+Ll9I/9LeBGNl9KKkS8kqrIfsnfFhF3puLC9nFL8Ra9jwEi4lXgAeBIskteTe8jLY3p7XjT9v7AK90cKvCOeD+cLqFGRGwEfkaO/evEtOXmAkPTkzfbk93EnJ1zTO8gaSdJ/ZqWgZOAxWRxTkjVJgCz8omwTa3FOBs4Nz0pdATQUHI5KjfNrrl/nKyfIYv37PQk1hBgKPBUN8cm4H+ApRHx3ZJNhezj1uItah9LGihp57S8A/AhsvtiDwBjU7Xm/dvU72OB+9OINc94/1DynxSR3Q8r7d/u/T509dMV2/KH7GmVZ8muJ1+ZdzwtxLcv2dNKC4AlTTGSXc++D3gO+B2wa85x3k52aWYT2fXrC1qLkezJoOtTny8CagsS760pnoVk/5AHldS/MsW7DPhIDvEeQ3aZbiHwdPp8tKh93Ea8hexj4BBgfoprMfC1VL4vWYJ8HpgB9EnlfdP682n7vgWJ9/7Uv4uBX7D5yb1u/z74lURmZlYovpRnZmaF4sRkZmaF4sRkZmaF4sRkZmaF4sRkZmaF4sRkZmaF4sRkZmaF8v8BEBScEicaSW4AAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "indian_ingredient_df = create_ingredient_df(indian_df)\r\n", + "indian_ingredient_df.head(10).plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "korean_ingredient_df = create_ingredient_df(korean_df)\r\n", + "korean_ingredient_df.head(10).plot.barh()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\n", + "labels_df = df.cuisine #.unique()\n", + "feature_df.head()\n" + ] + }, + { + "source": [ + "Cân bằng dữ liệu với SMOTE oversampling đến lớp cao nhất. Đọc thêm tại đây: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "oversample = SMOTE()\n", + "transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new label count: korean 799\nchinese 799\njapanese 799\nindian 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n" + ] + } + ], + "source": [ + "print(f'new label count: {transformed_label_df.value_counts()}')\r\n", + "print(f'old label count: {df.cuisine.value_counts()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "transformed_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy \\\n", + "0 indian 0 0 0 0 0 0 \n", + "1 indian 1 0 0 0 0 0 \n", + "2 indian 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 thai 0 0 0 0 0 0 \n", + "3991 thai 0 0 0 0 0 0 \n", + "3992 thai 0 0 0 0 0 0 \n", + "3993 thai 0 0 0 0 0 0 \n", + "3994 thai 0 0 0 0 0 0 \n", + "\n", + " apricot armagnac artemisia ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 ... 0 0 0 \n", + "3991 0 0 0 ... 0 0 0 \n", + "3992 0 0 0 ... 0 0 0 \n", + "3993 0 0 0 ... 0 0 0 \n", + "3994 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "3990 0 0 0 0 0 0 0 \n", + "3991 0 0 0 0 0 0 0 \n", + "3992 0 0 0 0 0 0 0 \n", + "3993 0 0 0 0 0 0 0 \n", + "3994 0 0 0 0 0 0 0 \n", + "\n", + "[3995 rows x 381 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisia...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
0indian000000000...0000000000
1indian100000000...0000000000
2indian000000000...0000000000
3indian000000000...0000000000
4indian000000000...0000000010
..................................................................
3990thai000000000...0000000000
3991thai000000000...0000000000
3992thai000000000...0000000000
3993thai000000000...0000000000
3994thai000000000...0000000000
\n

3995 rows × 381 columns

\n
" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# export transformed data to new df for classification\n", + "transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\n", + "transformed_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nRangeIndex: 3995 entries, 0 to 3994\nColumns: 381 entries, cuisine to zucchini\ndtypes: int64(380), object(1)\nmemory usage: 11.6+ MB\n" + ] + } + ], + "source": [ + "transformed_df.info()" + ] + }, + { + "source": [ + "Lưu tệp để sử dụng sau này\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "transformed_df.to_csv(\"../../data/cleaned_cuisines.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "1da12ed6d238756959b8de9cac2a35a2", + "translation_date": "2025-09-06T14:53:18+00:00", + "source_file": "4-Classification/1-Introduction/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/vi/4-Classification/2-Classifiers-1/notebook.ipynb b/translations/vi/4-Classification/2-Classifiers-1/notebook.ipynb new file mode 100644 index 000000000..facf06172 --- /dev/null +++ b/translations/vi/4-Classification/2-Classifiers-1/notebook.ipynb @@ -0,0 +1,41 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "68829b06b4dcd512d3327849191f4d7f", + "translation_date": "2025-09-06T14:32:53+00:00", + "source_file": "4-Classification/2-Classifiers-1/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Xây dựng các mô hình phân loại\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb b/translations/vi/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb new file mode 100644 index 000000000..40b4c2304 --- /dev/null +++ b/translations/vi/4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb @@ -0,0 +1,1298 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "colab": { + "name": "lesson_11-R.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "6ea6a5171b1b99b7b5a55f7469c048d2", + "translation_date": "2025-09-06T14:41:18+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/R/lesson_11-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "zs2woWv_HoE8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Bộ phân loại ẩm thực 1\n", + "\n", + "Trong bài học này, chúng ta sẽ khám phá nhiều bộ phân loại để *dự đoán một nền ẩm thực quốc gia dựa trên nhóm nguyên liệu.* Trong quá trình này, chúng ta sẽ tìm hiểu thêm về cách các thuật toán có thể được sử dụng cho các nhiệm vụ phân loại.\n", + "\n", + "### [**Câu hỏi trước bài học**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/21/)\n", + "\n", + "### **Chuẩn bị**\n", + "\n", + "Bài học này dựa trên [bài học trước](https://github.com/microsoft/ML-For-Beginners/blob/main/4-Classification/1-Introduction/solution/lesson_10-R.ipynb) nơi chúng ta:\n", + "\n", + "- Đã giới thiệu nhẹ nhàng về phân loại bằng cách sử dụng một tập dữ liệu về tất cả các nền ẩm thực tuyệt vời của châu Á và Ấn Độ 😋.\n", + "\n", + "- Đã khám phá một số [động từ dplyr](https://dplyr.tidyverse.org/) để chuẩn bị và làm sạch dữ liệu.\n", + "\n", + "- Đã tạo các hình ảnh trực quan đẹp mắt bằng ggplot2.\n", + "\n", + "- Đã trình bày cách xử lý dữ liệu không cân bằng bằng cách tiền xử lý nó bằng [recipes](https://recipes.tidymodels.org/articles/Simple_Example.html).\n", + "\n", + "- Đã trình bày cách `prep` và `bake` công thức của chúng ta để xác nhận rằng nó hoạt động như mong đợi.\n", + "\n", + "#### **Điều kiện tiên quyết**\n", + "\n", + "Đối với bài học này, chúng ta sẽ cần các gói sau để làm sạch, chuẩn bị và trực quan hóa dữ liệu:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [khung làm việc gồm các gói](https://www.tidymodels.org/packages/) dành cho mô hình hóa và học máy.\n", + "\n", + "- `themis`: [gói themis](https://themis.tidymodels.org/) cung cấp các bước bổ sung trong công thức để xử lý dữ liệu không cân bằng.\n", + "\n", + "- `nnet`: [gói nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf) cung cấp các hàm để ước tính mạng nơ-ron truyền thẳng với một lớp ẩn, và các mô hình hồi quy logistic đa thức.\n", + "\n", + "Bạn có thể cài đặt chúng như sau:\n" + ], + "metadata": { + "id": "iDFOb3ebHwQC" + } + }, + { + "cell_type": "markdown", + "source": [ + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"DataExplorer\", \"here\"))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành mô-đun này chưa và sẽ cài đặt chúng nếu chúng bị thiếu.\n" + ], + "metadata": { + "id": "4V85BGCjII7F" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load(tidyverse, tidymodels, themis, here)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading required package: pacman\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "an5NPyyKIKNR", + "outputId": "834d5e74-f4b8-49f9-8ab5-4c52ff2d7bc8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Chia dữ liệu thành tập huấn luyện và tập kiểm tra.\n", + "\n", + "Chúng ta sẽ bắt đầu bằng cách chọn một vài bước từ bài học trước.\n", + "\n", + "### Loại bỏ các nguyên liệu phổ biến nhất gây nhầm lẫn giữa các nền ẩm thực khác nhau, sử dụng `dplyr::select()`.\n", + "\n", + "Ai cũng yêu thích cơm, tỏi và gừng!\n" + ], + "metadata": { + "id": "0ax9GQLBINVv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# Load the original cuisines data\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\r\n", + "\r\n", + "# Drop id column, rice, garlic and ginger from our original data set\r\n", + "df_select <- df %>% \r\n", + " select(-c(1, rice, garlic, ginger)) %>%\r\n", + " # Encode cuisine column as categorical\r\n", + " mutate(cuisine = factor(cuisine))\r\n", + "\r\n", + "# Display new data set\r\n", + "df_select %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "# Display distribution of cuisines\r\n", + "df_select %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "New names:\n", + "* `` -> ...1\n", + "\n", + "\u001b[1m\u001b[1mRows: \u001b[1m\u001b[22m\u001b[34m\u001b[34m2448\u001b[34m\u001b[39m \u001b[1m\u001b[1mColumns: \u001b[1m\u001b[22m\u001b[34m\u001b[34m385\u001b[34m\u001b[39m\n", + "\n", + "\u001b[36m──\u001b[39m \u001b[1m\u001b[1mColumn specification\u001b[1m\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", + "\u001b[1mDelimiter:\u001b[22m \",\"\n", + "\u001b[31mchr\u001b[39m (1): cuisine\n", + "\u001b[32mdbl\u001b[39m (384): ...1, almond, angelica, anise, anise_seed, apple, apple_brandy, a...\n", + "\n", + "\n", + "\u001b[36mℹ\u001b[39m Use \u001b[30m\u001b[47m\u001b[30m\u001b[47m`spec()`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to retrieve the full column specification for this data.\n", + "\u001b[36mℹ\u001b[39m Specify the column types or set \u001b[30m\u001b[47m\u001b[30m\u001b[47m`show_col_types = FALSE`\u001b[47m\u001b[30m\u001b[49m\u001b[39m to quiet this message.\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 indian 0 0 0 0 0 0 0 0 \n", + "2 indian 1 0 0 0 0 0 0 0 \n", + "3 indian 0 0 0 0 0 0 0 0 \n", + "4 indian 0 0 0 0 0 0 0 0 \n", + "5 indian 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 0 0 \n", + "2 0 ⋯ 0 0 0 0 0 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 1 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| indian | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t indian & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
indian0000000000000000000
indian1000000000000000000
indian0000000000000000000
indian0000000000000000000
indian0000000000000000010
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 799\n", + "2 indian 598\n", + "3 chinese 442\n", + "4 japanese 320\n", + "5 thai 289" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 799 |\n", + "| indian | 598 |\n", + "| chinese | 442 |\n", + "| japanese | 320 |\n", + "| thai | 289 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 799\\\\\n", + "\t indian & 598\\\\\n", + "\t chinese & 442\\\\\n", + "\t japanese & 320\\\\\n", + "\t thai & 289\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 799
indian 598
chinese 442
japanese320
thai 289
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 735 + }, + "id": "jhCrrH22IWVR", + "outputId": "d444a85c-1d8b-485f-bc4f-8be2e8f8217c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tuyệt vời! Bây giờ, chúng ta sẽ chia dữ liệu sao cho 70% dữ liệu dành cho huấn luyện và 30% dành cho kiểm tra. Chúng ta cũng sẽ áp dụng kỹ thuật `phân tầng` khi chia dữ liệu để `duy trì tỷ lệ của mỗi loại ẩm thực` trong các tập dữ liệu huấn luyện và kiểm tra.\n", + "\n", + "[rsample](https://rsample.tidymodels.org/), một gói trong Tidymodels, cung cấp cơ sở hạ tầng cho việc chia dữ liệu và lấy mẫu lại một cách hiệu quả:\n" + ], + "metadata": { + "id": "AYTjVyajIdny" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# Load the core Tidymodels packages into R session\r\n", + "library(tidymodels)\r\n", + "\r\n", + "# Create split specification\r\n", + "set.seed(2056)\r\n", + "cuisines_split <- initial_split(data = df_select,\r\n", + " strata = cuisine,\r\n", + " prop = 0.7)\r\n", + "\r\n", + "# Extract the data in each split\r\n", + "cuisines_train <- training(cuisines_split)\r\n", + "cuisines_test <- testing(cuisines_split)\r\n", + "\r\n", + "# Print the number of cases in each split\r\n", + "cat(\"Training cases: \", nrow(cuisines_train), \"\\n\",\r\n", + " \"Test cases: \", nrow(cuisines_test), sep = \"\")\r\n", + "\r\n", + "# Display the first few rows of the training set\r\n", + "cuisines_train %>% \r\n", + " slice_head(n = 5)\r\n", + "\r\n", + "\r\n", + "# Display distribution of cuisines in the training set\r\n", + "cuisines_train %>% \r\n", + " count(cuisine) %>% \r\n", + " arrange(desc(n))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training cases: 1712\n", + "Test cases: 736" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine almond angelica anise anise_seed apple apple_brandy apricot armagnac\n", + "1 chinese 0 0 0 0 0 0 0 0 \n", + "2 chinese 0 0 0 0 0 0 0 0 \n", + "3 chinese 0 0 0 0 0 0 0 0 \n", + "4 chinese 0 0 0 0 0 0 0 0 \n", + "5 chinese 0 0 0 0 0 0 0 0 \n", + " artemisia ⋯ whiskey white_bread white_wine whole_grain_wheat_flour wine wood\n", + "1 0 ⋯ 0 0 0 0 1 0 \n", + "2 0 ⋯ 0 0 0 0 1 0 \n", + "3 0 ⋯ 0 0 0 0 0 0 \n", + "4 0 ⋯ 0 0 0 0 0 0 \n", + "5 0 ⋯ 0 0 0 0 0 0 \n", + " yam yeast yogurt zucchini\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "5 0 0 0 0 " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 381\n", + "\n", + "| cuisine <fct> | almond <dbl> | angelica <dbl> | anise <dbl> | anise_seed <dbl> | apple <dbl> | apple_brandy <dbl> | apricot <dbl> | armagnac <dbl> | artemisia <dbl> | ⋯ ⋯ | whiskey <dbl> | white_bread <dbl> | white_wine <dbl> | whole_grain_wheat_flour <dbl> | wine <dbl> | wood <dbl> | yam <dbl> | yeast <dbl> | yogurt <dbl> | zucchini <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "| chinese | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 381\n", + "\\begin{tabular}{lllllllllllllllllllll}\n", + " cuisine & almond & angelica & anise & anise\\_seed & apple & apple\\_brandy & apricot & armagnac & artemisia & ⋯ & whiskey & white\\_bread & white\\_wine & whole\\_grain\\_wheat\\_flour & wine & wood & yam & yeast & yogurt & zucchini\\\\\n", + " & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\t chinese & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 381
cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiawhiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
<fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
chinese0000000000000100000
chinese0000000000000100000
chinese0000000000000000000
chinese0000000000000000000
chinese0000000000000000000
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine n \n", + "1 korean 559\n", + "2 indian 418\n", + "3 chinese 309\n", + "4 japanese 224\n", + "5 thai 202" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | n <int> |\n", + "|---|---|\n", + "| korean | 559 |\n", + "| indian | 418 |\n", + "| chinese | 309 |\n", + "| japanese | 224 |\n", + "| thai | 202 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & n\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t korean & 559\\\\\n", + "\t indian & 418\\\\\n", + "\t chinese & 309\\\\\n", + "\t japanese & 224\\\\\n", + "\t thai & 202\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisinen
<fct><int>
korean 559
indian 418
chinese 309
japanese224
thai 202
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 535 + }, + "id": "w5FWIkEiIjdN", + "outputId": "2e195fd9-1a8f-4b91-9573-cce5582242df" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Xử lý dữ liệu không cân bằng\n", + "\n", + "Như bạn có thể đã nhận thấy trong tập dữ liệu gốc cũng như trong tập huấn luyện của chúng ta, có sự phân bố không đồng đều về số lượng các loại ẩm thực. Các món ăn Hàn Quốc *gần như* gấp 3 lần các món ăn Thái. Dữ liệu không cân bằng thường ảnh hưởng tiêu cực đến hiệu suất của mô hình. Nhiều mô hình hoạt động tốt nhất khi số lượng quan sát là bằng nhau và do đó thường gặp khó khăn với dữ liệu không cân bằng.\n", + "\n", + "Có hai cách chính để xử lý các tập dữ liệu không cân bằng:\n", + "\n", + "- thêm các quan sát vào lớp thiểu số: `Over-sampling`, ví dụ sử dụng thuật toán SMOTE, thuật toán này tạo ra các ví dụ mới cho lớp thiểu số một cách tổng hợp bằng cách sử dụng các hàng xóm gần nhất của các trường hợp này.\n", + "\n", + "- loại bỏ các quan sát từ lớp đa số: `Under-sampling`\n", + "\n", + "Trong bài học trước, chúng ta đã minh họa cách xử lý các tập dữ liệu không cân bằng bằng cách sử dụng một `recipe`. Recipe có thể được coi như một bản hướng dẫn mô tả các bước cần áp dụng cho một tập dữ liệu để chuẩn bị cho việc phân tích dữ liệu. Trong trường hợp của chúng ta, chúng ta muốn có sự phân bố đồng đều về số lượng các loại ẩm thực trong `training set`. Hãy bắt đầu ngay thôi.\n" + ], + "metadata": { + "id": "daBi9qJNIwqW" + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# Load themis package for dealing with imbalanced data\r\n", + "library(themis)\r\n", + "\r\n", + "# Create a recipe for preprocessing training data\r\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>% \r\n", + " step_smote(cuisine)\r\n", + "\r\n", + "# Print recipe\r\n", + "cuisines_recipe" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Data Recipe\n", + "\n", + "Inputs:\n", + "\n", + " role #variables\n", + " outcome 1\n", + " predictor 380\n", + "\n", + "Operations:\n", + "\n", + "SMOTE based on cuisine" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 200 + }, + "id": "Az6LFBGxI1X0", + "outputId": "29d71d85-64b0-4e62-871e-bcd5398573b6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bạn có thể xác nhận (bằng cách chuẩn bị và thực hiện) rằng công thức này sẽ hoạt động như mong đợi - tất cả các nhãn ẩm thực đều có `559` quan sát.\n", + "\n", + "Vì chúng ta sẽ sử dụng công thức này như một bộ tiền xử lý cho việc mô hình hóa, một `workflow()` sẽ thực hiện toàn bộ việc chuẩn bị và thực hiện cho chúng ta, vì vậy chúng ta sẽ không phải ước tính công thức một cách thủ công.\n", + "\n", + "Bây giờ chúng ta đã sẵn sàng để huấn luyện một mô hình 👩‍💻👨‍💻!\n", + "\n", + "## 3. Lựa chọn bộ phân loại của bạn\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ], + "metadata": { + "id": "NBL3PqIWJBBB" + } + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ chúng ta cần quyết định thuật toán nào sẽ được sử dụng cho công việc này 🤔.\n", + "\n", + "Trong Tidymodels, [`gói parsnip`](https://parsnip.tidymodels.org/index.html) cung cấp giao diện nhất quán để làm việc với các mô hình trên các engine (gói) khác nhau. Vui lòng xem tài liệu của parsnip để khám phá [các loại mô hình & engine](https://www.tidymodels.org/find/parsnip/#models) và [các tham số mô hình tương ứng](https://www.tidymodels.org/find/parsnip/#model-args). Ban đầu, sự đa dạng này có thể khiến bạn choáng ngợp. Ví dụ, các phương pháp sau đây đều bao gồm các kỹ thuật phân loại:\n", + "\n", + "- Mô hình phân loại dựa trên quy tắc C5.0\n", + "\n", + "- Mô hình phân biệt linh hoạt\n", + "\n", + "- Mô hình phân biệt tuyến tính\n", + "\n", + "- Mô hình phân biệt có điều chỉnh\n", + "\n", + "- Mô hình hồi quy logistic\n", + "\n", + "- Mô hình hồi quy đa thức\n", + "\n", + "- Mô hình Naive Bayes\n", + "\n", + "- Máy vector hỗ trợ\n", + "\n", + "- Láng giềng gần nhất\n", + "\n", + "- Cây quyết định\n", + "\n", + "- Phương pháp tập hợp\n", + "\n", + "- Mạng nơ-ron\n", + "\n", + "Danh sách này còn tiếp tục!\n", + "\n", + "### **Nên chọn bộ phân loại nào?**\n", + "\n", + "Vậy, bạn nên chọn bộ phân loại nào? Thông thường, thử nghiệm qua nhiều bộ phân loại và tìm kiếm kết quả tốt là một cách để kiểm tra.\n", + "\n", + "> AutoML giải quyết vấn đề này một cách gọn gàng bằng cách chạy các so sánh này trên đám mây, cho phép bạn chọn thuật toán tốt nhất cho dữ liệu của mình. Hãy thử tại đây [here](https://docs.microsoft.com/learn/modules/automate-model-selection-with-azure-automl/?WT.mc_id=academic-77952-leestott)\n", + "\n", + "Ngoài ra, việc lựa chọn bộ phân loại còn phụ thuộc vào vấn đề của chúng ta. Ví dụ, khi kết quả có thể được phân loại thành `nhiều hơn hai lớp`, như trong trường hợp của chúng ta, bạn phải sử dụng một `thuật toán phân loại đa lớp` thay vì `phân loại nhị phân.`\n", + "\n", + "### **Một cách tiếp cận tốt hơn**\n", + "\n", + "Tuy nhiên, một cách tốt hơn thay vì đoán mò là làm theo các ý tưởng trong [bảng tham khảo nhanh ML](https://docs.microsoft.com/azure/machine-learning/algorithm-cheat-sheet?WT.mc_id=academic-77952-leestott) có thể tải xuống này. Tại đây, chúng ta phát hiện rằng, đối với vấn đề phân loại đa lớp của mình, chúng ta có một số lựa chọn:\n", + "\n", + "

\n", + " \n", + "

Một phần của Bảng tham khảo thuật toán của Microsoft, chi tiết các tùy chọn phân loại đa lớp
\n" + ], + "metadata": { + "id": "a6DLAZ3vJZ14" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **Lý do**\n", + "\n", + "Hãy xem xét các cách tiếp cận khác nhau dựa trên các giới hạn mà chúng ta có:\n", + "\n", + "- **Mạng nơ-ron sâu quá nặng**. Với tập dữ liệu sạch nhưng tối thiểu của chúng ta, và việc huấn luyện được thực hiện cục bộ qua notebook, mạng nơ-ron sâu là quá nặng nề cho nhiệm vụ này.\n", + "\n", + "- **Không sử dụng bộ phân loại hai lớp**. Chúng ta không sử dụng bộ phân loại hai lớp, vì vậy loại bỏ phương pháp one-vs-all.\n", + "\n", + "- **Cây quyết định hoặc hồi quy logistic có thể phù hợp**. Cây quyết định có thể hoạt động, hoặc hồi quy đa thức/hồi quy logistic đa lớp cho dữ liệu đa lớp.\n", + "\n", + "- **Cây quyết định tăng cường đa lớp giải quyết vấn đề khác**. Cây quyết định tăng cường đa lớp phù hợp nhất cho các nhiệm vụ phi tham số, ví dụ như các nhiệm vụ thiết kế xếp hạng, vì vậy nó không hữu ích cho chúng ta.\n", + "\n", + "Ngoài ra, thông thường trước khi bắt đầu với các mô hình học máy phức tạp hơn như phương pháp ensemble, nên xây dựng mô hình đơn giản nhất có thể để hiểu rõ vấn đề. Vì vậy, trong bài học này, chúng ta sẽ bắt đầu với mô hình `hồi quy đa thức`.\n", + "\n", + "> Hồi quy logistic là một kỹ thuật được sử dụng khi biến kết quả là biến phân loại (hoặc danh nghĩa). Đối với hồi quy logistic nhị phân, số lượng biến kết quả là hai, trong khi số lượng biến kết quả đối với hồi quy logistic đa thức là nhiều hơn hai. Xem [Phương pháp hồi quy nâng cao](https://bookdown.org/chua/ber642_advanced_regression/multinomial-logistic-regression.html) để tìm hiểu thêm.\n", + "\n", + "## 4. Huấn luyện và đánh giá mô hình hồi quy logistic đa thức.\n", + "\n", + "Trong Tidymodels, `parsnip::multinom_reg()` định nghĩa một mô hình sử dụng các dự đoán tuyến tính để dự đoán dữ liệu đa lớp bằng cách sử dụng phân phối đa thức. Xem `?multinom_reg()` để biết các cách/engine khác nhau mà bạn có thể sử dụng để huấn luyện mô hình này.\n", + "\n", + "Trong ví dụ này, chúng ta sẽ huấn luyện mô hình hồi quy đa thức thông qua engine mặc định [nnet](https://cran.r-project.org/web/packages/nnet/nnet.pdf).\n", + "\n", + "> Tôi đã chọn giá trị cho `penalty` một cách ngẫu nhiên. Có những cách tốt hơn để chọn giá trị này, đó là sử dụng `resampling` và `tuning` mô hình, điều mà chúng ta sẽ thảo luận sau.\n", + ">\n", + "> Xem [Tidymodels: Bắt đầu](https://www.tidymodels.org/start/tuning/) nếu bạn muốn tìm hiểu thêm về cách tinh chỉnh siêu tham số của mô hình.\n" + ], + "metadata": { + "id": "gWMsVcbBJemu" + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# Create a multinomial regression model specification\r\n", + "mr_spec <- multinom_reg(penalty = 1) %>% \r\n", + " set_engine(\"nnet\", MaxNWts = 2086) %>% \r\n", + " set_mode(\"classification\")\r\n", + "\r\n", + "# Print model specification\r\n", + "mr_spec" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + }, + "id": "Wq_fcyQiJvfG", + "outputId": "c30449c7-3864-4be7-f810-72a003743e2d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm 🥳! Bây giờ chúng ta đã có một công thức và một mô tả mô hình, chúng ta cần tìm cách kết hợp chúng lại thành một đối tượng để trước tiên xử lý dữ liệu, sau đó khớp mô hình trên dữ liệu đã được xử lý, và cũng cho phép các hoạt động xử lý sau tiềm năng. Trong Tidymodels, đối tượng tiện lợi này được gọi là [`workflow`](https://workflows.tidymodels.org/) và nó tiện lợi lưu giữ các thành phần mô hình của bạn! Đây là điều chúng ta gọi là *pipelines* trong *Python*.\n", + "\n", + "Vậy hãy kết hợp mọi thứ lại thành một workflow!📦\n" + ], + "metadata": { + "id": "NlSbzDfgJ0zh" + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# Bundle recipe and model specification\r\n", + "mr_wf <- workflow() %>% \r\n", + " add_recipe(cuisines_recipe) %>% \r\n", + " add_model(mr_spec)\r\n", + "\r\n", + "# Print out workflow\r\n", + "mr_wf" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow ════════════════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Multinomial Regression Model Specification (classification)\n", + "\n", + "Main Arguments:\n", + " penalty = 1\n", + "\n", + "Engine-Specific Arguments:\n", + " MaxNWts = 2086\n", + "\n", + "Computational engine: nnet \n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333 + }, + "id": "Sc1TfPA4Ke3_", + "outputId": "82c70013-e431-4e7e-cef6-9fcf8aad4a6c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Quy trình làm việc 👌👌! Một **`workflow()`** có thể được điều chỉnh tương tự như cách một mô hình có thể. Vậy, đến lúc huấn luyện một mô hình rồi!\n" + ], + "metadata": { + "id": "TNQ8i85aKf9L" + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "# Train a multinomial regression model\n", + "mr_fit <- fit(object = mr_wf, data = cuisines_train)\n", + "\n", + "mr_fit" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "══ Workflow [trained] ══════════════════════════════════════════════════════════\n", + "\u001b[3mPreprocessor:\u001b[23m Recipe\n", + "\u001b[3mModel:\u001b[23m multinom_reg()\n", + "\n", + "── Preprocessor ────────────────────────────────────────────────────────────────\n", + "1 Recipe Step\n", + "\n", + "• step_smote()\n", + "\n", + "── Model ───────────────────────────────────────────────────────────────────────\n", + "Call:\n", + "nnet::multinom(formula = ..y ~ ., data = data, decay = ~1, MaxNWts = ~2086, \n", + " trace = FALSE)\n", + "\n", + "Coefficients:\n", + " (Intercept) almond angelica anise anise_seed apple\n", + "indian 0.19723325 0.2409661 0 -5.004955e-05 -0.1657635 -0.05769734\n", + "japanese 0.13961959 -0.6262400 0 -1.169155e-04 -0.4893596 -0.08585717\n", + "korean 0.22377347 -0.1833485 0 -5.560395e-05 -0.2489401 -0.15657804\n", + "thai -0.04336577 -0.6106258 0 4.903828e-04 -0.5782866 0.63451105\n", + " apple_brandy apricot armagnac artemisia artichoke asparagus\n", + "indian 0 0.37042636 0 -0.09122797 0 -0.27181970\n", + "japanese 0 0.28895643 0 -0.12651100 0 0.14054037\n", + "korean 0 -0.07981259 0 0.55756709 0 -0.66979948\n", + "thai 0 -0.33160904 0 -0.10725182 0 -0.02602152\n", + " avocado bacon baked_potato balm banana barley\n", + "indian -0.46624197 0.16008055 0 0 -0.2838796 0.2230625\n", + "japanese 0.90341344 0.02932727 0 0 -0.4142787 2.0953906\n", + "korean -0.06925382 -0.35804134 0 0 -0.2686963 -0.7233404\n", + "thai -0.21473955 -0.75594439 0 0 0.6784880 -0.4363320\n", + " bartlett_pear basil bay bean beech\n", + "indian 0 -0.7128756 0.1011587 -0.8777275 -0.0004380795\n", + "japanese 0 0.1288697 0.9425626 -0.2380748 0.3373437611\n", + "korean 0 -0.2445193 -0.4744318 -0.8957870 -0.0048784496\n", + "thai 0 1.5365848 0.1333256 0.2196970 -0.0113078024\n", + " beef beef_broth beef_liver beer beet\n", + "indian -0.7985278 0.2430186 -0.035598065 -0.002173738 0.01005813\n", + "japanese 0.2241875 -0.3653020 -0.139551027 0.128905553 0.04923911\n", + "korean 0.5366515 -0.6153237 0.213455197 -0.010828645 0.27325423\n", + "thai 0.1570012 -0.9364154 -0.008032213 -0.035063746 -0.28279823\n", + " bell_pepper bergamot berry bitter_orange black_bean\n", + "indian 0.49074330 0 0.58947607 0.191256164 -0.1945233\n", + "japanese 0.09074167 0 -0.25917977 -0.118915977 -0.3442400\n", + "korean -0.57876763 0 -0.07874180 -0.007729435 -0.5220672\n", + "thai 0.92554006 0 -0.07210196 -0.002983296 -0.4614426\n", + " black_currant black_mustard_seed_oil black_pepper black_raspberry\n", + "indian 0 0.38935801 -0.4453495 0\n", + "japanese 0 -0.05452887 -0.5440869 0\n", + "korean 0 -0.03929970 0.8025454 0\n", + "thai 0 -0.21498372 -0.9854806 0\n", + " black_sesame_seed black_tea blackberry blackberry_brandy\n", + "indian -0.2759246 0.3079977 0.191256164 0\n", + "japanese -0.6101687 -0.1671913 -0.118915977 0\n", + "korean 1.5197674 -0.3036261 -0.007729435 0\n", + "thai -0.1755656 -0.1487033 -0.002983296 0\n", + " blue_cheese blueberry bone_oil bourbon_whiskey brandy\n", + "indian 0 0.216164294 -0.2276744 0 0.22427587\n", + "japanese 0 -0.119186087 0.3913019 0 -0.15595599\n", + "korean 0 -0.007821986 0.2854487 0 -0.02562342\n", + "thai 0 -0.004947048 -0.0253658 0 -0.05715244\n", + "\n", + "...\n", + "and 308 more lines." + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GMbdfVmTKkJI", + "outputId": "adf9ebdf-d69d-4a64-e9fd-e06e5322292e" + } + }, + { + "cell_type": "markdown", + "source": [ + "Các hệ số mà mô hình đã học được trong quá trình huấn luyện được hiển thị ở đầu ra.\n", + "\n", + "### Đánh giá Mô hình Đã Huấn Luyện\n", + "\n", + "Đã đến lúc xem mô hình hoạt động như thế nào 📏 bằng cách đánh giá nó trên tập kiểm tra! Hãy bắt đầu bằng việc tạo dự đoán trên tập kiểm tra.\n" + ], + "metadata": { + "id": "tt2BfOxrKmcJ" + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "# Make predictions on the test set\n", + "results <- cuisines_test %>% select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test))\n", + "\n", + "# Print out results\n", + "results %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class\n", + "1 indian thai \n", + "2 indian indian \n", + "3 indian indian \n", + "4 indian indian \n", + "5 indian indian " + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 2\n", + "\n", + "| cuisine <fct> | .pred_class <fct> |\n", + "|---|---|\n", + "| indian | thai |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "| indian | indian |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 2\n", + "\\begin{tabular}{ll}\n", + " cuisine & .pred\\_class\\\\\n", + " & \\\\\n", + "\\hline\n", + "\t indian & thai \\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\t indian & indian\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 2
cuisine.pred_class
<fct><fct>
indianthai
indianindian
indianindian
indianindian
indianindian
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "CqtckvtsKqax", + "outputId": "e57fe557-6a68-4217-fe82-173328c5436d" + } + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm! Trong Tidymodels, việc đánh giá hiệu suất mô hình có thể được thực hiện bằng cách sử dụng [yardstick](https://yardstick.tidymodels.org/) - một gói dùng để đo lường hiệu quả của các mô hình bằng các chỉ số hiệu suất. Như chúng ta đã làm trong bài học hồi quy logistic, hãy bắt đầu bằng cách tính ma trận nhầm lẫn.\n" + ], + "metadata": { + "id": "8w5N6XsBKss7" + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "# Confusion matrix for categorical data\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class)\n" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Truth\n", + "Prediction chinese indian japanese korean thai\n", + " chinese 83 1 8 15 10\n", + " indian 4 163 1 2 6\n", + " japanese 21 5 73 25 1\n", + " korean 15 0 11 191 0\n", + " thai 10 11 3 7 70" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + }, + "id": "YvODvsLkK0iG", + "outputId": "bb69da84-1266-47ad-b174-d43b88ca2988" + } + }, + { + "cell_type": "markdown", + "source": [ + "Khi xử lý nhiều lớp, thường trực quan hơn khi hình dung điều này dưới dạng bản đồ nhiệt, như thế này:\n" + ], + "metadata": { + "id": "c0HfPL16Lr6U" + } + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "update_geom_defaults(geom = \"tile\", new = list(color = \"black\", alpha = 0.7))\n", + "# Visualize confusion matrix\n", + "results %>% \n", + " conf_mat(cuisine, .pred_class) %>% \n", + " autoplot(type = \"heatmap\")" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "plot without title" + ], + "image/png": "" + }, + "metadata": { + "image/png": { + "width": 420, + "height": 420 + } + } + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "HsAtwukyLsvt", + "outputId": "3032a224-a2c8-4270-b4f2-7bb620317400" + } + }, + { + "cell_type": "markdown", + "source": [ + "Các ô vuông tối màu hơn trong biểu đồ ma trận nhầm lẫn biểu thị số lượng trường hợp cao, và hy vọng bạn có thể thấy một đường chéo các ô vuông tối màu cho biết các trường hợp mà nhãn dự đoán và nhãn thực tế trùng khớp.\n", + "\n", + "Bây giờ, chúng ta hãy tính toán các thống kê tóm tắt cho ma trận nhầm lẫn.\n" + ], + "metadata": { + "id": "oOJC87dkLwPr" + } + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# Summary stats for confusion matrix\n", + "conf_mat(data = results, truth = cuisine, estimate = .pred_class) %>% \n", + "summary()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " .metric .estimator .estimate\n", + "1 accuracy multiclass 0.7880435\n", + "2 kap multiclass 0.7276583\n", + "3 sens macro 0.7780927\n", + "4 spec macro 0.9477598\n", + "5 ppv macro 0.7585583\n", + "6 npv macro 0.9460080\n", + "7 mcc multiclass 0.7292724\n", + "8 j_index macro 0.7258524\n", + "9 bal_accuracy macro 0.8629262\n", + "10 detection_prevalence macro 0.2000000\n", + "11 precision macro 0.7585583\n", + "12 recall macro 0.7780927\n", + "13 f_meas macro 0.7641862" + ], + "text/markdown": [ + "\n", + "A tibble: 13 × 3\n", + "\n", + "| .metric <chr> | .estimator <chr> | .estimate <dbl> |\n", + "|---|---|---|\n", + "| accuracy | multiclass | 0.7880435 |\n", + "| kap | multiclass | 0.7276583 |\n", + "| sens | macro | 0.7780927 |\n", + "| spec | macro | 0.9477598 |\n", + "| ppv | macro | 0.7585583 |\n", + "| npv | macro | 0.9460080 |\n", + "| mcc | multiclass | 0.7292724 |\n", + "| j_index | macro | 0.7258524 |\n", + "| bal_accuracy | macro | 0.8629262 |\n", + "| detection_prevalence | macro | 0.2000000 |\n", + "| precision | macro | 0.7585583 |\n", + "| recall | macro | 0.7780927 |\n", + "| f_meas | macro | 0.7641862 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 13 × 3\n", + "\\begin{tabular}{lll}\n", + " .metric & .estimator & .estimate\\\\\n", + " & & \\\\\n", + "\\hline\n", + "\t accuracy & multiclass & 0.7880435\\\\\n", + "\t kap & multiclass & 0.7276583\\\\\n", + "\t sens & macro & 0.7780927\\\\\n", + "\t spec & macro & 0.9477598\\\\\n", + "\t ppv & macro & 0.7585583\\\\\n", + "\t npv & macro & 0.9460080\\\\\n", + "\t mcc & multiclass & 0.7292724\\\\\n", + "\t j\\_index & macro & 0.7258524\\\\\n", + "\t bal\\_accuracy & macro & 0.8629262\\\\\n", + "\t detection\\_prevalence & macro & 0.2000000\\\\\n", + "\t precision & macro & 0.7585583\\\\\n", + "\t recall & macro & 0.7780927\\\\\n", + "\t f\\_meas & macro & 0.7641862\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 13 × 3
.metric.estimator.estimate
<chr><chr><dbl>
accuracy multiclass0.7880435
kap multiclass0.7276583
sens macro 0.7780927
spec macro 0.9477598
ppv macro 0.7585583
npv macro 0.9460080
mcc multiclass0.7292724
j_index macro 0.7258524
bal_accuracy macro 0.8629262
detection_prevalencemacro 0.2000000
precision macro 0.7585583
recall macro 0.7780927
f_meas macro 0.7641862
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 494 + }, + "id": "OYqetUyzL5Wz", + "outputId": "6a84d65e-113d-4281-dfc1-16e8b70f37e6" + } + }, + { + "cell_type": "markdown", + "source": [ + "Nếu chúng ta thu hẹp lại một số chỉ số như độ chính xác, độ nhạy, ppv, thì khởi đầu của chúng ta không tệ lắm 🥳!\n", + "\n", + "## 4. Đi sâu hơn\n", + "\n", + "Hãy đặt một câu hỏi tinh tế: Tiêu chí nào được sử dụng để quyết định một loại ẩm thực cụ thể là kết quả dự đoán?\n", + "\n", + "Thực ra, các thuật toán học máy thống kê, như hồi quy logistic, dựa trên `xác suất`; vì vậy, điều mà một bộ phân loại thực sự dự đoán là một phân phối xác suất trên tập hợp các kết quả có thể xảy ra. Lớp có xác suất cao nhất sau đó được chọn làm kết quả có khả năng xảy ra nhất cho các quan sát đã cho.\n", + "\n", + "Hãy xem điều này hoạt động như thế nào bằng cách thực hiện cả dự đoán lớp cứng và xác suất.\n" + ], + "metadata": { + "id": "43t7vz8vMJtW" + } + }, + { + "cell_type": "code", + "execution_count": 13, + "source": [ + "# Make hard class prediction and probabilities\n", + "results_prob <- cuisines_test %>%\n", + " select(cuisine) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test)) %>% \n", + " bind_cols(mr_fit %>% predict(new_data = cuisines_test, type = \"prob\"))\n", + "\n", + "# Print out results\n", + "results_prob %>% \n", + " slice_head(n = 5)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " cuisine .pred_class .pred_chinese .pred_indian .pred_japanese .pred_korean\n", + "1 indian thai 1.551259e-03 0.4587877 5.988039e-04 2.428503e-04\n", + "2 indian indian 2.637133e-05 0.9999488 6.648651e-07 2.259993e-05\n", + "3 indian indian 1.049433e-03 0.9909982 1.060937e-03 1.644947e-05\n", + "4 indian indian 6.237482e-02 0.4763035 9.136702e-02 3.660913e-01\n", + "5 indian indian 1.431745e-02 0.9418551 2.945239e-02 8.721782e-03\n", + " .pred_thai \n", + "1 5.388194e-01\n", + "2 1.577948e-06\n", + "3 6.874989e-03\n", + "4 3.863391e-03\n", + "5 5.653283e-03" + ], + "text/markdown": [ + "\n", + "A tibble: 5 × 7\n", + "\n", + "| cuisine <fct> | .pred_class <fct> | .pred_chinese <dbl> | .pred_indian <dbl> | .pred_japanese <dbl> | .pred_korean <dbl> | .pred_thai <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| indian | thai | 1.551259e-03 | 0.4587877 | 5.988039e-04 | 2.428503e-04 | 5.388194e-01 |\n", + "| indian | indian | 2.637133e-05 | 0.9999488 | 6.648651e-07 | 2.259993e-05 | 1.577948e-06 |\n", + "| indian | indian | 1.049433e-03 | 0.9909982 | 1.060937e-03 | 1.644947e-05 | 6.874989e-03 |\n", + "| indian | indian | 6.237482e-02 | 0.4763035 | 9.136702e-02 | 3.660913e-01 | 3.863391e-03 |\n", + "| indian | indian | 1.431745e-02 | 0.9418551 | 2.945239e-02 | 8.721782e-03 | 5.653283e-03 |\n", + "\n" + ], + "text/latex": [ + "A tibble: 5 × 7\n", + "\\begin{tabular}{lllllll}\n", + " cuisine & .pred\\_class & .pred\\_chinese & .pred\\_indian & .pred\\_japanese & .pred\\_korean & .pred\\_thai\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t indian & thai & 1.551259e-03 & 0.4587877 & 5.988039e-04 & 2.428503e-04 & 5.388194e-01\\\\\n", + "\t indian & indian & 2.637133e-05 & 0.9999488 & 6.648651e-07 & 2.259993e-05 & 1.577948e-06\\\\\n", + "\t indian & indian & 1.049433e-03 & 0.9909982 & 1.060937e-03 & 1.644947e-05 & 6.874989e-03\\\\\n", + "\t indian & indian & 6.237482e-02 & 0.4763035 & 9.136702e-02 & 3.660913e-01 & 3.863391e-03\\\\\n", + "\t indian & indian & 1.431745e-02 & 0.9418551 & 2.945239e-02 & 8.721782e-03 & 5.653283e-03\\\\\n", + "\\end{tabular}\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A tibble: 5 × 7
cuisine.pred_class.pred_chinese.pred_indian.pred_japanese.pred_korean.pred_thai
<fct><fct><dbl><dbl><dbl><dbl><dbl>
indianthai 1.551259e-030.45878775.988039e-042.428503e-045.388194e-01
indianindian2.637133e-050.99994886.648651e-072.259993e-051.577948e-06
indianindian1.049433e-030.99099821.060937e-031.644947e-056.874989e-03
indianindian6.237482e-020.47630359.136702e-023.660913e-013.863391e-03
indianindian1.431745e-020.94185512.945239e-028.721782e-035.653283e-03
\n" + ] + }, + "metadata": {} + } + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "xdKNs-ZPMTJL", + "outputId": "68f6ac5a-725a-4eff-9ea6-481fef00e008" + } + }, + { + "cell_type": "markdown", + "source": [ + "Tốt hơn nhiều!\n", + "\n", + "✅ Bạn có thể giải thích tại sao mô hình lại khá chắc chắn rằng quan sát đầu tiên là món ăn Thái không?\n", + "\n", + "## **🚀Thử thách**\n", + "\n", + "Trong bài học này, bạn đã sử dụng dữ liệu đã được làm sạch để xây dựng một mô hình học máy có thể dự đoán ẩm thực quốc gia dựa trên một loạt các nguyên liệu. Hãy dành thời gian để đọc qua [nhiều tùy chọn](https://www.tidymodels.org/find/parsnip/#models) mà Tidymodels cung cấp để phân loại dữ liệu và [các cách khác](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom_reg-models) để áp dụng hồi quy đa thức.\n", + "\n", + "#### CẢM ƠN ĐẾN:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) vì đã tạo ra những hình minh họa tuyệt vời giúp R trở nên thân thiện và hấp dẫn hơn. Tìm thêm các hình minh họa tại [bộ sưu tập](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM) của cô ấy.\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) và [Jen Looper](https://www.twitter.com/jenlooper) vì đã tạo ra phiên bản Python gốc của module này ♥️\n", + "\n", + "
\n", + "Đã định thêm vài câu đùa nhưng tôi không hiểu các trò chơi chữ về đồ ăn 😅.\n", + "\n", + "
\n", + "\n", + "Học vui nhé,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Đại sứ Sinh viên Vàng của Microsoft Learn.\n" + ], + "metadata": { + "id": "2tWVHMeLMYdM" + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/2-Classifiers-1/solution/notebook.ipynb b/translations/vi/4-Classification/2-Classifiers-1/solution/notebook.ipynb new file mode 100644 index 000000000..1b286f7e0 --- /dev/null +++ b/translations/vi/4-Classification/2-Classifiers-1/solution/notebook.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "source": [ + "# Xây dựng Mô hình Phân loại\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "from sklearn.svm import SVC\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy is 0.8181818181818182\n" + ] + } + ], + "source": [ + "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n", + "model = lr.fit(X_train, np.ravel(y_train))\n", + "\n", + "accuracy = model.score(X_test, y_test)\n", + "print (\"Accuracy is {}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ingredients: Index(['artemisia', 'black_pepper', 'mushroom', 'shiitake', 'soy_sauce',\n 'vegetable_oil'],\n dtype='object')\ncuisine: korean\n" + ] + } + ], + "source": [ + "# test an item\n", + "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n", + "print(f'cuisine: {y_test.iloc[50]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "korean 0.392231\n", + "chinese 0.372872\n", + "japanese 0.218825\n", + "thai 0.013427\n", + "indian 0.002645" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
korean0.392231
chinese0.372872
japanese0.218825
thai0.013427
indian0.002645
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "#rehsape to 2d array and transpose\n", + "test= X_test.iloc[50].values.reshape(-1, 1).T\n", + "# predict with score\n", + "proba = model.predict_proba(test)\n", + "classes = model.classes_\n", + "# create df with classes and scores\n", + "resultdf = pd.DataFrame(data=proba, columns=classes)\n", + "\n", + "# create df to show results\n", + "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n", + "topPrediction.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.75 0.73 0.74 223\n indian 0.93 0.88 0.90 255\n japanese 0.78 0.78 0.78 253\n korean 0.87 0.86 0.86 236\n thai 0.76 0.84 0.80 232\n\n accuracy 0.82 1199\n macro avg 0.82 0.82 0.82 1199\nweighted avg 0.82 0.82 0.82 1199\n\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test)\r\n", + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "9408506dd864f2b6e334c62f80c0cfcc", + "translation_date": "2025-09-06T14:33:23+00:00", + "source_file": "4-Classification/2-Classifiers-1/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/vi/4-Classification/3-Classifiers-2/notebook.ipynb b/translations/vi/4-Classification/3-Classifiers-2/notebook.ipynb new file mode 100644 index 000000000..ca5928421 --- /dev/null +++ b/translations/vi/4-Classification/3-Classifiers-2/notebook.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "15a83277036572e0773229b5f21c1e12", + "translation_date": "2025-09-06T14:42:34+00:00", + "source_file": "4-Classification/3-Classifiers-2/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/vi/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb b/translations/vi/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb new file mode 100644 index 000000000..fadf02894 --- /dev/null +++ b/translations/vi/4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb @@ -0,0 +1,648 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "lesson_12-R.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "ir", + "display_name": "R" + }, + "language_info": { + "name": "R" + }, + "coopTranslator": { + "original_hash": "fab50046ca413a38939d579f8432274f", + "translation_date": "2025-09-06T14:50:37+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/R/lesson_12-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jsFutf_ygqSx" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HD54bEefgtNO" + }, + "source": [ + "## Bộ phân loại ẩm thực 2\n", + "\n", + "Trong bài học phân loại thứ hai này, chúng ta sẽ khám phá `nhiều cách hơn` để phân loại dữ liệu dạng danh mục. Chúng ta cũng sẽ tìm hiểu về hậu quả của việc chọn một bộ phân loại thay vì bộ phân loại khác.\n", + "\n", + "### [**Câu hỏi trước bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/23/)\n", + "\n", + "### **Điều kiện tiên quyết**\n", + "\n", + "Chúng tôi giả định rằng bạn đã hoàn thành các bài học trước vì chúng ta sẽ tiếp tục sử dụng một số khái niệm đã học trước đó.\n", + "\n", + "Đối với bài học này, chúng ta sẽ cần các gói sau:\n", + "\n", + "- `tidyverse`: [tidyverse](https://www.tidyverse.org/) là một [bộ sưu tập các gói R](https://www.tidyverse.org/packages) được thiết kế để làm cho khoa học dữ liệu nhanh hơn, dễ dàng hơn và thú vị hơn!\n", + "\n", + "- `tidymodels`: [tidymodels](https://www.tidymodels.org/) là một [khung làm việc](https://www.tidymodels.org/packages/) bao gồm các gói dành cho mô hình hóa và học máy.\n", + "\n", + "- `themis`: [gói themis](https://themis.tidymodels.org/) cung cấp các bước bổ sung trong công thức để xử lý dữ liệu không cân bằng.\n", + "\n", + "Bạn có thể cài đặt chúng bằng lệnh:\n", + "\n", + "`install.packages(c(\"tidyverse\", \"tidymodels\", \"kernlab\", \"themis\", \"ranger\", \"xgboost\", \"kknn\"))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng cho bạn nếu chúng bị thiếu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vZ57IuUxgyQt" + }, + "source": [ + "suppressWarnings(if (!require(\"pacman\"))install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load(tidyverse, tidymodels, themis, kernlab, ranger, xgboost, kknn)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z22M-pj4g07x" + }, + "source": [ + "## **1. Một bản đồ phân loại**\n", + "\n", + "Trong [bài học trước](https://github.com/microsoft/ML-For-Beginners/tree/main/4-Classification/2-Classifiers-1), chúng ta đã cố gắng giải quyết câu hỏi: làm thế nào để chọn giữa nhiều mô hình? Phần lớn điều này phụ thuộc vào đặc điểm của dữ liệu và loại vấn đề mà chúng ta muốn giải quyết (ví dụ như phân loại hay hồi quy?)\n", + "\n", + "Trước đây, chúng ta đã tìm hiểu về các tùy chọn khác nhau khi phân loại dữ liệu bằng bảng gian lận của Microsoft. Framework Machine Learning của Python, Scikit-learn, cung cấp một bảng gian lận tương tự nhưng chi tiết hơn, giúp bạn thu hẹp các bộ ước lượng (một thuật ngữ khác cho các bộ phân loại):\n", + "\n", + "

\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1i3xRIVg7vG" + }, + "source": [ + "> Mẹo: [truy cập bản đồ trực tuyến này](https://scikit-learn.org/stable/tutorial/machine_learning_map/) và nhấp theo đường dẫn để đọc tài liệu.\n", + ">\n", + "> Trang [tham khảo Tidymodels](https://www.tidymodels.org/find/parsnip/#models) cũng cung cấp tài liệu tuyệt vời về các loại mô hình khác nhau.\n", + "\n", + "### **Kế hoạch** 🗺️\n", + "\n", + "Bản đồ này rất hữu ích khi bạn đã hiểu rõ dữ liệu của mình, vì bạn có thể 'đi bộ' theo các đường dẫn để đưa ra quyết định:\n", + "\n", + "- Chúng ta có \\>50 mẫu\n", + "\n", + "- Chúng ta muốn dự đoán một danh mục\n", + "\n", + "- Chúng ta có dữ liệu được gắn nhãn\n", + "\n", + "- Chúng ta có ít hơn 100K mẫu\n", + "\n", + "- ✨ Chúng ta có thể chọn Linear SVC\n", + "\n", + "- Nếu cách đó không hiệu quả, vì chúng ta có dữ liệu dạng số\n", + "\n", + " - Chúng ta có thể thử ✨ KNeighbors Classifier\n", + "\n", + " - Nếu cách đó không hiệu quả, thử ✨ SVC và ✨ Ensemble Classifiers\n", + "\n", + "Đây là một lộ trình rất hữu ích để làm theo. Bây giờ, hãy bắt đầu ngay với [tidymodels](https://www.tidymodels.org/): một bộ sưu tập các gói R nhất quán và linh hoạt được phát triển để khuyến khích thực hành thống kê tốt 😊.\n", + "\n", + "## 2. Chia dữ liệu và xử lý tập dữ liệu không cân bằng.\n", + "\n", + "Từ các bài học trước, chúng ta đã học rằng có một tập hợp các thành phần chung giữa các món ăn của chúng ta. Ngoài ra, có sự phân bố không đồng đều về số lượng món ăn.\n", + "\n", + "Chúng ta sẽ xử lý những điều này bằng cách:\n", + "\n", + "- Loại bỏ các thành phần phổ biến nhất gây nhầm lẫn giữa các món ăn khác nhau, sử dụng `dplyr::select()`.\n", + "\n", + "- Sử dụng một `recipe` để tiền xử lý dữ liệu, chuẩn bị cho việc mô hình hóa bằng cách áp dụng thuật toán `over-sampling`.\n", + "\n", + "Chúng ta đã xem qua những điều trên trong bài học trước nên việc này sẽ rất dễ dàng 🥳!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6tj_rN00hClA" + }, + "source": [ + "# Load the core Tidyverse and Tidymodels packages\n", + "library(tidyverse)\n", + "library(tidymodels)\n", + "\n", + "# Load the original cuisines data\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/4-Classification/data/cuisines.csv\")\n", + "\n", + "# Drop id column, rice, garlic and ginger from our original data set\n", + "df_select <- df %>% \n", + " select(-c(1, rice, garlic, ginger)) %>%\n", + " # Encode cuisine column as categorical\n", + " mutate(cuisine = factor(cuisine))\n", + "\n", + "\n", + "# Create data split specification\n", + "set.seed(2056)\n", + "cuisines_split <- initial_split(data = df_select,\n", + " strata = cuisine,\n", + " prop = 0.7)\n", + "\n", + "# Extract the data in each split\n", + "cuisines_train <- training(cuisines_split)\n", + "cuisines_test <- testing(cuisines_split)\n", + "\n", + "# Display distribution of cuisines in the training set\n", + "cuisines_train %>% \n", + " count(cuisine) %>% \n", + " arrange(desc(n))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zFin5yw3hHb1" + }, + "source": [ + "### Xử lý dữ liệu không cân bằng\n", + "\n", + "Dữ liệu không cân bằng thường ảnh hưởng tiêu cực đến hiệu suất của mô hình. Nhiều mô hình hoạt động tốt nhất khi số lượng quan sát là bằng nhau và do đó thường gặp khó khăn với dữ liệu không cân bằng.\n", + "\n", + "Có hai cách chính để xử lý các tập dữ liệu không cân bằng:\n", + "\n", + "- thêm các quan sát vào lớp thiểu số: `Over-sampling`, ví dụ sử dụng thuật toán SMOTE, thuật toán này tạo ra các ví dụ mới cho lớp thiểu số một cách tổng hợp bằng cách sử dụng các điểm lân cận gần nhất của các trường hợp này.\n", + "\n", + "- loại bỏ các quan sát từ lớp đa số: `Under-sampling`\n", + "\n", + "Trong bài học trước, chúng ta đã minh họa cách xử lý các tập dữ liệu không cân bằng bằng cách sử dụng một `recipe`. Recipe có thể được xem như một bản thiết kế mô tả các bước cần áp dụng cho một tập dữ liệu để chuẩn bị cho việc phân tích dữ liệu. Trong trường hợp của chúng ta, mục tiêu là có một phân phối đồng đều về số lượng các loại món ăn trong `training set`. Hãy bắt đầu ngay thôi.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRzTnHolhLWd" + }, + "source": [ + "# Load themis package for dealing with imbalanced data\n", + "library(themis)\n", + "\n", + "# Create a recipe for preprocessing training data\n", + "cuisines_recipe <- recipe(cuisine ~ ., data = cuisines_train) %>%\n", + " step_smote(cuisine) \n", + "\n", + "# Print recipe\n", + "cuisines_recipe" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxOQ2ORhhO81" + }, + "source": [ + "Bây giờ chúng ta sẵn sàng để huấn luyện các mô hình 👩‍💻👨‍💻!\n", + "\n", + "## 3. Vượt ra ngoài các mô hình hồi quy đa thức\n", + "\n", + "Trong bài học trước, chúng ta đã tìm hiểu về các mô hình hồi quy đa thức. Hãy khám phá một số mô hình linh hoạt hơn cho bài toán phân loại.\n", + "\n", + "### Máy Vector Hỗ Trợ (Support Vector Machines)\n", + "\n", + "Trong bối cảnh phân loại, `Máy Vector Hỗ Trợ` là một kỹ thuật học máy cố gắng tìm một *siêu phẳng* (*hyperplane*) để \"tách biệt tốt nhất\" các lớp. Hãy xem một ví dụ đơn giản:\n", + "\n", + "

\n", + " \n", + "

https://commons.wikimedia.org/w/index.php?curid=22877598
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C4Wsd0vZhXYu" + }, + "source": [ + "H1~ không tách biệt các lớp. H2~ có tách biệt, nhưng chỉ với một khoảng cách nhỏ. H3~ tách biệt chúng với khoảng cách tối đa.\n", + "\n", + "#### Bộ phân loại tuyến tính SVM\n", + "\n", + "Hỗ trợ phân cụm vector (SVC) là một nhánh của họ các kỹ thuật máy học (ML) thuộc máy vector hỗ trợ (SVM). Trong SVC, siêu phẳng được chọn để phân tách `phần lớn` các quan sát huấn luyện một cách chính xác, nhưng `có thể phân loại sai` một vài quan sát. Bằng cách cho phép một số điểm nằm ở phía sai, SVM trở nên mạnh mẽ hơn trước các giá trị ngoại lai, do đó cải thiện khả năng tổng quát hóa với dữ liệu mới. Tham số điều chỉnh sự vi phạm này được gọi là `cost`, với giá trị mặc định là 1 (xem `help(\"svm_poly\")`).\n", + "\n", + "Hãy tạo một SVC tuyến tính bằng cách đặt `degree = 1` trong một mô hình SVM đa thức.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vJpp6nuChlBz" + }, + "source": [ + "# Make a linear SVC specification\n", + "svc_linear_spec <- svm_poly(degree = 1) %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svc_linear_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svc_linear_spec)\n", + "\n", + "# Print out workflow\n", + "svc_linear_wf" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDs8cWNkhoqu" + }, + "source": [ + "Bây giờ khi chúng ta đã lưu lại các bước tiền xử lý và đặc tả mô hình vào một *workflow*, chúng ta có thể tiến hành huấn luyện SVC tuyến tính và đánh giá kết quả trong quá trình đó. Để đo lường hiệu suất, hãy tạo một tập hợp các chỉ số để đánh giá: `accuracy`, `sensitivity`, `Positive Predicted Value` và `F Measure`.\n", + "\n", + "> `augment()` sẽ thêm cột (hoặc các cột) cho dự đoán vào dữ liệu được cung cấp.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "81wiqcwuhrnq" + }, + "source": [ + "# Train a linear SVC model\n", + "svc_linear_fit <- svc_linear_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svc_linear_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UFQvHf-huo3" + }, + "source": [ + "#### Máy Vector Hỗ Trợ\n", + "\n", + "Máy vector hỗ trợ (SVM) là một sự mở rộng của bộ phân loại vector hỗ trợ nhằm đáp ứng ranh giới phi tuyến giữa các lớp. Về cơ bản, SVM sử dụng *kernel trick* để mở rộng không gian đặc trưng, thích nghi với các mối quan hệ phi tuyến giữa các lớp. Một hàm kernel phổ biến và cực kỳ linh hoạt được SVM sử dụng là *Hàm cơ sở xuyên tâm.* Hãy cùng xem cách nó hoạt động trên dữ liệu của chúng ta.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-KX4S8mzhzmp" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Make an RBF SVM specification\n", + "svm_rbf_spec <- svm_rbf() %>% \n", + " set_engine(\"kernlab\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle specification and recipe into a worklow\n", + "svm_rbf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(svm_rbf_spec)\n", + "\n", + "\n", + "# Train an RBF model\n", + "svm_rbf_fit <- svm_rbf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "svm_rbf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QBFSa7WSh4HQ" + }, + "source": [ + "Tuyệt vời hơn 🤩!\n", + "\n", + "> ✅ Vui lòng xem:\n", + ">\n", + "> - [*Support Vector Machines*](https://bradleyboehmke.github.io/HOML/svm.html), Hands-on Machine Learning with R\n", + ">\n", + "> - [*Support Vector Machines*](https://www.statlearning.com/), An Introduction to Statistical Learning with Applications in R\n", + ">\n", + "> để đọc thêm.\n", + "\n", + "### Bộ phân loại Hàng xóm Gần nhất\n", + "\n", + "*K*-nearest neighbor (KNN) là một thuật toán trong đó mỗi quan sát được dự đoán dựa trên *sự tương đồng* của nó với các quan sát khác.\n", + "\n", + "Hãy thử áp dụng nó vào dữ liệu của chúng ta.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k4BxxBcdh9Ka" + }, + "source": [ + "# Make a KNN specification\n", + "knn_spec <- nearest_neighbor() %>% \n", + " set_engine(\"kknn\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "knn_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(knn_spec)\n", + "\n", + "# Train a boosted tree model\n", + "knn_wf_fit <- knn_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "knn_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HaegQseriAcj" + }, + "source": [ + "Có vẻ như mô hình này không hoạt động tốt lắm. Có lẽ việc thay đổi các tham số của mô hình (xem `help(\"nearest_neighbor\")`) sẽ cải thiện hiệu suất của mô hình. Hãy chắc chắn thử nghiệm điều này.\n", + "\n", + "> ✅ Vui lòng tham khảo:\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> để tìm hiểu thêm về bộ phân loại *K*-Nearest Neighbors.\n", + "\n", + "### Bộ phân loại tập hợp (Ensemble classifiers)\n", + "\n", + "Các thuật toán tập hợp hoạt động bằng cách kết hợp nhiều bộ ước lượng cơ sở để tạo ra một mô hình tối ưu thông qua:\n", + "\n", + "`bagging`: áp dụng một *hàm trung bình* cho một tập hợp các mô hình cơ sở\n", + "\n", + "`boosting`: xây dựng một chuỗi các mô hình, trong đó mỗi mô hình kế tiếp cải thiện hiệu suất dự đoán dựa trên mô hình trước.\n", + "\n", + "Hãy bắt đầu bằng cách thử nghiệm mô hình Random Forest, mô hình này xây dựng một tập hợp lớn các cây quyết định, sau đó áp dụng một hàm trung bình để tạo ra một mô hình tổng thể tốt hơn.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "49DPoVs6iK1M" + }, + "source": [ + "# Make a random forest specification\n", + "rf_spec <- rand_forest() %>% \n", + " set_engine(\"ranger\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "rf_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(rf_spec)\n", + "\n", + "# Train a random forest model\n", + "rf_wf_fit <- rf_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "rf_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGVYwC_aiUWc" + }, + "source": [ + "Làm tốt lắm 👏!\n", + "\n", + "Hãy cùng thử nghiệm với mô hình Boosted Tree.\n", + "\n", + "Boosted Tree định nghĩa một phương pháp tập hợp, tạo ra một chuỗi các cây quyết định tuần tự, trong đó mỗi cây phụ thuộc vào kết quả của các cây trước đó nhằm giảm dần lỗi. Phương pháp này tập trung vào trọng số của các mục bị phân loại sai và điều chỉnh việc khớp cho bộ phân loại tiếp theo để sửa lỗi.\n", + "\n", + "Có nhiều cách khác nhau để khớp mô hình này (xem `help(\"boost_tree\")`). Trong ví dụ này, chúng ta sẽ khớp Boosted trees thông qua công cụ `xgboost`.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Py1YWo-micWs" + }, + "source": [ + "# Make a boosted tree specification\n", + "boost_spec <- boost_tree(trees = 200) %>% \n", + " set_engine(\"xgboost\") %>% \n", + " set_mode(\"classification\")\n", + "\n", + "# Bundle recipe and model specification into a workflow\n", + "boost_wf <- workflow() %>% \n", + " add_recipe(cuisines_recipe) %>% \n", + " add_model(boost_spec)\n", + "\n", + "# Train a boosted tree model\n", + "boost_wf_fit <- boost_wf %>% \n", + " fit(data = cuisines_train)\n", + "\n", + "\n", + "# Make predictions and Evaluate model performance\n", + "boost_wf_fit %>% \n", + " augment(new_data = cuisines_test) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNQnbuejigZM" + }, + "source": [ + "> ✅ Vui lòng xem:\n", + ">\n", + "> - [Machine Learning for Social Scientists](https://cimentadaj.github.io/ml_socsci/tree-based-methods.html#random-forests)\n", + ">\n", + "> - [Hands-on Machine Learning with R](https://bradleyboehmke.github.io/HOML/)\n", + ">\n", + "> - [An Introduction to Statistical Learning with Applications in R](https://www.statlearning.com/)\n", + ">\n", + "> - - Khám phá mô hình AdaBoost, một lựa chọn thay thế tốt cho xgboost.\n", + ">\n", + "> để tìm hiểu thêm về các bộ phân loại Ensemble.\n", + "\n", + "## 4. Bổ sung - so sánh nhiều mô hình\n", + "\n", + "Chúng ta đã xây dựng khá nhiều mô hình trong bài thực hành này 🙌. Việc tạo ra nhiều quy trình làm việc từ các bộ tiền xử lý và/hoặc các đặc tả mô hình khác nhau, sau đó tính toán từng chỉ số hiệu suất một cách riêng lẻ có thể trở nên tẻ nhạt hoặc khó khăn.\n", + "\n", + "Hãy xem liệu chúng ta có thể giải quyết vấn đề này bằng cách tạo một hàm để huấn luyện danh sách các quy trình làm việc trên tập huấn luyện, sau đó trả về các chỉ số hiệu suất dựa trên tập kiểm tra. Chúng ta sẽ sử dụng `map()` và `map_dfr()` từ gói [purrr](https://purrr.tidyverse.org/) để áp dụng các hàm cho từng phần tử trong danh sách.\n", + "\n", + "> Các hàm [`map()`](https://purrr.tidyverse.org/reference/map.html) cho phép bạn thay thế nhiều vòng lặp for bằng mã ngắn gọn hơn và dễ đọc hơn. Nơi tốt nhất để tìm hiểu về các hàm [`map()`](https://purrr.tidyverse.org/reference/map.html) là chương [iteration](http://r4ds.had.co.nz/iteration.html) trong sách R for Data Science.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Qzb7LyZnimd2" + }, + "source": [ + "set.seed(2056)\n", + "\n", + "# Create a metric set\n", + "eval_metrics <- metric_set(ppv, sens, accuracy, f_meas)\n", + "\n", + "# Define a function that returns performance metrics\n", + "compare_models <- function(workflow_list, train_set, test_set){\n", + " \n", + " suppressWarnings(\n", + " # Fit each model to the train_set\n", + " map(workflow_list, fit, data = train_set) %>% \n", + " # Make predictions on the test set\n", + " map_dfr(augment, new_data = test_set, .id = \"model\") %>%\n", + " # Select desired columns\n", + " select(model, cuisine, .pred_class) %>% \n", + " # Evaluate model performance\n", + " group_by(model) %>% \n", + " eval_metrics(truth = cuisine, estimate = .pred_class) %>% \n", + " ungroup()\n", + " )\n", + " \n", + "} # End of function" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwa712sNisDA" + }, + "source": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3i4VJOi2iu-a" + }, + "source": [ + "# Make a list of workflows\n", + "workflow_list <- list(\n", + " \"svc\" = svc_linear_wf,\n", + " \"svm\" = svm_rbf_wf,\n", + " \"knn\" = knn_wf,\n", + " \"random_forest\" = rf_wf,\n", + " \"xgboost\" = boost_wf)\n", + "\n", + "# Call the function\n", + "set.seed(2056)\n", + "perf_metrics <- compare_models(workflow_list = workflow_list, train_set = cuisines_train, test_set = cuisines_test)\n", + "\n", + "# Print out performance metrics\n", + "perf_metrics %>% \n", + " group_by(.metric) %>% \n", + " arrange(desc(.estimate)) %>% \n", + " slice_head(n=7)\n", + "\n", + "# Compare accuracy\n", + "perf_metrics %>% \n", + " filter(.metric == \"accuracy\") %>% \n", + " arrange(desc(.estimate))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KuWK_lEli4nW" + }, + "source": [ + "[**workflowset**](https://workflowsets.tidymodels.org/) cho phép người dùng tạo và dễ dàng huấn luyện một số lượng lớn mô hình, nhưng chủ yếu được thiết kế để hoạt động với các kỹ thuật lấy mẫu lại như `cross-validation`, một phương pháp mà chúng ta chưa đề cập đến.\n", + "\n", + "## **🚀Thử thách**\n", + "\n", + "Mỗi kỹ thuật này có một số lượng lớn các tham số mà bạn có thể điều chỉnh, ví dụ như `cost` trong SVMs, `neighbors` trong KNN, `mtry` (Các Dự đoán Được Chọn Ngẫu Nhiên) trong Random Forest.\n", + "\n", + "Hãy nghiên cứu các tham số mặc định của từng kỹ thuật và suy nghĩ về việc điều chỉnh các tham số này sẽ ảnh hưởng như thế nào đến chất lượng của mô hình.\n", + "\n", + "Để tìm hiểu thêm về một mô hình cụ thể và các tham số của nó, sử dụng: `help(\"model\")` ví dụ `help(\"rand_forest\")`\n", + "\n", + "> Trong thực tế, chúng ta thường *ước lượng* các *giá trị tốt nhất* cho các tham số này bằng cách huấn luyện nhiều mô hình trên một `bộ dữ liệu mô phỏng` và đo lường hiệu suất của tất cả các mô hình này. Quá trình này được gọi là **tuning**.\n", + "\n", + "### [**Câu hỏi sau bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/24/)\n", + "\n", + "### **Ôn tập & Tự học**\n", + "\n", + "Có rất nhiều thuật ngữ chuyên ngành trong các bài học này, vì vậy hãy dành một chút thời gian để xem lại [danh sách này](https://docs.microsoft.com/dotnet/machine-learning/resources/glossary?WT.mc_id=academic-77952-leestott) các thuật ngữ hữu ích!\n", + "\n", + "#### CẢM ƠN ĐẾN:\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) vì đã tạo ra những hình minh họa tuyệt vời giúp R trở nên thân thiện và hấp dẫn hơn. Tìm thêm các hình minh họa tại [bộ sưu tập của cô ấy](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "[Cassie Breviu](https://www.twitter.com/cassieview) và [Jen Looper](https://www.twitter.com/jenlooper) vì đã tạo ra phiên bản Python gốc của module này ♥️\n", + "\n", + "Chúc học tập vui vẻ,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Đại sứ Sinh viên Microsoft Learn Vàng.\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/translations/vi/4-Classification/3-Classifiers-2/solution/notebook.ipynb new file mode 100644 index 000000000..9ea5d7931 --- /dev/null +++ b/translations/vi/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n", + "cuisines_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "cuisines_label_df = cuisines_df['cuisine']\n", + "cuisines_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "cuisines_feature_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Thử các bộ phân loại khác\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + " \n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy (train) for Linear SVC: 76.4% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.64 0.66 0.65 242\n", + " indian 0.91 0.86 0.89 236\n", + " japanese 0.72 0.73 0.73 245\n", + " korean 0.83 0.75 0.79 234\n", + " thai 0.75 0.82 0.78 242\n", + "\n", + " accuracy 0.76 1199\n", + " macro avg 0.77 0.76 0.77 1199\n", + "weighted avg 0.77 0.76 0.77 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 70.7% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.65 0.63 0.64 242\n", + " indian 0.84 0.81 0.82 236\n", + " japanese 0.60 0.81 0.69 245\n", + " korean 0.89 0.53 0.67 234\n", + " thai 0.69 0.75 0.72 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.73 0.71 0.71 1199\n", + "weighted avg 0.73 0.71 0.71 1199\n", + "\n", + "Accuracy (train) for SVC: 80.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.71 0.69 0.70 242\n", + " indian 0.92 0.92 0.92 236\n", + " japanese 0.77 0.78 0.77 245\n", + " korean 0.87 0.77 0.82 234\n", + " thai 0.75 0.86 0.80 242\n", + "\n", + " accuracy 0.80 1199\n", + " macro avg 0.80 0.80 0.80 1199\n", + "weighted avg 0.80 0.80 0.80 1199\n", + "\n", + "Accuracy (train) for RFST: 82.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.80 0.75 0.77 242\n", + " indian 0.90 0.91 0.90 236\n", + " japanese 0.82 0.78 0.80 245\n", + " korean 0.85 0.82 0.83 234\n", + " thai 0.78 0.89 0.83 242\n", + "\n", + " accuracy 0.83 1199\n", + " macro avg 0.83 0.83 0.83 1199\n", + "weighted avg 0.83 0.83 0.83 1199\n", + "\n", + "Accuracy (train) for ADA: 71.1% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.60 0.57 0.58 242\n", + " indian 0.87 0.84 0.86 236\n", + " japanese 0.71 0.60 0.65 245\n", + " korean 0.68 0.78 0.72 234\n", + " thai 0.70 0.78 0.74 242\n", + "\n", + " accuracy 0.71 1199\n", + " macro avg 0.71 0.71 0.71 1199\n", + "weighted avg 0.71 0.71 0.71 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "7ea2b714669c823a596d986ba2d5739f", + "translation_date": "2025-09-06T14:43:09+00:00", + "source_file": "4-Classification/3-Classifiers-2/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/translations/vi/4-Classification/4-Applied/notebook.ipynb b/translations/vi/4-Classification/4-Applied/notebook.ipynb new file mode 100644 index 000000000..6aa02291f --- /dev/null +++ b/translations/vi/4-Classification/4-Applied/notebook.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2f3e0d9e9ac5c301558fb8bf733ac0cb", + "translation_date": "2025-09-06T14:41:44+00:00", + "source_file": "4-Classification/4-Applied/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/4-Classification/4-Applied/solution/notebook.ipynb b/translations/vi/4-Classification/4-Applied/solution/notebook.ipynb new file mode 100644 index 000000000..59d001fc8 --- /dev/null +++ b/translations/vi/4-Classification/4-Applied/solution/notebook.ipynb @@ -0,0 +1,290 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "49325d6dd12a3628fc64fa7ccb1a80ff", + "translation_date": "2025-09-06T14:42:08+00:00", + "source_file": "4-Classification/4-Applied/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: skl2onnx in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (1.8.0)\n", + "Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n", + "Requirement already satisfied: onnx>=1.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.9.0)\n", + "Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n", + "Requirement already satisfied: onnxconverter-common<1.9,>=1.6.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.8.1)\n", + "Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnx>=1.2.1->skl2onnx) (3.10.0.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install skl2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "data = pd.read_csv('../../data/cleaned_cuisines.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "X = data.iloc[:,2:]\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cuisine\n", + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cuisine
0indian
1indian
2indian
3indian
4indian
\n
" + }, + "metadata": {}, + "execution_count": 62 + } + ], + "source": [ + "y = data[['cuisine']]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(C=10, kernel='linear', probability=True, random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "model = SVC(kernel='linear', C=10, probability=True,random_state=0)\n", + "model.fit(X_train,y_train.values.ravel())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n chinese 0.72 0.70 0.71 236\n indian 0.91 0.88 0.89 243\n japanese 0.80 0.75 0.77 240\n korean 0.80 0.81 0.81 230\n thai 0.76 0.85 0.80 250\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n" + ] + } + ], + "source": [ + "print(classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "\n", + "initial_type = [('float_input', FloatTensorType([None, 380]))]\n", + "options = {id(model): {'nocl': True, 'zipmap': False}}\n", + "onx = convert_sklearn(model, initial_types=initial_type, options=options)\n", + "with open(\"./model.onnx\", \"wb\") as f:\n", + " f.write(onx.SerializeToString())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/1-Visualize/notebook.ipynb b/translations/vi/5-Clustering/1-Visualize/notebook.ipynb new file mode 100644 index 000000000..3012140c8 --- /dev/null +++ b/translations/vi/5-Clustering/1-Visualize/notebook.ipynb @@ -0,0 +1,50 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python383jvsc74a57bd0e134e05457d34029b6460cd73bbf1ed73f339b5b6d98c95be70b69eba114fe95", + "display_name": "Python 3.8.3 64-bit (conda)" + }, + "coopTranslator": { + "original_hash": "40e0707e96b3e1899a912776006264f9", + "translation_date": "2025-09-06T14:08:14+00:00", + "source_file": "5-Clustering/1-Visualize/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb b/translations/vi/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb new file mode 100644 index 000000000..f76379f5d --- /dev/null +++ b/translations/vi/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb @@ -0,0 +1,493 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Phân tích âm nhạc Nigeria được thu thập từ Spotify**\n", + "\n", + "Clustering (phân cụm) là một loại [Học không giám sát](https://wikipedia.org/wiki/Unsupervised_learning) giả định rằng một tập dữ liệu không được gắn nhãn hoặc các đầu vào của nó không được ghép với các đầu ra được xác định trước. Nó sử dụng các thuật toán khác nhau để sắp xếp dữ liệu không gắn nhãn và cung cấp các nhóm dựa trên các mẫu mà nó nhận ra trong dữ liệu.\n", + "\n", + "[**Câu hỏi kiểm tra trước bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/27/)\n", + "\n", + "### **Giới thiệu**\n", + "\n", + "[Phân cụm](https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-30164-8_124) rất hữu ích trong việc khám phá dữ liệu. Hãy xem liệu nó có thể giúp khám phá các xu hướng và mẫu trong cách khán giả Nigeria tiêu thụ âm nhạc hay không.\n", + "\n", + "> ✅ Dành một phút để suy nghĩ về các ứng dụng của phân cụm. Trong đời sống thực, phân cụm xảy ra bất cứ khi nào bạn có một đống quần áo cần phân loại theo từng thành viên trong gia đình 🧦👕👖🩲. Trong khoa học dữ liệu, phân cụm xảy ra khi cố gắng phân tích sở thích của người dùng hoặc xác định các đặc điểm của bất kỳ tập dữ liệu không gắn nhãn nào. Phân cụm, theo một cách nào đó, giúp làm sáng tỏ sự hỗn loạn, giống như ngăn kéo đựng tất.\n", + "\n", + "Trong môi trường chuyên nghiệp, phân cụm có thể được sử dụng để xác định các phân khúc thị trường, ví dụ như xác định nhóm tuổi nào mua những mặt hàng nào. Một ứng dụng khác có thể là phát hiện bất thường, chẳng hạn để phát hiện gian lận từ một tập dữ liệu giao dịch thẻ tín dụng. Hoặc bạn có thể sử dụng phân cụm để xác định khối u trong một loạt các bản quét y tế.\n", + "\n", + "✅ Dành một phút để suy nghĩ về cách bạn có thể đã gặp phân cụm 'trong thực tế', trong ngân hàng, thương mại điện tử hoặc môi trường kinh doanh.\n", + "\n", + "> 🎓 Thật thú vị, phân tích cụm bắt nguồn từ các lĩnh vực Nhân học và Tâm lý học vào những năm 1930. Bạn có thể tưởng tượng nó đã được sử dụng như thế nào không?\n", + "\n", + "Ngoài ra, bạn có thể sử dụng nó để nhóm các kết quả tìm kiếm - ví dụ như theo liên kết mua sắm, hình ảnh hoặc đánh giá. Phân cụm rất hữu ích khi bạn có một tập dữ liệu lớn mà bạn muốn giảm bớt và thực hiện phân tích chi tiết hơn, vì vậy kỹ thuật này có thể được sử dụng để tìm hiểu về dữ liệu trước khi xây dựng các mô hình khác.\n", + "\n", + "✅ Khi dữ liệu của bạn được tổ chức thành các cụm, bạn gán cho nó một Id cụm, và kỹ thuật này có thể hữu ích khi bảo vệ quyền riêng tư của tập dữ liệu; bạn có thể thay thế việc tham chiếu đến một điểm dữ liệu bằng Id cụm của nó, thay vì bằng dữ liệu nhận dạng tiết lộ nhiều hơn. Bạn có thể nghĩ đến những lý do khác tại sao bạn lại tham chiếu đến Id cụm thay vì các yếu tố khác của cụm để xác định nó không?\n", + "\n", + "### Bắt đầu với phân cụm\n", + "\n", + "> 🎓 Cách chúng ta tạo cụm phụ thuộc rất nhiều vào cách chúng ta nhóm các điểm dữ liệu thành các nhóm. Hãy cùng tìm hiểu một số thuật ngữ:\n", + ">\n", + "> 🎓 ['Transductive' vs. 'inductive'](https://wikipedia.org/wiki/Transduction_(machine_learning))\n", + ">\n", + "> Suy diễn truyền dẫn (transductive inference) được rút ra từ các trường hợp huấn luyện quan sát được ánh xạ đến các trường hợp kiểm tra cụ thể. Suy diễn quy nạp (inductive inference) được rút ra từ các trường hợp huấn luyện ánh xạ đến các quy tắc chung, sau đó mới được áp dụng cho các trường hợp kiểm tra.\n", + ">\n", + "> Một ví dụ: Hãy tưởng tượng bạn có một tập dữ liệu chỉ được gắn nhãn một phần. Một số là 'đĩa nhạc', một số là 'cd', và một số để trống. Nhiệm vụ của bạn là cung cấp nhãn cho các mục trống. Nếu bạn chọn cách tiếp cận quy nạp, bạn sẽ huấn luyện một mô hình tìm kiếm 'đĩa nhạc' và 'cd', và áp dụng các nhãn đó cho dữ liệu chưa được gắn nhãn. Cách tiếp cận này sẽ gặp khó khăn trong việc phân loại những thứ thực sự là 'băng cassette'. Một cách tiếp cận truyền dẫn, mặt khác, xử lý dữ liệu chưa biết này hiệu quả hơn vì nó hoạt động để nhóm các mục tương tự lại với nhau và sau đó áp dụng một nhãn cho một nhóm. Trong trường hợp này, các cụm có thể phản ánh 'những thứ âm nhạc hình tròn' và 'những thứ âm nhạc hình vuông'.\n", + ">\n", + "> 🎓 ['Non-flat' vs. 'flat' geometry](https://datascience.stackexchange.com/questions/52260/terminology-flat-geometry-in-the-context-of-clustering)\n", + ">\n", + "> Được lấy từ thuật ngữ toán học, hình học không phẳng (non-flat) và phẳng (flat) đề cập đến cách đo khoảng cách giữa các điểm bằng các phương pháp hình học 'phẳng' ([Euclidean](https://wikipedia.org/wiki/Euclidean_geometry)) hoặc 'không phẳng' (phi Euclid).\n", + ">\n", + "> 'Phẳng' trong ngữ cảnh này đề cập đến hình học Euclid (một phần được dạy như hình học 'mặt phẳng'), và không phẳng đề cập đến hình học phi Euclid. Hình học có liên quan gì đến học máy? Vâng, vì cả hai lĩnh vực đều bắt nguồn từ toán học, nên phải có một cách chung để đo khoảng cách giữa các điểm trong các cụm, và điều đó có thể được thực hiện theo cách 'phẳng' hoặc 'không phẳng', tùy thuộc vào bản chất của dữ liệu. [Khoảng cách Euclid](https://wikipedia.org/wiki/Euclidean_distance) được đo bằng độ dài của một đoạn thẳng giữa hai điểm. [Khoảng cách phi Euclid](https://wikipedia.org/wiki/Non-Euclidean_geometry) được đo dọc theo một đường cong. Nếu dữ liệu của bạn, khi được hình dung, dường như không tồn tại trên một mặt phẳng, bạn có thể cần sử dụng một thuật toán chuyên biệt để xử lý nó.\n", + "\n", + "

\n", + " \n", + "

Infographic bởi Dasani Madipalli
\n", + "\n", + "> 🎓 ['Distances'](https://web.stanford.edu/class/cs345a/slides/12-clustering.pdf)\n", + ">\n", + "> Các cụm được xác định bởi ma trận khoảng cách của chúng, ví dụ: khoảng cách giữa các điểm. Khoảng cách này có thể được đo bằng một vài cách. Các cụm Euclid được xác định bởi giá trị trung bình của các điểm và chứa một 'centroid' hoặc điểm trung tâm. Khoảng cách được đo bằng khoảng cách đến centroid đó. Khoảng cách phi Euclid đề cập đến 'clustroids', điểm gần nhất với các điểm khác. Clustroids lần lượt có thể được định nghĩa theo nhiều cách khác nhau.\n", + ">\n", + "> 🎓 ['Constrained'](https://wikipedia.org/wiki/Constrained_clustering)\n", + ">\n", + "> [Phân cụm có ràng buộc](https://web.cs.ucdavis.edu/~davidson/Publications/ICDMTutorial.pdf) giới thiệu học 'bán giám sát' vào phương pháp không giám sát này. Các mối quan hệ giữa các điểm được đánh dấu là 'không thể liên kết' hoặc 'phải liên kết' để một số quy tắc được áp dụng cho tập dữ liệu.\n", + ">\n", + "> Một ví dụ: Nếu một thuật toán được tự do áp dụng trên một loạt dữ liệu không gắn nhãn hoặc gắn nhãn một phần, các cụm mà nó tạo ra có thể có chất lượng kém. Trong ví dụ trên, các cụm có thể nhóm 'những thứ âm nhạc hình tròn', 'những thứ âm nhạc hình vuông', 'những thứ hình tam giác' và 'bánh quy'. Nếu được cung cấp một số ràng buộc hoặc quy tắc để tuân theo (\"mục phải được làm bằng nhựa\", \"mục cần có khả năng tạo ra âm nhạc\"), điều này có thể giúp 'ràng buộc' thuật toán để đưa ra các lựa chọn tốt hơn.\n", + ">\n", + "> 🎓 'Density'\n", + ">\n", + "> Dữ liệu 'nhiễu' được coi là 'dày đặc'. Khoảng cách giữa các điểm trong mỗi cụm của nó có thể, khi được kiểm tra, dày đặc hơn hoặc ít dày đặc hơn, hoặc 'đông đúc', và do đó dữ liệu này cần được phân tích bằng phương pháp phân cụm phù hợp. [Bài viết này](https://www.kdnuggets.com/2020/02/understanding-density-based-clustering.html) minh họa sự khác biệt giữa việc sử dụng phân cụm K-Means và các thuật toán HDBSCAN để khám phá một tập dữ liệu nhiễu với mật độ cụm không đồng đều.\n", + "\n", + "Nâng cao hiểu biết của bạn về các kỹ thuật phân cụm trong [module học này](https://docs.microsoft.com/learn/modules/train-evaluate-cluster-models?WT.mc_id=academic-77952-leestott)\n", + "\n", + "### **Thuật toán phân cụm**\n", + "\n", + "Có hơn 100 thuật toán phân cụm, và việc sử dụng chúng phụ thuộc vào bản chất của dữ liệu. Hãy thảo luận một số thuật toán chính:\n", + "\n", + "- **Phân cụm phân cấp**. Nếu một đối tượng được phân loại dựa trên sự gần gũi của nó với một đối tượng gần đó, thay vì với một đối tượng xa hơn, các cụm được hình thành dựa trên khoảng cách của các thành viên với các đối tượng khác. Phân cụm phân cấp được đặc trưng bởi việc liên tục kết hợp hai cụm.\n", + "\n", + "

\n", + " \n", + "

Infographic bởi Dasani Madipalli
\n", + "\n", + "- **Phân cụm centroid**. Thuật toán phổ biến này yêu cầu chọn 'k', hoặc số lượng cụm cần tạo, sau đó thuật toán xác định điểm trung tâm của một cụm và thu thập dữ liệu xung quanh điểm đó. [Phân cụm K-means](https://wikipedia.org/wiki/K-means_clustering) là một phiên bản phổ biến của phân cụm centroid, chia một tập dữ liệu thành K nhóm được xác định trước. Trung tâm được xác định bởi giá trị trung bình gần nhất, do đó có tên gọi này. Khoảng cách bình phương từ cụm được giảm thiểu.\n", + "\n", + "

\n", + " \n", + "

Infographic bởi Dasani Madipalli
\n", + "\n", + "- **Phân cụm dựa trên phân phối**. Dựa trên mô hình thống kê, phân cụm dựa trên phân phối tập trung vào việc xác định xác suất rằng một điểm dữ liệu thuộc về một cụm và gán nó tương ứng. Các phương pháp hỗn hợp Gaussian thuộc loại này.\n", + "\n", + "- **Phân cụm dựa trên mật độ**. Các điểm dữ liệu được gán vào các cụm dựa trên mật độ của chúng, hoặc sự nhóm lại xung quanh nhau. Các điểm dữ liệu xa nhóm được coi là nhiễu hoặc ngoại lệ. DBSCAN, Mean-shift và OPTICS thuộc loại phân cụm này.\n", + "\n", + "- **Phân cụm dựa trên lưới**. Đối với các tập dữ liệu đa chiều, một lưới được tạo ra và dữ liệu được chia giữa các ô của lưới, từ đó tạo ra các cụm.\n", + "\n", + "Cách tốt nhất để học về phân cụm là tự mình thử nghiệm, và đó là điều bạn sẽ làm trong bài tập này.\n", + "\n", + "Chúng ta sẽ cần một số gói để hoàn thành module này. Bạn có thể cài đặt chúng bằng lệnh: `install.packages(c('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork'))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng nếu thiếu.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\r\n", + "\r\n", + "pacman::p_load('tidyverse', 'tidymodels', 'DataExplorer', 'summarytools', 'plotly', 'paletteer', 'corrplot', 'patchwork')\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Bài tập - phân cụm dữ liệu của bạn\n", + "\n", + "Phân cụm là một kỹ thuật được hỗ trợ rất nhiều bởi việc trực quan hóa đúng cách, vì vậy hãy bắt đầu bằng cách trực quan hóa dữ liệu âm nhạc của chúng ta. Bài tập này sẽ giúp chúng ta quyết định phương pháp phân cụm nào sẽ được sử dụng hiệu quả nhất cho bản chất của dữ liệu này.\n", + "\n", + "Hãy bắt đầu ngay bằng cách nhập dữ liệu.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Load the core tidyverse and make it available in your current R session\r\n", + "library(tidyverse)\r\n", + "\r\n", + "# Import the data into a tibble\r\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\")\r\n", + "\r\n", + "# View the first 5 rows of the data set\r\n", + "df %>% \r\n", + " slice_head(n = 5)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Đôi khi, chúng ta có thể muốn biết thêm một chút thông tin về dữ liệu của mình. Chúng ta có thể xem `dữ liệu` và `cấu trúc của nó` bằng cách sử dụng hàm [*glimpse()*](https://pillar.r-lib.org/reference/glimpse.html):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Glimpse into the data set\r\n", + "df %>% \r\n", + " glimpse()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Làm tốt lắm!💪\n", + "\n", + "Chúng ta có thể thấy rằng `glimpse()` sẽ cung cấp cho bạn tổng số hàng (quan sát) và cột (biến), sau đó là một vài giá trị đầu tiên của mỗi biến được hiển thị theo hàng sau tên biến. Ngoài ra, *kiểu dữ liệu* của biến được hiển thị ngay sau tên biến trong dấu `< >`.\n", + "\n", + "`DataExplorer::introduce()` có thể tóm tắt thông tin này một cách gọn gàng:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe basic information for our data\r\n", + "df %>% \r\n", + " introduce()\r\n", + "\r\n", + "# A visual display of the same\r\n", + "df %>% \r\n", + " plot_intro()\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tuyệt vời! Chúng ta vừa biết rằng dữ liệu của mình không có giá trị bị thiếu.\n", + "\n", + "Nhân tiện, chúng ta có thể khám phá các thống kê xu hướng trung tâm phổ biến (ví dụ như [mean](https://en.wikipedia.org/wiki/Arithmetic_mean) và [median](https://en.wikipedia.org/wiki/Median)) và các thước đo độ phân tán (ví dụ như [standard deviation](https://en.wikipedia.org/wiki/Standard_deviation)) bằng cách sử dụng `summarytools::descr()`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Describe common statistics\r\n", + "df %>% \r\n", + " descr(stats = \"common\")\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hãy xem các giá trị tổng quát của dữ liệu. Lưu ý rằng độ phổ biến có thể là `0`, điều này cho thấy các bài hát không có xếp hạng. Chúng ta sẽ loại bỏ chúng ngay sau đây.\n", + "\n", + "> 🤔 Nếu chúng ta đang làm việc với phân cụm, một phương pháp không giám sát không yêu cầu dữ liệu được gắn nhãn, tại sao lại hiển thị dữ liệu này với nhãn? Trong giai đoạn khám phá dữ liệu, chúng rất hữu ích, nhưng chúng không cần thiết để các thuật toán phân cụm hoạt động.\n", + "\n", + "### 1. Khám phá các thể loại phổ biến\n", + "\n", + "Hãy cùng tìm hiểu các thể loại phổ biến nhất 🎶 bằng cách đếm số lần chúng xuất hiện.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Popular genres\r\n", + "top_genres <- df %>% \r\n", + " count(artist_top_genre, sort = TRUE) %>% \r\n", + "# Encode to categorical and reorder the according to count\r\n", + " mutate(artist_top_genre = factor(artist_top_genre) %>% fct_inorder())\r\n", + "\r\n", + "# Print the top genres\r\n", + "top_genres\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Điều đó thật tốt! Người ta thường nói một bức tranh đáng giá ngàn dòng của một khung dữ liệu (thực ra chẳng ai nói vậy cả 😅). Nhưng bạn hiểu ý rồi đúng không?\n", + "\n", + "Một cách để trực quan hóa dữ liệu phân loại (biến ký tự hoặc biến nhân tố) là sử dụng biểu đồ cột. Hãy tạo một biểu đồ cột cho 10 thể loại hàng đầu:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Change the default gray theme\r\n", + "theme_set(theme_light())\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Bây giờ dễ dàng hơn nhiều để nhận ra rằng chúng ta có các thể loại `thiếu` 🧐!\n", + "\n", + "> Một hình ảnh trực quan tốt sẽ cho bạn thấy những điều mà bạn không ngờ tới, hoặc gợi lên những câu hỏi mới về dữ liệu - Hadley Wickham và Garrett Grolemund, [R For Data Science](https://r4ds.had.co.nz/introduction.html)\n", + "\n", + "Lưu ý, khi thể loại hàng đầu được mô tả là `Thiếu`, điều đó có nghĩa là Spotify đã không phân loại nó, vì vậy hãy loại bỏ nó đi.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Visualize popular genres\r\n", + "top_genres %>%\r\n", + " filter(artist_top_genre != \"Missing\") %>% \r\n", + " slice(1:10) %>% \r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"rcartocolor::Vivid\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5),\r\n", + " # Rotates the X markers (so we can read them)\r\n", + " axis.text.x = element_text(angle = 90))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Từ việc khám phá dữ liệu ban đầu, chúng ta nhận thấy rằng ba thể loại hàng đầu chiếm ưu thế trong tập dữ liệu này. Hãy tập trung vào `afro dancehall`, `afropop`, và `nigerian pop`, đồng thời lọc tập dữ liệu để loại bỏ bất kỳ mục nào có giá trị phổ biến bằng 0 (nghĩa là không được phân loại với mức độ phổ biến trong tập dữ liệu và có thể được coi là nhiễu đối với mục đích của chúng ta):\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "nigerian_songs <- df %>% \r\n", + " # Concentrate on top 3 genres\r\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \r\n", + " # Remove unclassified observations\r\n", + " filter(popularity != 0)\r\n", + "\r\n", + "\r\n", + "\r\n", + "# Visualize popular genres\r\n", + "nigerian_songs %>%\r\n", + " count(artist_top_genre) %>%\r\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\r\n", + " fill = artist_top_genre)) +\r\n", + " geom_col(alpha = 0.8) +\r\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\r\n", + " ggtitle(\"Top genres\") +\r\n", + " theme(plot.title = element_text(hjust = 0.5))\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hãy xem liệu có bất kỳ mối quan hệ tuyến tính rõ ràng nào giữa các biến số trong tập dữ liệu của chúng ta hay không. Mối quan hệ này được định lượng một cách toán học bằng [thống kê tương quan](https://en.wikipedia.org/wiki/Correlation).\n", + "\n", + "Thống kê tương quan là một giá trị nằm trong khoảng từ -1 đến 1, biểu thị mức độ mạnh của mối quan hệ. Các giá trị lớn hơn 0 cho thấy mối tương quan *dương* (giá trị cao của một biến thường đi kèm với giá trị cao của biến kia), trong khi các giá trị nhỏ hơn 0 cho thấy mối tương quan *âm* (giá trị cao của một biến thường đi kèm với giá trị thấp của biến kia).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Narrow down to numeric variables and fid correlation\r\n", + "corr_mat <- nigerian_songs %>% \r\n", + " select(where(is.numeric)) %>% \r\n", + " cor()\r\n", + "\r\n", + "# Visualize correlation matrix\r\n", + "corrplot(corr_mat, order = 'AOE', col = c('white', 'black'), bg = 'gold2') \r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Dữ liệu không có sự tương quan mạnh ngoại trừ giữa `energy` và `loudness`, điều này khá hợp lý vì nhạc lớn thường rất sôi động. `Popularity` có sự liên hệ với `release date`, điều này cũng hợp lý vì các bài hát mới hơn có thể phổ biến hơn. Độ dài và năng lượng dường như cũng có sự tương quan.\n", + "\n", + "Sẽ rất thú vị khi xem một thuật toán phân cụm có thể làm gì với dữ liệu này!\n", + "\n", + "> 🎓 Lưu ý rằng sự tương quan không đồng nghĩa với nguyên nhân! Chúng ta có bằng chứng về sự tương quan nhưng không có bằng chứng về nguyên nhân. Một [trang web thú vị](https://tylervigen.com/spurious-correlations) có một số hình ảnh minh họa nhấn mạnh điểm này.\n", + "\n", + "### 2. Khám phá phân bố dữ liệu\n", + "\n", + "Hãy đặt ra một số câu hỏi tinh tế hơn. Liệu các thể loại có khác biệt đáng kể trong cách chúng được cảm nhận về khả năng nhảy múa, dựa trên mức độ phổ biến của chúng? Hãy kiểm tra phân bố dữ liệu của ba thể loại hàng đầu về mức độ phổ biến và khả năng nhảy múa dọc theo trục x và y bằng cách sử dụng [biểu đồ mật độ](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/density-curves/v/density-curves).\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Perform 2D kernel density estimation\r\n", + "density_estimate_2d <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre)) +\r\n", + " geom_density_2d(bins = 5, size = 1) +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " xlim(-20, 80) +\r\n", + " ylim(0, 1.2)\r\n", + "\r\n", + "# Density plot based on the popularity\r\n", + "density_estimate_pop <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " theme(legend.position = \"none\")\r\n", + "\r\n", + "# Density plot based on the danceability\r\n", + "density_estimate_dance <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = danceability, fill = artist_top_genre, color = artist_top_genre)) +\r\n", + " geom_density(size = 1, alpha = 0.5) +\r\n", + " paletteer::scale_fill_paletteer_d(\"RSkittleBrewer::wildberry\") +\r\n", + " paletteer::scale_color_paletteer_d(\"RSkittleBrewer::wildberry\")\r\n", + "\r\n", + "\r\n", + "# Patch everything together\r\n", + "library(patchwork)\r\n", + "density_estimate_2d / (density_estimate_pop + density_estimate_dance)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Chúng ta thấy rằng có các vòng tròn đồng tâm xếp thẳng hàng, bất kể thể loại. Liệu có thể rằng sở thích của người Nigeria hội tụ ở một mức độ nhảy múa nhất định cho thể loại này?\n", + "\n", + "Nhìn chung, ba thể loại này tương đồng về mức độ phổ biến và khả năng nhảy múa. Việc xác định các cụm trong dữ liệu không hoàn toàn đồng nhất này sẽ là một thách thức. Hãy xem liệu biểu đồ phân tán có thể hỗ trợ điều này không.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# A scatter plot of popularity and danceability\r\n", + "scatter_plot <- nigerian_songs %>% \r\n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = artist_top_genre, shape = artist_top_genre)) +\r\n", + " geom_point(size = 2, alpha = 0.8) +\r\n", + " paletteer::scale_color_paletteer_d(\"futurevisions::mars\")\r\n", + "\r\n", + "# Add a touch of interactivity\r\n", + "ggplotly(scatter_plot)\r\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Một biểu đồ phân tán của cùng các trục cho thấy một mô hình hội tụ tương tự.\n", + "\n", + "Nhìn chung, đối với việc phân cụm, bạn có thể sử dụng biểu đồ phân tán để hiển thị các cụm dữ liệu, vì vậy việc thành thạo loại hình trực quan hóa này rất hữu ích. Trong bài học tiếp theo, chúng ta sẽ sử dụng dữ liệu đã được lọc này và áp dụng phương pháp phân cụm k-means để khám phá các nhóm trong dữ liệu này, những nhóm có xu hướng chồng lấn theo những cách thú vị.\n", + "\n", + "## **🚀 Thử thách**\n", + "\n", + "Để chuẩn bị cho bài học tiếp theo, hãy tạo một biểu đồ về các thuật toán phân cụm khác nhau mà bạn có thể khám phá và sử dụng trong môi trường sản xuất. Những loại vấn đề nào mà phân cụm đang cố gắng giải quyết?\n", + "\n", + "## [**Câu hỏi kiểm tra sau bài học**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/28/)\n", + "\n", + "## **Ôn tập & Tự học**\n", + "\n", + "Trước khi bạn áp dụng các thuật toán phân cụm, như chúng ta đã học, việc hiểu rõ bản chất của tập dữ liệu là một ý tưởng tốt. Đọc thêm về chủ đề này [tại đây](https://www.kdnuggets.com/2019/10/right-clustering-algorithm.html)\n", + "\n", + "Nâng cao hiểu biết của bạn về các kỹ thuật phân cụm:\n", + "\n", + "- [Huấn luyện và Đánh giá Mô hình Phân cụm bằng Tidymodels và các công cụ liên quan](https://rpubs.com/eR_ic/clustering)\n", + "\n", + "- Bradley Boehmke & Brandon Greenwell, [*Hands-On Machine Learning with R*](https://bradleyboehmke.github.io/HOML/)*.*\n", + "\n", + "## **Bài tập**\n", + "\n", + "[Nghiên cứu các cách trực quan hóa khác cho phân cụm](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/assignment.md)\n", + "\n", + "## CẢM ƠN ĐẾN:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) vì đã tạo phiên bản Python gốc của mô-đun này ♥️\n", + "\n", + "[`Dasani Madipalli`](https://twitter.com/dasani_decoded) vì đã tạo ra những hình minh họa tuyệt vời giúp các khái niệm học máy trở nên dễ hiểu và dễ tiếp cận hơn.\n", + "\n", + "Chúc bạn học vui,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Đại sứ Sinh viên Microsoft Learn Vàng.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "coopTranslator": { + "original_hash": "99c36449cad3708a435f6798cfa39972", + "translation_date": "2025-09-06T14:19:17+00:00", + "source_file": "5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/1-Visualize/solution/notebook.ipynb b/translations/vi/5-Clustering/1-Visualize/solution/notebook.ipynb new file mode 100644 index 000000000..eeb1ef963 --- /dev/null +++ b/translations/vi/5-Clustering/1-Visualize/solution/notebook.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (0.11.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (3.5.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.21.4)\n", + "Requirement already satisfied: pandas>=0.23 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.3.4)\n", + "Requirement already satisfied: scipy>=1.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from seaborn) (1.7.2)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (4.28.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (21.2)\n", + "Requirement already satisfied: setuptools-scm>=4 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (6.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from pandas>=0.23->seaborn) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: tomli>=1.0.0 in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (1.2.2)\n", + "Requirement already satisfied: setuptools in /Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages (from setuptools-scm>=4->matplotlib>=2.2->seaborn) (59.1.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n", + "
" + ], + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lấy thông tin về dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 530 entries, 0 to 529\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 530 non-null object \n", + " 1 album 530 non-null object \n", + " 2 artist 530 non-null object \n", + " 3 artist_top_genre 530 non-null object \n", + " 4 release_date 530 non-null int64 \n", + " 5 length 530 non-null int64 \n", + " 6 popularity 530 non-null int64 \n", + " 7 danceability 530 non-null float64\n", + " 8 acousticness 530 non-null float64\n", + " 9 energy 530 non-null float64\n", + " 10 instrumentalness 530 non-null float64\n", + " 11 liveness 530 non-null float64\n", + " 12 loudness 530 non-null float64\n", + " 13 speechiness 530 non-null float64\n", + " 14 tempo 530 non-null float64\n", + " 15 time_signature 530 non-null int64 \n", + "dtypes: float64(8), int64(4), object(4)\n", + "memory usage: 66.4+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name 0\n", + "album 0\n", + "artist 0\n", + "artist_top_genre 0\n", + "release_date 0\n", + "length 0\n", + "popularity 0\n", + "danceability 0\n", + "acousticness 0\n", + "energy 0\n", + "instrumentalness 0\n", + "liveness 0\n", + "loudness 0\n", + "speechiness 0\n", + "tempo 0\n", + "time_signature 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hãy xem các giá trị tổng quát của dữ liệu. Lưu ý rằng mức độ phổ biến có thể là '0' - và có nhiều hàng có giá trị đó.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
release_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
count530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000530.000000
mean2015.390566222298.16981117.5075470.7416190.2654120.7606230.0163050.147308-4.9530110.130748116.4878643.986792
std3.13168839696.82225918.9922120.1175220.2083420.1485330.0903210.1235882.4641860.09293923.5186010.333701
min1998.00000089488.0000000.0000000.2550000.0006650.1110000.0000000.028300-19.3620000.02780061.6950003.000000
25%2014.000000199305.0000000.0000000.6810000.0895250.6690000.0000000.075650-6.2987500.059100102.9612504.000000
50%2016.000000218509.00000013.0000000.7610000.2205000.7845000.0000040.103500-4.5585000.097950112.7145004.000000
75%2017.000000242098.50000031.0000000.8295000.4030000.8757500.0002340.164000-3.3310000.177000125.0392504.000000
max2020.000000511738.00000073.0000000.9660000.9540000.9950000.9100000.8110000.5820000.514000206.0070005.000000
\n", + "
" + ], + "text/plain": [ + " release_date length popularity danceability acousticness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n", + "std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n", + "min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n", + "25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n", + "50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n", + "75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n", + "max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n", + "\n", + " energy instrumentalness liveness loudness speechiness \\\n", + "count 530.000000 530.000000 530.000000 530.000000 530.000000 \n", + "mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n", + "std 0.148533 0.090321 0.123588 2.464186 0.092939 \n", + "min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n", + "25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n", + "50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n", + "75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n", + "max 0.995000 0.910000 0.811000 0.582000 0.514000 \n", + "\n", + " tempo time_signature \n", + "count 530.000000 530.000000 \n", + "mean 116.487864 3.986792 \n", + "std 23.518601 0.333701 \n", + "min 61.695000 3.000000 \n", + "25% 102.961250 4.000000 \n", + "50% 112.714500 4.000000 \n", + "75% 125.039250 4.000000 \n", + "max 206.007000 5.000000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top[:5].index,y=top[:5].values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Xóa các thể loại 'Missing', vì chúng không được phân loại trên Spotify\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[df['artist_top_genre'] != 'Missing']\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corrmat = df.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corrmat, vmax=.8, square=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAGkCAYAAAB+TFE1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOydd3gc1dWH39m+q9Xuqvde3eTeG7bBdAwm9A6BBEJJvtASSggJCYQk1IQSCBBIqKaYjgvuvTfZktV71/Y+8/2x0tpCkrst28z7PPvM7MydmbNl7m/uveeeI0iSJCEjIyMjIzPAKAbaABkZGRkZGZAFSUZGRkbmJEEWJBkZGRmZkwJZkGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMC1UAbICNzuIiSiD8YwBf04Q368AX9+AKhZfd7b8CHL9j98qNRqjFpIzHrIkmOTMCkjUQQhIH+KDIyMvshC5LMgODyuWlyttLhttLpsdLhtuL0uXAHvLgDHjx+D+6AF4/fgyfg3U9sQgJztBg1EeREZzAsoZCihEFkWFJkgZKRGWAEOVKDzPGm1dXO7pa9lLZVUmtroNbWQIfb2qucVqVFr9KiV+nQqbXoVDr0Ki06lRaNSoNWqUGjVKNVadB0rytD6z22qfbbp9KgUajxBX3YvA7a3Vbq7Y3UWBvY3bqXOlsjAKmmJM7ImsD0zAmYdaYT/RXJyMggC5LMccDld7O1cReb6newq6WUFmcbEBKc1MhEUsyJpJqSSDTGEa23YNGbsehMaJTqE25ru6uTjfXbWVq5hpK2ctRKNWflTGVO4Wyi9OYTbo+MzI8ZWZBkjgl2r4PVNZtYX7eVHc17CIpBIjURDI7PZ1BcLoWxuWRaUlEoTl4/mlpbA/OLF7Csai1KhZI5hbO5uHA2GpVmoE2TkflRIAuSzBHjC/rZVL+dZZVr2dywg6AkkmSMZ0xKEWNThpMfk31SC1B/NDpaeG/7fFZVbyAhIpZbRl/FiKTBA22WjMxpjyxIMoeFJEkUt+xledU6VtdsxOV3E6UzMyVjLFMzxp9WzgE7mnbz+sb3qbM3MjtnGteNuBSt3FqSkTluyIIkc0jYvA6WVqxhYflyGuzNaFVaxqeOYFrGeIbGF5ySLaFDwR/08972+Xy+ZyHJkQn8cuJPyYxKHWizZGROS2RBkukXSZLY3bqXBWUrWFOziYAYoCA2h7NypjIudQQ6lXagTTxh7GjazQtr38Tpc/GzMdcyNXPcQJskI3PaIQuSTC9EUWRt3WY+Lf6Wio4aDGo90zLGc2bOFNItKQNt3oDR6bHxzKrXKG4p5by8GVw74lJUCuVAmyUjc9ogC5JMmIAYZFnlGj7b/R0N9maSjPFcWHgWUzLG/qhaQwciIAZ5Z+vHfFWymEFxufxq0q1Y5HlLMjLHBFmQZJAkic0NO3h7y8fU2RvJikrjkkHnMC5lxGk7NnS0rKhax8vr3yFSY+S+KT8jOzpjoE2SkTnlkQXpR06rs51XN/yXLY27SIqM57rhcxmdXHTaeModTyo6anh6xctYvXZuH3stUzLkcSUZmaNBFqQfKZIk8X3FKt7a/BEiElcOvZCzc6ejUsrhDQ8Hq8fG31f9i+KWvVxUeBZXD7tYblXKyBwhsiD9CPEEvLy07m1W12xkSHw+t4+9jnhj7ECbdcoSCAZ4c/OHfFe2jBGJg7l74s0YNREDbZaMzCmHLEg/MhrtzTy98hVqbQ1cNWwOFxWehUKQn+iPBQvLlvP6pveJM0Rz/5TbSTUnDbRJMjKnFLIg/YjY21bJn5a9CMAvJ95CUeKgAbbo9GN3y17+tvJVfEE/d064kbEpwwfaJBmZUwZZkH4k7GjazV9WvIxJa+ThM+4h0Rg30CadtrS62vnrilco76jmnLwzuHb43AGJZC4jc6ohC9KPgE31O/jbyldIjIznoel3Ea23DLRJpz2+oJ//bfuUr0oWk25O4c7xN5AZlTbQZsnInNTIgnSas6u5hCeWvUiqKZFHpt+DUSsPtp9INjfs4J/r3sbudXBR4Vn8ZPB5cjoLGZl+kAXpNKasvYrHv3+WaIOF38/8NSatcaBN+lHi8Dp5e+vHfF+xiviIGK4ZfgkTUkfJc71kZH6ALEinKY2OFh5a+Bd0Ki1/mHkv0QbLQJv0o2dH0x7e3Pwh1dY68mOyuXLYhQyJL5CFSUamC1mQTkOcPhcPL3waq9fOE2feT1Jk/ECbJNOFKIosqVzD+9vn0+GxkhudycWDzmZ08jCUcqBWmR85siCdZgTEIE8u+wc7W0p4ZPrdDI7PH2iTZPrAF/SzpGI183d/R7OzjWi9hZnZk5iZNZnYiOiBNk9GZkCQBek049+b3ueb0iXcPvY6ZmRPGmhzZA5CUAyysX47i8pXsKVhFwgwInEw0zLHMzZ5uOwAIfOjQhak04hllWt5ce2bnJ8/ixtG/mSgzZE5TFqcbSwqX8nSyjW0uTrQq3VMSB3F9MzxFMblyhE1ZE57ZEE6TajsqOXhRX8hNzqTR864Rx6POIURJZHilr0srVzDmppNeAJe4gzRTM0cx7SM8SSbEgfaRBmZ44IsSKcBDp+T33z3JD7Rz1OzfysnjDuN8AZ8rK/bwrLKtWxtKkaSJPKiM5maOZ7J6WOIlF35ZU4jZEE6xRElkaeWv8S2pmJ+P+P/yI/NHmiTZI4THW4rK6rWs6xyDVXWOtQKFVMzxnF+wSzSzMkDbZ6MzFEjC9Ipzkc7v+SDHV9w86grOCfvjIE2R+YEUdlRy4KyZSytXIMv6Gd44mAuLDiTYQmF8rwmmVMWWZBOYbY07OTPy/7BlIyx3Dn+Rrki+hFi9zpYULacb0qX0OmxURibw2VDL2CoPOFW5hREFqRTlGZnGw989ydi9VH88cz70cruwT9q/EE/i8tX8UnxN7S7OxkUl8tlQy5gaELBQJsmI3PIyIJ0CuIL+nl00V9pdLTw5FkPkihHYpDpwhf0s7h8JZ8Uf0OH28qQ+HyuGHoRhXE5A22ajMxBkQXpFOTl9e+wuHwl90/5OWPkBHAyfeAL+llYtpxPir/F6rExMmkIVwy9iOzo9IE2TUamX2RBOsVYXL6Kl9e/zSWDzuGqojkDbY7MSY4n4OWb0iV8tvs7nD4X41NHcsXQC+X06jInJbIgnUKUt1fxyKK/UhiXy0PT7kKhkGfuyxwaLp+bL0oW8sWeRXiDPqZmjOOyIeeTIGcOljmJkAXpFKHd1clvFj6JUlDy5OzfyLmNZI4Im9fBZ8Xf8s3epYhikJnZk7l08HlyehKZkwJZkE4BPAEvv1v8Nxrszfxx1n2kW1IG2iSZU5x2dycf7/qaReUrUQgKzs6ZxsWDzsakixxo02R+xMiCdJIjiiJ/X/0v1tdt5YEptzMqedhAmyRzGtHsaOWjnV+xtGoNWqWGM7OncF7BTGINcgoMmROPLEgnMZIk8eqG/7GofAU3jryM8/JnDrRJMqcpdbZG5u38ilU1GxGAieljuLDgTLKi0gbaNJkfEbIgnaRIksQ7Wz/m8z0LmTv4HK4cdnw86oJBkU6Hlzarh3abhw6bB5vLh9sTwOUN4PYG8PqCiKLUZReIXX8ZhSCgUSvQqJVdLwUaVWhd27Vdq1ai1SjD65qu99of7NOolSgVB48sIIoSgaCIPyASCIrh9e6XLxDE7xfx+oP4A0F8fhGfP4gvIPZ+7w92lQsd173PH+h5vD8QRBQP7ftUKEClVIReqtBSHV4Xwvu6P7tOq0Kn6VrXdK+r0He912qV6DUqIvRqjHo1Wo3yuEZgaHW281XJYhaWr8AT8DIsoYBz8mYwKmmoHEFe5rgjC9JJiCRJfLDjC+bt+oqzc6dz86grjroS8ngDVDbYqGiwUd/ioLbZQX2Lg8Z2V1hs9ketUqDXqtBrVWg1ShSCgCCA0L0ERJEeFXf3eiB4ZH8phUIgpEn7rkHX9QACAZFgH7YeLoIAalVINNUq5T5RVSm6titRh8U1tO1QxBJCYr2/WAYCUg/h3CegQTy+rpc3cMifS6UUwuJk1GuIMITWTREaok06oiK1RJl0REXqiDJpMUVoD9n2/XH6XCwsW8HXpd/T7u4kRh/FrJzJzMyeTLTectjnk5E5FGRBOskQJZE3N33IN3uXMDNrEreNveawE7OJokRVo40dZW2UVHdQVtdJXbOD7jpPo1aSEhdBcpyR5NgI4qIMxJh0oQqtqxJTq47cpTwoSqFWiD+I1xdqhXi71n1d6/vv6173B8Ww/RBqjUmEBBpCItnd2lB3tz66liqlIiwePxQUjSr0XqtWolYpUSmFky7Omz8g4vUF8PiC4VapZ7/3Trcfh9uPw+XD4fbve+/243T5sTq9uDyBXudVKASiIrXERxlIiDGQEG0gMTqChBgDybERRJt0B/wuujPaLihbxtbGYhSCgjEpRczMmszwxEFyq0nmmCIL0kmEL+jn5XVvs6J6PRcWnMm1w+ceUsUZFCUq6q3sKGtjR1kruyrasLv8AMSYdeSkWMhJNZOTYiYr2UysRY/iCJ6aZU5uPL4AnXYvHTYvHfZQ92uH3Uur1U1Tu4umdhdtnW72b4xF6NWkJ0SS1vVKT4wkPSGSGHNvoWq0N7OwfAXfl6/C7nNi1pmYkj6WaZnjybSknnQiL3PqIQvSSUKzs42/r3yV8o5qri66mDmFs/u9wYNBkbI6KzvKWtle1kZxRRvOrqfjpJgIhubEhF7ZscRHG07kx5A5yfEHRFo6XTS1uahvcVDVZKemyU51ox2b0xcuF6FTkZNqITvFTE6qhdxUM8mxRhQKgUAwwObGnSytXMPG+u0ExSBp5mSmZ45ncvpYYgxRA/gJZU5lZEE6CVhft5WX1r2NKIncOf6GXvHp/AGRvTWd7ChvZUdZG8WVbbi9QQBS4oxdAhTLsJwYYsz6gfgIB0QK+Am6bIg+N5Lfi+j3IPm6lx5EvxcpGABJBFFE+sESQFAoQaEILZXK0FJQIigUoFCiUGsR1FoUGl1o2fU+tK5DUGtCx8j0i9XhpbpLnCobbJTVdlLZYMMfCP0Geq2SrOSQQOWkmMlNtWA2w7r6zSyrXEdJWzkAeTFZjE8dyfjUEXIkCJnDQhakAaTd1cm/N7/PutotZFhS+fWkW0mMjMfl8bO7qoNdFW3sKm9nT3UHPn9IgNITIxmaHRKgodkxRJl0A2K7JEmIHicBa0voZWshYG0l6LKFxMdlI+gOrUs+z1FcqbuVeAycGdRaFFoDCl0ECm0ECp0h/F7Zva17v26/dW2onKDW/ui6pQJBkZomO2W1VspqOymrs1Jeb8XrC/0fNSoFWclmslPNxMYo6FRUU+raRpW1GoBMSyrjUkcwPHEwOVEZcrgrmQMiC9IAYPPY+aJkEd+ULiEgipybcS7p6qGUVHWys6KNijorogQKAbJTzAzOimFIduhlNmpPiI2SJCG6bPg7m7vEpuvV9d5vbUXyunocI6g0KCPMKPQmlIZIlAYTCoMJpcGEUh+JQqtHUOtCrReNLtRy0YRaMChVCIIi1ArqWiIowgIQai0FkcTuZTDUihKDSMEAUsC3r7Xl94aXkt+D6PeFlj43oseF6HEieruXToIeJ6LHBWJvp4AeKJQotPougdonVGFh2/8VFrKe5QXNgZ0ITgWCokRds53yOitldVb21nZSXmcNO1WolAIp8Qb0Ji92ZR2tVCAYbBh1OoYlFDI8YRDDEgcRZ4g+5b8LmWOLLEgnCEmS2NNcwVc717G2pByf3UBkMAWvw4DLHbqRNWolhRlRDM6KYXBWNAUZURh06uNjjxgkaG8nYGsjYG3Bb23eJzrWZgLWVqSAr8cxgtaA2hyHyhyHyhzftYwLb1MYTKdsBSNJElLAFxIs736C1f3yunq++trmdYe6HQ+EoAh1K2r0IbHqWu57b0Ch0XWJlx6FVhfa1lVW2P8Y9ckjbqIo0dju7NGSKqvtDDvXAOgjgohaG0FNJ4LegckE+alxDE7KJD8mi+zoDHSqE/PAJXNyIgvSMUSSJJxuP21WD21WD9XNneysqaOisZO2Dh8Bt5buLiiVUiAzOeT51t0nn51iRqU8ui4NSZKQfO5Q15nTSsDRTtDWRsDW2vUKrQcdnb0qT4XBhMoUh9oSFxab/YVHqYs4KttOd0LfvacPofqhgLlD42k+d3hd9LoQfZ7wtoMKGwBCl0CFBEyhCa0L3evdIvYD8eu9zYCg0YZapsf4+2jpdFNWa6Wy3kpNs4OaJju1zfaec9VUXgStG4XWQ0SERKxFR3KMiYz4WLLj48mMjSc2wiJ39/0IkAWJru4pKfSUFwyK4TkzP1x6fAEcbj82p5dOhwery4PN6cXqCLna2hwBAj/s9VEEUOjcWCwK8pJjGZOVQ0F6LGkJkaiUitAcGzGIFPQjBQKhZdDf1Q3lD73CFZerRwUmda13j9sEnVZElw0p6O/1GQWVBpUpBqUpFpUpBlVk19IUGxYchWZgxqNkehJurXldXb+9p8dvL4X/A/uWkq/7/+HpKhMSONHrOkRxIyRS+7XKBK0ehUoTcghR7XspfvBeUGtQqLTh9X371AgKJYJSCQpV17qKIAKtNj+1rW5qW11UNlmpbu6gpcONwyEhin20+pR+VJoAWp2EwaAkQq/AoFMTodNg1GswGbSY9AZMBi1GvQa9Roteo+5aatCp1aE5a11z2eRpDycnp6QgBQIBGhsbD1jG6vDywgdbcHr8BEWJYFBCEiWCkoQohl5BUULsen+4CIjoFD50gh+d4MOocBOpcGNUeDEpvZiVXqJUoaU6/OQpgtQ10VOSIBgAMcgRDdgLyq6nYy1KnRFBF4nSYAyN1eiMoTEcfSQKgxlVZBSCNuKk6d6ROXFIkgRBf2hczedG9HlDLTG/p2tczbOvZeb3dnk/uhG9XiS/e99DUdCHFPAjBvwQ8IMUPIZWCiAoQBnynHQKejoDeqyiDoeowhnU4BA1OEUNrqAGp6TBJWrwSWpEjrDVJIihCCDCfvde13r39q6gISgEAZPOiFJQ7BetZN+91L3avWV4fhxzz8g9qAmJiYmoVKojs/805ZQUpNraWmbNmjXQZsjIyMgcMYsWLSI1NXWgzTipOCUF6VBaSEdKY2Mj11xzDf/9739JTEw8Ltc4Xsi2Dwyy7QPDqWw7yC2kvjglvw2VSnXcnywSExNP2acX2faBQbZ9YDiVbZfpiey2IiMjIyNzUiALkoyMjIzMSYEsSDIyMjIyJwWyIP0Ak8nEnXfeiclkGmhTDhvZ9oFBtn1gOJVtl+mbU9LLTkZGRkbm9ENuIcnIyMjInBTIgiQjIyMjc1JwSgpSIBCgtraWQK/AcTIyMjKnDz+2uu6UFKTGxkZmzZp13KI1yMjIyJwM/NjquuMuSA6HgwsuuIDa2tpe+xYuXMicOXO46KKLuOOOO7BarcfbHBkZGRmZk5TjKkhbt27lqquuorKystc+h8PBY489xquvvsr8+fMpKCjghRdeOJ7myMjIyMicxBzXWHYffPABv/vd77j//vt77fP7/Tz22GMkJCQAUFBQwOeff96rnM1mw2az9dj2Y2m+ysjI/HiQ67rjLEhPPPFEv/uioqI488wzAfB4PLz66qtcd911vcq99dZbvPjii8fNRhkZGZmTAbmuOwmifdvtdu644w4KCwu55JJLeu2/4YYbem3vDjsvI/Njw+/3U1tbi8fjGWhTZA4BnU5HamoqarX6oGXlum6ABam5uZlbbrmFCRMm8Nvf/rbPMiaTSQ4NIiPTRW1tLZGRkWRmZsoZgE9yJEmira2N2tpasrKyDlperusG0O07GAzy85//nHPPPZeHHnpIvrlkZA4Bj8dDTEyMfL+cAgiCQExMjNyaPQxOeAvp1ltv5e6776axsZFdu3YRDAb59ttvARg6dOgBx51kZGSQxegUQv6tDo8TIkiLFy8Or//rX/8CYNiwYezevftEXF5GRkZG5hTglIzUICMjc+zYtm0bjz76KADbt2/n7rvvPuTyx6KcjEw3siDJyPzI2bt3L01NTUCo5+L5558/5PLHopyMTDcD7vYtIyNzfBBFkT/96U9s3boVp9OJJEn88Y9/5MMPP6Szs5OamhqGDx/OqlWrsNvt/OY3v+Hiiy/mD3/4A1988QUbNmzgySefRBRFAH72s59RVFTE888/Hy7/5z//uc9rNzQ09Cr3/vvv8/bbb6NQKIiNjeWRRx4hKyuLBx98EEEQKCsro729ncmTJ/Pwww8f0FU6GAzyl7/8hcWLFxMZGUlRURFlZWW8/fbb2O12nnjiCUpKSvD7/UycOJH7778flUrFsGHDuO2221i5ciXNzc1cf/313HjjjXz88cd89NFHuN1ujEYjb7/9Nh9++CHvvvsuoihisVh45JFHyMnJOS6/lUwX0ilITU2NlJ+fL9XU1Ay0KTIyJ5Rdu3YdctlNmzZJd911lxQMBiVJkqRXXnlF+tnPfiY98MAD0g033BAuN2/ePOm2226TJEmS1qxZI51//vmSJEnS9ddfL33xxReSJElScXGx9Nhjj/UqfyD2L7dq1SrpzDPPlNra2sL7zj33XEkURemBBx6QLr74YsnhcEher1e65pprpLfffvuA53733Xela665RvJ4PJLX65Vuvvlm6dprr5UkSZIefPBB6T//+Y8kSZIUCASke++9V3r11VclSZKk/Pz88Lm3b98uDR06VPJ4PNK8efOksWPHSna7XZIkSVq7dq109dVXSy6XS5IkSVq+fLl07rnnHvQz98Xh/GY/5MdW18ktJBmZ05SRI0diNpt57733qKmpYe3atURERGCxWBg9evRBjz/33HN5/PHHWbx4MZMmTeL//u//jtiW5cuXc9555xEdHQ3A3LlzeeKJJ8JBly+55BIiIiIAmDNnDosWLeLaa6/t93xLly5lzpw5aLVaAK644grefvttAJYsWcL27dv56KOPAHq5Xc+aNQuAIUOG4PP5cLlcQCh8mdFoDJ+jqqqKK6+8Mnyc1Wqls7MTi8VyxN+DzIGRBUlG5jRlyZIlPPHEE9x0003MmjWL7Oxs5s+fD4DBYDjo8VdeeSUzZsxg5cqVLF++nBdffDF8/OEiSVKf27rz/CiVyh7bFYoDD2+rVD2rrv3Li6LIc889F+5es9lsPdyvu0Wse1u3bft/J6IoMmfOHO67777w++bmZsxm80E+qczRIDs1yMicpqxcuZIZM2Zw9dVXM2zYMBYuXEgwGOxVTqlU9pkA7sorr6S4uJi5c+fyhz/8AZvNhtVq7bf8gc47ZcoUvvrqK9rb2wGYN28eFouFjIwMAL7++mt8Ph9er5dPPvmEGTNmHPDc06dPZ/78+fh8PgKBAJ988kl435QpU3jzzTeRJAmfz8ftt9/OO++8c1B792fy5Ml8+eWXNDc3A/Duu+9yww03HNY5ZA4fWZBkZE5TrrzyStavX8+FF17IFVdcQVpaGrW1tWEnhW5GjhxJeXk5v/jFL3psv/fee3n++ee5+OKLuf7667nzzjtJTU3tt/wP2b/c5MmTufHGG7nhhhs4//zz+fTTT3nllVfCLRudTsfVV1/NhRdeyJgxY7j00ksPeO65c+dSVFTExRdfzJVXXolarUav1wPw0EMP4XK5uPDCC7nwwgvJz8/npz/96WF9d1OnTuXWW2/l5ptv5sILL+SLL77gxRdflCe6HmcEqa+29ElObW0ts2bNYtGiRaSmpg60OTIyJ4zi4mIGDRo00GYcUx588EHy8vK45ZZbDvmYFStW0NbWxpw5cwD44x//iFarDXexnUwczW/2Y6vr5DEkGRmZI6K8vJxf/epXfe7Lysri2WefParzX3311Tidzj73/fOf/+T111/n9ddfJxgMUlhYyGOPPXZU15MZeGRBkpGROSKys7P57LPPjvo8Tz75ZJ/b//e//x3wuDfeeOOory1zciGPIcnIyMjInBTIgiQjIyMjc1IgC5KMjIyMzEmBLEgyMjIyMicFsiDJyMjIyJwUyIIkIyNzTHn++eeZNWuW7AUnc9jIbt8yMjLHlM8++4zXXnuNrKysgTZF5hRDFiQZmVOUxRuqWbCu+ric+6xx6cwck37AMoFAgMcee4zS0lJaW1vJysoiOTmZpqYmfvGLX/C3v/2Nm266iSFDhtDa2spHH33E66+/zvz581EqlUyePJn77ruPhoYGbr/9dtLS0qiqqiI5OZmnn34ai8XC999/z7PPPosoiqSlpfH4448TGxvLzJkzmTlzJhs2bADgT3/6E4MHDz4u34XMiUPuspORkTkiNm/ejFqt5v3332fBggV4vV4mT55MfHw8r776KoMGDaKjo4PbbruNzz77jFWrVrF48WI+/vhjPvnkE6qqqnjvvfcAKCkp4YYbbuDLL78kJyeHF198kba2Nh599FH+8Y9/8PnnnzNq1Cgef/zx8PUtFguffvopd999Nw888MBAfQ0yxxC5hSQjc4oyc8zBWzHHk7Fjx2KxWPjvf/9LeXk5lZWV4dxC+zN8+HAA1qxZw/nnn49OpwPg0ksv5dNPP2X69OlkZmYyfvx4AC6++GLuvfdeJk+eTFFRUTiG2xVXXMGrr74aPu/ll18OwMyZM3nwwQdpb28P51uSOTWRW0gyMjJHxKJFi7j33nvR6XTMnTuXsWPH9pn3qFuAfhhlHAinp9g/v5EkSSiVyl7l98+f9MNjRFHskVNJ5tREFiQZGZkjYvXq1Zx77rlceumlxMbGsn79+j7zLXUzYcIEvvzySzweD4FAgHnz5jFhwgQAKioqKC4uBkK5kqZNm8bw4cPZunVrOKvs+++/H25FAXz55ZcALFiwgJycHDl53mmA3GUnIyNzRFx22WXce++9fPPNN2g0GkaMGBEWj76YMWMGxcXFXHrppQQCAaZOncq1115LY2MjZrOZ559/nurqagoKCvjjH/+IwWDg8ccf584778Tv95OcnMwTTzwRPt+mTZv46KOP0Ov1/QZolTm1kAVJRkbmiCgoKODzzz/vtX3/lBR79uzpse+OO+7gjjvu6HWMXq/npZde6rW925uuL37961//KHIE/ZiQu+xkZGRkZE4K5BaSjIzMgJKamsrixYsP65jDLS9zaiC3kGRkZGRkTgpkQZKRkZGROSmQBUlGRkZG5qRAFiQZGRkZmZMCWZBkZGSOKU1NTdx6663H5FzPPfccixYtOibnkjn5kb3sZGRkjikJCQn861//Oibnuueee47JeWRODWRBkpGROSLWrl3LK6+8gk6no6ysjIKCAv7617/S3NzM9ddfz+LFi2lsbOTee+/FarWSn5/P+vXrWbZsGU6nk8cff5zS0lKCwSC33norF1xwQTgSeGdnJzNmzKC5uZlx48Yxd+5cnnnmGVavXo3VaiUqKooXXniBuLg4pkyZwtlnn83GjRtRKpU8++yzpKWl9bC1v3QVFRUVPProo3R2dmIwGHjooYcoKiriwQcfRBAESkpKcDgc3H777Vx88cUD8C3/uJAFSUbmFMW+bQn2rcdnPk7k8JlEFp1x0HKbN2/m66+/Jj4+nssvv5wVK1aQn58f3v/EE09w7rnncs0117BgwQK++OILAF566SWGDBnCU089hcPh4MorrwxHBW9qauKrr75CpVLx4IMPAlBVVUV5eTnvvfceCoWC+++/n88//5ybb76ZlpYWJk6cyCOPPMKTTz7Jf//73/Bx+9OdrmLx4sU88MADfP7559x3333cdtttzJ49my1btnDPPffw7bffhu147733aGtrY+7cuUyePJm4uLij/WqPiL6C1p6OyGNIMjIyR0xeXh6JiYkoFApycnKwWq099q9cuZI5c+YAcNZZZ2EymQBYtWoV7733HnPmzOGaa67B5XJRWloKwODBg3tE8gbIyMjggQce4MMPP+TJJ59ky5YtPVJdTJ06NWzPD23oZv90FU1NTTQ2NlJdXc3s2bMBGDFiBGazmfLycgDmzp2LWq0mMTGRUaNGsXHjxqP6ro6OH4cgyS0kGZlTlMiiMw6pFXM80Wq14XVBEHo9ySuVyj6f7kVR5Omnn2bIkCEAtLa2Yjab+fzzz8PpKvZnx44d/PrXv+bGG2/k7LPPRqFQ9Dhvtx192dDND9NVBIPBXmUlSQpHLN8/nYUoir1E8oQS8A/ctU8gcgtJRkbmuDFp0qRwANalS5dis9mAUCqKd999F4Dm5mYuuugiGhoa+j3P+vXrGTduHFdddRW5ubmsXLnygKku+uKH6SpSUlJIS0vju+++A2DLli20traSl5cHwNdff40kSdTV1bFt2zZGjx59eB/+GCIGfxyCJLeQZGRkjhu//e1veeCBB/jggw8oLCwMd9ndeeedPPbYY1xwwQUEg0Huu+8+0tPTw04HP+S8887jzjvv5MILL0StVlNQUHDAVBd90Ve6iqeffprHHnuMF154AbVazQsvvIBGowHA4/Fw6aWX4vP5ePzxx4mKijqKb+LocDl6Z+I9LZFOQWpqaqT8/HyppqZmoE2RkTmh7Nq1a6BNOCzeeustqbS0VJIkSdqxY4d0ySWXDIgdM2bMOKz64oEHHpDmzZt3TK59NL9Zd123esnSY2LLyc5xbyF1e9C8/PLLvXKXFBcX8/DDD+NwOBgzZgy///3vB7afVua0R5IkvAEvLr8HT8CDN+jHH/QTlLrHEwRUCiUqhRKNSoNepSNCrUer0iIIwkCbf8qRkZHB//3f/6FQKNBqtfzhD38YaJNOSaxW50CbcEI4rrX/1q1befjhh6msrOxz/3333ccf//hHRowYwW9/+1s++OADrr766uNpksxpjCRJWD02Gh2tNDtbaXW10+7qpNXdQYe7k06PDbvXSUAMHPa51QoVJl0k0XoLcYZo4o2xxEfEkhwZT4opEbPOdBw+0anP9OnTmT59+kCbcdjpKk62DLStVvtAm3BCOK6C9MEHH/C73/2O+++/v9e+uro6PB4PI0aMAEIuls8//3wvQbLZbOGB0G4aGxuPm80yJz+iKNLkbKXGWk+drZE6eyN11kbq7U24A54eZY2aCGIMUUTrzWRYUjFpI4nURBCh0aNTadEoNWiUahSCAoUgIAFBUSQg+vEGfbj9Xlx+Fzavg063jXZ3J+Ud1ayt3UxQEsPXMetMZEelkR2VQU50OnkxWSdUpCRJQpQkRDG0HnrtcxYWAEEIeaEpFAKKrqXMycOB6rqmDlmQjponnnii333Nzc09JpnFxcXR1NTUq9xbb73Fiy++eFzskzn56fTYqLHWU2Otp9paT1VnLTXWenz7eR1F6c2kmpKYnjmBpMh4kiLjiYuIIdYQjValOS52iaJIq7uDelsTtbYGqjprKe+oZkvjrrArcVJkPIPi8hgcl8fg+DxiDdFHdC2vP0hjm5Omdhcqb4DWTjeBoEgwKBEQQ0tRlA57pooggEohoFIqUKkUqLteGpUSjVopC9YJ5kB1XWOnp8/tpxsDNmAj9TFXoK8++htuuIFLLrmkx7bGxkauueaa42abzIlFlETa3Z002JuptzVRbw9V8tWddVi9+54MIzURZFhSOStnGmnmZNLNySSbEjCo9SfcZoVCQXxEDPERMYxIGhze7g34qOiopqStnF0te1lTs4nF5SsBSDTGMTShkCHxeQyJy8eiN4ePkySJDruX6kYb1U126pod1LU4qGt20GrdVxnde2kqnQ4vKoWAUhkSD6VGQKEUUO7X+uluDe1PqBXVNddG7HoFRQJBCY8viMPl79GiUqsV6DQq9BolOq0KjVqJzPHjQHVdu0Ps56jTiwETpISEBFpbW8PvW1paiI+P71XOZDKFXUVlTi0CwQBOvwunz4Xd58TudWD12On02OhwW2l1d9DiaKXJ2dqjxaNVaUmNTGRk8lDSzSlkWFJIMydj1kae9I4FWpWGwrhcCuNyuahwNqIkUt1Zz87mPWxv3sPK6vUsLFsOgEUdQ0QwgYDVQmu9DodVTUgKIEKnIiXeyNDcWFLijCTFRJAQYyDoaCQnxXxcvgdJkvAHRHz+IF5/EK8viNPtx+b0AaBWKYjQqYjQq9FrVSf9b3GqcaC6rsP943gYGDBBSklJQavVsnHjRkaPHs2nn37KtGnTBsocmX5w+z20uTpod3di9dixee04fC6cfhcuvxtPwIs34MXj9+IJeHEHPKGl34P/AM4DkZoIovUWEoxxDE8cTGJXV1tyZALRestpU9lZHT7amtS46tJQ1pvR1+fT6axHEdlGm6mdzsgSiAxAAUQpDKQaU8mPyyQ/PpF0SzKJxnhUin2VUXFx03H7bgRBQKMOddcZu7ZJkoQvIOL2BHB5/FidPjodPpQKgUiDmsgIDTqN7Bl7vHEGdDTZOkkwWQbalOPKCf8n3Xrrrdx9990MGzaMv/71rzz88MM4nU4GDx7M9ddff6LNkemi022lvKOGamsdtbYGGuzNNDpasHsdvcoKCOjVIXdonUqLTqVFq9Ji0kWiU2nRq3To1DoMah0GtZ4ItYFIbQSRWiMmrRGLzoRaqT7un0mSJFx+Nx0eKzaPHbvPidPnxhPw4Au7e4uAhEJQoFKo0Cg16FVaIjSGkK16MzH6qIOORYmiRGO7k/I6K+V1VirqbZTXWWm37etui4/Sk5VsZtqIcWQlm8hKNhNr0VFra6CkrYyS1grKOqr4unwBX5WFOs+UgoJEY3yXV18MI9T5OLzOLtd0FUqF8riKtyAIaNVKtGollkgtoijh8vixu/aJk06jxGzUEmlQh215/vnn+eyzz7j22mu56aabjsqGF154AYC77rrrqD/PgaitrQ1HKT9UZs6cyX/+8x/WrVvHunXrjqN3nsCCbdu4dsrp/dB+QgRp/x94/zwphYWFfPTRRyfCBJn9kCSJBnsTO5r3UNyyl5LWclpc7eH90XoLSZHxjE8ZQbwxllhDNNF6MxadCZM2EoNGj0LoHXVKCvrxNlXha64i0N5AwF6D6HYgBkJdPoJShag1YI0wozLForYkoo5NQR2dhKA4si4JURJpcbZRZ2uk3t5Mo6OZFmcbLc52WlzteAPeAx4vIIBw8GjKJq2RRGOoFZcYkYA2GIXfbqSpOUhFfUiA3N5Qi1CpEEiNN1KUF0tOioWcFDNZKWaM+r5FODMqlcyoVGbnhtyjfQEftbZGam0N1NoaqLc10eRspaS1jLz0VJqc+7q6BUClUIUFSqVQoVaqUCvUqJUhwTqWKBQCRoMGo0FDMCh2CZOXpnYXbVaBqEgdpggNn332Ga+99hpZWVnH9Po/Zlbu2SsLkszpQSAYYEfzHjbUbWNL406anW1AyEOtICaHc/NnkB2VQYYlhQiN4ZDPG3TZcBavxlmyHk/1TqQu8UGhQmW0oNBHIqhDk0pFr4tARyMBpxXJuy8UiqDSoEnMRpeSjy5tELr0wSj1xl7XcvicVHbUUNlZR3VnHTW2emqtDXiDvnAZg1pPQkQsyZEJFCUOIkYfRZTejKmrdWbQGDCodGhUGlQKZVhYQwP9wZCrd8CDw+ui02OnqrWZypYmaq0t1De3Utq0BUm1n8eTT48hMo68kWkMSchnTGYumUnmo3IA0Kg0ZEenkx2d3mvfzl27SDUlERCDLK9cy/LqdSEXb6Twcn8EBBSCIuTu3eXaLnDwFtWMrElMz5pwwDKSJPLXp/5ASUkJra1tJKem8+Cjf+b1l5+lsbGRX/ziF/ztb3/jpptuYsiQIbS2tvLRRx/x+uuvM3/+fJRKJZMnT+a+++7rEcgU4LXXXuODDz4gKioKk8lEUVERAO+88w6fffYZbrcbQRB49tlnycnJYebMmVx00UWsWLECt9vNU089xdChQykuLubRRx/F4/FgNpv561//SmJiIq+++ipff/01wWCQKVOmcN999wGhcEG/+tWvKC0txWQy8Y9//IOoqKh+r3uiMCq9NDRCVWctGZbUgx9wiiIL0mmMKInsbC5hedU61tdtxelzoVVpGRZfwEWFsylKHERCROxBu3yCotTlZizS3ZAINFfg2vgF7j1rQQygjk4icsSZ6NIHoU3IQmWJP2CrJ+hxEmhvwNdag7epEm9dKdYNX2FdOx9JUOBJzqI5KZ0mk4maoIvKzlpaukQUQvN+0s1JzMqeTJo5mRRTEsmmBCI1EYfdheUPiDS2OalttlPT5KCm2U5NU2jd5w8CWgQhlcToAoqSIkmO1hIR5SKg6aDRXc/e9kpKXSspbVjJgjYDQ+LyGZpQQFFCIUmRCce0S00hCGhVGrSAXq1Dreh9C4thgRJD65JEUNoXiFSBAoVCgVJQ9NnSPVQ2b96MWq3mgw8+QBRFrr/+esp3b+L/7nuIDevW8Ps/PUN2Tg4dHR3cdtttjB8/nqVLl7J48WI+/vhjVCoVd911F++9914Pr9nt27czb948PvnkEwRB4IorrqCoqAiHw8HChQt5++230el0PPfcc/zvf//jkUceAUL5jj766CPefvttXnnlFV544QXuvfde7r33XmbMmMH//vc/3nrrLSZOnMiOHTv46KOPEASB++67j/nz5zN69Gja29u56aabKCoq4u677+arr75izpw5B7zuiSBbZ2W3LZt5O7/h/yb/9IRd90QjC9JpSKurncXlK1lSsYZWVzt6tY4xyUVMTBtNUeIgxKBAU5uL6ioXGzsqaLN56LR76XR4sTt9ONx+XJ4AHl8Ary9IUNz31B2jsHORYSMjNNV4JDVrvbms9+fR5opF16xCu8mFXrsHg66MCJ2aCL2aSIOGSIMao0GDKWLfy2xMRR2dgjU1m+qcAio7qqlqKaPG3oRdtEPnToQOidggpOksTE8eQ172WLJjMg866VSURGweO+1uK50eG802Ky02G+1OJ1anC7vLg8Ptx+H243QHkYIKpKAKAipMWiNJ5mhmZSWSlxRHRpKJ9IRIdNr+b5dWZzu7WkrZ0byHnU17WFe3BYBYQ3RYnIYmFGI5hpNlp2dNOGgrphtREvEGQq2/bgcUCQmloCBCY8CoiUB3mOGRxo4di8Vi4b///S/l5eVUVVUR9HtJjTeiVAgEgiI1TaExyGFdLZw1a9Zw/vnnh1NMXHrppXz66ac9BGndunVMnz6diIgIAM455xxEUcRoNPK3v/2NL7/8ksrKSpYvX86gQYPCx+2fE+m7776jvb2dlpYWZsyYARCedP/UU0+xbds25s6dC4RaRcnJyYwePZr4+Phwayw3N5eOjo6DXvdEkK1ppdhbyKqSPZyXX0Zh3IlrnZ1IZEE6TZAkieKWvXxVupgNdduQJImhCYWclXY2OncKNY0uPtti5x/Ni3vMa4HQuIDFqMVi1GKK0BBj1mPQqdBrVWg1StQqJWqlRGL9chJrFiIpFDQkn0ljwmR0aBjf5Srs8QXx+AK4vQFcngBN7S7sbh8unwsPDgSNG4XOhaB1I+icoZfGQ3cdKIhKNKKZCDLJVsYRrzST57WR5CjDXLMH5e5SxJWLqIwZhDN+GPaofDokL62eFjp8LXT627EHOnGLdrw4QTjA3A1JAK0AWglVVM9uLg9QAVR4YUN9BAm2WFLqE0kzJ5MVlUZ2VDpGbUSPY2IjopkWMZ5pmeMBaHK0sLWxmG1Nxayv28qSitUApJqSGBSXS0FsDvmx2YfUQj0WKAQFerUOvVoH+tDEXlfAHXbJt3kdqJWqrkgWRpSKg7ecFi1axPPPP8/111/P3Llz6ejoQJKkcDSIlDgjWkNo3KzVFiBRFUQUe/8mgUBPb0xBEHqUU6lU+Hw+GhoauO6667j22muZNm0asbGxFBcXh8vtnxMJQK3uOWbn9Xppbm4mGAxyww03hJ0tbDYbSqWSjo6OHrE0u3MrHey6J4IsQqk5dO40Xtv4Ln8+64ET4hh0opEF6RRHlEQ21G3jk+JvKGuvQqfUk6Ecga8xlS2bAqwLdAAd6DRK0hIiGZYbS3KckeTYCBKiDcRFGbAYtQeclR+wt9P8yd/x1BQTUTiRmLNuIivCTKfXRqfbRqfHhtVjw+q1d63bET023B4rQbcVAl72T7mmVeowq6OIUMSjk8yo/WYErwm/S4fLHcTh9tPo8VPhDbDUG4kojUCpGEx6ZCWxhgZUUiVtnVU0uVT49rNb8msQfBGoRTNGIRWjMhKT1kS0wUy8yUJSlJnkKDMpcWZMhn2J5URJxB8M4Pa7cfhd2DwOOj1WWl3tNDlaaXS0sKN5D8uq1oaPSTTGkR+bzaDYXIbE55NgjOshLAnGOGbnxjE7dxqiKFLRWcP2pt0Ut5Syono9C7rmIkVqIsiMSiPdnEKaOYkUUyJJkUfW9Xg4KBQKjJoIjJoIREnE6QuFR2pzddDhtoa9IQ/kFLF69WrOPfdcLr30Upqamli/fj0TJ07c7xoCCTEh4Q4ERGqa7YwYNYY3Xv8XV1xxBSqVinnz5jFhQs9W3sSJE7nnnnu466670Gg0LFiwgOnTp7N9+3YyMjK48cYb8fl8vPzyy0RH9x/9IjIyksTERFauXMnkyZP57LPPWLduHeeddx7PP/88l19+OVqtll/84hdccskljBs3rs/zHO51jwcmyUp6QiR48qm2fsb/tn3GDSN/ckJtOBHIgnSKIkoiq6s38v62L2l0NaEKGPHXDcHdnIxdoSY3NYJzJ0WTl2YhN81CUkzEYYWCCYpBGh0tVFZtYe+aebQJIs4Rw7EpPbQvfrJPd3AIjWtYtCYsehOZljRGJg0lWm8hPiKGuK7IBsaDVLa+gI/KzlrK2qsoa6+ivKOaensTjZJIIxChtpCssTDeFyC+vZmEznbi/EHM8VkYcoZhyB2FOikbp9+D1WvH7nXi9Lvw+DtoCDZTUx+K7B1y9VaiVWkxqPVEaiOw6Ewkxcb3WRF3O1Xsba+itK2CrQ27WFYZEql9XXODKEochEm7zylDoVCQE51BTnQGFw86G1EUqbbWU9JWTnlHNZUdNXxXtgz/fpOD9SodsYaoUBw+QxTReksosGswEm/AF3bIOBaipRAURGqNRGqNeANeOj1dDxleOxatCbPO1GeL6bLLLuPee+/lm2++QaPRMGLEiH5zFKUlGGloc5E7eCzjJxZz6aWXEggEmDp1Ktdee22PsoMGDeKGG27gJz/5CSaTieTkZAAmT57Mu+++y3nnnYdGo6GoqCic9rw/uvMd/eUvfyEqKoq//OUvxMfHs3v3bi6//HKCwSBTp07lkksuoa6urs9zHMl1jwfTiuJ4Z0E5s0fP4MuSRRTEZjMhbdQJt+N4IkgH83c9CamtrWXWrFksWrSoV0qL0x1RFPlqxzo+2f0FdqkN0R1BoD6HTEMho/ITGJEfR0FGNNrD8PIKikGqOmspaatgb3slVR211NmbekTFjlDpiDfGEW0IVYxRejMWXcgV3KIzYdZFYtZGojnM2HEun5vKzhoqOmqo6FrW2RoRuwKXmnUmcqLSyezqKsuOTidGHxXu1mmwN1NZt4Pq6q3UtdfQ4nPQqVJgUykIHmFlLQgC0ToLCcZYkk2JpJuTybCkkGVJQ6fe19aTJIl6exM7mvawozn0cvpcCAhkRqWGxakgNgfNQbpXRFGk2dlKvb2JenvIdb3V1R6KWO4OzaOSkLgz5xpSstNCdnanylCqUCtUqJVqNEo1GqWmx2TaI8EX9NPh7sThc6EUlEQbLEfdahNFiaZ2Fw63H4tRQ6xFf9pMgD4QxcXFRzzm1F3XvTk3n/TbXuTnL27hytl5FCu/pKKzht+d8UvyY7OPscUDh9xCOkWwOrzMW72JRXXf4Nc3I3kMpElTmV0wifGXJhIVqTv4SbqQJIk6WyObGnawvWk3e1rL8HTN1+mOWj3EEI9x2wqS9FEMveQBzNHJR2W/w+ekwd5MXdf8mhprA9XWOtpcHeEyFp2JrKh0xqYUdUXNzghHbfAEvFR21LKudgsVHTVUd4Ym8O4fDcISYSIhLokCEYwOB4b2ZiJcdiKCIgatEVNiNsakPAzJ+Wjj0pAUCgLBAJ6u/Eh2376I3i2uNprsLayu2RgO9SMgkGZOJj8mi0FxeQyJzyfFlEiKKZGz86YjiiLlHdVsbdzFtqbdfLFnIZ/t/g61QkVe1zGFcTnkRWdh0PSMv6dQKEiMjCcxMp6+nnkDYpBOj5X68joSjLEExCBBMYhfDBAQAzh8rrCIA6GWn1KDTq1Dr9KhUaoPq/LXKNUkGOMwB7y0uTpocbZh9zqIi4g5qLj2h0IhkBhjoNXqptPuQ5IgLurHIUrHglh9kKE5MSzbVM9ffvkzHl78V55c/k8eOeMesqLSBtq8Y4IsSCc5e2s7mbdsJ+valqGIq0ap1TA+6kxunnQeUcZDDyoqSRKlbRWsqtnI+rqtYRfqFFMi0zLHUxibS2FsDjGGKHxNFdS//QgqcxzJVz2O0nBwzzBPV8XV6mqnxdlGs7ON5q44dU2OVhy+fQnGVAoVKaZEBsXmkmZOJjMqlSxLWjjYaCAYoNpax8b6bextq6KsvZJae2N48qpZG0lmVCrnJJxBmjmZ1C6X7x8GWZUkiUBHA+7KHXhqivFU7yKweyMuwK3SoEnMQpuUQ1RSDolJuaiTMnu5qkuSRIfbSkVnDWVdXXWrajaysHwFEIroPSyhkOGJgxkSn09uTCa5MZlcOuQ83H4PxS2lbG/aw66WEj4u/hppV1ckcGM8GVGpZJhDcfqSIxOIj4jpt4WpUiiJNUTTomzCqInos0xQDOIL+vAG/XgDPrwBL06/O3y8Qa0nQmNAr9IdsgjoVFqSIxOw+5y0uzqotTZ0tY5NRyQkgiAQaw6JUIfNiyDwo2kpHS2i18WM0Wm88MEWGpsDPDz9Lh77/hkeX/Isj0y/m+zojIE28aiRu+xOQiRJYntZKx8sLGFH+1Y0GXsQVH4mp0zi5rGX9PLwOhBNjha+r1jN8sq1tLjaUSlUFCUOYnTSMEYmD+mVEiHg6KDu3/eDoCDlxj+jigzt9/g9NDpaaXK20ORo3a9LKSRCTp+rx3mUgoLYrjGjBGMcica4rmR2ScRHxITHaERRpN7eFB4vKmuvpLKzNtzyMWmN5ERnkhOd3tVll0GU7siDiwZsrXjqSvDW7sFTvxdfY3l4Mq+g1qFJyESbmI02KQdtUjbqmJReIiWKIpWdtexsLmFH8x52tZTiDXhRCgryYrIoShxMUUIhOdEZPcaiXH43e9sqKW2roLyjmqrO2vAE5W4sOhMxhihi9FHhrtHwS2fGUW+lsLAQhUKxX86j0C0s0DvKdyAYwBXw4PK7cfndSJKESqEMhXHSGFEpD/2ZNCAGw7+1TqUlPiLmiD29JEkKt5RizTqiTIfewj+VkCSJ3bt3H5Muu6IbH4b0UVz/2DfMHp/Bz+YW0exo5fffP4Pd5+T/Jt3WI/L8qYgsSCcZu6va+c+XxeyoqcGQuwvR2EJOVCY/H3fNIc/QFkWRDfXb+HbvErY37UEQBIoSBjElfSxjU4f3m65BEoPU/Pd3VLdW4ppxOXWiJ5QEz95Ih9vao6xBrSfOEE1MRDSx+tDge6whmtiIKOIMMUTrLSh+MBDuCXiptYZyB1V21nZFXagNR1rQqrRkR6WTE51BbnQmudEZxEXEHNenZ0kM4m+tw9tYjrexDG9DOb6mCiR/qAtTUGtDApWShy6lAG1qASpjVI9zBIIB9rSVh7rqGoup6KhBQkKv0jEoLpfB8XkUxuaSFZXWqwL3+D3U2ZuotzXR7GylxdlOg72FVkcHNp8dr9jTRf/y1HMoTM5HZ4wAlCAqQFKAqETqWlcoQKlQoFKGch2pVUq0agUajQJf0Ivd58TldyMQSmBo0ZsPuRtOkiQcPhetrnYkJGIN0Uc8tiRJoTElu8tPcmwEEf2EVjpVkSSJtrY27Hb7EYdQ2l+Qhl71f0QWzeCp/6xn295W3vrd2aiUCtpdnfx5+T+osdbz09FXcmbO1GP8SU4ccpfdSUJLh5s3vtjJ8i21RKY2EzlyJwoFXDP8CmbnTjukGfWegJfF5Sv5qmQxzc42YgxRXD70QmZkTSTGENXnMaFupb3sailhR9laqlVWAsmRsOdrtCotaaYkihIGhWK4GeNJNMYSb4ztt9tIkiTsXgel7RXU25qoszdSa2ukztpAs7Mt/DSvV+nIsKQwM3syWVFp5ERnkBKZ2EvEDgdPwEurqz3sumzzOnD6XLj9HnxBH4GuaAUKQYFGoUan1mLURGDSGomKiyUuoyD01K9Q4m+r7xKoMrz1e7Gu/wrrmvkAqKOT0WUMQZ9VhD5zGCp9JEPi8xkSn8/VRRdj8zrY0bSbHU2h1tOmhh0AKBVKMswpZEalkRaZjE6KwufQ0dEGdS0KalsMNLSKuL2R+z6UIojBGMBkEdFFBNlQY0VHExatDoRQNIb9wwUJKFCgROgSK1Gkx8RmpSIU0VutEgjgoy5QiyRJ6FRaItSGQ/7+g6KI3eegNliNVqXBqIk4oqgP3XmgGmolok3aQ5r/dCqh0+mO2UOz2BVua8aYNFZsrWfT7mbGDUkk2mDh8Zm/5tnVr/Hqhv+xt62Sm0dfecRjfQOJLEgDTFCU+Hx5Ge98sxsJH7mTq6jzlzAoNpdfjLuBeGPsQc/hCXj5tnQp8/cswO51UBCbw7XD5zI2ZXif7sv19ibW125lU8N2SlrLCUoiKkFJitvDFH0cKYXnEqNORC+Y8AdCidzwA53Q5lBgUznwCy24JSsO0UqHt51mZyuNjmaaHK24usYtIDRelBQZT050BtOzJpBmTibDnEK8MfaIw9aEPPNqqbbWUd2VxrzB3kSnx9arbPeEUI1SjUro6iaUJHyiH08/KTLiDNGkmpPJtKSSM3g0uVMvJVkdgbepAk/1LjzVu3DsWol98wIQFOhSCzDkjyNi0ATU5nhMWiOT0scwKX0MAFWtLazcu4Pi5nIa2uqpaF2PpNwXf08SFSiJQJ8QSXKamRiDhURzDKmWGNJj4kg0R2PWmfr0nJMkCavHRrW1nsrOGva2VbGntYwOT6hFm25OYWzyCNK1hbQ0CWwva2VraT1ubxCzUcPUMbFIcWUsq1iJUqHkJ4PP4/yCWYfkpSeKIvP3LOD97e9i1pm4c/wNDE0oPLQfcT8a25zc8/clZCaZ+NMdU1DKmWr7RPSEBGlUQTymCA2LN9YwbkgiEJpu8cCUO/hg5+d8vOsbKjpquGfizSSbEgfS5MNG7rIbQOpbHPz93U3sqepg6BA1ttjVtHnauXzoBVxcePZBn1YDYpCFZcuZt+trrB4bwxMHc+ngcymMy+1VttnZxoqqdaysWk+NLTTr26KKI8KfjNgZxcVt3xMlOviz9SJcUtekUUFE0DlR6B0IekdoqXMi6FwIin0eXZIooAgY0EkmIlUW4gyxpEclUpiYzrD0NIx6bS97DhV/0E9FRw2lXS7p5e3VNDiaw/sj1HpSTUkkmRJINMYRHxFDjCGKKJ0Zky7yoAP43oAPm9dOm6uTVlcbjY5W6m2NVFnrqLc1dqWnCIlUYVxoEuzQ+ALiDFF46/fiKtuEq2QDvuZKAJRJ+XTEj2aXkEtpg4fyuk7abfsijsda9GQmR5IYr8Rg8aDUu/EKNto87bQ622lzd4ZdvPdHQMCsiyTGEEVcREzXmFwCqaYk0szJPdJjdHtRbm7Yyfq6LexuLQNgWEIB5+bNZFj8YLaWtLJwfTVrdzSgUCiYPiEKZ/RWtjbtIMOcwu3jrjvkQfLy9iqeW/NvGuzNnJ8/i6uGXXTY7v+LN1TzzLubue3iYVw49fRxYz5awl12lw8jf/oFxJx5IwCvfLyNb9dW8Z/HzukVRX5j/XZeXPsm/qCfa4fPPeQelpMBWZAGiO831vDPj7aiUiqYdZaapS1fYFDr+eWkWxgUl3fQ4zfV7+CtLR/SYG9mSHw+Vw67iILYnvGtAsEA6+q28vWepexpD03kU7qjcTfHI3YkIPn0REVqmWkqZ6p7EWszzqc5Lgar1ESrr5FWT3M4KKeAQIw+hhhdLFGaaIxKC1rJjDIQgd+lpdPuo7XTTVO7i5YOF/v1EoXzAOWkmMlJs5CfFoUlsm+Rsnkd7GktC71ayijrqA7Ph4rWW8iNziQ7Op1MSyoZltTjmszPF/RT2SWGe1rLKW4pDadUj4+IYXBsATHKNALWaNoqGzA0bGawVEKi0opHUlGsHERL8mQSM7PJSTWTlWTCaDh4Rd3t4h2KgmGlw22jw9NJu6uT1i4X7GZXG0Gx67cRBFJNSRTE5jA0Pp9hCYVE7jcxt9XZztLKNSwsW0Gbu4NUUxI/GXIeE9JG0drh4YNFJSxYV41Rr+asszSsbl+AzWvn0iHnc8mgsw8phYUn4OWdrR/z3d5lJEcmcMe46w9rfowkSfzu1dXsqe7glQfP7Pf/8WOju677z7XjyB45kbgL7gCgpLqDXz+3jDsvG8HZE3o/OLS7O3l53dtsadxFUcIgbh93Xb/d9icTsiCdYAJBkdc/28EXKysYnB3NsImdzC/9ioKYbH49+baw63N/tLk6+Pem91lft5WkyHiuH/ETRiUN7VEpO3xOPtq6kMUVy/BILkSvnmBLChGeLIamplGQEUVumgWLRWRP207WrnqXMp0KhxD6KxjUenKiM8iKSg+7JaeYEg7Zo8ofEGlqd4YiZzfZqWywUVFvpa7FEY4WHh9toCDDQkqKgMrUSXuwgZLWcursjUCoqy87Kp382GwKYrPJi84i2mA5/C/8GNE9AL+qZA+baoupdlbgVjchqAJIEqh8FuJUaQyKzWOiyUBCw3rce1ZDMEjE4ElETbkMTdyxmysSFIM0OVuptTZ0ddVVsqetHLffg4BAYVwOE9NGMyl9TDhqRFAMsrpmI5/s+oYaWwPZUencOPJyCuNyqGqw8cIHW9hT3cGZE5OQkrezqmYDhbE53D3x5l7emP2xrbGYl9a/Tburk3PyzuDKYReF4ucdAjVNdu786/ecNzGTn80tOuLv5nSiu65755ZppOcUkHDpvUDo/3j7U4uwROp48hdT+jxWkiQWlC3n7S3zUCqUXDnsIs7KmXrMc2QdS2RBOoG4PH6e+s8GNu1p5qKpWUgp21lYvoIpGeO4fey1B6zwJUliUflK3t4yj6AU5CdDzueC/Fk93HbbnFZeWfEpW9s3ICkCBK2xJElDmJ47krGDk8hIjKTJ0cKqmo2sq91CeUc1AKZAkKFJQxiWMZqCuBySIxOOSxO/3WFnZWkxW+tLqbJWYZWaQNU1lhJQEyHFk2XKZHT6IKYVDiFSN3CuwMGgSEW9jV0VbRRXtlNc2U5bV1BavVZJXloUBRkWTHEunKpGSjpKKWkrJygGEQSBTEsqeaYUUtrbiN6zmRi3B3PRGURNvxpV5PF5UhVFkbKOKjY37GBtzWZqbA2oFCompo3i/PxZ4fxKoiiyono97277jDZ3BzOzJ3Pt8EvQK/W8/XUx877fy5hBCUybAW9seQ+VQsWd429kVPLQQ7LD5Xfz7rbP+G7vMqL0Zm4Y+RMmpI46pJbsix9uYdH6Gl5/+CyiT1NX8MOhu6773+3nkBwfTdLVvwvve3/BHt75ZjevP3QW8dH95zBrtDfz6ob/saN5D2nmZG4aeTlDEwpOhPmHjSxIJwib08fv/rWa8jort186lGJxMatrNnLxoLO5aticA96sVo+Nl9a/w6b67QyNL+BnY68hwRi3b7/TyXPfz2OHbT2SIoDGmcaMtDOYM3YkcVF63H4PK6rW833FKva2VwKQF5PFmITBJH7/EZkJeSRd8dAx/bwev4fKzjoqOqop66iivL2aOltjeGwkOTKB/Jhskgyp4Iiivk6guKKd6iZ7qMWhFMhNtTAoK4aCjCgKM6KIMR/aRGBvwIfVa+/ysHPjFwOIktgVakcV8ijTGDBrI9GrQ2NMHXYPJVUd7KnuYE9VByXVHXh8oS6xuCg9gzKiGZwVzaCsGDKSTH0OvHsCXkpayylu2cvu1r3sbasMu7SrUZDg8REfEElPHUpG4WQSjPHERURj0kYel27Hqs5aFpWvZGnFGtwBDyMSB3PlsDlhYfIEvHy080u+2LMIi87EHeOupyhxEF+vquCf87YxqSiJ6y5O57k1r1PVWcvcwedw+ZALD9kTr6S1nNc2vktlZy1D4vO5YcRlZEYd+H6tb3Xw8ycXccWZBVxzzuE7SJxudNd1795zCQk6iZSbnwrva2xzcuufFnLTBUOYO6P3uPH+SJLEurot/GfzR7S42pmQNorrh19KbMSJDRJ7MGRBOgE43H4eemklNU127r9+NCs7v2Bt7WauHX4JFxXOPuCxxS2lPLvqdRw+J9cMv4Rz8s4It14CgSAvL/mWZY0LQO3B6EvnimEXMnv4EARBoN7exFcli1lWuRZPwEuaOZnpmROYlD6aWEM01nVf0LbgDZJv/DO6lPwj+mzdKcSrrfVUd9ZRZa2jqqOWRkdLWHzMOlMouGhUOnkxWeRGZ/Y7udfh8rGrop2d5aGWSWlNJ4FgyLEgxqwjJ8VCVrKJtEQjmkgXLtppcjVRb2+iydFKq7MtHJ3gUFBIKiSfnoBLj+iOQPBEkmRMYmhKJoMzYxmcFUNc1KFHxNifoBikztYYCqDaWUt1awU1bdVYhWCPcmqFiii9uWsirCW8HmOwEKMPOTFE6c1H5ZX4XdkyPt+9ALvPyRmZE7lm+MXhnFJl7VW8uPZN6myNXFR4FlcOm8MXyyt4ff5Orjt3EBefkcnrm97n+4pVDEso5J4JN2PSRR7kqiFEUWRh+Qre3z4fh8/FjKyJXD7sQqL1ln6Peexfq6lssPH6w7N/9B533XXd+/dfTYyvlbTbX+ix/5fPLEGpEPjbPdMP6Xy+gI/5exbwSfG3CMDFg87mooKzDtsJ5XghC9JxxusP8ugrqyip7uChm8ay1vYdSyvXcP2In3BBwawDHvtN6RLe2vwh8RGx/GrSrT2eLteVlvP8qv/g0zWh8Udz3fCfcPawkUDoyXjezq9ZW7sZpULJlPSxzM6dRk50RvhJXBKD1PzzTlSmGJKv/+MhfRarx0ZVZx011nqqrfXUWOupsTXgDezzIkuIiCXDkkpmVCqZllBA1Cj9kUdW8AeClNVZ2VJRw7b6Emqd1bgUrQgG2z5PP0lAGTCilUwYFJHoBCMaQY9S1CIGlXi94HIHsDq8uHxeUAYRlH4UGh9GcxCd0Y+kduAQO8NOHBqlumsMK4fC2BwK43L6nXt1OEiSRNv2JZQueYs2gviHTsYRHUeH20q7uzO83D8tO4REK9EYR4o5iQxzCjnRGeTFZB1WunmXz83Hxd/wZcki9CodN468jKkZ4xAEAV/Ax5tbPmJh2XIKYnP45cRbeH1eGau21fPMr6aTlWxmcflKXt/4HiZtJL+a9NPDclpw+Jx8vPNrvt67BJWg5IKCM7mo8Kw+x5eWba7l6Xc28uc7JjM05+DTHk5nuuu6Dx+5BXPrbjJ++e8e+z9cVMJ/vio+aLfdD2lxtvH21o9ZU7OJOEM014/8CeNSRgx4CCdZkI4jkiTxt/9uYunmWu6/bgxVwjrm7/6Oy4acz2VDL+j3OFEUeXPLh3xTuoRRycO4e/xN4WCcgaDI0199yib7YgQEzkg+k59NvQClUkmzo5X/bf+MVdUb0Kt1nJN7Bufmz+gzS6mrdCONH/yJ+Ln3Yhw0sdf+dlcne9srKWuvCkfj3n+eT6TWGHZ4SDMnk9710h3iAPbB6PTYwlG0dzWX0OhoAUJCkWVJJ06bhEGMQXKbcNu02BwBbE4fTo8ff0AkKEooBQGtRoleqyLSoCHarCPOoicpNoKUOCOp8UY0+0VFD4pBGuzNVHTUUNZeSWl7JeUd1aFxIQTSLSkMictjcHw+g+PzjkqgAvYOWj5/HnfFNoxFZxB77s9QdD2lSpKE2++h3d1Jq6udZmcbTY4W6u1N1FobaHK2AoSjio9MGsK4lJFkRaUdUoVSa2vglXXvsKetnAlpo/jZmGvCwraiaj2vbPgvepWWn4+6mb/9q5ysZBN//PlkACo6avj7yldpdbVz7fC5nJc/87AqsUZHC//b9ilrajZh0hq5dPB5nJUztcdYqMvj5+pHvubi6TnceMGQQz736Uh3XTfvj3cSUbaSrAff67G/odXJbX9eyK1zhnLRtMPPIruzuYQ3Nn1AtbWOYQkF3DjyctLMRxdI+WiQBek4Mn9ZGf/6bAfXnltIfHYbL61/m9k507hl9JX93sQBMciLa95gVc1Gzs+fxXXD54b77Fttdh747J/YNZWYxCQenv0zMmMS8AV8fFL8LfN3f4cgCJyfP4sLC888YIXZ+OGTeOtKSb/rFVAoQ0nomvaws7mE3a1ltLtDUbgVKIg3xJNmSiE7Oo3c2HQyo1IOmkL8cPEEvOxu2cu2xmK2Ne2m2hrKTWNQ6xkcl8eguDwGxeWSGZV21KkVDgdfwMfe9ip2tZSyq7mEPW3l+IN+BAQyLCkh2+LzKIjNOez05JIk0rHsAzpXfIg2JZ+EnzyAymg56HEuv5vy9iqKW/ayvWk3JW0ViJJIcmQCM7MnMTN78kHFct+k1vnEGqL5v8m3hSNGV3fW8fTKV2hzdTDGOIvvFwo8fddUCjND4w1On4t/rPsPG+q2MjZlOLePu+6wxXlvWyX/3fYJO5tLSDTGcXXRxYxPHRm+L+5/YTmSJPH03dMO67ynG9113cd/uQ/9jq/JeuA9BFVP56efP7mQpFgjv/vpoaWz/yFBMciCsuW8v+Nz3H4P5+RO57KhFxxW6/tYIQvScaKqwcYvn1nKqIJ4rro4gUcX/41Bcbn8dtqd/bpdBsQgz61+vc/xpd31tfx+0QsE1DbGRE3jvtlXoBAU7G4p46X1/6HB3syU9LFcO3zuQd2jg04r5c/fSvOIKawzWNjesgtHsCtWnV9LwBaF6LAgOixIrkiQ9tmrUgrERRlIiTOSlWyiID2KIdkxhzS/Zn+6UzVsayruSoFRTkAMoFaoKIjNoShxEEPjC8iOSj+qcELHGn/Qz972SnY2l7CzuYSStopwYr0EYxx50ZnkRGeQHZ1Ohjm1V5qJvnDuXkPzZ8+hjIwm6apHUEcd3ux6u9fButotLK1cw+7WMrRKDWflTuPiwtkHHespaS3nmVWvYfc5uGPc9eHoEg6vk2dW/4vtTXugOZvxMTP49dVjwsdJksSXJYv479ZPiNZbuGfiLYedl0eSJLY07uSdLR9TY2tgcFweN468nMyoVP716Xa+WVPFh386/7ASS55udNd1nz77CNpNn5Dxy3+jjOg5NeSf87by/YYa3v3jeaiUR36v2LwO3t8+n4VlKzBqI7h62BxmZE06ofefLEjHgaAocd/zy2jucPH0PZP406q/EpCC/GX2b3tMWNwfURJ5ce1brKhaxw0jfsL5+40vbaou4anl/0RC5OpBV3PxqAmIoshHu75k3q6viTVE87Mx11CUePCIwhUdNXy69B22OytxqBRIogLRGoPel0x6RBaZ0UnEWgyYIjTotSqUSgFRlPD4gjhcPtptHpraXdQ2O6husiOKEoIA+elRTByaxPRRqcRaelfCoiRSY63vipBdwq7mknCIoQxLKkUJhRQlDmJQbO5RD7D6gn46PTZsHjvugAdf0B/2stMo1ejVulBQUZ3pkOfI9EcgGKC8o5rdrXspaQ1Fk2h3d4b3xxqiSTMnkRSZQHJXPMD4iBhiDdE9uqk8dSU0vv8EglJD0rWPoYlJOSJ7Kjtq+WLPQpZXr0On0nLZkPM5N2/GAeeedHps/H3lq+xuLePyoRdy6eBzEQSBgBjkjU3vh9Ktdybw+vUPEKnv+duWtlXw3OrXaXV1cMXQC5kzaPZhO18ExSCLy1fx3o75OHxOzs6dTqx7BP/6eDevP3wW8VEn/kn9ZKG7rpv/0p9Rr/4vqT9/AU1Mzy61lVvrefI/6/nr3VMpyDh6r7mKjhre2PQ+u1vLyI5K56ZRl/eadH+8kAXpOPDtmipe/HALv75mNMWB71lSuZrHZ/76gD/qf7d+wme7v+PKYRcxd/C54e2b63bz5LJ/IPnV3D3uZ0wpLMDudfDs6tfZ3rSb6ZkTuHnUFQesWEVRZE3tJt7f8i0N7loUIuQ4g4i6s5hRMJrR+UmH7FK9P15/kJLqDraVtrJhdxN7azoRhFCsrQumZhAZ42ZPWzm7W/ZS3Lo3nKIiISKWoQmFDE0IheE50u6/7lTn3V5sddYGGh0t4WgKh4JBrSc+IoakyATSzElkWEK5mWIMUUc8wNvptlLeUUNVZ23Y8aPB3oRvvxTlAgIWvYlYQzRxhmhiI2KIFgWUaz4nNigw5KrH0cUceV9+ra2Bt7fMY3PDTrKj0rlrwk2kHCCumT/o55X1/2VZ1VpmZk/m1tFXoVQokSSJV5Z/xqL6b0mOSOH3Z97Va/K2y+fm1Q3/ZVXNRobGF3DnhBsP6EXXHw6vk/d2zGfB3uVEqI207cjnz9ddwuCsmMM+1+lCd133xevPolz6Gsk3PYUuuaeLd1O7i58+sYA7Li3i3ElHFlX8h0iSxMrqDbyz9WPa3Z1MyxjPNcMvIeogE/ePFlmQjjE+f5Db/ryQWIue6y6P44mlzzOncDbXDL+k32NWVK3n+TX/5sycqdw6+qpwRbintZzfLXyGgEfLXaN/zvRhuTQ5WvjT0hdpcbXz09FXMTN7Ur/nFSWRlVUbeHfb57S6WxE9BizOLO6xLSN+9AUknHX9MfnMoiTS5GhlQ9UeVpTupNJajajtRFDsS0ZXGJfLoK5YcHERR1bBePweilv3sqNpD8Ute6noqA7HmovURJBqTibJGNflJm3BpDViUOvRqjQoBAWiJOIP+nH5PTh8Tjrc1i6ngdau9A/7opGbtEZyozPJj82mMDaH3OjMo2q5iZJIh9tKk6OVJkcLLa5Q7LruNOWtro4eKeOVUmiuVmZMKHNuYWwOmZa0w+o+kSSJNbWbeG3je/gCPm4dczXTMscfsPz7O+bz8a5vGJcygnsm3oxaqcbl8XPNX99Al7edaIOJ30y7k1RzUq9jv69YzRub3kejVPOL8TcwKnnY4X9RdLW6Vr5Js7uZ4dGjuW/G9SeNW/KJpruu+/LtV1AseIHEqx7FkD28RxlJkrj6ka+ZPDyZOy8bcUyv7/F7+HT3t8zfvRCVQsllQy7g3PwZx20cV472fYxZtL6aNquHu68czr83vUSiMe6AHnV1tkZeWf8OhbE53DzqirAY1dub+OP3LxLwargo5WqmD8ul1tbA498/S0AM8ugZv6Qwrv8WV0lrOa9vei+Um8cVibptDDdMmcnEyHpaP/0ec+G4I/p8oiTS6GihoqOa8vZqyjuqqeioCXe/aZRq8tPSUXvzKNkN1uYIskfkcf3MoYed70aSJKo6a9ncsJMtjbvCkRBUChV5MZlcWHhWV96kzKNyLe/GE/BS3VnX5WUXyg67f+qInKgMBsXlMiguj4LY7MMa9FUIilDiPUMUg+N7xyoUJZFOt41mZys1NTso3TCf5mArOwJulletA0K5i0YkDmZS+mhGJA09aKUgCAIT00ZTEJvD86v/zYtr36TaWs/VRXP67FYTBIErh83BpI3kzc0f8pcVL3Hv5J9j0GlI0eZidKbQplvGw4ue5v4pt/f4HIIgMDN7EgWx2Ty7+nWeXP5PLsifxdXDLznsyisvJotfjb6Hez/8F1vZyMOLmrh/yu0n3STOE4lCE+oBEX+QCBNC331GkomapkPvGThUdGodVw6bwxlZk3hz0we8vXUeSyvX8NPRVx2w/jlSZEE6hkiSxBcrK8hNNdNEMQ32Zh6ceke/eUmCYpAX176JRqnml5N+Gr5xXT43Ty79J15fkBTHbK6ZNZJGRwuPf/8sAI/P/HWvJ9RuvAEf/9v2KV+Xfo+WCHxlRYyIH8Gvfj4Ks1FLy5ffoNAa0KYcPICrJEm0uToo6Yq0XdZeRWVHDe5AKISOWqEi3ZLCpPQx5EZnkB2VQZo5KTxe4Z4V4P0Fe/hkyV62lbXymxvGkptqOeA1nT4X25t2s6lhB1sadoZdzTMtqZyfP4uihEIKYnN6RLc+VuhUWvJjs3sMztu9DkraKkLdji17+aJkEZ/t/g4BgTRzMoWxORTE5pAXk0mCMe6IRVEhKIg2WIg2WCiMy2VyZBqNHz6JIX8sqgvuZ3drGVsbi9lUv50V1esx60zMzpnKufkzDurhFq238PAZ9/DGpveZv/s7Oj1Wbh97Xb/jSuflz0Sv0vHy+nd4cvk/eHDqL8hMMrOzIsBfrrifPy17kT8ufZ67J9zEhLRRPY5NMSXyxJn38/aWeXxRsojStgp+NfnWw+7CUylUBGoKuWTMeBY1f8ZvFjzJb6bdGY4y8WNDodEhApK370nfCdEGtpW2HLfrJxrjeGDqHayv28obmz/g0cV/ZWbWJK4efkk4VuKxQBakY0hZrZXqRjs/u3QQH+96jSHx+YxM6j/+15cliyhrr+KXE38avmElSeLlDe/Q5GzBWzqWO2+ZhNPv5ImlLxAUg/z+AGJUZ2vkbytfpdbWQLZ2BDtXxnLBxDx+evGw8Ix3d9UOdOlDeqXl7qbZ0cq2pt3saN7DnpYy2rrcv9UKFZmWVKZmjiM7KoPsqHRSzUkHfPrVa1XceMEQJgxN4qm3N/DAiyt44PoxjBu8bywjKAYpa69iW1MxWxt2UdJegSRJRKj1FCUOZmTSEIYnDj7ufdf9Eak1Mjp5GKO7up+8AR+lbRXsbt3L7pYyllWt5buyZUCoBZMTHcp4m2lJI8OSSkJE7BF5KRnyRhM963raF75JVFIuUybPZUrGOAJikK2Nu1hQtpwPd37JFyWL+Mng8zkv/8COCyqFkp+OvooovYUPdnyOKEncOf6Gfh0QZmRPQqVQ8eK6N3lq+T/JTTiLpZvdRKrM/GHmvTy14iWeWfUat465qleGUo1SzS2jr2RQXB4vrX+bB7/7M/dN+Tl5MYc+vuELhCYoF1gKmFl0P39a+iK/X/IMD0+/+7DOc7rQLUiiz9Pn/oRoA202D/6AiFp1fLziBEFgXOoIihIK+WjX13y5ZyEb67dz29hrGJsy/OAnOARkQTqGrNhah1IhEDTXYK2x8+uht/X7xNzpsfHRzq8YlTyMifs9Za6oWs+amk0omwYxKnUQOSlmnlj2PO2uDn4341f9itG2xmL+tvJV1EoVV+Vcz7/fbWbmmDRuu2RY2IaAvYNARyOm0WeHjwt1i9WxqmYD6+u2UmcLRduO0pkZFJ9HYWwOeTFZZJhTeniFHQ6FmdH8/ZfTePz1tfzpzTXceFkqisgOdjWXUNyyF3cgFKE6OyqdSwadw4jEIeTFZJ6UUYm1Kg1DEwrCwSmDYpBaW0NXvqYqytqr+LT4O8SusS21Uk1aV86iVFMSKaZEUk2JxB+CUJnHXYC3vpSOpe+iTx+ELm0QKoUyLJBVnbX8b9tnvL11HquqN3DPxJtJjIzv93yCIPCTIeehEATe2z4fo8bAzaOu6Lf81MxxSEi8uPZNnEYJyKShzUlWspmHp9/N31f9i1c3/A9f0M95+TN7HT8pfTSppkT+suIlHvv+GX458ZZDrrhc7tB4WoReTaoplj/MupfHvn+GPy19gT+eef8BHTRORwR1d5dd3y0ks1GLJIHT7T/uqTt0ah3XDr+EaRnj+Mfat3h6xctMyxzPzSOvOKRpDgdCFqRjyMbdzQzOjub7qoXkRmf2mSivm492fok/6OeGET8JC4bD6+TNLR+SpE+hvCqd83+axae7v2V70x5+Pva6fud5rKnZxHOrXyfFlMT9k2/n4Rc3kxIXwR0/Gd5DEL11ewDQpRbi8XtYUrmGRWUrqLLWoRAUDInP48zsKQxPGkxKZOIxCSNi8zrY21ZJSVs5pmFlaBPK+V95qLJJioxnSsbYUAUfX9CvS/yR4Al4aXa00ubuwOoJBVr1Bf1ISCgFZTjtdpTeTKwhihhD9BEN1CoVSjK6cjN1txR8QT811vpwmKUaaz3bGotZWrkmfJxaoSLVnESWJY3s6HRyozPJsKT2EGFBEIg77+d4G8po/vRZUm/9Owrdvu65DEsqv5n2C9bUbOKVDf/lwQVP9hrb6YtLBp2Dw+vki5JFpEQmcnZe/3HQpmWOx+V38+9N76POdFHXPI6sZDNalYb7Jv+MZ9e8zpubP0QhKDgn74xex6dbUvjTmQ/w5PJ/8reVr3Ln+BuZkjH2oN9rpyMUjsoUEeqajTFE8cgZ9/DbBU/y9IqXefKsB49ZVJBTAUGpAoUqnMb8h+g0of+NxxcATkwuqXRLCk+ceT/zdn3NJ8XfUNpawa8n30a65cimLIAsSMcMp9tPVaONM2caWGFv4s7xN/Zbtt3dyeLyVczInkzSfk+0H+36CofPSZ50Dk06D7GJAf628EsmpY1mRlbv8D4Amxt28Nzq18mJzuS30+5kU3E7Da1OfnvjOLTqnhWst6EMr1LJ5+17+Grtyzj9brKi0vjp6CuZkDb6qPuCXX43lR01lLWHInyXtVWGw9woBAUZ5hTOyJrImrVeJHs0j//yHMzGo795XD43e9sr9wt1VEuLs+2wzqFUKEmNTCSrSxwKYrNJMycfUUBTjVIdCib7g4yrTp+LOlsjtbZGam0NVHfWsb5uK4srVgGhMayihEGMSx3BmJQiDGo9Cq2B+Dn3UP/WQ7QtfJO4C37R63oT0kaRHZ3Bn5e9yBNLn+eh6XcxOL7/YLmCIHDt8LnU25t4c8uH5ERnkBuT2W/5c/LOoMXRyed8y5KaZUwZcRUAKqWKX078KX9f9S/+vel9jJqIPsXGpIvk0TPu4cnl/+SFtW+gU2kYc5CWUoc91DW1fwqK+IgYfjXpVh7//lne2foJPx1z1QHPcbqh0OqQ+umy02lDVbnbG+hz//FCpVRxxbALKUos5JlVr/HQwr9w+7jrmZQ++sjOd4zt+9FSXm9FksCqKkev0jEhdWS/ZReWLScoBrmo8KzwtnZ3Jwv2LuOMzIls+x6GZMfy9taP0Km0Pbzv9qfGWs/fV71GujmF306/E4Naz7LNdUSbdIwf0rNLQ5IkVjTt5LOMGOy7vmJMchEXDzr7sGfXd+P2e0KpJbrEp6Kjmgb7vtTisYZosqPTmZUzhfyYLLKjM9CpQuJzdpqVXz+3jH98tJXf3DD2sFpioiTSaG+mtCsh3Z7WMmqtDWF37SRjPHnRmczMmkRiZByxhmgsOhMRGgMapQYFAkFJxB3w4PA66fBYaXG202Bvoqqzlo3121lSsRoIjQkNTxzEqKRhDE8afNSCHaEx9HKakCSJFlc7e9sq2NFcwqb67ayr24JWqeHMnKnMKTwLS0o+lolz6Fz1Ccah09Bn9nanjo+I4fGZv+bRxX/jLyte5snZvyFxvxQlP0ShUHDnhBu579sneGHtGzw9+6EDulZfM+Iivtq4nW3ScopbxoSzGqsUSn458RaeWPoCL637D4nGuD7FTafW8eDUO/j9kmd5bvW/eeLM+w/4JN3a6UGjVvbyzBwSn885eWfwTekSZudOO6qn8VMNhcbQb5edXhOqyj3eYJ/7jzeD4vJ4avZv+fvKV3lu9es4fA5m5x5aBPL9kQXpGFHf4gQkKhyljE4p6vfmFkWRReUrGZE0uEeF8VXJ9wSkIBcVzOabD9eSPzjI6qbd3Djysj7Dv/gCPv6+8l/oVFoemHoHBnWo73ZXRRtjBiX0CLdi9zp4ad3bbBA6yFTo+M2sew74RPxDJEmi3t7E7pa97GkrZ29bZY/cRjGGKLKj0pmWMZ7s6HSyo9IPONk1O8XMdecW8sYXu1i5rZ4pw/uuVDwBL/W2RmqsDVRZQ7mV9ncx16t05MdmMTFtNPkxWeREZxySK7aK0FiQRWfqcz5Nk7OVPS1lbG/ezdaGXays3oBCUDA8cRBT0scxNqXomHUXCYJAfEQM8RExTEofgyiJlLZVsKBsOV+Xfs/i8pX8dPRVTJ5yGY7i1bR+/Sqpt/0doQ/PzUitkd9Mu5MHvn2CF9e8yeOzfn3AFp5RE8Ed467nD0ue49Pd33L50Av7LasQFOSI0ykLfs7za97g7+c8Gp6MrVGquXfybTz43Z95ZtW/ePqch8P/x/3RqXU8MOV27vvuTzy35t88ddZv+h2XbOl0EWfR9/mwctmQ81lSsZpPir/hnom39Gvz6YZCq0Psx8tO26PLbmCI0pt55Ix7+Pvq10Jz34J+Lig487DOIQvSMaLd6kaIsOLwOxl1AM+6nS0ldLit3DTy8vC2QDDA9+UrGZsyHK1kQhQlKoObiNKbe3kwdfPBzi+oszfy8PS7w7HrPL4AVoePlLh9T/K1tgaeWvZP2twdnN9i54LhM4k5BDFy+JxsadjJpoadbG/ajbXL/TpSE0FeTBaT0keHYrYdRHz6Y860HJZuruPV+ZuITvBi83fS7Gyl0dFKk6OZenszba6OcHm1QkW6eZ+LeW50JqmmpGMeZ0sQBBKNcSQa45ieNQFREilvr2Zd3RZWVK3nhbVvoFfrmJU9hXPyziD+CCf59odCUFDQ5Up+6eDzeHn927yw9g06hs/lzNm30Pj+E1jXf4Vlwpw+j4+PiOGGkZfxz3X/YVX1xoOO1wxLKGRS2mjm717A7NzpBwwQm5MUw65Nw2gvXMOHO77g+pE/Ce+L1Bq5Z+ItPLL4r7y3bT43j+7bWcKiN/OzMdfwlxUv8XXpEi4s7LvCau5wE99PHiqjNoIzsiayoGw5Lr+7T/E7HRE0+v5bSF1ddgMpSAAalYZ7J/+M59f8m/9smUdcRAzjD9Bb9ENkQTpGONx+dNGdABQl9J/pcn3dVtRKdQ938K1Nxdh9TmZmTcLtDSDoHDR4q7hq2Jw+5zDV25v4cs8iZmZN6hG/LhgMtVi63T6rO+v4/ZJnUSDw0PCr0H/4d7QHCEfjD/pZX7eNpZVr2Na4i6AkEqk1UpRQyJD4AgbH5ZIUmXBYXWxBMUirq51GRwtNjpZQpAJnK82OVjrSW/EEPTy29Ktw+UhNBInGOIbE5ZMUGR/ySjMnkWSMHxCvO4WgIDcmk9yYTK4cdhF7Wsv4bu8yvipZzNel33N+/ix+Mvjc4zLAnhQZz6Nn/JLn17zBO1s/Jn/mvZhzRtK5ch6Rw2ei1PcdOHVa5ng+2/0dX5YsOiQHgiuGXcTq2k18XfI9VxX1LXQAeekW/EvMTIgfzdd7l3Bu/oweUTfyY7M5K2cq35Ut47z8Gf16/I1JKaIoYRDzd3/HuXln9NlKam53kTOsb49SgPGpI/i69Ht2NpccM5fjkx2FSoMY8PW5T6ftmvs3QF12+6NSKLlz/I20Odt5cc2bpM5OOmSvSFmQjhG+gIjC2ElSZOIBIyzvbNrD4LjcHhM719duQa/WUZQwiKZ2D8qYhlCuo34cGebt/AqVQsWVP6g89FoVKqVAp91Lq6udPy59HrVCxWMzfkVkfSVNgMqS0Ot8br+Hb/cu5auSxXR6bMQYoji/YBbjU0eSE51xSAP7/qCfOlsj1dZ6am0N1NoaabA10ehsISjuu0nUSjXxhhjijbEUxOawu9TD3jIf910xjRHpGUftNno8UQiKrjQYeVwz/BI+2P4F83d/x9bGXTx6xj3H1EuwG6VCyR3jrmdXSylf7FnIXTOupe61e+lc/SkxM6/r184ZWRN5Z+sntLk6iDFEHfAaSZHxjEwaypLK1Vwx7MJ+f++C9FCkhDRGsVnaxNelS7h+xKU9ylw6+DwWl6/im71LuXHkZf1e89z8GTy1/J9sayruFWLI4w3ltjpQUNWc6EwEBKo6a380goRSjeTp28suPIY0wC2kbjRKNfdO+Tn/9/XveX3jezxyxj2H9CArC9IxQgBEnZWsqP6765w+F7W2xnCIfwiNWWxtKqYoYRAqpQqjXo0yqok4TUqfk0E73FZWVW/g7LwzenWvKBQC6QkmSmvbeWblF3gDPp44834SI+Ox2jcBoDLte6KVJIklFav537ZPsXrtDE8cxB3511OUMOiAXWGiKFJtraekrZy97ZWUt1dTZ2sIx5VTCgoSI+NJNiUwJqWIpK4o14nGOCx6U48Kz1rg5fanFvHxV82Mv7PgwF/ySUSsIZo7xoe8iZ5e8TJvb/2YO8YdXmxASZJweUKZbO0uH15/EFGUUCoUGHQqTBFaok1atCoNg+PyKO+oRpuQiXHIFGwbvsEy/qJeqQi6KYwNTTmo6Kg5qCABTE4bw6b67VR01PTyDuwmLkpPfLSB8kofI3OHsrJ6PdcNn9ujoonSmxmZNIQ1NZt6TGn4IcMSClEr1exo2tNLkFo6Q91S/XXZQWgMMFIbQbvbetDPdrogqNRI+wXo3Z/wGNJJ0ELqJkpv5qqiOby28T021m9nTErRQY+RBekYoVKLEHQfsGla0VGDhNTDoaDN3UGbq4M5XbmPRIUbhcGBWew7MOXyqrUEJZHZuX0nLivKi+WrvQtQtlfyq0k/DQ/aBx0dIChQGkIiZvM6+MfaN9ncsJOC2Bzun3r7AWfAt7ra2VS/nS2NxT1SR0RqjeREpTM6eRjplmQyzKkkRsYf8pwes1HLbZcU8bf/buSdr4u54fzBh3TcycKIpCHkx2ZT1lbZbxlJkmjpdFNWa6WiPhTNo67FQVO786BdLAqFQFJMBL6MWrRaJTVNduInX4pj5wqsG74ienrfrs/mrla6w+c8pM8xKD4kYHvbKvsVJIDhubGs2t7ALVOHsaF+Gw32JpJ/8J8fnjiY9XVbaXG2EW/sOwW5RqkmOTKBentTr32tXYIUd5C0EwpBgSiePBXw8UZQqqAfQdJ1tZBc3r73DxSzsqcwb9fXLChbLgvSiUTQecADUbr+n0ZrbQ0APVIEV3bUAJAdFYrRVdpeCYC7ve/B5bU1m8mJyiA5snfXG8CooWa+cZSRpstjYtq+uQBBtw2F3oigUNJgb+aPS5+n023l5lFXMDt3Wp/dNE6fi+VV61hetY7StgoA4iJimJA2isFxoSgOcRExRz2B9oxRqewoa+WjxaUkxhg4e0LmUZ3vRLKudgu7Wko5P29fpAKXx09JdQfFlR3sqWqntKYTmzPU9y8IkBgTSqE+LDeWWLMeS6QWU4QGrUaJQhAIBEVcngCdDi8tHS52teyhXNFER3ked6xcTFJMBLeaC2H911gmzUWh7j2XqzvVx6Hme4rRR6FSqGhxtR+w3Ij8OBasq0bpDbXMam2NvQSp+6GsydnaryBBKPWHJ+Dttb3NGhKkGPOBU6o4fC6Mx6Gb9GRFUKqRgn13ySkUAnqt6oTPQzoYSoWS6ZkT+Gz3dzh8zoPGXTyugvT555/z0ksv4ff7ufHGG7nmmmt67N+5cyePPvoofr+fpKQknn76aUymY5sa+0Sh1YWe1BTB/m+iFmcbaqWaKN2+bpb6rrk73Tdxd+rumkp6xaVy+Jzsba/i0iHn9XuNnfb1CAqRll0ZeM4JhCfMiR4nSl0EzY5WHvv+7+G4eH25f3e4rXy2+zsWla/EG/CSYU7hqmFzGJ864rCdGkRRxOl34faHEuV1p1hQCAq0Kg0GtZ4IjYGfXVJEa6ebFz/cSpvVwxVnFYTj752MBMUgnxR/y4c7vyA9Mo143whe/HALuyvbqW6yI0kh8UlLiGTc4ERy0yzkpJrJTDKFn2YPhZLWcr5ftpwUfSL/d8PNbC/tYPW2Bv5Xmc5dkcW8+/JbjDj/UoZk9/T229NaDoSiORwKgiCgVqh6pMDoi+F5cQgCVNSEhMTudfQqo1WGxkd9/TzNd+PyuYjpI4J3m7X3pNgf0uhoJiAGSD5AqKTTDUGpQgr0/50adCrcnpNLkACGxhfwafG3lLdXHzSJ6HETpKamJp555hk+/vhjNBoNV155JePHjyc3d184nSeeeIK7776b6dOn8+STT/L666/zq1/96niZdFyJMIY83Jy9788wnR4bFp2pR4Xe5upAr9KF58802luIVJto9gjsKm9jeP6+uUpl7VVISAzqJySRP+hncfkqBkcPYeN6Fe8t2MONFwwBQPJ58Ku1PLviJXxBP7+f8X+9JhUGxSCf71nIvJ1fERADTE4fy3n5M8g+QBcOhFKv11obqOysoc7WSIO9mRZnG+3uTmxeR3i+Un8IgoBFayI2M5r0SBUfbqtgZfk2fn7ONIZlnTwxyyRJorXTw5q9u5lf+SlWsRmhI4Xd6/PZLe4kQqeiICOayUXJFGREU5ARddgpN7oJBAPM37OAD3d8QWxEDL+ddidxERbS4iycNymL1s4RNPx7C+mdG3nwHwmMyIvjpguHkJ1iRpREFpavIMOccsDJsfvjC/hwBzwHfYI1G7XkplrYXtYEsfTp+ej0h1pnhgO0zgLBAA2OZob24ZHabvNg1KvRqPvv9t3RXAJwwjKZngyExpD6FxyDToXrJBSkzK6Holpbw8AJ0qpVq5gwYQIWiwWAs88+m2+++YY777wzXEYURZzOUB+32+3GbO49QGuz2bDZbD22NTY2Hi+zjxhTZOjmaW7t2y0TwOl3Y1T37Be3ee09vPI6PJ3EG6OxapQs31rXQ5CqO+sByLKk9Xn+7U27cficXDR+OtEdfj5ZspdxQxIZnBWDFPTzpT5IjbWe3067q5cYtbk6+Puqf1HaVsHYlOFcN3xuv267kiRR0VHD5oYd7GjeQ2lbRfhpWKVQkWCMJT4ilpzoDMw6E5HaCAxqPRqlGqVCiYBAUAriDfhw+d3YvA463Faana0EDE2o0620sIfH1yxBvzKGooQhXDJiKjlxJ25Wvs3po67ZQU2znaoGG5UNNsob2/FG70SZUAUBDVG2iQyLHUb+iGgKM6NIi4/sMSH5SAiIQVZVb+DDnV/S5GhhQtoobht9NUZtT6GItRhQTz2fiO/+zS9mRvGfNZ386pklXDAlm/i8Jmqs9fzyMCaNVnbWApDWT/De/RlVEM9H68rQxNKnw0StNdQ1nWTsv/VS0vWf6SunTofdS9QBWkcAK6s3kGSM77fr+lTkYHXdgbrsAAw6NU73yTWGBIQftp195HL6IcdNkJqbm4mL21eZxsfHs23bth5lHnzwQW666Sb+9Kc/odfr+eCDD3qd56233uLFF188XmYeMzTqrrk/jf03kfxBX695RW6/B4Nq383n8LqIMliYOCyJFVvq+OlFQ8Pdbk3OFiI0hl6VUzebG3aiVWkZllBA4RzYUdbGU/9Zz99/OZ0G0csKlZezc6czIqmn40CdrZE/LHkOl9/NLyfe0sMLcH9aXe0sKlvJ8qq1NDvbEBDItKQyK3sKeTFZZEWlkWiMO+r5Qjavg12NZXyzbRO720tY176UdYuXovHFkBcxgklpY8hIMBMXpcccoT0sEZAkCY8viM3pw+rw0m7z0NbppqXTTXOHm4Y2J42tThz73dhajZL4NAeqwRsJ4mBM/DhuGXspMcZj171s9dj4vmI135Yupc3dQYYlld9Ou5MRSUP6PcY4ZBptC99irK6SKb+5kv98XcyXW9ej8WygIKqgxxjiwVhXtxWFoGBw3MHzZA3Pj2Pezk5g39Pv/uxo3kN8REyvVOf7s6ZmE2qFiqKE3k/MnXYvUQeIWF3ZUUtxSynXFF1yTAIAnywcrK4TlKp+vewg1MVZ23zsk/QdLYIgdD2Eigcte9wEqa/M6Pv/eTweDw899BBvvfUWRUVFvPHGGzzwwAO8+uqrPY654YYbuOSSnum/Gxsbe41HDTTdn620phNJkvq8UURJ6uVOHRCDPSYGeoJetEoNsydksmRjLd9vquXciZkAdLptROv6v8n3tJaRH5OFWqlGrYSHbhrHfS8s4/evrSElyYUaemWvbXd18sclzxOURP4w694+xxxanG18sOMLVlStQ5QkihILuXTweYxKHnpEURoOhklrZELGcCZkDEcUJdaVVvJN8SpKA9vY6V/E9uJVBBbmEGxNQaFQYIrQEKFTo9cqUauUKBQCCkFAlCQCARFfIIjHF8TtCeBw+wkEe98YKqVAXJSBhGgDU0ekkBxnJDkuguRYAwtrv+Wr0qWkmBK5feztRxz/74cExSBbG4v5vmIVG+q2EpREhsTnc8voKxmVPPSg87+UhkgM2SNw7lpJ9MzrGDsBVgQ3I3oi2f59Gt+aqjin679zIDwBL9+Xr2RU8rBDmktVkB6FKqqFSCG2V3mX3822pt3Myprc7/Fuv4elVWsYnzqyT6eLdpuHQZn9Z4edt+sr9CodZ+ZMOaitpxKHVNf1Ua92E2fRs6Wkud/6Z6Bw+z1ISBgPMazXcSEhIYENGzaE3zc3NxMfv68JX1JSglarpago5Ap4xRVX8Nxzz/U6j8lkOiUcHZRdQmNzeahuspOR2NtmhSDg/0FlKAj0+JOJkoRSUDA4K5rcVDOfLNnL7PEZKBUCTr+r31htoihSa2vkvPwZ4W0ZSSYevH4cf3hrOc0pXiYGND0ChIqSyPNr/o3T7+Lxmb/uJUaiJPL57oV8sPMLBODs3OmcVzDrmIfLORAKhcCEgiwmFGQhSVezoW477237ghrtDmIKWhmingmeSJxuPx5fEH8gSFCUQuIvCBh0KixqLVqNEoNOTYROhdGgwRyhwWzUEmXSEmPWYzH2bml5Az7+uvJltjYWc07eGVw7fG6/2X8Ph0Z7M4srVrG0Yg0dHiuRWiPn5s1gZs5kUk0H7zLbn4jCCVjLNvLWqjf4qnY9OVEZ3DX2Nl7x7uEfH22lucPFdecOOmAF9Vnxd9h9Ti7umnpwMFrcLQjGTlT2Eb32rahahz/oZ2rmuH6P/3bvUtx+D+cXzOq1T5Ik2m2efrvsdrfsZW3tZi4bcv5hpZA/FTikuu4Av2OsRY/bG8Tp9mM0HPuMykdKd1ddhHoABWnSpEm88MILtLe3o9fr+e677/jDH/4Q3p+RkUFjYyPl5eVkZ2ezaNEihg3re+7NqYBW2dXFoAyyeU9zn4KkVmqwe3vOC1EqVAT2m0uhFBSIkoggCFw2K58/v7WepZtqmDkmHX8w0G/q7nZ3JwExQOIP+u1HFcZz4XkRfFMPGc0ebE5fOMfM9+Wr2NVSyu1jryMzque4lMfv4ZnVr7O5YQfjUkZw46jLiDX0/9R6IhAEgbGpRYxJGcbyqnW8vWUeqzwfcMOInzA7d9oxfSoUJZFnV7/Gtsbd/HzstczM7v+J/1DPt6l+B1+Xfs/2pt0IgsDIpKHcnDWR0UnDjij5oSRJlJgjeT0tmtba9ZyZM5UbR16GRqnm0ZvH89LH2/hwUSkA15/X9/yu/2fvvMOjKtM+fJ9pmZmUmfTeeyAJCaH3KoioKApi1/WzrH11i72srrruuq517b03RJDeWyChJJCekN57Mpk+5/tjkkBIIaEISO7r8pLMnHPmzWTmPO/7vM/z+xU1lvBTzhomB40Z9MpvRe56JEhpLPbAZhO7A7nNZmNl7kbCXO0WHn3RYdLzc846knxH9Nnv1NZhxmyx9VnybbFZeS/9K9zVriw8Rin/wkHE3oLfN56djcR1zfpzKiDVdlrQePRRUXk8Z3SF9MADD3DDDTdgNptZvHgxCQkJ3Hbbbdx7773Ex8fzj3/8g/vvvx9RFHF3d+f5558/U8M543QJPHp7yNlzuIbLp/WuhFPLVZSZK3s8ppQ5oLcc9ThRSOUYrfbCiPEjfQkP0PD56px+FbG7aO4UP+1L3cHkUIdSlOCr0/PXN7bx1B8m4KZ14LusVUS5h/WSKDJaTDy/9XXyGo7wh9FLmRN+em/2p4ogCEwNGcconzje2PMJ7+/7ipKWCv6QvPS0ia2uyd9CemUmtyQvOaVgZLPZ2FGaxg9Zv1LRVo27ypUlIxcyI3RityjuUBFFkey6fL49vJLDtXl4yBTcYVQzM2VZ9zFSqYQ/LrZL6ny7IR8vV3Wv9F2zoZWXd7yD1sGFm5OvZjBUt9ex5cguopwS2d8ho6HF0H0j3FmWRlV7LQ9OvK3fz8uP2atpN+lY0o+yeF2TfTbtoe2t0vBT9hpKWyp4ePId3VYmFxKiKA4Uj7rfs/pmPaF+/af2f2sqWu3Nz/7OJ66YPaN9SAsXLmThwp4fvHfffbf739OmTWPatKF7ZpyLOHcWGkSEqNm5tZ7mNmMvK2EXhSPtx62QnOTqHtUnKrmKDrM9QEkkAjcvGMFj/9vJ8q2FyCTSfvtEuoKaStZ7ZnmkuYwQuSMeylYamw089N+tLF6koaGjiZuTru5183h/31fk1Bdy/4Q/nLTR1m+Bi9KZv0y5ky8zlrM8Zy06Uwf3jL/5pJxfj8VsNfND1q/Ee0dz0Ul4unSRW1/Ie+lfUdJcTrDGn3vH38L4wOSTHp/VZmVPxQF+yd1AfsMRNEoXbk66mtHl5bSnrsBm1CNxOHojFwSBO69MpK5Jzzs/ZRIT4kaI71Gljr9v/i9txnaenvngoHX4PjnwPTKpnFlBM9nPYbtNhKsKi9XC15krCNb4MzZgVJ/n1uoaWJW3sdOmpO9WgupG+3fB261neqe4qZzvD69kYlDKhaNd1wfCQCsk7dEV0rlERVs1SplDn5Pl4zm92v0XMF2b+wF+CmwibD9Y0ecxeosB4zGKvS5KZ9pNHd1pO2cHxx7NholRnowb4cM36/OQIEdv7tsx0mqz7031dbOraa/HR+GCYOrgxbsmIJdL+WT7JhQSh146YhnV2Ww+sotFsfPO6WDUhUSQcG3iIq5LvIJdZem8uecTbIOo5hmI3PoiWoxtzIuccVIrQ6vNyucHf+SJDf+i3aTj3vG38OJFjzA5eMxJBaNmQys/ZP3K3b88zis736PV0MatyUt5Y8GzzI+agVPwSLBZMVbm9zpXKhF4cFkyjko5r39zAFEUqdU18OTGf1HVXsvDk+84YZ9ZF3srDpJWcZAr4+bj72pXYOhSoFhTsIUaXT3LEi/vtxjj84M/IgjCgIrilXX2z76fx9FKUrPVzOupH+Hk4MStyX3bWlwQdHVb94PWWYlUInRLL50rVLbW4O/sM6jv0nBAOk04KdTIJDJEuYFQPxc2ppX1OsZNpQWg4Rh5FjeVBhGRZoNdJFKrdKHJ0FMw8rbL47GJUFFlpK0fbbKuogrLcdpeFquFDrO+uwTX38nKv++bioO2BX2DC9+sy+9REfnd4ZV4qt1YPIAaxLnIpTFzWBp/KdtL9vB15opTulZFq733o799kIEwWky8sO1NluesZVbYJP497wkmB485KSv0goZi/rv7Q+5c8QhfZf6Mn4s3f558B69e/DQXRU7rNoF08LfblRsq8vq8jsbJgesvjiW3tInv9uzkb+teoFnfwqNT7z5ho2IXbcZ23k37gmCNP5dEz0bRqSBiNttoM7bz3eGVJHjH9rBVOZacugJ2laVzWczcAcVeS6vb8NAoUSuPFo98e3glpS0V3DnmujOiqH7+MHCDuVQi4K5VnZMrJD+XwfWLDWvZnSYkggR3lZb6jkZmjI7ngxWHKatpI9D7aNNrl3dMXUdjt/5X15ezXteEh9oNd5UrOlMHBouxO0/u7aZm6ZwovsjIQilvxSbaet3guo49Xhusq2FVpbKv4Cyt9Tj5u2GVtxOkDePLtblU1LZz39IkmoyN5NQXcm3CIuSnoZrst2ZR7DzqdI38mL2aUNdAxgcmn9R1zDb7ezbUijqrzcrLO/5HRk02t6dcy6yTKEsWRZF9VYf4KXsNufWFqGRK5oRPYV7EtF6acV1IlY7IXH0w1RT3e93JSd58lJ7Pt0dWE6Dx5aFJ/9fv9foa09t7P6PNpOORqXfbU8ed3ltSqcDXmSvQW4zceIxh3/Hnf37wR1yVmhMWIxSUNxPmr+3+uaixlJ9z1jE9dEKv1fwFh8iAKySwp+3qms6dgGQwG2joaBq0H9LwCuk04uXkTl17PdOTA5BIBDbsLe3xvHen0GRNe93Rcxztj3VVonQHLV1Dj3MXTY/ATaXFKlqpbm7ieJw7JV9ajcc1xnV+fgWVfWZpbqxCZ+rAJtqYkxzJjQvi2Hqggife2UV6eTbAoFR5z0UEQeDW5CVEuoXw1t5PqT3uPRwsLg72SUSXS+5g+T5rFQers7ht9LKTCkYZ1dn8dd0/eHHbmzR2NHFT0lW8fek/uCV5yQmDh8IrGFNtSZ/PHag6zF/WPY/FvRBbfRBPT3940MEIYFXeRvZWHOTahMu7qzHbOlN1HTSyrmgbcyOm9hANPpb9VYfJbShi8YgFAxYjtOpMlNe2Ex1sn6TZRBvvpn2Bi4NTL9+lC5OBq+wAPDSqcypl16XVOVhFjeGAdBrxcvSkWlePq4uSsXHebNhbhtlydD/DVaVBLpVT1XZsQHJHQKC63f6H69Ieqz4maAHIpBIun2APFB+vT+N4XDvTgY365h6PKzvL0Y0yGUhkmBoqutN6CqmcxTMjefi60eSWNPLN9n3IJLLzWo5FJpVx38Q/gAhv7fmkzwbtExHQebMubu69D9gf1W21/Ji9hinBY4fcsNmob+al7W/z9y3/RWfq4K6xN/Dqgme4OGrmoNW6Fe7+mJtrekjLlDSX84+tr/P81teRCAKXB1yL8UgcdY39y1sdz+HaPD49+ANj/BNZEHW0b6iqQQeIbK5Zg6NczdUjLun3Gj9mr8ZT7caMsIkDvlZGgf0zPzLcPinbXrKXwqYSrk+88oQaexcCNrMRQT5wObenq4qGFj0229A/92eCyjZ7+nvYMfYs4OvsSZuxHZ2pg4vGh7D7UDWph6u6S7YlggQfR48eKyS5VI6noxuVnaWRvp36cZWtNXBcpXdKWBif5kBqQQFZR1KICz3aoOogU6BxcKa2vb7HORKJBBcHJ5qN7cjd/TDXleEks6eiDJ3FFVOTAnBSKXh+wyFkZhk6gwWnkxQFPRfwcnTnusQreDf9C7YWpzItdPyQzg/U+KOQKjhQmUO4UywmsxWzxYbFasMmit0TValEQCaVIJdJ+LFgDQIC1yYsOuH1jyWt4iBvpH6M2WZhWcLlLIiaeVLpUpnWG2xWLG2N1EisfHd4FbtK01HLlVyfeCXzIqeRX9rKl2ynua235UNfVLXV8q8d7+Dr7MUfx93YY1O6oLwZtVcjeY0F3JK8pF85q6LGEnLrC7kp6aoTFnSkZ9fiqJITHeSK1Wblm0MrCHUNZFJw31JWFxqi2YjkBBMUd40Si1WkrcOExunsl8ZXtNYgCMKgRX6HA9JpxLdzZVHVVktSdDAeWhXrUkt79BD5OHt1B58u/F18Ke/cSHdUqNEqXbq9k47Fy9EduUSG3NXAG98d5NUHpyOTHl3k+rn4dG/IH4u3owfV7bU4eIegLz6Et1yNXCrvsZpKjvEipdyX9JoqXvx4L0/dNh6p9PxdQM8Kn8SmIzv5IvMnJgQmIyCjoUVPXZOe+hY9jS0GmtqMNLcZadHZHVvbOszo9Gb0BjOySBc2tu/j128HsYku2FAm7cHa7MUtT25F46TAzUWJl5saf08ngnxciArU4uvh2H1TF0WRn7LX8GXmcsJdg7l3wi3dk5GTQabxoFoh5ce0z9jTUIBCpuCy2LlcGjOne3Vh7Zw1D8bWo8XQyvNbX0cQBP4y5a7uPruusR/Ir0ERmoe7szezw6f0e50NRTtQSOVMD5nQ7zEAVquNPVnVjI7xQiqVkFq+n1pdAw+NWnxSBSG/R2wmA4Ji4IDUZdnR0GI4RwJSNd6OHoOeZA0HpNNIV6qrsq2GCPcQZo0J5Jv1edQ2deDV6X7p6+zNvqpDWG3WbhHSABcfMmtysNlsSCQSAjV+3YrJxyKVSAlw8cWmspGzqY2ftxZxxYyjDbhBGj+2FO/uVfQQqPFjb8VBFH7TaD+0FWtbI35OXpS39GzSjQvwZ1/DHg4UVfD5mpx+u/vPZdo6TBRX2tW5HZvjKbCt4Na3PqClxLeXDJiDQoqrswMaRwdcnZUEejvjpJSjUsootxk4oNvMLVeE4q52Qy6TIJVKkAgCgmCvwO3SyitoKeCXSgszw8fhEhpAc5uRhhYDJVVtpB6q7g4EWmcHkqI8mZYcQIF5D99nrWJy0BjuGHv9KUkSFTWW8m3RRtKD3HFoOsJlsXO5JGpWDxV5gJoGe4WmxwDW4GD33Xpuy2s06Zt5Yvr9vWa3RRUt1AsFKCQtXJOwpN+Vj81mY3f5flL8E1ErBn7N9NxaWnUmpo6yT962FKfiqtSQ4nd+7meeCUSzAcmJAlKnwkVjq4Ew/7PfHFvcXEaQZvAq/cMB6TTi7ejRYz9o9pggvl6Xx4a9ZVwzNxoAf2dvrDYrtbqG7hmxv4svFpuFWl09Ps5eBGn8WVe4tTtAHUuwawD7KjMZHTuZr9blMjMlsLsBN8w1iDUFW6hsq+mhiRbuFsLGIztpcbcHTENFLuFuwaSW7+/xGl2utaNHy/l+Yz4T4n2JDOy/RPdsI4oiFXXtZBTUc7iogdySJmoajzYZq1Uy5DGuSD3LWRI5HW83NZ5aFe5aJW4uPUuLj6e0WcOBNZtx8+tgaujAN8UjB1ORVkn4w+xpvTbtzRYb5bVt5JU2kVnQwN6sGraW7EYRdohIx3juGnsjMunJNcqWNJfzdeYK0iozUMuUzGrUsXDsUvwS+i7ZP1TUgJNKjo97//sx7UYdz219jfLWav48+Y4+5YR+3VWE3L+QEE0gY/1H9XutI81ltBnbSRlEddyvO4vROjswOtYbk9XMweosZoVOOm3KG78HbCZDn+7Ax3LsCuls027SUd1e10sJZiCGA9JpRCaV4a52pbpzH8fH3ZGECA82pZWxdE4UgiB0b+5VtFZ3B6SuTfTy1qrOgOSHyWqmRlffK40T5hrE5iO7uHJuAPtfq+PrdbncfoX9hhndefPIrSvsEZDivOyWArliB5FyJYaSw8THjWLjkZ3kNRzp9qSJ9ghHJVPi6tuES7Yv7y0/xAt/nHxOyQaJokhuSRNb9peTeri6u8TVzUVJbIgb8yaEEOanIdjXGTcXJSty5Xx28EcWTPdDOwRl8gAXXxykCo40lzGNgfegcuoKCHML7rOCTC6TEOqnIdRPw0XjQ8irO8KTm1YhN/iQsceXx0t38bcbx3brCw6GRn0zX2YsZ2txKiq5kiUjFzI3IIW61+/EsZ/NbKPZyu5D1YyJ8+43Zdesb+G5ra9T0VrNgxNv69P6oqnVwOaiVCQhepYkLBzws5FbXwhAnGfUgL9PWU0badk1XDM3GplUQnZdIWarmQSf3uZ9FzKiyYCgHvgz3GXb0dI+uH3CM0lho73qsy/Nwv4YDkinGW8njx6FBTNGB/Dq1wfIL2smKsi1R0DqKq8O6DRFK2upIsU/sds8r6S5vFdA6vrjdkjqmTM2iNW7S7hyZiQeWhW+zt5olC4crs3rUXbs5+yNl6M76VWHiQ+KRX8kg6RZ1yGXyNhRurc7IMmlciYGpbCtJJVFs8bw0U+FZBTUkxg5uA3JM4nRbGVdagkrthVRWa9DIZOQFO3FVbOiSIz0wNfdsc+bY1cpcnVb3ZACkkQiwcvRvVf5/fGYLCYKmkpYEDXzhNc0WUy8ufdjtEoXXrz0QfbENPHm9wd55M3tvHzf1BNam9tsNn7N38RXh1ZgtVlZGDOby2MvwknhiM2gow4QbX2rVKxPLUGnN3PRuL5vDmUtlbyw9Q1aje38ZcqdJPr0na79Yk0OeBfh6+hDcj9NsF2UtlTi7OB0Qs2+7zbmo5BLuXhiKAAlndWNg1WQuFCwmY1ITpD6lMukyKQCeuPZd449UJWFXCIbkqvv8Hr4NOPp6N7dUwR2gVSpRGBnhn2/pqtooaLtaPGBWq7CXe1KWWchQ6CLL4IgUNrSu+w4RBuATCIjr76Iq2dFYRNFlm+1z0QFQSDeO4aMmuwe8jmCIDAuIMn+eHAc5sZK5O0tjAtIYmtxKh2mo30LC6NnYbFZaVBmoHFS8Mv2otP7Bg0RURTZsLeU255bx/9+zMTZUcF9S5L49Ol5PHbLOOZPCMHPw6nfmbrebJ8pnswejVLmgMk6cIl0Vl0BVpuVEV4DrwLArjhQ2VbDnWOvx0XpzOyxQTx681hKqtv4bkNv2Z9jqW6v44mN/+LjA98R5xnBK/Of4LrEK7oLFrrK2/t6H3R6M1+tzyMu1I0RYb2tQ3aX7ePR9S9htll4auaD/Qaj/LIm1mWnIVG1c8WIi064cq7T1Z+wuqq4qpVN6WVcPDGkO/Vcq2tAIZXjOoD314WIaDIgnCBlB6BykJ31gGSz2dhTvp8RXlFDEsIdDkinGS9HD5oNrd0KCU5qBfERHuw+dLRIwc/Zu1elXaCLb3eRgUKmwM/Ju88+GLlUTphrEHn1RXi5qZmU4Me6PaUYTPYPYLLvCFqN7RQ0FPc4b2rIOKw2K+lK+02kIz+NS6Jno7cYWJW/8ejYXHy4KGIaG4q2kzhKQlp2DW0dg+9bOZ3o9Gae/SCV/3y1Hy83Nf+4axIv3zuV2WODBtz/6UIURTYf2YmTwpGgfpo2B6LVpDuhh0taxUEUUvkJ01LFTeWsyF3PjNCJPeR6Rsd4M26ED+v2lPbbM7W7bB9/WfM85a1V3DPuZv465Y94H3ejFztL+IU+7Ek+/OUwre1G/nDZyJ4mmWYD7+z9nH/vfJdAjR8vzPlbv+kVo9nKK1/uR+lfjouDM5P6cRU+lmZ964BBRRRF3v0pEyeVnKtnH33/Wg1taBycz6lU8bmAbRBFDXBuBKQD1VnUdTSesPfseIYD0mnGs9MzqKHjqJrCmDhvKup0VHdWOfk6e1PVWfjQRYDGj4q2GmydKZdgrT8lTb318ABiPMMpbCrFZDExf0IIOr2Z3Zn2gJfkOxKpRMru8v09zgnWBhDlHsa6in1IPAPR5ewmzC2Isf6jWJ6zjvpj9PWuib8UX2cvsmzrsMraST3Uu5T8TNNhMPPImzvYl1PLbZeP5KW7pzAy3GPQ59tEG58e+J4D1VlcGTd/yH5DTfoWatrr+nTQ7cJoMbGjLI0Uv4R+farAPlt8J+1znBRqrk+8otfzsSFuNLYaet1EbKKNrzNX8O+d7xLg4sPLFz3GlJCxfbsRG+yipBJlz4KF7QcrWLO7hEXTI3oUqByszuJPa/7OhqIdXBozl6dnPNhvak0URf73QwblTTXYnGqZHT55UO9n+wCGkgCb0svIKKjn+vmxOB/j39NhMaD+nZnvnSqiKCKajINaIUkkdrfks4VNtPH94ZW4qbSMGaDopS+GA9Jpxr3zS32sgOqozj2YjAJ7Ks/byYM2Y3sP5e4AF1/MVnN3ui/ENZC6jkba+xBTjfWMxGKzkN9YzIgwdzw0SrYfPJoSTPSOZVdpei/V60tj5lCjqycrOAxDWTaW1nq7JIso8vaez7qPV8qV/HnKnUgkoIpLY2t2zml6dwbPf77aT0l1K4/dMo5Lp4T3cnMdiIKGYh5f/09+ydvAvIjpzD/GRXewrM7fDMC4fqwUADYUbbc3QUcObFHxS94GChqLuSnp6j4bSHUGMxIBFPKj1XYWq4XXd3/E91mrmBE6kadn/mlAgzNLu30CJHXUdj9WVNHCq1/tJzrYlWvn2Vdlte31/GvHOzy35TVkgpSnZj7AdYmLBgwwK3ccYd2eUkaMbUciCMwZoO/oWI7VYzye+mY97/x0iNgQNy4aH9LjOZPFhIP03DGYOyewmADxhHtIYO83G0yv2Zlie8le8huLWRp/6ZDV7YcD0mmmS9G7SX9UBy3Q2xknlZzcEvtNw7PzxnLshrl/pxpuRZs9lRfcWdhQ2kfaLsYjHEEQOFybZ7f4HunL/rw6TGa7JNDUkHE06Js4VJPb47wU/wTCXINYYajEJED7oW14OXlwY9JiMmqy+ebQL93H+jl78/j0+5DJIUe+gu3FveWKzhQ5xY3syqxi2UUxpMQOTsbIYDawtTiVJzf+i0fWv0htRyN3j7uJm5OvHnJj5ZGmMn7JXc/EoJR+Nd9aDW18f3gVI7yiiPHobcbYhb00+2dS/BP7THOJokhadg2Rga7dTc4Gi5EXt7/J9tK9LI2/lDvGXHfCFYml2b7ilmk6pacadDz93m6c1Ar+duMYDNYOPjnwPff/+jT7qw5x9ciF/HPeY8R6Rg543V2Zlbz7UyYpI9ypFrMZGzBqQLXuY3+v/gKS1Sby7y/2YbXauP+apF6TDZPVhPwkHHR/z3SnZAchJWW1ikjOUrqzur2O9/d9RaRbCFNDxg35/OGAdJrp8kVqMR4NSIIgEBGgpaiiGQBPtX1juf6YtN6xKg9Ad6qopI+A5KhQE6YN4lCNfeWSFO2FyWztDnhj/BNxVjiytmBrj/MkgoQbkxbTYGhhY3AQbQc3Ioois8ImMzN0Ij9k/cqqvKP7SSGugdwQeRs2vSP/TX2ff25/u5fG3plgZ2YVMqmEhVMGttSu1TWwvnAbL2x7k1uX/5nXUz+iUd/CdYlX8OrFTzE1ZNyQ9yFKmyt4fuvrODs4cXPSVX0eYxNtvLX3UzosBm5JXtLva3SY9byy8z0cFWpuT1nW53F7s2s4UtnKnHH2HjCdqYPnNv+XjJoc7hhzPVfEzR/U72CqK0OQOyDTeFDb1MFjb+/EZLby15uS2Fy+ibtXPs7KvA1MDhrDqxc/zeIRF5+w0GNfTi0vfZpOVJArCWN16Mx6LjlGz24gjFYToij2qcX3+epsMgvruX1RAn4evZUwjFYTiuEVUg9spk7TTsWJU3YWq62HgstvRYdZz793vIMEgfsm/uGkFDYGNQ1ZtGgRy5Yt45JLLkGlOvGS8UJGLVchFSS0HmOyBxDk68zqXSXYbGJ3rv7YfSZnhSOOclV3U62rUoOTwpHS49QUuoj3iWFFzjo6zHriQu0rrqziBuIj7DIds8InszxnLdXtdT0qnWI9I5kTPoX1hdsI1zfhUXoYVfBI/pCyjHZzBx/t/5YmfQvXxF+GRCJh2ohI3vlmAuGjG8moPsD9q55iQmAyF0VMI7pzpXa6aWjR46FVonI4+vG02KyUt1SR33CE7PoCcuoKuve9PB3dmR02mXEBo4jxjDipL4JNtLH5yC4+3P8tarmSx6bf2z25OBZRFPki4yfSKzO5KemqfhWurTYr/9n5nr06bvr9fV6rpd3IW99nEOjtxMyUIJoNrTy35TXKW6t4YMIfhmSfYawqQOEVQlVDB4+/vZN2g4F5lwi8lP6SvTnVP5GlIxd2txSciLTsGp7/aA9B3s48eEM8j216jnjvmD4bZfui1WBXndc49FSL2La/gm835HPR+GBmjw3q81y92dA9QRvGTvcK6QRFDVabSHuHaUh9bacDg8XIP7a+QVlLJX+ecidejr2rOQfDoALS448/ztdff82rr77K3Llzueaaa4iMHHipf6EiCAKOCjXtx9iSA/h7OmEyW2lqM+DqrEFA6KElJwgC3k6e3cKrgiAQpPHrs/QbYJRPHD9lr+FQTS5jA0bh7+lIfunR682PnMHK3A38mLWaO8de3+Pc60ddSXZdPl/YRHz2LicxeCQyiZQHJvyBD/Z9zfKcteQ1HOGusdfj7eTJ6Bhfsg858O+HL2N1wSbWF25nR2kaHmo3UvwTSPCOIco9rJdUzckgiiJqZyuN1gp+ydlIZVsVR5rLKG2uwNxp365RuhDrEcGlMXMY4RVFQGeZ/Mlgs9k4UH2Ybw+tpLCphJFe0fxx3I19pqVsoo3PD/7Iitz1zI2YyvzIvvembDYbb+z5hAPVWfxfyrXdjcnHYjRb+cfHe2lpN/LITZNpMjTy9y3/pVHfzF8m38Uo38HLNtlMeoyVhYixc3jy9a1YXEpwHlHMryWtxHvHsDT+UiLdQwd9va37y3nly30E+7rw9G0T+Pzw1+jM+iFZQHTthR6775V9pJFXvtpHbIgbty/qX72h1diOywVtxNcbm9mIACcUV23vMGET+U117FqN7by8/W3yGoq4f8Kt/Zo0DoZBBaTk5GSSk5NpbW1lxYoV3HnnnXh5eXH99dczf/78k37x3ytquYoOc09Pki4tu7omPe4aFS5K517OsD5OnhQ2HfVQCtL6s/nIrj4N+aI8wlHLVaRXZjI2YBRh/lpyS44WUriqNMwJn8KvBZtZEDWzx8xYKXPgr1P+yCOrnuG/xhIeL8skJDAeqUTKH0ZfQ5R7GB/s+5oHf7VbIExMTmRXZhXZ+R1cn3QlV41YwO7y/aSW72dj0Y7uAgB3lSsBGh+8HT1xU2vRODjjqFCjkCq6Nzdtog2T1YzBYkRn6qDN1E6zvpUGfTP1ugZqdQ0YrSakUfDJwVQcFWpCtYFcFDmdMNcgItyC8XbyPKWVmdVmpaiplL0VB9lRspe6jkY81G7cNfYGpoaM63OF1W7S8WbqJ6RVZnBRxDRuTr66zzFYrBZe3/MxO0vTuCb+sj6tKDoMZp7/aA9ZRxp46NrRSBxbeXTDm1hsFp6Yfv+gVyHd1yvcj2iz8O/cZszhuYiKdnycw3gg4dZB9Ud1IXb2tH2w4jBxoe48dss4dpTvZHPxLq6Imz9gxeHxFDeXA/Z2BoDS6lae/WA3nloVj948Frms783uDpOeDrMej0HsU11IiF0B6QQrpC4vpC5NuzNNZWs1/+j077pv/K1MCBx9Stcb9M5ha2sry5cv57vvvsPZ2Zn58+ezfPlyNm3axEsvvXRKg/i9oZIpMZh7akl1SXo0ttofd1W60HycAZyXkwe7y/d3C68Ga/wxWIzU6hp6NRjKJFKSfEeQVpmBzWYj1M+FbQcq0OnNOHZaR1w54mK2lKTywb6veWLG/T1utF5OHjw26Q6e2fwqT+38H/dPtc/KBUFgWuh44r1j+CLjJ37OWYdcuglNrD+fbTMyIf4KlHIl00MnMD10AiarmYKGYgoaiylpLqeitZqCxhJ0x60Q+0NAwNnBETeVFm8nT+K9Y/B28mT99kZKim389dZZRAf3X102EFablWZDK3W6Bqrb6yhvraKosZSCxmIMFiMSQUKCdwzXJi5irP+oPgsHRFEkrTKD99K/pNXQxs1JVzMvcnqfwajdqONfO9/hcG0e1yYs4rLYub2OqarX8fxHeyitaeP+pcnIPWp4cuPHaByceXLG/T0knwaDzSayeevPbPJzo05diI+jF9cnXUeKX8KQgrbFauPtHzJYs7uESQl+PLAsmW2lO/lg/9ck+8UP6HfUF5k1uXg7eaJVaaiq1/H4/3Yhk0p4+v8mDDh7L2u1p6jPZ0+uM4HY2eB9oqKGslr7VkGA55lfYe4sTeN/aZ8jl8h4csYDQ55I9cWgAtKf/vQntm7dyvTp03nqqadISkoC4JprrmHixKE1Pl0IOMgUGI/r8O/qQm/pdNrUKF16OZL6OHlhE23UdzTi7eTZ7c5Z3FTWZ8f7uIAkdpSmcag2lxBf+4yyuKq1uxvf2cGJ6xOv4O29n7EqbyOXRM/ucX6IfxwPu8TyVvNhnt/6GvMiprM04VLUchVuai13j7+Jy+MuYmXuRrbYUml2LubWH/cwPjiBkV7RRLmH4u3kSZxXZK+0lMliotXYjs7cgdFiwipaERAQBAG5RI5S7oCTXI2jQt2ten4s430MPPzaNp58Zxd/vXEMo6K8MFnNtBrbaDPqaDO202psp91k/3ebUUebyf7/FmMbLYZWWoxtPZpNZRIZQRo/poWMJ9YzgnjvGJwHSA0VNpbwRcZPZNbkEKjx4y+T7+xXzqagoZhXdr5Lk6GVu8fd1KvCyK44Uca7yzORSgQeu3UMucZd/LxzHdHuYTw0+fY+95kGorShhhdWvUe9tgWlzYGbR13F3MjJfb6fA9HcZuTFT/dyqLCBxTMjWTw7lI8PfMX6ou0k+Y7ggQl/GJLIaauhjYyabOZHTKemsYNH396B2WLjH3dNGlDYFSC/4QgwLBt0PDazESknLmoor2lDIoCf55kzNDRYjHy8/zs2FG0nyj2M+ybc0u10faoMKiBFRkby6KOP4ubWc6Yqk8n48ssvT8tAfk8opAr0lp4rJKfOxr/2jq6A5NzL86gr6FS11eHt5Emgxg+pIKGoqbTPDe5k35Go5Sq2FO9mWexSAAormnvIw8wInci+ykN8fvBHQrSBjPSO7nGNiMlLuet/97IxOoY1BVvYUZbGZTFzmR02GbVCRYCLL7ePuZbrE6/g8S9/oqQjn93CfjYf2QXYV4MBLj74Onvj6eiOm0qLRumMk8IRtVyFUqawBx1BigDYELHZbJhtFpoMLVS116I3G+kw69GZOtCZO7oDTOD4ZnIranhu1wZk+y1YMff7njvKVTg5OOGicMRD7Uq4axBalQY3lRYvR3e8nDzwdvQ44c3aarOyv+oQq/I2cag2FyeFIzclXcXciGl99lRYrBZ+zF7N91m/4qbS8szMPxHhHtLjmNLqVv73YyYZBfWMCHPnhstD+CL7M/IaipgbPpUbkxYPyZTPYDbw3u7lbC3fikRmY3qTnmuveg6N++Bl/rvIKWnkhY/30tZh5v5rEhHcKnlwzdM061u5LGYuS+MvHXKAW12wGavNyki3JP725nb0Bgt/v2Miwb4nDrgHqrLwd/bpbp8Yxs7Rsu+Bi8ryy5sJ8HbuNyV6quyrPMT76V9S39HE5bEXcfXIhUPuNRqIQQWktLQ07rjjjh6PXX311XzzzTeEhw9eOO9CQS6V0WrsefNUyCTIpAI6vf1xrVJDi8E+g+9KrXQJqVa11TDKNw6FVE6Q1p/CxuI+X0chUzApKIXNxbu5cdRiXJ0dyC9r7nGMIAjcNfYGHtvwT17a/haPTbu3x9Ja7uqDe8Is5h3cyOxlf+GbI9v57OAPfHd4JRODUpgclEKMZyRqhYqnrrqSP726ldYKI/dcF4ZZ0ciRpjLKW6s4XJdHY2nzSVmG937/5LgonHBxcCI6wJO6OiuV1WaUUjVjo4MYGxWEm9oFJwdHnBWOOCkch3zTPBaL1UJOfSF7Kw6ysyydFkMrrioN1yYsYk7ElB7mdMeSVZvHe+lfUd5axeTgsdyavKSHMkFVvY6v1+eyKa0MlVLO7YviUfpW8sLulxEEgfsm3MKkoDGDHqfVZmVj4S4+2f8jRrEDrc6d2xtyCR17+ZCDkSiK/LytiI9+OYyrG1w818Z3le/QUNhEhFsIf5r4fyeVgqnTNbAiZz0JnvG8+lE+ZovI3++YSHiA9oTnthraOFyb22slPwzYTPaU3UB7SFabSG5xI1OSBr/XN1ia9S18uP9bdpWlE+Diy9MzHyTGs//+u5NlwIB07733cuTIEcrKyli4cGH34xaLZdinZADkEnl3RVgXgiCgcpDT0SkPo1W6YLFZaDfputNGWqULarmqh+trpFso20r29OmNBHBRxDTWFW5jbeE24kI9OVTY0CPIAagVKh6bdi9Pbvo3z25+lXvG38zYYxQIXKdcRXvmZjT7NvPYogcpbCxhdf5mdpSmsbFoB2q5ihFeUcR6RnDj1T6893UJr31SyAPXjObW0ZO6r2OxWe2pMkMb7SYdBosRg8WI2WrGeoxqhEwiRS6RI5fKUMocUMmVqOUqHDtTeH3J8GQdaeCjX7LYsLqR9B21zJ+gZs5YLRrl0NoQRFGkQd9EaXMlR5pKya0vJLu+EKPFiFwiI8lvJNNCxpPkO7LfmV9FazVfZi5nT/kBPNVu/GXKXYzu9Pyx2UQyC+tZueMIuw9VIZdKWDglnGnjtXyV/T2ZaTmM8IrirrE3DDrNIYoi+6oO8WH699R21GBr1zBGOYcl7auQanzRTl48pPegpd3Iy99sI7P+MK6jmtBJa1lTIjLCK4o/pFxDsu/IkyoasdisvJ76MaIIWTu8kYrwjz9OIthncKnIjUd2YhVtTAkeO+TX/r1zdA+p/3Lu0upWdAYLsSEnt+faFzbRxsaiHXx+8EdMVjNLRi7kspi5Q5biGiwDXvXPf/4zFRUVPP744zz++OPdj0ul0uGy7wGQS2WYrb3TS04qOR16e0A6KjHU3B2QBEEgwMW3RyovxjOctYVbKW4u6zOvHqT1J9kvnpV5G7gs4jZ2ZFRSWtPW6ybgptby7KyHeGnbW7y843/MjZjKsoTLUctVyJzd0Iy/jObt32JImU94YCx/HHcjt45eSkZ1NvuqDnG4Jpe9FQftFwsHqVXFy7t34ZflybioULyd3bpTdY4KFT5OnihlDsilcuQSGVKJtNdNThRFRFHEIlqx2uz/6c162oztWGwWLDZr9/+lzlZuXOJNXqmCnZkVfJ26nW/2bCfQx4m4EFfCAjQ4OUqx2uxVfCarCb3ZQIdZT5tRR5OhhcaOpu4qvi4CXHyZFjKOBO9YErxjUA6waVzeWsVPWWvYVroHhVTBkpELuSR6NgqpnMLyZnZkVLJlXzm1TXqc1XIWz4xkznh/tlVu5amta5FJZPxh9FJmh08ZdK9UVm0enx9cTn5jEaJBjUPDGO6ZMhG/fW9hMenxuuaxE5q2gb1p8XBtHpty9pNeeQjRUYfCEVxdfLk48GKmBI/F5xQs1G2ijXfTviC7Lh9KRuEsOPPsHyfiN8jNdYPFyMrcDcR7xwy6V+pCQrQYEWQKhAEyAXuz7CoviZGD13wciPKWKt5J+5yc+kJGeEVxW8qyM15sMmBACggIICAggDVr1gwr7w4BB6kCYx8ByVEtp11vvxm6q+xFCI36JkJcjy6xg7T+7CpN617ljPSy7/lk1OT0u9G7dOSl/GXt81RJ9yERHNmyr7xP+3Gt0oWnZj7IlxnLWZW3kdSy/SyKm8essMloJy6iLWMT9avfxf/WfyJIpChlDowNGNW9mmrWt1DUVEpxczllLVUcLi+j2lDMz7m5MIiPhyAICJ0Hiognn95Tg0PnVlgNUNMImxr7PrQr/adROuPt7EWCTxy+zl4EuPgSog04obW2TbRxqCaXVfmb2FeZiUIqZ0HkTCZ4T6G8ysTb3x1mf14tDS0GJAKMivLi+vmxjIv3YW/lPp7Z/iIN+iYmBaVww6jFuKoGZ6mQU1fIt4d+IbM2B8xKTOVxzAqfxHVTHWlb+R8sumZ8lj6Gg3dIn+dbbVbyG4rJqMkiozqHgsZibKIN0SpBYfHiougZzI0dc0pB6NjXejf9SzYd2Ym1MgJvInn2ngm4awa/el2evZYWYxtLRi488cEXIKLJeMKm2D1Z1UQGaof0vveFyWrmx6zV/JSzBpVMyV1jb2BayPjfJAYMGJCuueYavvzyS5KTk3sMputmuW/fvjM+wPMRpcyhV9k3gIujorvKritdU3ucAVyYaxDrC7dR3V6Hr7MXWpWGMNcg9lYc5PLYi/p8vRDXAOZGTGVtwVaiR8xlXWopS+dE9xDr7EIhlXNj0mImB4/hkwPf89H+b/nu8CqmBo8leeIlqFd/RMuelWjHX9rrXK1KQ7IqnuRjLKmPVLbw7vIMMksq0Whg/Cg34iI0CFILRqsJk9XUudKx2m+Ix/j2SAQBiSBBKkiRSqTIuv+TIZPIkEokR/8tSJFKJJ3H2/8vdJ5f26gnt7iZ3JImCsta0RsAmwS1QkmAj5ZAb2f8XZzwclbjrlWidXLAxVGBSt7/x7+ypY5NhbvZXpZKg6EBB4maUEkK1IXw6wEj3+p3A/ZVb2KkJymx3qTEeqNxUrC/6hBPbPqEkuZywlyDuG/CLYPKt4uiyKHaXH7I+tWuU2h1wFwRTbA8njuujMarYitNXy1H6qjB97qnUfr37DFqNrSyv/IQ+6oOkVmTQ4dZb3cpdvTHoTmK5kpn5o4cxa2LE05oBjhYWg1t/Hf3h2TUZGOpDCNUksJTd0/ood59IkqbK/gpZw2Tg8acltLh3yM2i2nAptj6Zj15pU0su+jUXHYP1+bxTtrnVLXVMjV4HDeMuvK0NLwPlgE/la+++ioAv/zyy0CHDXMcaoUaY+eN+Nh9CFdnB0qq7KXeWqULSplDt3ZdFzGd7orZdfndRQ7jA5P5IuMnatrrevngdHFtwuUcrM6iXraDZmMKq3Ye4fJp/d8Ew92CeXrmg+TUFbAqfxNrC7exymbBOcKHiMPLGaUWiA5IIFDjN2AFWKifhufumExGQT3fb8xnzfo6NmwyMDHBl9ljIkiI9EB6hnW1wt1gQuevarXaOFLVSkFZM0UVLZRUt7Izo6pPTyeJAA4KGXKZxC7ZL2vH4liNzaUSwcku62RtdcVal4C+0QejQkGQj5xJie5EBGiJDnYlyMcFqURAFEUya3L4JvUX8hqK8Hb04L4JtzAhcPQJ03M20UZaRQbLs9eQ31iM1KbCVBaDxhjBH6d4EWvJpv3nj2k26HAaORX3OTcjVbsgiiIVbdXsLT9IWsVB8juLX9xUWsYHJjPCM4acTIFfNlfg5uzAk9ckkRx96iuiLtIqMng37QtaDO2YikaS4JbEIzeNRekw+GBnMBt4ddf73dWMw/SNzWwYcIW0fm8pogjTk0+uoEFn6uDTgz+wsWgH3o4ePDbt3h6+Xb8VA35yDhw4MODJ/v7Dud6+cO508WwztvdI0XhoVTS1GjBbbMhlEgJcfHtJA/m7+OCq1HCgOouZYfaCgcnBY/gyczkbi3ZyTcJlfb6mUq7koUm38/iGl9EmHOCLDTImxvvh5Tawr0yMZwQxnhHoTB2kV2ayr3QfGeUH2J+9CrJXIREkeDt54OvkhZejBx6ObriptLiqNGgcnHF2sFe5JUZ6khjpSUlVK6t3F7MpvZyt+yvQOjkwMcGXiQl+jAhzP+Oij1KphIgALRHHVXW1d5iobdLT0KKnuc1IW4eZxo4WaozlNFjKaRDLMdIMgLPgRrBqIrGakQTEeuOhVeGpVaF1duhzH2xfZSbfZ/1KfsMR3FWu3DZ6GTPCJp6wHNZkNbO1eDcrctdT1VaLzOqEvDyMOL2SOYFmfExrsOyooFWQ4Bg9Fu3EK1H4hFLUVEpqwUb2lB+gslMdPtwtmCUjFzLaL55gbQCHihp446sDVNTpmDM2iFsuHYmTauiuuX1R3VbLpwd/YG/FQZwl7nRkjmNSVAwPLhuNXDb4v6/NZuP11I8pb6vm0an3/KYz8fMN0WxEoup7r9BmE1mXWsKoSM8T9nn1xd6Kg7yX9iUtxjYujZnLVSMWDOjvdSYZMCB9+umn/T4nCAJz5/buRB8GtCp7QUFzZ/lwF77ujthEuzVAoLczoa6BbC/d20MaSBAEkv3i2VG6F5PVjEIqt2vG+SWwrnAbi2Iv6nfjPUjrz8OT7+CFrW9CxE6e/ULOv25f0Gfq7ngcFWqmhoxjasg4Wg9sIHfN27SkzKbBy4/y1iqq22rJrivo1V/VxbGVcioXJfGzHejogMZmMxsqD7K2RIJCqiDQU0u4rzuR/h64OTmilDngIHOwV9vJlJ0/K06pjPt4DBYjjaZ66sRaysUqSiwVFOlKutOlCqmcWM8IRvnMZrRf/KD2VSw2K7vL0lmes46S5nI81W7cNnoZ00PH91hRilYzNkMHVoMOm7EDm0FHS3s9G6sPsbG5kDbRjIdBYF6DhfEdJSiFIpCBUKdAHhSLJnku6piJHDE1s6ZsH6lp71PX0YhEkDDCK5L5kTMY45/YLdjb0m7kv18fYP3eUrzd1Dx7+wRGRZ2eVVFNex3Ls9ey6chOZFI5kbLxZOxyYe7YUO5anDgkDx5RFHl/31fsqTjATUlXnZXZ+PmEaDYh0Wj7fO5gfh21TXpuWjBiSNdsMbTywb5v2FWWTrA2gL9M6b/x+7fipAPSMP3joT7qdxTaqbYAdFe+lVS3EujtTLRHOOsKt1HaXNGtygAwITCZDUXbSavIYGKQXRtqUew89la8yC95G1g8YkG/rz3SO5onZtzH85vfpFq2loe/aOTFa65FqRj87Ng5cSaBBem479nIlJuexyHevtEsiiJ6s4FGfTPNhhZajG20GtppM+nQmTroMNt1yPRmA+0mHXoM2JwMODrYy79FREqB0lrYVDvwGORSOapjgpRS3hmspAoUUjlyqRyZRNodyLuq9UxWMwazAZ25g1ZjO82G1l4yRt6OHoS6BjE3YirRHuGEuQYNujG1w6xnY9FOVuVtpL6jEV+llpu9kkgRVYhZ+6nfswmrrgWrvg2bob27XBegXi5lm1ZFurMKi0QgWmdkTKMZb7MaR3dfPEZOROkZiIN3CHKvIIpaKllXmsauLS/T0NGETCIjwTuGxSMWMMY/sYfZn9VqY01qCZ+uykZvtHDljAiWzo0+5b0iURTJqS/g1/zNpJbvRypImR0+BUNZKKu3VXPJpFBuuzx+SAaKNpuN9/d9xbrCbVwWM5eLo2ae0hgvBESzsV/ZoLWpJTir5YyP79u7qy8OVB3mjdSP0Zn1LI2/lEtj5p7WBteTZcBP63PPPcejjz7aqym2i7fffvuMDOp856jiQs+7brCvMzKphNySJiYn+nerJhyozuoRkEZ6ReOhdmNd4dbugBThHsL4gGR+yl7DlOCx/e4lAUR5hPHvBY/x9Jq3qTKmctu3+dw+4SomhSYOqlJGEAQ8F9xJ+XsPUfPjvwm45SUkSkcEQUCtUNkVHDRD01wTRRGzzUKHyUBeeR0ZRdVkldRRXNOIFQsSmRUvdzleHg64aWU4O0uwCRYMZgMGixGj1UiHqYNGSxMmqxmzzYLFZum2ahYEAZkgRSGVo5QrcZSr8HfxYaRXNO5qVzwd3fBx8sLf2XvA0u7+qGyqYNXB5WytzcIgWgk1WLm4sY2Yjlok5NEmkSJzckXq5IrMxR2FTyhSlTMSpSNFNgPLGwrIMdYgiAKyBk/UjaH4xMQTOyesh7V4aXMF20v3smPfR9TpGpBKpIzyieOa+MsY7RffpyX4gbxa3lt+iJLqNhIiPLh9UTxBg+z96Y9mQyvbS/awqWgnZa1VOMpVLIyezcVRM1m1uYqftuWxcEoYt102tJ4lo8XEG6kfs7t8H5fFzGVZwuWnNM4LBZvF2GdTbEu7kd2Hqrh4Uuig1BksVgufZ/zEyrwNBGr8eHz6fedUmf2AAWnChAkAXHRR39Vdw/SNo0KNm0pLyXH7Q3KZlJgQVzLy7dL8biqtvYKu/ECPCjqJRML8yBl8evB78uqLuiuPbkxazMGaLF5P/ZgnZzww4IzGTaXlP5f9hXe3rGFd6Rr+u/d/fJnpxYLYaUwMSkF7At00qcoZ78sfoPLTx6ld8Treix9GOAmfoS4EQUAhlaNQyRkb6czYSPvvZDBZyC1uIrOwnszCeg7tbsZitTfR+rq7ER3iSlyQK5GhWkL9NINKP54urGYje/b9zNriXWSJHQhAQruJGTJXIr2jUYwMRu4RgNzVB6mTtkePSE2jjuUZqWwv20q7pAbRIkesi2CkJplZk6MYE+fdvXpp0rewvWQvW4p3U9pSgUSQEO8dw1WdK6G+ghBAYXkzn6zKZl9uLV5uav56wxgmJpy8FYfO1GFXQC9NI7MmB5toI9IthDvGXMfEoBSUMgdWbi/i6/V5zB0XPORgVK9r5F873qGoqZQbRl05rMgwBPor+96UXobFKjJ37IlTbS2GVv69812y6wqYFzGd6xIXoThLe0X9MWBAmjnTvpRetGgRTU1NHDhwAJlMRmJiIi4upzYD+70T7hZMQadQ5LGMjvHm45VZ1DZ14OWq7q6gq2qr7a6qA5gTPpmfc9by2cEfeHrmnxAEAXe1K/+XsoxXd33AR/u+4dbRSwe8IQiCwP9Nn8eM8jH8Y/mP1Kjy+Wj/t3y8/zsi3EOI944hxiOCcLegPkVGlYExuM++kYZ1H9K8/Xtcp5z+KiilQkZilCeJUfYVn8lsJb+smZziRnJKGjmYV8fmdLuVgVQiEOjtTJi/hlA/DSG+zgT5uODaR7HBqdDUVMnqnZ+xpbmQRpkEJ6vIfLUfc6Jm4BuRguS43iWj2UpReSv5pU1klzaQUXuIDk02EnUbWFWEMIGLR0xlbIw/aqU9NWi1WUmrOMj6oh3srzqEKIpEuoVwS/ISJgQmDyi0WlzVyldrc9mRUYmTSs6tl47g4omhJxWsW43tpFVkkFq+n4yabKw2K56O7lwaM4epweN6rIT35dTyzk+ZjBvhw11XDk1NfF/lId7Y8zEWm4WHJ99Oin/ikMd6IWMv++5Z1CCKImtTS4gOdj2hTmBpcwX/2PYGbcZ27h1/C5ODBy9Z9VsyqATz5s2b+ctf/kJkZCQ2m43S0lJeeeUVxow5N3+pc4FYzwj2VhykoaOph9nbpAQ/Pl6ZxZZ95Vw1K4ppIeP5OvNn1hRs6VH2qpQrWRJ/Ke+kfc7mI7uYEWZXVZ8UNIYjTWX8nLMOR4WapfGXnvDGEBngzv9uv4UfNxfw3c50TE6VVNhaKGhYg8ivwFEvIx8nL7ydPPBQ26vptHHjcajMp2nrVyg8A3GMGX8G3q2jKORSRoS5dwvEiqJIfbOBgvIm8suaKaxoYX9uLRvTyrrPcVTJCfB0wtfTET93R7zd1Xi6qvHQqHDTKHEYxI3aJtrIrDzM6vTvONBRg1UQCJepuDpsElOSLkMQZDS2Gsgu76C6oY6qeh3lte2UVLdSWa/DZrMhdavGIbAQ0bcdrdSNeeFXc1nCFOTHyKy0GtpYX7SdtQVbadQ3o1W6cFnMXKaHjMfPZeA9gLzSJr7dkMfuQ9WoHGQsmRPF5dMihlw9V9tez96Kg+ypOEhOfQGiKOKpdmN+5AwmBCYT4RbS6zNV16Tn5c/TCPJx4U/Xjh50Kb/RYuKLjJ/4NX8TQRp/Hpx027C1xMlg7r1CyiluoqymnXuuHjXgqXn1Rfxj6+s4yBx4ZuZDhLn17dR7LjCogPTqq6/y2WefdcsFHT58mMcff5wffvjhjA7ufKZLYSGzJofpoRO6H/f1cGREmDtrU0u4YkYkrioNE4NS2FC0gyvi5vdwypwZNpFtJXv4aP+3xHhGdK+gliVcjs6k58fs1bSZdNySvOSEG5JymYSrZ0dx0fhglm8tZNXOYjqMejz8jASF2VA6d9BibCCvIRV9H0296nBvHPd+gPuRDbhpvNEqXdCqNLgqNbirtbir3XBXu6IYgmr1YBAEAU9XFZ6uKibEH7ULb24zUlLdSml1G2W1bVTWtXO4qIEt+8o5XgBCrZShcXTASS3HUSVH5SBDqZCikEsxCW3UkE+N5RAdgh6V1Ua8XgXWcbSYAviuxMT7KzbQ1mHqcV2JAN7ujgR6OxEea6TIlkq9sRZ/F18Wj1jK+ICkHtqDNe11rMhZz6biXZitZhK8Y7kleQnJfvED/u0sVhu7D1Xx89YisosbcVTJWTInisumhg+6+VQURY40lZFWeZC95Qe7U8mBLr5cETufsQGjCNEG9DuxEUWR177Zj9li4283julhLT8QOXUFvLX3U6raapkXOZ3rEq847Z+PCwex1wpp3Z4SVA5Spozqfw8op66Q57a+hlbpwuPT7ztpa/HfikF9sgRB6KFdN2LEiNOi6vx7Jkjrj5tKy96Kgz0CEsDCyWG88MledmZUMmWUP4vi5rG9dC8/Zq3mxqSjQpkSQcI9427iL2uf5+Xtb/PMrIdwVKiRCBJuS7kGZwdHfspeQ1lLJfeMuwkvpxNrWGmcHLjh4jiunhXFjoxKNqaVcWBbPTbRGXdNMImRHkSFOOHpBTKlkRZjG036Fhrb6qjJ201rXSmF5g5azHbx1ONxU2nxcfLEz8WHABcfgjR+BGkDTrsltdbZAa2zvffpWMwWK3XNemobO6hvNtDUZqCpzUhLu5H2DjM6vZm61jY6HEoxOZVic6wHEcL1JuJaRIraJlAiC0XpIMNJJSHAy5mRYQ5onR1w1yjxdFXj7abGy1VNaWsZnxz4jj11Bfg4eXJv0i1MDBzdIxDV6Rr49vBKthTvRipImRoyjkuiZp2wKKSmsYN1e0pYl1pKY6sBbzc1t102ktljg7rTfgNhsVrIqssnrTKje6UuCALR7mHcMOpKUvwT+/TY6ovdh6rYn1fH/10ePyhtug6zni8yfmJdwTY8HN14fPp9xHufmoLAMD3N+QxGC9sPVjA50b/fCUJ5SxUvbn8TN5WGp2Y8OGjZqrPJgAGpubkZgJEjR/L++++zdOlSJBIJP/zwA+PHn9nUzfmORJAwPiCJtYXbaDO299ijGR/vS6C3E5+vzmFCvC8BLr7MCJ3I6vxNzAid0KPqxcPRjQcm/oHntr7Oi9ve5JGpd6OUK5EIEpYlXE6Qxp9307/gT2v+zlUjLubiyJmDUuJVOsiYNSaIWWOCaG4zsjermvScWtKyatmUZt+zcVTJCffXEOoXRKjvCKaOGoNi3UtI2xrxu/7vWNRONOlbaOhooqGjibqORmra66hqq2VXWXqPcmtPtRsR7qFEuYcS7RFOiGvgGSkzlcuk+Hk44efR88ZpsBg5UHWYHaWZ7KvMxGyz4OPkSYrBkxEF2QREjMNz2V1IHAZuJAb75vB7+z5n05GdaByc+cPoa5gZNqnH76M3G/gxezW/5G5AAOZHzuDSmDkD+vy0683szKhkc3o5mYX1CAIkR3vxx8WJjI71PmGfT4dZz8HqLPZWZLCvMpMOsx65VE6idyxLRi4k2XfkkJtPRVHkq3V5+Hs6cfHEkBMeu7t8Hx/t+5ZmQyvzIqdzTfylJ1XVOExvjlX63plZhd5oZdaYvtNv7UYd/9j6OnKJjEen3nNeBCMAQRxgqRMTE4MgCH2uhgRBIDs7+4wOrj/Ky8uZNWsWGzZsICDg9Ht/nC5Kmyt4aM3fuS7xCi6NmdPjuT2Hq3n2g1RuWTiCRdMjaDW288CvT+OhduW5WX/uFVR2laXz6q4PCHcN4i9T/9hjxVGna+D9fV+zrzITT0d3roybz5TgsUMyfevCZhMpq20jp7iJgvJmCsqbKa1uw2S2AhAkredul3XopM6kBt6Am5cXvu5qvN3sezddBQaiKNJiaKW0pZLi5nIKGospaCimvsOuguogcyDWI5wRXtGM8Ioi1DXwtDbDgn2/5kB1FnsrDrK/6hAmqxmN0oUJgclMDkjGecsP6PP24Dp1CdrJV51wL04URTYf2cUnB77DYDGyIHoWV8TN7+WXtK/yEO+mfUGDvompweNYmnBpd2/a8TS3GdmTVc2uzCoO5NVisYr4eTgyMyWQGSmBeLkOHCCb9C2kV2ayt+IgmTU5WGwWnBWOjPZLYExAIgnesafUdV9S1crdL2/ijkXxLJjcv85cTXsd76d/xYHqLEK1gdyWsqyXUeEwQ6frXvfRFVHEX/dnnEZMAeCxt3dQ09jBO3+b3ad6yD+3v83+6sM8O/Oh8+rvMOBUOicn55QuvmLFCt566y3MZjM33XQT1157bY/ni4qKePLJJ2lpacHT05N///vfaDTnRyQfDEFaf2I8wllbsIUFUTN73HDHxHkzJs6bz1bnMHaED/6eTtyeci0v7/gfnx38gZuSr+5xrQmBo5EKUl7d/QGPrHuBhybd3t275Onozl+n3MWBqiy+ylzO23s/48uM5UwPncC0kPFD6hmSSASCfVwI9nHhIuylpFabSE2DjtKaNipq29lX6sGYis9JLv6EVw/MRicenQErZBI8XdX4ejji46bG290RH/eRJISOw2e0mg5bOzl1hWTV5XG4No/PM34E7M6zMZ4RxHpGEOkeSphrEKohzqxbDW0UNJaQU19AZnUORU2liIi4KjVMD53A+IBk4jwjEQSoXf4qurw9uM+9Bc2Y/huNu2jsaOatvZ9ysDqLGI9w/m/MtQS49HxfzVYznxz4njUFWwhw8eXvEx/uJRZqtdrIL29mf04t6Tm15JU1IYrg5aZm4ZRwJif6ERmoHTA41ukaSC0/QGr5fvLqixAR8XJ0Z17ENMYEJBLlHnbagvuhIruaxZi4vgsuLFYLv+Rt4NvDK5EKEm5KuoqLIqad9snFMCDI7HtITW0GMgrqWTonus/PyeYju0irzODGUYvPq2AEg9xDMplMbNmyBZ1OB4DVaqW0tJQHHnig33Nqamp45ZVX+OGHH1AoFCxdupRx48YREWFXwRRFkTvvvJNHH32UqVOn8vLLL/POO+/w8MMPn4Zf69xhYcwc/rn9bbaV7OmxlyQIAn9cnMjd/9zEy5+l8eLdUxgbMIr5kTNYlb+JQI0fs8In97jW2IBRPDXjAf614x0eXf8S1yRcxsVRM7vVCkb5xpHoE0tmTQ6r8zezInc9y3PWEujiy2j/BEb5xBHlHjZkcy2pRMDP0+mY/YNIOo6EIP/mBV6K3AVzH6LWIKemQUd1Ywc1jR3UNHSQfaQBnaGnUaGrswO+Ho74ekQzwWM02gjokFVTZSglr6GQ/VWH7O8PAl5OHvi7+OCldkercsFJoUYukSMCJqsJnamDJn0LNbp6yluqaNDbBVGlgoQItxCuGrmAJN+RhLoG9hA4bdz0GbrD23Gbce2ggtH+qkO8vvsjTFYztyQvYW7E1F6CqW3Gdv65/W1y6gtZEDWLZQmXIZfKMVusFFa0kFXUSGZhPVlHGugwWBAEiAzUsuyiGMbG+RDq5zJgEDJYjOwqTWdL8W6y6vIBCNL4c9XISxjrn0igxu+M2AM0tRoQBPB07W1pUNRYwlt7PqWkpYKxAaO4JWlJt4TRMKcfobOoIS2rBlGECfG9J5rtJh2fZfxItEf4eamAMag70wMPPEBZWRl1dXXExcVx8OBBxo4d2NVx586djB8/Hq1WC9iba1evXs3dd98N2Cv11Go1U6dOBeCOO+6gtbW113VaW1t7PV5dXd3ruHOV0X7xhLsG882hX5gYlNKjyshdo+L+pUn8/cM9vPV9BvcuGcX1o66ksq2Gd9O/xMnBkXEBST2uF+keyotz/8b/9n7OJwe+Z0dpGrcmL+2eCQmCQIJPLAk+sTTrW9hVto89FQf4OWcdP2WvQS6VE+kWQpRHGGGuQYS5BuHp6D7km5k6NBGfq/9G9bcvIFv9IqOWPYEstndKp73DRFWDjur6DqobdVTV66is17E/t5YNe8uOOdIZd814wrxkOLrrENStGKVNVLXUkV2b36+GnqNCjbejB7GeEYS4BhDhFkK4W0i/aar2nF007/wR56Q5aCYsGvB3FEWRH7NX81XmzwRr/Hmgn5LldpOOZza/SkVrNctiluFiDubDFTnklTZRWN7S3ejr7+nIlFH+jIryJCHCExfHE6fSDGYDq/I3sTJ3A20mHb5OXiyNv5SJgaNPi5fRiVArZYiifX+rq6rParPyU/Yavj28Eo2DMw9PvoMxw31Fp8yJ7nVdVXaph6vxdFUR0kfv0c8562g36rh12pLz0sNuUAEpOzubtWvX8tRTT3HzzTcjiiJPP/30gOfU1tbi6Xm0isfLy4uMjIzun0tLS/Hw8OAvf/kLWVlZREVF9XCl7eLjjz/m9ddfH+zvc84hESRcm3g5z2x+lRU567hyxMU9nh830pelc6L5al0uvh6OXD07ij9NvI2/b3mN/+x8j/sm3Mr4wOQe52iULjw8+Q52lO7l4wPf88j6F5kYOJrFIxf0SCNpVRrmR81gftQMdKYOsuryOVybR05dASty1nXbijvKVQRpA+wVcRp/grR+BGsDUMoGdiJVhSbge83jVH/9PBUfP4rvNY+h8Oy5yeqkVhCpVvSQx+lCb7RQWddOZb2Oyrp2Kjr/O3xAis7gDDgDQUgkAl5uDnh6yvB0dcDLVY2PqzMB7q74urvgrJYP6stnbq6l7pc3cfCLxGPurQOeY7PZeCf9CzYW7WBy0BjuGHMdCpkCURRp6zBT0xlcy2vb2Nj4Ha1CNZb8ZN7f3Qg04qCQEhGg5ZLJocSEuBEX4oary9BSkEWNJbyy631q2utI8h3J5bFzifGI+E1vNNHB9r2vtOwaZowOpNXQxqu73yezJpeJQSn8YfRSnBRDV5gepjcnutcJcgVGs5X9eXXMGRvU63OgM3WwpmAL4wOTe0iRnU8MKiB5eXkhk8kICQkhLy+P+fPno9frBzynv0KILiwWC3v27OGzzz4jPj6e//znP7zwwgu88MILPc658cYbWbSo50y2urq6137UucxI7xjGBybzQ9avTAhM7tUAec3caKobdXz6azZOajkXTwzlkal3849tb/DKzve4Kekq5kfN6HGOIAhMDh5Lsl88P+esZWXuRnaV7SPFP4EFUTOJ9Yzs8X47KtSM8U/snsmarGZKmysobi6jqKmM0uYKthandq9EBAT8XLwJdw0mwj2EGI9wgrT+vVJVysBYfK9/luovn6Xyk8fwXvxnVMEjB/W+qBxkhAdoCT/OKkIURVp1JirrdFTUtVPVoOsOXEVHWtAZepoaKuRS3DVK3DVKXJ2VaJwUuDg64NLZd6RWyVHJJKi2/BuJzYZl8v9R0WhAEIzYbCI2m4jZasNktmIwWekwmFlV9iMFusOEy1KwFCfybEYaDS0G6ps70But3a8t8ypFHlKJn2ECo5JGE+LrQpi/Bn8v5yGpXx9PZVsNT2/+D2q5iqdmPECcV9SJTzoDxIS4EeDlxLcb8ggPk/LPHW/RaGjhjjHXM7OzWXuY08OJ7nWC3IG80iZMZmufvlbbS/aiNxu47LgCqvOJQQUktVrNihUriImJ4ZtvviEsLKy7JLw/vL29SUtL6/65trYWL6+jb6KnpyfBwcHEx9vdRy+55BLuvffeXtdxcXH5XcgU3ZJ0NZnV2byR+jHPzHqox6avRCJw35IkOvQW3vo+A0EQmD8hhMem3curu97nw/3fUNZSyc3JV/eqnFPLVSyNv4yLo2axKm8Dawu2sbfiYHcp+ZTgMWj7KPlUSOVEuIf02PQURZH6jkZKmsspairjSFMpGTXZbC1JBcDZwYkE7xhG+9ldY7uqyxy8Q/C7+R9Uf/UcVV88i+fFt+OcePL5a0EQ0Dg5oHFyIDa0Z3WaKIq0683UNHRQ29RBXbOe+mY9DS0GGlsNFJY309Ju7LV3Nc0hmysc8/msfRJ7384c8PVlgbnIfY9gLo8kt9aLGucGXJ0dCPByYlSUJ16uqs6qQiUv7H0BP+dInpxx/WlduXxy4HukgpRnZv6p2134bCCVCNx66Uie+WwDj6xdidJBytMzHjzvNsvPB050r5PIHcgpse+TRgf3zjhsK9lDoMbvrFtInAqDCkhPPPEE33zzDQ8//DDfffcd1113HQ8++OCA50ycOJHXXnuNxsZGVCoVa9eu5dlnn+1+PikpicbGRnJycoiJiWHjxo2MGDE0P4/zCa1Kw20py/jPrvf55tAvvYz2ZFIJf70xhec/2sub3x1Eb7BwxYwIHpp0O18d+pmfstdQ1FTK/RNu7XPvwMXBiaXxl7Eodj47S9NYX7iNTw9+z2cZP5DgHcP4gGTGBozqU7OuC0EQ8HR0x9PRvVtrrCtIZdXmk1mTw8HqLHaUpiGXyhnjl8CMsInEe8cg13jhd+Pz1P7wMnW/vIGptgS3WTf0EBw9HQiCgLNagbNaQUSgtt/jLFYbbR0mOgwWdLWVSFd8hclzJLNTljDDBlZRBFG0q4RLJUilAgq5lIK2LL4rPMKUwInccukS1Mr+04E5dYU0G1q5Ofnq0xqMrDYrB6oOc3HkjLMajLqICXfCNeEgHWYr872Hy7nPFoLMgZziRnw9HNE49Uyn17TXkddQdN6rpw8qIIWEhPDnP/+Z1tZW/vOf/wzqwt7e3jzwwAPccMMNmM1mFi9eTEJCArfddhv33nsv8fHxvPHGGzz22GPo9Xp8fHx46aWXTuV3OeeZGJRCRnU2P2avJtI9lBT/hB7Py2VSHrlpLK98uY8PfzlMc7uRmxbEsSzhciLdQ3kz9WMeXvs8N41azMywSX3eBB1kCmaETWRG2EQqWqvZWpzKztI0/pf2Oe+mf8kIryjG+CcyNmDUgE2aXXQFqWmh7kwLHY9NtJHfcITtJXvZUZrGzrJ0/Jy9uThqJtNDJ+Cz9DEa1n1Ey55fMNYcwevyB5E5nfh1TjcyqQRXZyVaJ5Hq9V9jkEqIuOoeZC79q1k0G1p57dcVRLqHcuf4ZSds3O1yaw1zPb3aYHqzAZto66GBeDb5eP93mIR2ws3z+PqXCnydvJiZcu7qof1eEeQK8suaeqmTAByszgLoVQR1vjGogFRUVMQ999xDa2sr3333HTfddBOvv/464eHhA563cOFCFi5c2OOxd999t/vfiYmJfPfddycx7POXW5KXUNxczmu7P+S5OX/u1csil0n407WjcXFU8OPmAuqb9dy/NIkx/on8c95jvJH6Mf9L+5w9FQe4bfQyPBz7brgEux36NQmXsTT+Uo40lbG7fB+p5fv5YN/XfLjvGyLdQxkXkMSEoOR+GzePRyJIiPYIJ9ojnBtGXUlq+X5W5m7kvfQv+f7wKi6Lncvs2Tfg4BtO/a//o+L9h/C6/P5B7yudbnTZO9EXHcB97i0DBiOAbzJXYLAYuWvsDYNSkZB27qdZbdYTHDk0lHIlAgI688D7tL8F5S1VbCnezWUxc1kcdzHPvr+b/3y1H4lEwvTkc7cp/feIzizQ2GrsNvo8lqza/G7ZrvOZQUn2/v3vf+eRRx7B3d0db29vrrvuOp544okzPbbfJQqZgocm345CpuCFrW/QYuhd6i6VCNy+KJ6bFsSx7UAFj729k5Z2Ix5quy7YLclLyKrN50+rn2VN/hZsndVy/SEIAmFuQSxLuJxXL36af89/gqtGXoLJauLTg9/zxxWP8dTGf7OxaEef+nT9IZfKmRw8lufn/IUnpt+Pn4s3H+3/lvtWPkmqowzvG55DolBR9dlTNG7+AtFqOfFFTyNWfRsNa9/HwTccl9HzBjy2Sd/CpiM7mRU2Cf8TqG53Edwp8ZTXh83IqSCTSHFVaajTNZz44DPM9tI9SAQJC6Nn4yCX8tgt4xgZ5sErX6SzM6PybA/vwkGmoKre3gfal55gdl1BZ9P3+VfqfSyDCkjNzc1MmjSp++drr72W9vb2Mzao3zseajf+MvlOmg2tvLjtLYwWU69jBEHgypmR/PWGMRSWN/Pwf7dRUdeORJAwL3I6/5r3OBHuIby/7yue3vQKla2D780KcPFl8YiLeemiR/nvgme4auQlNBtaeXvvZ9z+81/5YN/X1LbXD/p6giAw0juaJ2c8wBPT78NNreWdtM/5276PKbnoGtTx02ne8T0VH/0NU23poK97qjSs+whrRxseC+464V7WluLdWEUbl0TNGvT1g7UBeDt6sK5w22kXG/ZUu3XLLJ1NipsrCHDx7dbAUypkPH7rOCKDXPnnZ+lkHTn7QfNCQJApqOwMSP6ePcvsWwytNBlaCD+Pixm6GLQFqNFo7I6+dXV12GwDz8qHGZgI9xDuHX8LhY0lvLrr/X7TPpMS/XjurknoDGYe/u9Wso/Yb1JeTh48Nu1e7hxzPaUtlTy89nlW5Kw/4WrpeHycPFk84mJemf8kz8x8iNF+Cawr3Ma9q57kjdSPhxSYwF7i/vdZD/PnyXcgl8r5795PeVnRTNmsqzC11lP+wcM0bf8O0Woe0nWHii5nN+2Zm9FOvAIH75ATHp9emUm4a/CQmk0FQeDSmLnkNxwhtXz/yQ+2D1yUzrQZdaf1mieDVJBgsfVc2aocZDxx63i8XFU8/9EeGlrOfmrx944gtSuhAPi49wxIpS32lWqgxq/XeecbgwpIy5Yt49Zbb6WhoYF//etfLFmyhGuuueZMj+13z9iAUdycfDVplRl8uO+bfmfZMcFuvHzvVJzUCh57ewd7s+yrIUEQmBE2kX/Pe4JEnzg+Pfg9z25+lSZ9y5DHIggCMZ7h3Dv+Zt5Y8HfmR85gZ1k69//6NJ8d/AFDHx5JA10rxT+Rl+Y+wr3jb8FsM/NGyRbeiA4mKyKW+i1fUv7un9AXD1x+fbKYm2uoW/UWCp/wQbnc2mw2ihpLiPWMGPJrzQybSLA2gA/3fUO76fQFEJlE1isQnA1iPSOobKshr76ox+Mujgoeu2UcBpOV1789OGxHc4YRZDKa242olbJezsAVndmRoWhWnqsMKiAtXryY++67j4ULF2KxWHj22WdZtmzZmR7bBcG8yOlcGjOXtYVb+TV/U7/H+Xo48s97phDk48xzH+5hx8Gj+XtXlYaHJ93OHWOuo6ChmL+u/QcFDcUnPSY3tZYbkxbz2sXPMDloDD/nrOOBX58hrSLjxCcfg0QiYXLwGF6Z9yT3jLsZBAmfWGt4OTaE9Qoz+V8+Tc33/8TcePr2ImxGPTXfvgiiiPeiBxAGodvXZGjBbLPgexJOplKJlDvHXE+rsY330r86mSH3icFiOCWV7tPFrLDJuKm0/HPH/yhqLOnxXKC3M9fPjyUtu4a07JqzNMILA0EipbXdhMaxt3pKi6ENAQGtw/nfrzmogNTe3s6+fft4+OGHue6669i8eTMdHR0nPnGYQbEs4TLG+CfyyYHvOVST2+9xGicHnrtzElFBrrz0WRq7D1V1PycIAjPDJvHc7D8jl8p4evN/yKw5NbV2N7WWu8bdwLOzHkKtUPHS9rd4ffdHQ14JSCQSpoSM5Z/zHuOvU+4iwC2QXx3hH2FevNuSy/pPHqZm5dtYWupOabw2k57qr5/DVFeG1+UPIHcb3IyxyyHXUdFbQHQwhLkFsXjEAnaWprGzNP2krnE89bpG3FVnv+xbJVfy+PT7kAlSHln/Eu+lf9kjjbtgUihebmp+2FxwFkf5+0eQymnRGXFx6j1JaTG04uzg2MMY8nxlUL/B3/72t25lBhcXuypxX7pzw5wcEkHC3eNuwsfJk9d2f0ibsf+CEbVSzlO3jSciQMNLn6Z17yl1EaT15++zHsbL0Z2Xtr9NcVP5KY8v2iOcF+f8jSvjLmZ76V4eXv1cd9/DUJAIEpL94nl8+v38e94TXBQ5nSKtCx/6uvBI837e+Pphdv/4AvqK/CFf29xUTeXHj2Eoz8Xr8vtRhw++H6Pri2w5hfLty2MvItw1mA/3fX3KqTuDxUhFWw1B2nNjT8DfxYd/znuU2WGT2VC4nXtWPcFL294irSIDQRCZPSaIQ4UNtLQPvkJzmKEhSGW09LdCMrahcRia8eK5yqD6kIqLi3nttdcAcHZ25pFHHuHSSy89owO70FDJldw/4Vb+tu4FPj3wA3eNu6HfY9VKOU/cOp6HX9vG8x/t4ZUHpuGhPTq716o0PDrtHv627gX+s+s9Xrro0R4q4yeDTCpjSfxCUvwTeD31I57b8hpzwqdwXeIVQ/YtAnu++6bkq7k2cRHplZlsK9xOak0OO0wluGz+JyNtDozxTyRx5FwcPQP7LWe16lpoSVtFy+6fEWQKfJY8MqRgBODcKQ460ERgIAxGC01tRub4XcLbh9/glQ3fEOcwGZ3BjN5gwWCyYrJYsVhtiCIIgr1xV6mQ4aSSo3FywNNVRaC3M/6eTuTUFWATbcR4RJ7UeM4ETgpH/pByDVfEzWdNwRY2HtlJWmUGrkoNI7SjQC5wpLKFUVFnXoH8QkSQymltMRLZhzpJq6ENjfL8T9fBIAOSxWKhvb0dJyd7/btOpxvexDwDhLgGsiB6Fity1nNJ9KweVubHo3Fy4PFbxvGnV7fw8ufpPHfnpB5inm4qLXeOuYHnt77GuoKtLIgefDnzQIS7BfPinL/x1aEVrMzdwP6qw9w6eimj/eJP6npyqZzxgcmMD0ymw6xnb/FeduVuJq29ip116cg3pBFphpFKDxJcg/Fy8gRRxKprxlh9BEN5DtisOMZOwH32zchchi6146RwxEHmQO1xfT8ms5XGVoNdJ6/FQEPrUc28xlYDTa0GGluN6I1Hiw/kYX5kWNPZc8AZiahA7SBDqZAil0uRSSVIBLCJdmkjg9FCu96M2XK0MlIuk6CNzUWqkqM0eSJ2yhudK7iptVyTcBlXjbyE/VWH2FC4nR1VW1Emwk9FzQQGXH3OKEz8rpBIadWZ+rQsaTG2ndf6dccyqIB0+eWXc9VVVzFv3jwEQWDdunVcccUVZ3psFySXx1zEmvwtrMrbyB1jrx/w2EBvZ25flMB/vtrPyu1FXDq1p3LGKN84oj3CWV+4/bQFJLA3994w6krGBYzinb2f8+K2N0nxS+CGUVeekkePWq5iWuRUpkVOxWQxcfBIKnsLdpLRWk6W2MA3jQ14VFuI7DARZRKJVnvjOm4hzgkzUHgMXjXAbLHS2GqkscVAY5s92ChsTuzOK6A4bVd3EGrr6N0fppBLcXNxwM1FSaifhtExSrTO9p+1zg60ieG8mfEGd93qybyoqYOyRe8wWKht6qC0uo2csjo2dqzDUu/Jw//dSYivC/MmhDBrTCBKxdCMFc8kMom0Wz1+Y0YOr2/+kVxpJvetOszS+Et7GEcOc+oYkWOxir007MBe1KC9kFJ2t99+OxEREezatQuZTMZDDz3EtGnTzvTYLkicHBwZF5hEavl+bktZdkIr6JkpgWw7UMFnq7OZPMoft+M8dyYEJvPR/m9p6Gg67TPXaI9wXpz7CCvzNvJ91ioe+PVpZodPYVHcvEHp5A2EQqZgTOQUxkROQRRFqtpq2F91mIyqLPbVF7DLakIqdBBFLaNqD5EkFQnWBmA0Walt6qC2SU9dpxp4V4Bp6lzZtHX07oFSREmRKVtQd5jwdlMTG+qGm4sSdxcl7hqV3d5Cq8JRKRswyIiiF1/nu5LTkM984cTfEUEQcFTJCVVpCPXTYHQpREw388ilV1Fb5sDaPaW8/UMGX63N5erZUcybEIJcdm7d6EtLrVjLRvD8spv5NvtHPjnwPVl1Bdw/4dZTThUPY0cn2gOR5riiBpPFhN5i6G5cPt8Z9JRrypQppKSkdKfqmpubu91ghzm9JHjHsrU4laq22hP2FgiCwP8tiuePL23k89U53HP1qB7PdzXLVbfXnZFUikwq47LYuUwLGce3h1eyvnAbG4t2MCtsMpfGzBlQa2+wCIKAn4sPfi4+LIiehdlqZl9ZLjuLD5LdkEt23XK+zFwOZgcsTZ5Ym7yxtbqBKEUiEXB1dsBdo8TXw5G4MHfcXZS4uijtAafTQ+nDzDqKm8t45cZTm2gJgkCoWxDlLUMvZbfarKzI3UC4WzDJAdEIgQLzJ4ZyuKiBz1fn8M5PmazccYT/WxTfpx/O2aCtw8Sa3SWMifUm1MOHhyffwaq8jXx84Dv+s+t9Hpr0f8MrpdNAh80eiFyOK2po7dz3vKCKGj7++GP+9a9/YTbbZ5Zdee3s7OwzOrgLlS6BxLqOhkE1u/l5ODF/YigrtxexaHo4AV5HP5xyiX2Gaj7DOnJd9hqXxszhh6zVrCvcytrCrUwMHM2C6FmnJGsiiiJV9ToO5NdxuKiB3JImaho7ACdgNBqtiMa3BZtzDS1e5Vi8ylFIFIz0imNKSAqj/UagHKDwQhRFKttqTnlV14WLgxMFJ6Fvt61kDzXtdVw/6fYeq7ARYe48d+dE0rJreHf5IZ58ZxeTE/34w2UjcdecXKn66UAURV7/9gB6o4Vr58UA9oDclR7++MB3bCrayazwyWdtjL8X2q327/HxK6QWY5v98QupqOHTTz/lyy+//F37FZ1LdKU5hhJErp4VxbrUEr5ck8vD16d0P95uss+gnBTq0zvIfvB28uTOsddz1YgFrMzbyMaiHWwv3Uu0RzjzI2cwNmDUoJS0bTaRnJJGdmRUsudwNdUN9r43NxclsSFuXDwxhHB/LSF+Lj3y6marmUO1ueytyGBP+X72VR9ALpWT6BNHil88CT6xPZTNbaKNX/M2UdJczi3JS07Le2Cz2U6Yaj0ek9XMN4d+Idw1uNvV91gEQWBMnA+jojz5YVMBX6/PIz2nhmvmxnDJ5LDfPI1ns4m8+1MmOzOquPmSOEL9eppAXhw1kx2laazIXT8ckE4DXQFJ69RzYtUlzqy5kFJ2np6ew8HoN6Sj03ZgKOXUWmcHFk4J47uN+SyeFdl9gyjvlBX5rWXpPRzduDFpMVeNXMCmop2szt/Mf3a9h6tKw9zwqcwOn9znrK6uSc/a1BI2ppVS26RHLpOQGOnJ5dMiSIryxNfDccA9HLlUTpLvSJJ8R/KH5KVk1xeQWr6fvRUHSas4CICrUoOPsycyiZTKtloaOppI9IllVtikfq87FOo7GnFT9nbpHYhf8zZR39HInWMHdp6Vy6QsmRPN1KQA3vkpkw9WHGb1rmJuWBDHhJG+SE7BNn2wtLQb+e/XB9iTVc3l08JZNL235JIgCEwOHsNH+7+lSd+Cax+uxcMMHp3FfqvWOh+3QjJ0rpAupJTdpEmT+OKLL5g1axYODkdno8N7SGeGms5O+MF6FHVxxfQIft1ZzAc/H+aZ2ycgCAL5DUfwdvTAycHxxBc4A6jlKhZEz2J+5Az2Vx9mdf4mvj60gu+zfmVSUAoXR80k1DWQrCMN/LSlkNRDVYhAYqQn186LZfxIH9TKk9sYl0gkjPCKYoRXFDcnXU1ZSyWHanM50lRGna4Bg8VEtHsYKQkJTAoac1rKq202G0eayxgfkDzoc5r1LfyQ9SvJfvHEe8cM6hxfD0eeuHUc6Tm1fLDiEC98vJcwPw1XzIhgUqIfMunpXzGZLTbW7y3ls1+z6TBYuH1RPAsmhfb7vjl19nfpLQZcGQ5Ip0KbRYqTSo5c1nPl3dy9QrqAUnbvvPMOJpOJZ555pvux4T2kM8eR5jIcpAq8HQc2lDseJ7WCay6K5t2fDrEjo5KJCb7k1BUw2i/hxCefYSQSCaP94hntF09FazW/5m9iS3EqW4p342DyorU4ALXJj0XTI5g/MRRvt9ObYhQEgSCt/4C9XaeDgsZidKYORngNvqn184yfMNnM3Dhq8ZBeSxAEUmK9SYr2Yuv+cr5el8vLn6fz/s+HmDUmiKlJ/oT4upxyoG1o0bMpvZxVO49Q16QnNsSNP16V2KdR3LEUNBYjk8iGPLEapjdtZila594l3436ZhzlqnNC9/B0MKiAlJExNFHNYU6N7LoCIt1DT0qbasHEUDallfHW9xk4uXXQZtIx0jv6DIzy5PFz9ibeYTqZZe6UmrMQfEtxiNqHt3MlIbFuuGt7f/HOF7aX7kUmkZHkOziH3Oy6fLYU7+by2IvwPckeLqlEYMboQKYlBZCWU8PqXcX8sLmA7zbl4uZrxC/QjMrZhEJpw1Elx1Xlgr+zDzGeEb1eUxRFmtuMFFa0kFPSyMG8OnJKmgAYGe7OXVcmMjrG64RBrrGjmS3Fuxnrnzhc+n0aaDcLuDr3TuE36Jtx+x01Ig8qIJlMJrZs2YJOZ9foslqtlJaW8sADD5zRwV2ItBhaKWkuZ2n8yUkzSaUSHlw2mgf/s4U3Vm8EF86ZgGSziew+VMXX6/MoqmjBx13NXbMWMSXZj7TK/SzPWcebez7h20O/cFnsXGaETkR+Ht3MOkx6thTvZlzAKBwHUURisVl5L/0rPNRuXBE3/5RfXyIRSIn1QuHaiCoqgwNVh9GLJgpFEJukiBY5AiDITSCxq0MorC64mMNQd4TS0SalvlmP3mjX9JMIEB6g5bp5MUwZ5d+nU2lftJt0vLT9LWyiyJKT/BwP05N2o0BoHyukpo7m01Ydei4wqID0wAMPUFZWRl1dHXFxcRw8eJCxY8ee6bFdkHQpdCd4x570NQK9nXlw2Whe3rEXpdUZpXB29o+6sFhtbDtQwXcb8ymtbsPXw5H7lyYxPTkAaedex+TgsUwKGsP+qsP8kPUr76V/xY9Za1gUN4+ZoRORDcJG4myzKn8TerOBhdGzB3X8L7nrKWup5M+T70ApO7VVoc1mY3vpXn7MWk1FWzVOCkemho4lyXcE4a4htLcKFFe1UVbbZm8cbq+lyVaBzqGMeuUBcDiIRhPAqPAERniOIMxfS7i/Zsj7d7n1hbye+jENHU08NOn2k171DdOTNpPQZ8quQd90xtPQvyWD+pZnZ2ezdu1annrqKW6++WZEUeTpp58+02O7IDlYnY2zwpEw16BTus6EeF+0eSaaa5346xvb+fP1KT36k34LWnUm1qWW8MuOI9Q36wnyceZP145myij/Hrp7XQiCQLLfSJJ8R5BZk8O3h37hvfQv+TlnLVePXMjkoDHnrMR+Y0czP+esZYx/4qB0xarbavn28ErGBowipY8y76GQXZfP++lfU9pSQbA2gHvG3cz4wKQeq0t3Rwj27buwoKa9jg1FO9hYtIMDxpWUNe9kkiYFpS6JcEXwCd9zq83Kodpcfs3fzL7KTDwd3Xl8+r3Eep474rDnOwYLvVRYOkx6mg2tv3kF7ZlkUAHJy8sLmUxGSEgIeXl5zJ8/H71+2Lb4dCOKIpk1OYzwjj7lG68oinRYW5kUM5K0jXru+/cWls6J4rKp4b0cJ08nVpvIocJ6NuwtZcfBSkwWG/HhHtx1ZQKjY7wHVZYsCAIJPrHEe8dwoPowX2X8zOupH/FzzjqWJVxGku/Ic0pwVBRF3k3/Aqto44ZRV57weJto439pnyOTSE+p98lgMfLZwR9YW7AVD7Ub90+4lfGByUNWRvB28mRZwuVcNWIBeysOsqU4lZW5G/g5Zx1quYpwtyACXPzwdHTDUa5GEAT0ZgMN+mbKWirIqS9EbzbgrHBkyciFXBw186QU4IcZGHdNz/e0vNXuh/Z7sC7vYlABSa1Ws2LFCmJiYvjmm28ICwvr9kca5vRR015Ho76ZkV6nvucjImIVbQR4arjxoRm89X0Gn6zKZuWOI1wyOYzZY4L6TAGcDGaLlcNFDaQermZXZhUNLQbUShmzxgRx8aRQQnxPriRVEASSfEeS6BPH7rJ9fJn5My9se5MRXlFcn3jFOaNwvKZgC+mVmdwwajHeg5itri/czuHaPG5Pufak8//lrVX8a8c7VLbWsCBqFkvjLz3lSiu5VM7EoBQmBqXQbtRxoDqLrLp8ihpL2HhkJ0ZLT78jmUSGn7M3k4LGkOgTS7LvyPNqz+984/iAVNopTxX4O7Au72JQAemJJ57g22+/5eGHH+b777/n+uuvHy5oOAPkdcrNxHiEn+DIEyMRJDg7OFHf0Yi7RsVjt4wjo6COr9fl8fHKLD79NZuECA+SoryIC3MjzE8zqJWTKIrUNesprmqloKyZ7OJGsosbMZqsKGQSkqK9uHVhAGNH+uBwmlZiEkHCxKAUxvqPYn3Rdr49vJK/rnuByUFjWJpwGV6OQ7ecOF1k1uTw8f5vSfYdycVRM054fE17HZ8d/IEE71hmnmQj7v6qQ7yy8z0cpAoen34vIwfZuzQUnBwcmRw8hsnBYwD7311n7qDDbABRxEGmwFnhdM6mUH+PHC8TVdJcjoPMAc+z+Pk/3QwYkK6/vmfX+A033IAoikRHR/Prr79yzTXXnPEBXkiUNJcjl8jwd/E5LdeL84xkX2UmJosJhUxBQoQnCRGelFS3sjm9nNTDVXz4y2HAXlHl4arGQ6NE4+SAykGGRBCwiSJGk5W2DhNNbQZqm/QYTfYqLEGAYB8X5owJIinGi4RwD5QOZ674QCaVMS9yOlNDxrE8ey0r8zawu3w/8yKmsShuHs4Og6sCO13k1BXw0va38XP25t7xt5wwVWaz2Xg99WMEQeCOsdedVNpxU9FO3k77jBBNAH+Zchduau1Jjn5oCIKAk8Kxu9l1mN8e9+P2kHLqC4lyD/ldidcOePe47rrrAFi3bh3t7e1ceeWVSKVSli9fjovL76Mz+FyiRlePl6PHkHXQ+mN+5HRSy/fzZebP3DDqyu4bYLCPCzcuiOPGBXE0tOjJK22iqKKVqnodDa16ymvbMZos2GwigkRAqZDipFIQ4OVMcrQ3fp6OBPu4EOrnctIqCqeCWq7imoTLmBsxla8PrWBl/kY2FO1gYcxsLo6aiVp+5gVH95Qf4LXdH+Km1vLY9PtQK078mt9lrSK3vpB7xt18Us2iq/M388G+r0n0ieVPE/9vQMHYYX5fKOVCj8leh0lPaXMFi0dcfBZHdfoZMCBddNFFALz//vt89dVX3cvz6dOns2TJ6RGiHOYobcZ2tKrTF+jjvKK4KGIaK/M20GHWc/2oK3rNcN01KibEq5gQf/5tjLqrXblr7A0sjJ7NV5k/882hX1iVt4kFUTOZFzl9UL1AQ8VsNfP1oRX8nLOOCLcQ/jzlTrSDkG3JqM7m+8OrmBoyjikhQ2+ZWFewjQ/2fU2KfyIPTLh1eK/mAkPr2PNWndtQiIhIjGdvHcHzmUHlV5qamjAajahU9lmgTqejpaXljA7sQsRitQxqpj0Ubk6+GkeFmh+yfmVXWTpTg8cxyncEYa5BaFUufS73bTYbbaZ2WgxtNBlaaNK30GxopVnfQrOxjTZjO+0mHQazEaPVhMVmVyWXSqQopArUciUuDk64KrV4OLrh6+RFgMYXfxefM9K1H6jx4+HJd1DYWMK3h1fy9aEV/JSzlpmhE5kbMfW0pEBFUWR/1WE+OfAdlW01zA6fwk2jFqMYRCFBra6BV3e9j7+LD38YPfQ0987SdN5L/5Jkv3genPCH86Ina5jTi1bd829+sCoLuURGpHvoWRrRmWFQn+xLLrmEq6++mjlz5iCKIqtXr+bqq68+02O74FDIFBjMxhMfOAQkgoSl8ZcyITCZ5Tnr2FK8m7WFWwF7lZSzgyMOUvtN1Wy1oLcY0JsNiIi9rqWSKdEonXFxcEar1KByVuIgVXTbSVhFGyaLCZ1ZT6uxjbKWKpr0Ld3XkgoSgjT+RHmEEecVyQivaFxO475PuFswf51yF8VNZfySu4G1hVv5NX8T0R7hTAkeQ4p/4pCr2vRmA6nl+/k1fxNHmsrwdfLikan3MMo3blDn60wdvLD1DWyijYdPogE2qzaP11M/ItojbDgYXcBonY7+3UVRJK0yg3jvmFNuqD7XGNSn+7777mPEiBHs3r0bgL/+9a/DFuZnAHeVK5m1OWfk2sHaAO4dfzMmi4nCphJKmiuo72ik3ajDaDUB9rJfpcwBJ4UaFwdnNEpntEoXXJUatEqXk9qzMFnN1LTXUdZSRXFzGYWNxWwu3s2agi0ICIS7BZPin8BY/1GDMiMcDCGugdw9/iauG3UFm4/sYmtxKu+lf8V76V8RqPEjxiOcUNcg/Jy9cVNpUMtVSCVSzDYLrYY2anX1lLZUkl2Xz+HafCw2C37O3twx5jqmBo8bdFAwWky8tP0tqtpreXTq3UNWLShvreKf29/G28mDP0+5c1CrsWF+n2jVRzMLZS2V1OoauDz2orM4ojPDoKdbs2fPZvbswUmiDHNyhLgGsrUklYaOpjNiNw72VVisZ+Rv1kWvkMoJ1PgRqPFjYtBowK7hVtRYQkZNNvsqD/FV5s98lfkzgS6+TA4ey5SQsadFIVqrdOHy2Iu4LGYuFa3VpFVmcKgml+2le1lXuO2E5we4+DIvYhpjA5KI9ggbUlWcwWLkn9vfIqeukHsn3Dzk0uxWYzsvbn0TmUTG36bePVzddoHjfExA2tvp63UuqPifbobX/+cQo3zi+ARILd/PxVEzz/ZwzhgyiZQojzCiPMJYPGIBjfpm9pQfYEdpGl9mLuerzJ9J8IlldvhkRvslDMphdiAEQSBA40uAxpfLYy/CJtqo1zVS1V5Lk74FvdmAVbQil8hxdnDCQ+1KgIvvSe/ntRnbeXHbW+Q3HuGusTcwKWjMkM43W828vP1tGvXNPDnjgbPaZzXMuYGz6uitOq0yg0i3kN+l6eFwQDqHCND4Eu4azJqCLcyLmH7BNB26qbTMi5zOvMjp1LTXsaV4N5uKdvGvHe/gqtIwJ3wKs8OnDKqabTBIBAleTh54OQ3Nb2owlDZX8M/OYPLAhD8wPnDwRn1g3x/4X9rn5NQXcv+EW4nyCDvtYxzm/EOttqdrG/XNFDaWnLQbwLnOhXHHO4+4LHYuVW21g0op/R7xdvLk6pELeeOSv/PnyXcSpPHnm0O/cNeKR3kj9WOKm8rO9hD7RBRF1hdu55H1L2KwmnhyxgNDDkYA3x1eydbiVK4eeQkTg1LOwEiHOR9xUtlTdukVmQCMOUVB3nOV4RXSOca4gCTivWP47OAPxHlF/q6EE4eCRCIhxT+BFP8EKttq+DVvE5uLd7OleDcjvKKYHzmDFL+Ec2IVWdlWwwfpX5NRk028dwz3jLsJ7UmkU9YX2mWRpodM4Mq431fD4zCnhrPKvkJKqzyIt6MHAS6/H/26Yzn73+ZheiAIAn8ceyMquZJ/bH2DWl3D2R7SWcfP2ZtbRy/l7YXPc13iFdS01/Pyjv9xz6on+Cl7DS2G1rMyrmZ9Cx/u+4Y/rX6W/IYj3Jq8lEen3XNSwWh32T7eTf+CJN8R/N+Ya88pNfNhzj5OjgoMZgOZNbmk+Cf+bj8fwyukcxA3tZa/Tb2bZza9wuMb/smfJ99J+DmibH02cVSouTRmDguiZrK34iBrCrbwRcZPfH1oBSl+CUwLGcconxFnvFfnSFMZawq2sK04FatoY0boRJbELzzpPa69FQd5ddf7RLqF8sDE2065iGOY3x9KBzkZNTlYbBZG+8Wf7eGcMYYD0jlKqGsgT8/8Ey9se5PHNvyTJSMXcknUrOHGSOyKEOMDkxkfmEx5axUbC3ewtSSV1PL9OMpVjPZLYLR/PPFeMTg5nHq5tCiKlLZUkF6Zya7SdEpaKpBL5UwLGc+lMXPwOQVX1J2laby2+0PCXIN4ZOrdv7tGx2FOD4JExr7KdFRy5e9OLuhYhu9u5zBBWn9enPs33kn7gi8yfmLTkZ0sjlvAxKDRp02A9XwnwMWXG5IWsyxxERnVWewsS2df5SG2lqQiIBCk8SPCPZRQ1wACXHzxcvJAq9T0uwoxWIzUdzRS3VZHaUsFRY2l5NYX0mJsAyDKPYxbkpcwOXjMKfcG/Zq3iY/2f0uMZzh/mXLXbyIKO8x5iiBhf9VhEr3jftcr6OGAdI7j7ODEnyb9H/sqD/H5wR94LfVDvsj8iRmhE5gQOJoAF98zmk82Wc006ptp7GimxdhKi6ENnakDvcWAyWrGZrMBdmsIlUyJs4MjWqULHmo3/Jy9T8sKZTDIJFKS/eJJ9ovHarNS2FhCRk0OOXUF7C5LZ0PR9h7HO8pVKOVKZBIZomjDbLPQYdJ3q1Z04ePkSYJPLCO8oknyHXFaej8sNisf7/+WNQVbSPFP5P7xtwyrMAwzIBW6WpoMLST7jTzbQzmjnNGAtGLFCt566y3MZjM33XQT1157bZ/Hbd68mWeeeYaNGzeeyeGc1yT7jWSUbxz7KjNZU7CF7w//yneHV+Hp6M4Irygi3UIJ1vrj6+yFk8JxUEHKJtpoN+po1LfQqG+ivqOROl0jdboG6nQN1OoaulcGxyOXyFDIFN3irGarGaPF1EsDT6t0IcwtmGj3MEZ4RRHuFnzGV3fSYxpvwZ5ya9A3UdlaQ62uniZ9C21GHQaLEYvNgiAIyCUy1HIVLkpn3FRafJw8T6k5tj/qOxp5ddcH5NYXckn0bK5LWHROVAoOc26TVV8IwCjfEWd5JGeWMxaQampqeOWVV/jhhx9QKBQsXbqUcePGERHRM/9ZX1/Piy++eKaG8btCIkhI8U8kxT+RJn0LeysOcKA6m/SKDDYf2dV9nIPMAa3SBSe5GgeZAqlEioCAxWbBZDWjNxtoN+loM+mwibYeryGVSPFQu+Hl6MZo/wQ81G54qF1xU2nRKl1wcXDCSeHY516WTbShM3XQpG+hVtdAVVstJS3lFDaUsK/S3j+hkitJ8h3JuIBRJPmO/E32TARB6Pw9Tl2O6GQRRZFtJXv4cN/XWEUb90+4dbjPaJhBk99QTJDG/7Q1h5+rnLGAtHPnTsaPH49WqwXs3kqrV6/m7rvv7nHcY489xt13382//vWvPq/T2tpKa2vPst7q6uozMubzCVeVhrkR05gbMQ1RFKnV1VPWUkl1ez0NHU00G1rQmTowHbNykUmkOCrUeKjd7AKqSic0Di64qjS4qexWEVoHl5OesXfZpjs7OBGk9e/xXKuxnazaPPZXHSa9MoOdpWk4yBwY5z+K6aHjifOK+l05Xx5LdVstH+7/hv1Vh4lyD+Pu8Tfh4+R5toc1zDnGQPe6Iy1lzAmbcTaG9ZtyxgJSbW0tnp5Hv3ReXl5kZGT0OOaTTz4hLi6OxMT+u44//vhjXn/99TM1zN8FgiDg7eSJ9zl8k3NxcOqujLPZbGTV5bO9dC+7ytLZWpKKl6M7s8ImMyN0wkn18ZyLtBnb+TF7DavzNyOTSLkp6aoLShJqmKEx0L3OaDER4/H7ra7r4owFJFHs7adz7L5GXl4ea9eu5aOPPhpwxXPjjTeyaNGiHo9VV1f3ux81zLmPRCJhpHc0I72juSXpavZUHGB94Xa+zFzON4dWMDYgibkRU4nzjDwvGwBbDK2sytvE6vzNGCxGpoWM55qEy36XYpjDnD5OdK+L8Qg/G8P6TTljAcnb25u0tLTun2tra/HyOtqvsXr1aurq6rjyyisxm83U1taybNkyvvjiix7XcXFxwcXl9503vZBRyBRMDh7L5OCxVLZWs65wO5uLd7GrLB0/Z29mhk1iasi4cz53LooihY0lrCvcxvaSPVhsVsYFJrE47uJe6cthhumLge51KrnqjFnSnEucsYA0ceJEXnvtNRobG1GpVKxdu5Znn322+/l7772Xe++9F4Dy8nJuuOGGXsFomAsLPxcfbkxazDXxl7KrbB/ri7bz2cEf+DLjJ0b5jmBK8FiS/eLPqebRmvY6dpXtY1vJHspaKnGQOTA9dAILombidxqs04cZBuzfjfMxWzBUzugK6YEHHuCGG27AbDazePFiEhISuO2227j33nuJj//9yl8Mc2ooZAqmhY5nWuh4ylur2HxkN9tKUkmvzEQhlZPoE8dovwRG+cYN2ZL8VDFZTOQ2FJFZk8O+ykOUtlQA9obZ20YvY1JQymkvFR9mGP9TUAM5nxDEvjZ7znHKy8uZNWsWGzZsICAg4GwPZ5jfAJvNRk59ATvL0kmryKBR3wyAv4sPsR4RRLiHEuYaRICLz2mTVzJbzVS21VDSXEFRUymFDcUUNpVisVmQChKiPcJJ8U9kbMCoYRO9Yc4IXfe6J95/jmsnLz7bwznjDCs1DHNeIJFIiPOKIs4riluTl1LaUsHB6mwO1+ayqyyd9Z1KDFJBgreTJz5Onnio3dCqNLg4OOKoUOMgdUAhlSMRBETo0ZelM3XQamynydBCQ0cjte0N1HY0dBfnyKVywrSBXBw1g7hOC3iVXHkW35FhLiR+60zA2WI4IA1z3iEIAsHa/2/vTmOjKhs2jl8DbdlKKYUuPKC8AbENYYuC7CU8ULpRdkNZLAqCgCyWSNjEQMSASFKIJGyi4UNRSkGwBAHZZGmDghrW+kLCIn0ZSi1SaEs7Mz3vBx4m1hateRznHvr/JU2Yc8+cueZOOdec6cw9rdQ6uJWGRMWowqqQ/X6+rv76s278+n/Ku29X/oMC/e8vV/WgvLjG+61jq6Mm9RurWYOmatvsf9Sn9Ut6pkkLPdvk0QoYrB8Ib2lav3a8Q5NCgs+rY6ujfwVF6F9BEer9bOUxp8up++XFKnaUqMxZLofLoYr/nPX41amrgLoBauBfT438G6phQIOn9sO58G215SMDFBKean51/dS0QZNa8x8aT6fashI8TwcBwHC14S3fEoUEADAEhQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMAKFBAAwAoUEADAChQQAMIJHCykrK0sJCQmKiYlRenp6lfGDBw9q6NChGjJkiKZPn6579+55Mg4AwGAeK6Tbt28rLS1NW7du1e7du7VtEpZtFQAADjxJREFU2zZduXLFPf7gwQMtWbJEGzdu1JdffqnIyEh99NFHnooDADCcn6d2nJ2drR49eig4OFiSFBsbq3379mnGjBmSJIfDoSVLlig8PFySFBkZqaysrCr7KSoqUlFRUaVtdrvdU7EBwCs41nmwkPLz8xUaGuq+HBYWprNnz7ovN23aVAMHDpQkPXz4UBs3btQrr7xSZT9btmzR2rVrPRUTAIzAsc6DhWRZVpVtNputyrb79+9r+vTpioqK0vDhw6uMT5gwocp2u92ucePG/X1hAcDLONZ5sJDCw8N1+vRp9+X8/HyFhYVVuk5+fr4mTZqkHj16aOHChdXuJygoSEFBQZ6KCQBG4FjnwTc19OrVSzk5OSosLFRpaakOHDig6Oho97jL5dLUqVMVHx+vRYsWVXv2BACoPTx6hpSamqqUlBQ5HA6NGjVKnTp10uTJkzVr1izZ7XZdvHhRLpdL+/fvlyR16NBB77//vqciAQAM5rFCkqSkpCQlJSVV2rZp0yZJUseOHZWbm+vJuwcA+BBWagAAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAgAYgUICABiBQgIAGMGjhZSVlaWEhATFxMQoPT29yvilS5c0cuRIxcbGatGiRXI6nZ6MAwAwmMcK6fbt20pLS9PWrVu1e/dubdu2TVeuXKl0nblz52rx4sXav3+/LMtSRkaGp+IAAAznsULKzs5Wjx49FBwcrIYNGyo2Nlb79u1zj+fl5enhw4fq0qWLJGnEiBGVxh8rKirSzZs3K/3Y7XZPxQYAr+BYJ/l5asf5+fkKDQ11Xw4LC9PZs2efOB4aGqrbt29X2c+WLVu0du1aT8UEACNwrPNgIVmWVWWbzWar8fhjEyZM0PDhwytts9vtGjdu3N+QEgDMwLHOg4UUHh6u06dPuy/n5+crLCys0nhBQYH78p07dyqNPxYUFKSgoCBPxQQAI3Cs8+DfkHr16qWcnBwVFhaqtLRUBw4cUHR0tHu8ZcuWqlevns6cOSNJ2rVrV6VxAEDt4rFCCg8PV2pqqlJSUjRs2DANHjxYnTp10uTJk3Xu3DlJ0qpVq7R8+XLFx8ertLRUKSkpnooDADCcx16yk6SkpCQlJSVV2rZp0yb3v6OiopSZmenJCAAAH8FKDQAAI1BIAAAjUEgAACN49G9InuJyuSSp1n2KGcDTIyIiQn5+PnkI9hifnI07d+5IUq36wBiAp8uhQ4fUqlUrb8cwis2qbskEwz18+FDnz59XaGio6tat+7fu+/Eno9PT0xUREfG37tvTyO4dZPcOX84u1ewMyel0ym6315qzKZ98hPXr11fXrl09eh8RERE+++yF7N5Bdu/w5ex/xs/P76l9bNXhTQ0AACNQSAAAI1BIAAAjUEi/ExQUpBkzZvjkqrtk9w6ye4cvZ0f1fPJddgCApw9nSAAAI1BIAAAjUEj/cebMGY0cOVJDhw7VhAkTlJeXJ0kqKirSlClTFB8fr3HjxrlXiTBNVlaWEhISFBMTo/T0dG/H+VNr165VYmKiEhMTtXLlSklSdna2kpKSNGjQIKWlpXk54Z/74IMPNH/+fEnSpUuXNHLkSMXGxmrRokVyOp1eTle9w4cPa8SIEYqLi9OyZcsk+c6879692/0788EHH0jynXlHDVmwLMuy+vfvb126dMmyLMvavn27NXXqVMuyLGvp0qXWhg0bLMuyrC+++MKaPXu2tyI+kd1ut/r372/dvXvXKi4utpKSkqzLly97O9YTnTx50ho9erRVVlZmlZeXWykpKVZWVpbVr18/68aNG5bD4bAmTpxoHT161NtRnyg7O9vq3r27NW/ePMuyLCsxMdH64YcfLMuyrAULFljp6eleTFe9GzduWH369LFu3bpllZeXW2PGjLGOHj3qE/NeUlJidevWzfrll18sh8NhjRo1yjp58qRPzDtqjjMkSeXl5Zo9e7aioqIkSZGRkbp165Yk6ejRo+4vGRw8eLCOHTsmh8PhtazVyc7OVo8ePRQcHKyGDRsqNjZW+/bt83asJwoNDdX8+fMVEBAgf39/tW3bVteuXVPr1q31zDPPyM/PT0lJScY+hl9//VVpaWmaOnWqJCkvL08PHz5Uly5dJEkjRowwMvvXX3+thIQERUREyN/fX2lpaWrQoIFPzLvL5VJFRYVKS0vldDrldDrl5+fnE/OOmqOQJAUEBGjo0KGSpIqKCq1du1YDBw6UJOXn5ys0NFTSo2U8AgMDVVhY6LWs1fltRkkKCwvT7du3vZjoj7Vr1859ELl27Zr27t0rm83mM4/h3XffVWpqqvvtxr+f/9DQUCOzX79+XS6XS5MmTdKQIUO0detWn/ndCQwM1OzZsxUfH6/o6Gi1bNlS/v7+PjHvqLlaV0hfffWVoqOjK/28+uqrkh6dKb399ttyOp164403nriPOnXMmjarmnfu22w2LyT5ay5fvqyJEydq3rx5evbZZ6uMm/gYtm/frhYtWqhnz57ubb4y/y6XSzk5Ofrwww+VkZGhc+fO6ebNm1WuZ2L23Nxc7dixQ0eOHNGJEydUp04dnTx5ssr1TMyOmvPJxVX/G/Hx8YqPj6+yvbi4WNOmTVNwcLDWrVsnf39/SY+eMRYUFCgiIkJOp1MPHjxQcHDwP5z6j4WHh+v06dPuy/n5+QoLC/Nioj935swZzZo1SwsXLlRiYqK+/fZbFRQUuMdNfQx79+7VnTt3NHToUN27d08lJSWy2WyVst+5c8fI7M2bN1fPnj0VEhIiSRowYID27dtXacV8U+f9xIkT6tmzp5o1aybp0ctzmzdv9ol5R82Z9VTfi+bOnavWrVtrzZo1CggIcG/v16+fdu3aJenRwahr167usjJFr169lJOTo8LCQpWWlurAgQOKjo72dqwnunXrlt58802tWrVKiYmJkqTOnTvr6tWr7peV9uzZY+Rj+PTTT7Vnzx7t3r1bs2bN0r///W8tX75c9erV05kzZyRJu3btMjJ7//79deLECRUVFcnlcun48eOKi4vziXmPiopSdna2SkpKZFmWDh8+rJdeeskn5h01V+vOkKpz8eJFHTp0SM8995yGDRsm6dGZ0aZNmzR79mzNnz9fiYmJaty4sVatWuXdsNUIDw9XamqqUlJS5HA4NGrUKHXq1MnbsZ5o8+bNKisr04oVK9zbkpOTtWLFCs2cOVNlZWXq16+f4uLivJjyr1m1apXeeecdFRcXq3379kpJSfF2pCo6d+6s119/XWPHjpXD4VDv3r01ZswYtWnTxvh579Onjy5evKgRI0bI399fHTt21JQpUxQTE2P8vKPmWDoIAGAEXrIDABiBQgIAGIFCAgAYgUICABiBQgIAGIFCAp5g/vz52rx581+6zaFDh9yraB89elRr1qzxRDTgqcTnkIC/0YABAzRgwABJ0rlz53Tv3j0vJwJ8B4UEn3Pq1CmtXLlS4eHh+vnnn1W/fn2tWLFCYWFhWrp0qXJzc2Wz2dS3b1/NmTNHfn5+at++vSZMmKBTp06ppKREc+bM0aBBg7Rz507t379fGzZskKQqlx/LzMzUtm3b5HA4dO/ePU2ePFljx47Vzp07lZmZqdLSUgUGBmr48OHav3+/pk+frs8//1wul0uNGzfW2bNnFRcXp9GjR0uS1q1bp7t372rhwoX/+PwBpqKQ4JMuXryoBQsWqGvXrvrss880d+5ctWvXTsHBwcrKypLD4dC0adP0ySefaMqUKXK5XGrSpIl27typ3NxcjR8/Xl27dq3RfRUXF2v79u3auHGjmjZtqh9//FGvvfaaxo4dK0m6cuWKDh8+rMDAQO3cuVPSo1URkpOTdffuXaWmpurgwYNav369Ro8erYqKCm3fvl0ff/yxx+YH8EX8DQk+KSoqyl0oI0eO1KVLl7Rnzx6NHz9eNptNAQEBSk5O1rFjx9y3GT9+vPu2zz//vL777rsa3VejRo20fv16ffPNN1q9erXWr1+vkpIS93hkZKQCAwP/cB/9+/dXQUGBcnNzdfz4cbVq1Upt2rT5qw8beKpRSPBJv12hWnr0FRC/XwWroqKi0lda//Y2FRUVqlu3rmw2W6XbVffli3a7XcOGDVNeXp5efPFFvfXWW5XGGzZsWKO8ycnJyszM1I4dO5ScnPyntwFqGwoJPik3N1e5ubmSpG3btumFF15QfHy80tPTZVmWysvLlZGRoV69erlv83jV9gsXLujq1avq1q2bQkJCdPnyZZWVlcnpdOrIkSNV7uv8+fMKCQnR9OnT1bdvX/d1XC7XH2asW7dupUJ8+eWXdfDgQV24cEExMTH/7RQATx3+hgSf1Lx5c61evVp5eXkKCQnRypUr1ahRIy1btkxJSUlyOBzq27ev+2vGJen7779XRkaGKioqlJaWpiZNmqh3797q1q2b4uPjFRoaqu7du+unn36qdF+9e/dWZmam4uLi1KBBA3Xq1EkhISG6fv36H2bs2bOnZs6cKX9/fy1evFjNmjVThw4d1LZtW+O+wgQwAat9w+ecOnVK7733nvbs2VPj20RGRionJ8f95XTeUFhYqFGjRik9PV0tWrTwWg7AVLxkB/wDMjIylJCQoJSUFMoIeALOkAAARuAMCQBgBAoJAGAECgkAYAQKCQBgBAoJAGAECgkAYIT/B5VRJ992K6JTAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "# Show the joint distribution using kernel density estimation\n", + "g = sns.jointplot(\n", + " data=df,\n", + " x=\"popularity\", y=\"danceability\", hue=\"artist_top_genre\",\n", + " kind=\"kde\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jenniferlooper/Library/Python/3.8/lib/python/site-packages/seaborn/axisgrid.py:337: UserWarning: The `size` parameter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.FacetGrid(df, hue=\"artist_top_genre\", size=5) \\\n", + " .map(plt.scatter, \"popularity\", \"danceability\") \\\n", + " .add_legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3.7.0 64-bit ('3.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "c61deff2839902ac8cb4ed411eb10fee", + "translation_date": "2025-09-06T14:10:11+00:00", + "source_file": "5-Clustering/1-Visualize/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/2-K-Means/notebook.ipynb b/translations/vi/5-Clustering/2-K-Means/notebook.ipynb new file mode 100644 index 000000000..a6d4f7bec --- /dev/null +++ b/translations/vi/5-Clustering/2-K-Means/notebook.ipynb @@ -0,0 +1,231 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "3e5c8ab363e8d88f566d4365efc7e0bd", + "translation_date": "2025-09-06T14:20:03+00:00", + "source_file": "5-Clustering/2-K-Means/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Bắt đầu từ nơi chúng ta đã kết thúc trong bài học trước, với dữ liệu đã được nhập và lọc.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Chúng ta sẽ chỉ tập trung vào 3 thể loại. Có lẽ chúng ta có thể tạo được 3 cụm!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 7 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb b/translations/vi/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb new file mode 100644 index 000000000..56a79dba6 --- /dev/null +++ b/translations/vi/5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb @@ -0,0 +1,639 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": "", + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.4.1" + }, + "colab": { + "name": "lesson_14.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "coopTranslator": { + "original_hash": "ad65fb4aad0a156b42216e4929f490fc", + "translation_date": "2025-09-06T14:31:58+00:00", + "source_file": "5-Clustering/2-K-Means/solution/R/lesson_15-R.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GULATlQXLXyR" + }, + "source": [ + "## Khám phá phân cụm K-Means bằng R và nguyên tắc dữ liệu Tidy.\n", + "\n", + "### [**Câu hỏi trước bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/29/)\n", + "\n", + "Trong bài học này, bạn sẽ học cách tạo các cụm bằng gói Tidymodels và các gói khác trong hệ sinh thái R (chúng ta sẽ gọi chúng là bạn bè 🧑‍🤝‍🧑), cùng với bộ dữ liệu âm nhạc Nigeria mà bạn đã nhập trước đó. Chúng ta sẽ tìm hiểu những điều cơ bản về K-Means để phân cụm. Hãy nhớ rằng, như bạn đã học trong bài trước, có nhiều cách để làm việc với các cụm và phương pháp bạn sử dụng phụ thuộc vào dữ liệu của bạn. Chúng ta sẽ thử K-Means vì đây là kỹ thuật phân cụm phổ biến nhất. Bắt đầu nào!\n", + "\n", + "Các thuật ngữ bạn sẽ học:\n", + "\n", + "- Điểm số Silhouette\n", + "\n", + "- Phương pháp Elbow\n", + "\n", + "- Quán tính (Inertia)\n", + "\n", + "- Phương sai (Variance)\n", + "\n", + "### **Giới thiệu**\n", + "\n", + "[Phân cụm K-Means](https://wikipedia.org/wiki/K-means_clustering) là một phương pháp xuất phát từ lĩnh vực xử lý tín hiệu. Nó được sử dụng để chia và phân nhóm dữ liệu thành `k cụm` dựa trên sự tương đồng trong các đặc điểm của chúng.\n", + "\n", + "Các cụm có thể được hình dung dưới dạng [biểu đồ Voronoi](https://wikipedia.org/wiki/Voronoi_diagram), bao gồm một điểm (hoặc 'hạt giống') và vùng tương ứng của nó.\n", + "\n", + "

\n", + " \n", + "

Đồ họa thông tin bởi Jen Looper
\n", + "\n", + "Phân cụm K-Means có các bước sau:\n", + "\n", + "1. Nhà khoa học dữ liệu bắt đầu bằng cách xác định số lượng cụm mong muốn sẽ được tạo.\n", + "\n", + "2. Tiếp theo, thuật toán chọn ngẫu nhiên K quan sát từ bộ dữ liệu để làm trung tâm ban đầu cho các cụm (tức là các centroid).\n", + "\n", + "3. Sau đó, mỗi quan sát còn lại được gán cho centroid gần nhất của nó.\n", + "\n", + "4. Tiếp theo, trung bình mới của mỗi cụm được tính toán và centroid được di chuyển đến vị trí trung bình.\n", + "\n", + "5. Bây giờ các trung tâm đã được tính toán lại, mỗi quan sát được kiểm tra lại để xem liệu nó có thể gần hơn với một cụm khác hay không. Tất cả các đối tượng được gán lại bằng cách sử dụng các trung bình cụm đã cập nhật. Các bước gán cụm và cập nhật centroid được lặp lại cho đến khi việc gán cụm không còn thay đổi (tức là khi đạt được sự hội tụ). Thông thường, thuật toán kết thúc khi mỗi lần lặp mới dẫn đến sự di chuyển không đáng kể của các centroid và các cụm trở nên ổn định.\n", + "\n", + "
\n", + "\n", + "> Lưu ý rằng do sự ngẫu nhiên của các quan sát k ban đầu được sử dụng làm centroid khởi đầu, chúng ta có thể nhận được kết quả hơi khác nhau mỗi lần áp dụng quy trình. Vì lý do này, hầu hết các thuật toán sử dụng nhiều *khởi đầu ngẫu nhiên* và chọn lần lặp có WCSS thấp nhất. Do đó, rất khuyến khích luôn chạy K-Means với nhiều giá trị *nstart* để tránh một *điểm cực tiểu cục bộ không mong muốn.*\n", + "\n", + "
\n", + "\n", + "Hình ảnh động ngắn này sử dụng [tác phẩm nghệ thuật](https://github.com/allisonhorst/stats-illustrations) của Allison Horst giải thích quá trình phân cụm:\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật bởi @allison_horst
\n", + "\n", + "Một câu hỏi cơ bản nảy sinh trong phân cụm là: làm thế nào để bạn biết nên chia dữ liệu của mình thành bao nhiêu cụm? Một nhược điểm của việc sử dụng K-Means là bạn sẽ cần xác định `k`, tức là số lượng `centroid`. May mắn thay, `phương pháp elbow` giúp ước tính một giá trị khởi đầu tốt cho `k`. Bạn sẽ thử nó ngay bây giờ.\n", + "\n", + "### \n", + "\n", + "**Điều kiện tiên quyết**\n", + "\n", + "Chúng ta sẽ tiếp tục từ nơi đã dừng lại trong [bài học trước](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb), nơi chúng ta đã phân tích bộ dữ liệu, tạo nhiều hình ảnh trực quan và lọc bộ dữ liệu để lấy các quan sát quan trọng. Hãy chắc chắn kiểm tra nó!\n", + "\n", + "Chúng ta sẽ cần một số gói để hoàn thành module này. Bạn có thể cài đặt chúng bằng: `install.packages(c('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork'))`\n", + "\n", + "Ngoài ra, đoạn mã dưới đây sẽ kiểm tra xem bạn đã có các gói cần thiết để hoàn thành module này chưa và cài đặt chúng nếu thiếu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ah_tBi58LXyi" + }, + "source": [ + "suppressWarnings(if(!require(\"pacman\")) install.packages(\"pacman\"))\n", + "\n", + "pacman::p_load('tidyverse', 'tidymodels', 'cluster', 'summarytools', 'plotly', 'paletteer', 'factoextra', 'patchwork')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7e--UCUTLXym" + }, + "source": [ + "Hãy bắt đầu ngay thôi nào!\n", + "\n", + "## 1. Một điệu nhảy với dữ liệu: Thu hẹp xuống 3 thể loại nhạc phổ biến nhất\n", + "\n", + "Đây là phần ôn lại những gì chúng ta đã làm trong bài học trước. Hãy cùng phân tích và xử lý dữ liệu nào!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ycamx7GGLXyn" + }, + "source": [ + "# Load the core tidyverse and make it available in your current R session\n", + "library(tidyverse)\n", + "\n", + "# Import the data into a tibble\n", + "df <- read_csv(file = \"https://raw.githubusercontent.com/microsoft/ML-For-Beginners/main/5-Clustering/data/nigerian-songs.csv\", show_col_types = FALSE)\n", + "\n", + "# Narrow down to top 3 popular genres\n", + "nigerian_songs <- df %>% \n", + " # Concentrate on top 3 genres\n", + " filter(artist_top_genre %in% c(\"afro dancehall\", \"afropop\",\"nigerian pop\")) %>% \n", + " # Remove unclassified observations\n", + " filter(popularity != 0)\n", + "\n", + "\n", + "\n", + "# Visualize popular genres using bar plots\n", + "theme_set(theme_light())\n", + "nigerian_songs %>%\n", + " count(artist_top_genre) %>%\n", + " ggplot(mapping = aes(x = artist_top_genre, y = n,\n", + " fill = artist_top_genre)) +\n", + " geom_col(alpha = 0.8) +\n", + " paletteer::scale_fill_paletteer_d(\"ggsci::category10_d3\") +\n", + " ggtitle(\"Top genres\") +\n", + " theme(plot.title = element_text(hjust = 0.5))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5h5zmkPLXyp" + }, + "source": [ + "🤩 Điều đó thật tuyệt!\n", + "\n", + "## 2. Khám phá dữ liệu thêm.\n", + "\n", + "Dữ liệu này sạch đến mức nào? Hãy kiểm tra các giá trị ngoại lai bằng cách sử dụng biểu đồ hộp. Chúng ta sẽ tập trung vào các cột số với ít giá trị ngoại lai hơn (mặc dù bạn có thể loại bỏ các giá trị ngoại lai). Biểu đồ hộp có thể hiển thị phạm vi của dữ liệu và sẽ giúp chọn những cột nào để sử dụng. Lưu ý rằng, biểu đồ hộp không hiển thị độ biến thiên, một yếu tố quan trọng của dữ liệu có thể phân cụm tốt. Vui lòng xem [thảo luận này](https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot) để tìm hiểu thêm.\n", + "\n", + "[Biểu đồ hộp](https://en.wikipedia.org/wiki/Box_plot) được sử dụng để mô tả phân phối dữ liệu `số` một cách trực quan, vì vậy hãy bắt đầu bằng cách *chọn* tất cả các cột số cùng với các thể loại nhạc phổ biến.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HhNreJKLLXyq" + }, + "source": [ + "# Select top genre column and all other numeric columns\n", + "df_numeric <- nigerian_songs %>% \n", + " select(artist_top_genre, where(is.numeric)) \n", + "\n", + "# Display the data\n", + "df_numeric %>% \n", + " slice_head(n = 5)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYXrwJRaLXyq" + }, + "source": [ + "Hãy xem cách công cụ chọn `where` giúp việc này trở nên dễ dàng 💁? Khám phá các hàm khác tương tự [tại đây](https://tidyselect.r-lib.org/).\n", + "\n", + "Vì chúng ta sẽ tạo biểu đồ hộp cho từng đặc điểm số và muốn tránh sử dụng vòng lặp, hãy định dạng lại dữ liệu của chúng ta thành dạng *dài hơn* để có thể tận dụng `facets` - các biểu đồ con, mỗi biểu đồ hiển thị một tập hợp con của dữ liệu.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gd5bR3f8LXys" + }, + "source": [ + "# Pivot data from wide to long\n", + "df_numeric_long <- df_numeric %>% \n", + " pivot_longer(!artist_top_genre, names_to = \"feature_names\", values_to = \"values\") \n", + "\n", + "# Print out data\n", + "df_numeric_long %>% \n", + " slice_head(n = 15)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-7tE1swnLXyv" + }, + "source": [ + "Lâu hơn nhiều! Bây giờ là lúc cho một số `ggplots`! Vậy chúng ta sẽ sử dụng `geom` nào?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r88bIsyuLXyy" + }, + "source": [ + "# Make a box plot\n", + "df_numeric_long %>% \n", + " ggplot(mapping = aes(x = feature_names, y = values, fill = feature_names)) +\n", + " geom_boxplot() +\n", + " facet_wrap(~ feature_names, ncol = 4, scales = \"free\") +\n", + " theme(legend.position = \"none\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYVyKIUELXyz" + }, + "source": [ + "Dễ dàng!\n", + "\n", + "Bây giờ chúng ta có thể thấy dữ liệu này hơi nhiễu: bằng cách quan sát từng cột dưới dạng biểu đồ hộp, bạn có thể thấy các giá trị ngoại lai. Bạn có thể duyệt qua tập dữ liệu và loại bỏ các giá trị ngoại lai này, nhưng điều đó sẽ làm cho dữ liệu trở nên khá tối giản.\n", + "\n", + "Hiện tại, hãy chọn những cột mà chúng ta sẽ sử dụng cho bài tập phân cụm. Hãy chọn các cột số với phạm vi tương tự. Chúng ta có thể mã hóa `artist_top_genre` dưới dạng số nhưng tạm thời sẽ bỏ qua nó.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-wkpINyZLXy0" + }, + "source": [ + "# Select variables with similar ranges\n", + "df_numeric_select <- df_numeric %>% \n", + " select(popularity, danceability, acousticness, loudness, energy) \n", + "\n", + "# Normalize data\n", + "# df_numeric_select <- scale(df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7dLzgpqLXy1" + }, + "source": [ + "## 3. Tính toán phân cụm k-means trong R\n", + "\n", + "Chúng ta có thể tính toán k-means trong R bằng hàm tích hợp sẵn `kmeans`, xem `help(\"kmeans()\")`. Hàm `kmeans()` chấp nhận một khung dữ liệu (data frame) với tất cả các cột là số làm đối số chính.\n", + "\n", + "Bước đầu tiên khi sử dụng phân cụm k-means là xác định số cụm (k) sẽ được tạo ra trong giải pháp cuối cùng. Chúng ta biết có 3 thể loại bài hát được tách ra từ tập dữ liệu, vì vậy hãy thử với 3:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uC4EQ5w7LXy5" + }, + "source": [ + "set.seed(2056)\n", + "# Kmeans clustering for 3 clusters\n", + "kclust <- kmeans(\n", + " df_numeric_select,\n", + " # Specify the number of clusters\n", + " centers = 3,\n", + " # How many random initial configurations\n", + " nstart = 25\n", + ")\n", + "\n", + "# Display clustering object\n", + "kclust\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hzfhscWrLXy-" + }, + "source": [ + "Đối tượng kmeans chứa nhiều thông tin được giải thích rõ trong `help(\"kmeans()\")`. Hiện tại, chúng ta hãy tập trung vào một vài điểm. Chúng ta thấy rằng dữ liệu đã được phân thành 3 cụm với kích thước lần lượt là 65, 110, 111. Kết quả cũng bao gồm các trung tâm cụm (giá trị trung bình) cho 3 nhóm trên 5 biến số.\n", + "\n", + "Vector phân cụm là sự phân bổ cụm cho từng quan sát. Hãy sử dụng hàm `augment` để thêm sự phân bổ cụm vào tập dữ liệu gốc.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0XwwpFGQLXy_" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "augment(kclust, df_numeric_select) %>% \n", + " relocate(.cluster) %>% \n", + " slice_head(n = 10)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXIVXXACLXzA" + }, + "source": [ + "Hoàn hảo, chúng ta vừa phân chia tập dữ liệu thành 3 nhóm. Vậy, việc phân cụm của chúng ta tốt đến mức nào 🤷? Hãy cùng xem xét `Silhouette score`.\n", + "\n", + "### **Silhouette score**\n", + "\n", + "[Phân tích Silhouette](https://en.wikipedia.org/wiki/Silhouette_(clustering)) có thể được sử dụng để nghiên cứu khoảng cách phân tách giữa các cụm kết quả. Điểm số này dao động từ -1 đến 1, và nếu điểm số gần 1, cụm đó dày đặc và được phân tách tốt khỏi các cụm khác. Giá trị gần 0 biểu thị các cụm chồng lấn với các mẫu rất gần ranh giới quyết định của các cụm lân cận. [nguồn](https://dzone.com/articles/kmeans-silhouette-score-explained-with-python-exam).\n", + "\n", + "Phương pháp silhouette trung bình tính toán silhouette trung bình của các quan sát cho các giá trị khác nhau của *k*. Điểm silhouette trung bình cao cho thấy việc phân cụm tốt.\n", + "\n", + "Hàm `silhouette` trong gói cluster được sử dụng để tính toán độ rộng silhouette trung bình.\n", + "\n", + "> Silhouette có thể được tính toán với bất kỳ [khoảng cách](https://en.wikipedia.org/wiki/Distance \"Distance\") nào, chẳng hạn như [khoảng cách Euclid](https://en.wikipedia.org/wiki/Euclidean_distance \"Euclidean distance\") hoặc [khoảng cách Manhattan](https://en.wikipedia.org/wiki/Manhattan_distance \"Manhattan distance\") mà chúng ta đã thảo luận trong [bài học trước](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/1-Visualize/solution/R/lesson_14-R.ipynb).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jn0McL28LXzB" + }, + "source": [ + "# Load cluster package\n", + "library(cluster)\n", + "\n", + "# Compute average silhouette score\n", + "ss <- silhouette(kclust$cluster,\n", + " # Compute euclidean distance\n", + " dist = dist(df_numeric_select))\n", + "mean(ss[, 3])\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyQRn97nLXzC" + }, + "source": [ + "Điểm số của chúng ta là **0.549**, nằm ở mức trung bình. Điều này cho thấy dữ liệu của chúng ta không thực sự phù hợp với loại phân cụm này. Hãy xem liệu chúng ta có thể xác nhận nhận định này một cách trực quan hay không. [Gói factoextra](https://rpkgs.datanovia.com/factoextra/index.html) cung cấp các hàm (`fviz_cluster()`) để trực quan hóa phân cụm.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7a6Km1_FLXzD" + }, + "source": [ + "library(factoextra)\n", + "\n", + "# Visualize clustering results\n", + "fviz_cluster(kclust, df_numeric_select)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IBwCWt-0LXzD" + }, + "source": [ + "Sự chồng chéo giữa các cụm cho thấy rằng dữ liệu của chúng ta không thực sự phù hợp với loại phân cụm này, nhưng hãy tiếp tục.\n", + "\n", + "## 4. Xác định số cụm tối ưu\n", + "\n", + "Một câu hỏi cơ bản thường xuất hiện trong phân cụm K-Means là - khi không có nhãn lớp đã biết, làm thế nào để bạn biết nên chia dữ liệu thành bao nhiêu cụm?\n", + "\n", + "Một cách chúng ta có thể thử tìm ra là sử dụng một mẫu dữ liệu để `tạo một loạt các mô hình phân cụm` với số cụm tăng dần (ví dụ từ 1-10), và đánh giá các chỉ số phân cụm như **điểm Silhouette.**\n", + "\n", + "Hãy xác định số cụm tối ưu bằng cách tính toán thuật toán phân cụm với các giá trị khác nhau của *k* và đánh giá **Tổng bình phương khoảng cách trong cụm** (WCSS). Tổng bình phương khoảng cách trong cụm (WCSS) đo lường mức độ chặt chẽ của phân cụm và chúng ta muốn giá trị này càng nhỏ càng tốt, với các giá trị thấp hơn có nghĩa là các điểm dữ liệu gần nhau hơn.\n", + "\n", + "Hãy khám phá ảnh hưởng của các lựa chọn khác nhau về `k`, từ 1 đến 10, đối với phân cụm này.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hSeIiylDLXzE" + }, + "source": [ + "# Create a series of clustering models\n", + "kclusts <- tibble(k = 1:10) %>% \n", + " # Perform kmeans clustering for 1,2,3 ... ,10 clusters\n", + " mutate(model = map(k, ~ kmeans(df_numeric_select, centers = .x, nstart = 25)),\n", + " # Farm out clustering metrics eg WCSS\n", + " glanced = map(model, ~ glance(.x))) %>% \n", + " unnest(cols = glanced)\n", + " \n", + "\n", + "# View clustering rsulsts\n", + "kclusts\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7rS2U1eLXzE" + }, + "source": [ + "Bây giờ chúng ta đã có tổng bình phương trong cụm (tot.withinss) cho mỗi thuật toán phân cụm với tâm *k*, chúng ta sử dụng [phương pháp khuỷu tay](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) để tìm số lượng cụm tối ưu. Phương pháp này bao gồm việc vẽ biểu đồ WCSS như một hàm của số lượng cụm, và chọn [điểm khuỷu tay của đường cong](https://en.wikipedia.org/wiki/Elbow_of_the_curve \"Elbow of the curve\") làm số lượng cụm cần sử dụng.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o_DjHGItLXzF" + }, + "source": [ + "set.seed(2056)\n", + "# Use elbow method to determine optimum number of clusters\n", + "kclusts %>% \n", + " ggplot(mapping = aes(x = k, y = tot.withinss)) +\n", + " geom_line(size = 1.2, alpha = 0.8, color = \"#FF7F0EFF\") +\n", + " geom_point(size = 2, color = \"#FF7F0EFF\")\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLYyt5XSLXzG" + }, + "source": [ + "Biểu đồ cho thấy sự giảm đáng kể trong WCSS (tức là *độ chặt chẽ* lớn hơn) khi số lượng cụm tăng từ một lên hai, và một sự giảm đáng chú ý khác từ hai lên ba cụm. Sau đó, mức giảm ít rõ rệt hơn, dẫn đến một điểm `khuỷu tay` 💪 trên biểu đồ ở khoảng ba cụm. Đây là một dấu hiệu tốt cho thấy có hai đến ba cụm dữ liệu được phân tách khá rõ ràng.\n", + "\n", + "Bây giờ chúng ta có thể tiếp tục và trích xuất mô hình phân cụm với `k = 3`:\n", + "\n", + "> `pull()`: được sử dụng để trích xuất một cột duy nhất \n", + ">\n", + "> `pluck()`: được sử dụng để truy cập các cấu trúc dữ liệu như danh sách \n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JP_JPKBILXzG" + }, + "source": [ + "# Extract k = 3 clustering\n", + "final_kmeans <- kclusts %>% \n", + " filter(k == 3) %>% \n", + " pull(model) %>% \n", + " pluck(1)\n", + "\n", + "\n", + "final_kmeans\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_PDTu8tLXzI" + }, + "source": [ + "Tuyệt vời! Hãy cùng xem qua các cụm đã thu được. Bạn có muốn thêm tính tương tác bằng cách sử dụng `plotly` không?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dNcleFe-LXzJ" + }, + "source": [ + "# Add predicted cluster assignment to data set\n", + "results <- augment(final_kmeans, df_numeric_select) %>% \n", + " bind_cols(df_numeric %>% select(artist_top_genre)) \n", + "\n", + "# Plot cluster assignments\n", + "clust_plt <- results %>% \n", + " ggplot(mapping = aes(x = popularity, y = danceability, color = .cluster, shape = artist_top_genre)) +\n", + " geom_point(size = 2, alpha = 0.8) +\n", + " paletteer::scale_color_paletteer_d(\"ggthemes::Tableau_10\")\n", + "\n", + "ggplotly(clust_plt)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6JUM_51VLXzK" + }, + "source": [ + "Có lẽ chúng ta đã kỳ vọng rằng mỗi cụm (được biểu thị bằng các màu sắc khác nhau) sẽ có các thể loại riêng biệt (được biểu thị bằng các hình dạng khác nhau).\n", + "\n", + "Hãy cùng xem xét độ chính xác của mô hình.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HdIMUGq7LXzL" + }, + "source": [ + "# Assign genres to predefined integers\n", + "label_count <- results %>% \n", + " group_by(artist_top_genre) %>% \n", + " mutate(id = cur_group_id()) %>% \n", + " ungroup() %>% \n", + " summarise(correct_labels = sum(.cluster == id))\n", + "\n", + "\n", + "# Print results \n", + "cat(\"Result:\", label_count$correct_labels, \"out of\", nrow(results), \"samples were correctly labeled.\")\n", + "\n", + "cat(\"\\nAccuracy score:\", label_count$correct_labels/nrow(results))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C50wvaAOLXzM" + }, + "source": [ + "Độ chính xác của mô hình này không tệ, nhưng cũng không quá tốt. Có thể là do dữ liệu không phù hợp để áp dụng K-Means Clustering. Dữ liệu này quá mất cân đối, ít tương quan và có quá nhiều sự biến đổi giữa các giá trị cột, khiến việc phân cụm trở nên khó khăn. Thực tế, các cụm được hình thành có lẽ bị ảnh hưởng hoặc lệch nhiều bởi ba danh mục thể loại mà chúng ta đã định nghĩa ở trên.\n", + "\n", + "Dù vậy, đây vẫn là một quá trình học hỏi rất thú vị!\n", + "\n", + "Trong tài liệu của Scikit-learn, bạn có thể thấy rằng một mô hình như thế này, với các cụm không được phân định rõ ràng, gặp phải vấn đề về 'phương sai':\n", + "\n", + "

\n", + " \n", + "

Infographic từ Scikit-learn
\n", + "\n", + "\n", + "\n", + "## **Phương sai**\n", + "\n", + "Phương sai được định nghĩa là \"trung bình của bình phương các độ lệch so với giá trị trung bình\" [nguồn](https://www.mathsisfun.com/data/standard-deviation.html). Trong bối cảnh của bài toán phân cụm này, nó ám chỉ việc các giá trị trong tập dữ liệu của chúng ta có xu hướng lệch quá nhiều so với giá trị trung bình.\n", + "\n", + "✅ Đây là thời điểm tuyệt vời để suy nghĩ về tất cả các cách bạn có thể khắc phục vấn đề này. Điều chỉnh dữ liệu thêm một chút? Sử dụng các cột khác? Dùng một thuật toán khác? Gợi ý: Hãy thử [chuẩn hóa dữ liệu của bạn](https://www.mygreatlearning.com/blog/learning-data-science-with-k-means-clustering/) để đưa nó về cùng một thang đo và kiểm tra các cột khác.\n", + "\n", + "> Hãy thử '[máy tính phương sai](https://www.calculatorsoup.com/calculators/statistics/variance-calculator.php)' để hiểu rõ hơn về khái niệm này.\n", + "\n", + "------------------------------------------------------------------------\n", + "\n", + "## **🚀Thử thách**\n", + "\n", + "Dành thời gian với notebook này, điều chỉnh các tham số. Bạn có thể cải thiện độ chính xác của mô hình bằng cách làm sạch dữ liệu hơn (ví dụ như loại bỏ các giá trị ngoại lai)? Bạn có thể sử dụng trọng số để tăng trọng số cho các mẫu dữ liệu nhất định. Còn cách nào khác để tạo ra các cụm tốt hơn?\n", + "\n", + "Gợi ý: Hãy thử chuẩn hóa dữ liệu của bạn. Có đoạn mã đã được chú thích trong notebook để thêm chuẩn hóa tiêu chuẩn, giúp các cột dữ liệu giống nhau hơn về mặt phạm vi. Bạn sẽ thấy rằng mặc dù điểm silhouette giảm xuống, nhưng 'gấp khúc' trong đồ thị khuỷu tay trở nên mượt mà hơn. Điều này là do việc để dữ liệu không được chuẩn hóa cho phép dữ liệu có ít phương sai hơn mang nhiều trọng số hơn. Đọc thêm về vấn đề này [tại đây](https://stats.stackexchange.com/questions/21222/are-mean-normalization-and-feature-scaling-needed-for-k-means-clustering/21226#21226).\n", + "\n", + "## [**Câu hỏi sau bài giảng**](https://gray-sand-07a10f403.1.azurestaticapps.net/quiz/30/)\n", + "\n", + "## **Ôn tập & Tự học**\n", + "\n", + "- Xem qua một trình mô phỏng K-Means [như thế này](https://user.ceng.metu.edu.tr/~akifakkus/courses/ceng574/k-means/). Bạn có thể sử dụng công cụ này để trực quan hóa các điểm dữ liệu mẫu và xác định các tâm cụm. Bạn có thể chỉnh sửa độ ngẫu nhiên của dữ liệu, số lượng cụm và số lượng tâm cụm. Điều này có giúp bạn hình dung cách dữ liệu có thể được nhóm lại không?\n", + "\n", + "- Ngoài ra, hãy xem [tài liệu về K-Means](https://stanford.edu/~cpiech/cs221/handouts/kmeans.html) từ Stanford.\n", + "\n", + "Muốn thử áp dụng kỹ năng phân cụm mới học vào các tập dữ liệu phù hợp với K-Means clustering? Hãy xem:\n", + "\n", + "- [Huấn luyện và Đánh giá Mô hình Phân cụm](https://rpubs.com/eR_ic/clustering) sử dụng Tidymodels và các công cụ liên quan\n", + "\n", + "- [Phân tích Cụm K-means](https://uc-r.github.io/kmeans_clustering), Hướng dẫn Lập trình R của UC Business Analytics\n", + "\n", + "- [Phân cụm K-means với nguyên tắc dữ liệu gọn gàng](https://www.tidymodels.org/learn/statistics/k-means/)\n", + "\n", + "## **Bài tập**\n", + "\n", + "[Thử các phương pháp phân cụm khác nhau](https://github.com/microsoft/ML-For-Beginners/blob/main/5-Clustering/2-K-Means/assignment.md)\n", + "\n", + "## CẢM ƠN ĐẾN:\n", + "\n", + "[Jen Looper](https://www.twitter.com/jenlooper) vì đã tạo phiên bản Python gốc của module này ♥️\n", + "\n", + "[`Allison Horst`](https://twitter.com/allison_horst/) vì đã tạo ra những hình minh họa tuyệt vời giúp R trở nên thân thiện và hấp dẫn hơn. Tìm thêm các hình minh họa tại [bộ sưu tập của cô ấy](https://www.google.com/url?q=https://github.com/allisonhorst/stats-illustrations&sa=D&source=editors&ust=1626380772530000&usg=AOvVaw3zcfyCizFQZpkSLzxiiQEM).\n", + "\n", + "Chúc bạn học vui,\n", + "\n", + "[Eric](https://twitter.com/ericntay), Đại sứ Sinh viên Microsoft Learn Vàng.\n", + "\n", + "

\n", + " \n", + "

Tác phẩm nghệ thuật của @allison_horst
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/2-K-Means/solution/notebook.ipynb b/translations/vi/5-Clustering/2-K-Means/solution/notebook.ipynb new file mode 100644 index 000000000..19cd21cfd --- /dev/null +++ b/translations/vi/5-Clustering/2-K-Means/solution/notebook.ipynb @@ -0,0 +1,546 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "e867e87e3129c8875423a82945f4ad5e", + "translation_date": "2025-09-06T14:22:10+00:00", + "source_file": "5-Clustering/2-K-Means/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Bắt đầu từ nơi chúng ta đã kết thúc trong bài học trước, với dữ liệu đã được nhập và lọc.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Chúng ta sẽ chỉ tập trung vào 3 thể loại. Có lẽ chúng ta có thể xây dựng được 3 cụm!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 12 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, + { + "source": [ + "Dữ liệu này sạch đến mức nào? Kiểm tra các giá trị ngoại lai bằng cách sử dụng biểu đồ hộp. Chúng ta sẽ tập trung vào các cột có ít giá trị ngoại lai hơn (mặc dù bạn có thể loại bỏ các giá trị ngoại lai). Biểu đồ hộp có thể hiển thị phạm vi của dữ liệu và sẽ giúp chọn cột nào để sử dụng. Lưu ý, biểu đồ hộp không hiển thị phương sai, một yếu tố quan trọng của dữ liệu có thể phân cụm tốt (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 14 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(20,20), dpi=200)\n", + "\n", + "plt.subplot(4,3,1)\n", + "sns.boxplot(x = 'popularity', data = df)\n", + "\n", + "plt.subplot(4,3,2)\n", + "sns.boxplot(x = 'acousticness', data = df)\n", + "\n", + "plt.subplot(4,3,3)\n", + "sns.boxplot(x = 'energy', data = df)\n", + "\n", + "plt.subplot(4,3,4)\n", + "sns.boxplot(x = 'instrumentalness', data = df)\n", + "\n", + "plt.subplot(4,3,5)\n", + "sns.boxplot(x = 'liveness', data = df)\n", + "\n", + "plt.subplot(4,3,6)\n", + "sns.boxplot(x = 'loudness', data = df)\n", + "\n", + "plt.subplot(4,3,7)\n", + "sns.boxplot(x = 'speechiness', data = df)\n", + "\n", + "plt.subplot(4,3,8)\n", + "sns.boxplot(x = 'tempo', data = df)\n", + "\n", + "plt.subplot(4,3,9)\n", + "sns.boxplot(x = 'time_signature', data = df)\n", + "\n", + "plt.subplot(4,3,10)\n", + "sns.boxplot(x = 'danceability', data = df)\n", + "\n", + "plt.subplot(4,3,11)\n", + "sns.boxplot(x = 'length', data = df)\n", + "\n", + "plt.subplot(4,3,12)\n", + "sns.boxplot(x = 'release_date', data = df)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "le = LabelEncoder()\n", + "\n", + "# scaler = StandardScaler()\n", + "\n", + "X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n", + "\n", + "y = df['artist_top_genre']\n", + "\n", + "X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "y = le.transform(y)\n", + "\n" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n", + " 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n", + " 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n", + " 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n", + " 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n", + " 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n", + " 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n", + " 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n", + " 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n", + " 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n", + " dtype=int32)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "nclusters = 3 \n", + "seed = 0\n", + "\n", + "km = KMeans(n_clusters=nclusters, random_state=seed)\n", + "km.fit(X)\n", + "\n", + "# Predict the cluster for each data point\n", + "\n", + "y_cluster_kmeans = km.predict(X)\n", + "y_cluster_kmeans" + ] + }, + { + "source": [ + "Những con số đó không có ý nghĩa nhiều đối với chúng ta, vì vậy hãy lấy 'điểm silhouette' để xem độ chính xác. Điểm của chúng ta nằm ở mức trung bình.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5466747351275563" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from sklearn import metrics\n", + "score = metrics.silhouette_score(X, y_cluster_kmeans)\n", + "score" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "wcss = []\n", + "\n", + "for i in range(1, 11):\n", + " kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n", + " kmeans.fit(X)\n", + " wcss.append(kmeans.inertia_)" + ] + }, + { + "source": [ + "Sử dụng mô hình đó để quyết định, bằng phương pháp Elbow, số lượng cụm tốt nhất để xây dựng\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(10,5))\n", + "sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n", + "plt.title('Elbow')\n", + "plt.xlabel('Number of clusters')\n", + "plt.ylabel('WCSS')\n", + "plt.show()" + ] + }, + { + "source": [ + "Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together." + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from sklearn.cluster import KMeans\n", + "kmeans = KMeans(n_clusters = 3)\n", + "kmeans.fit(X)\n", + "labels = kmeans.predict(X)\n", + "plt.scatter(df['popularity'],df['danceability'],c = labels)\n", + "plt.xlabel('popularity')\n", + "plt.ylabel('danceability')\n", + "plt.show()" + ] + }, + { + "source": [ + "Độ chính xác của mô hình này không tệ, nhưng cũng không tốt. Có thể dữ liệu không phù hợp với Phân cụm K-Means. Bạn có thể thử một phương pháp khác.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 811, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n" + ] + } + ], + "source": [ + "labels = kmeans.labels_\n", + "\n", + "correct_labels = sum(y == labels)\n", + "\n", + "print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n", + "\n", + "print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/5-Clustering/2-K-Means/solution/tester.ipynb b/translations/vi/5-Clustering/2-K-Means/solution/tester.ipynb new file mode 100644 index 000000000..fb76cf147 --- /dev/null +++ b/translations/vi/5-Clustering/2-K-Means/solution/tester.ipynb @@ -0,0 +1,343 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "6f92868513e59d321245137c1c4c5311", + "translation_date": "2025-09-06T14:23:06+00:00", + "source_file": "5-Clustering/2-K-Means/solution/tester.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", + "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", + "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", + "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", + "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install seaborn" + ] + }, + { + "source": [ + "Bắt đầu từ nơi chúng ta đã kết thúc trong bài học trước, với dữ liệu đã được nhập và lọc.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "0 Sparky Mandy & The Jungle \n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "2 LITT! LITT! \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "0 Cruel Santino alternative r&b 2019 144000 48 \n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "2 AYLØ indie r&b 2018 207758 40 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "\n", + " speechiness tempo time_signature \n", + "0 0.0829 133.015 5 \n", + "1 0.3600 129.993 3 \n", + "2 0.0424 130.005 4 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" + }, + "metadata": {}, + "execution_count": 105 + } + ], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", + "df.head()" + ] + }, + { + "source": [ + "Chúng ta sẽ chỉ tập trung vào 3 thể loại. Có lẽ chúng ta có thể xây dựng được 3 cụm!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top genres')" + ] + }, + "metadata": {}, + "execution_count": 106 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", + "df = df[(df['popularity'] > 0)]\n", + "top = df['artist_top_genre'].value_counts()\n", + "plt.figure(figsize=(10,7))\n", + "sns.barplot(x=top.index,y=top.values)\n", + "plt.xticks(rotation=45)\n", + "plt.title('Top genres',color = 'blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name album \\\n", + "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", + "3 Confident / Feeling Cool Enjoy Your Life \n", + "4 wanted you rare. \n", + "5 Kasala Pioneers \n", + "6 Pull Up Everything Pretty \n", + "\n", + " artist artist_top_genre release_date length popularity \\\n", + "1 Odunsi (The Engine) afropop 2020 89488 30 \n", + "3 Lady Donli nigerian pop 2019 175135 14 \n", + "4 Odunsi (The Engine) afropop 2018 152049 25 \n", + "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", + "6 prettyboydo nigerian pop 2018 202648 29 \n", + "\n", + " danceability acousticness energy instrumentalness liveness loudness \\\n", + "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", + "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", + "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", + "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", + "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", + "\n", + " speechiness tempo time_signature \n", + "1 0.3600 129.993 3 \n", + "3 0.1130 111.087 4 \n", + "4 0.0447 105.115 4 \n", + "5 0.1970 100.103 4 \n", + "6 0.1990 95.842 4 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" + }, + "metadata": {}, + "execution_count": 107 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "# X = df.loc[:, ('danceability','energy')]\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Unknown label type: 'continuous'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.semi_supervised import LabelSpreading\n", + "from sklearn.semi_supervised import SelfTrainingClassifier\n", + "from sklearn import datasets\n", + "\n", + "X = df[['danceability','acousticness']].values\n", + "y = df['energy'].values\n", + "\n", + "# X = scaler.fit_transform(X)\n", + "\n", + "# step size in the mesh\n", + "h = .02\n", + "\n", + "rng = np.random.RandomState(0)\n", + "y_rand = rng.rand(y.shape[0])\n", + "y_30 = np.copy(y)\n", + "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", + "y_50 = np.copy(y)\n", + "y_50[y_rand < 0.5] = -1\n", + "# we create an instance of SVM and fit out data. We do not scale our\n", + "# data since we want to plot the support vectors\n", + "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", + "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", + "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", + "\n", + "# the base classifier for self-training is identical to the SVC\n", + "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", + "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", + " y_30, 'Self-training 30% data')\n", + "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", + " y_50, 'Self-training 50% data')\n", + "\n", + "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", + "\n", + "# create a mesh to plot in\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", + " np.arange(y_min, y_max, h))\n", + "\n", + "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", + "\n", + "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", + "for i, (clf, y_train, title) in enumerate(classifiers):\n", + " # Plot the decision boundary. For that, we will assign a color to each\n", + " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", + " plt.subplot(3, 2, i + 1)\n", + " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", + "\n", + " # Put the result into a color plot\n", + " Z = Z.reshape(xx.shape)\n", + " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", + " plt.axis('off')\n", + "\n", + " # Plot also the training points\n", + " colors = [color_map[y] for y in y_train]\n", + " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", + "\n", + " plt.title(title)\n", + "\n", + "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb b/translations/vi/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb new file mode 100644 index 000000000..1c0510d01 --- /dev/null +++ b/translations/vi/6-NLP/3-Translation-Sentiment/solution/notebook.ipynb @@ -0,0 +1,100 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "27de2abc0235ebd22080fc8f1107454d", + "translation_date": "2025-09-06T15:22:23+00:00", + "source_file": "6-NLP/3-Translation-Sentiment/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should download the book text, clean it, and import it here\n", + "with open(\"pride.txt\", encoding=\"utf8\") as f:\n", + " file_contents = f.read()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "book_pride = TextBlob(file_contents)\n", + "positive_sentiment_sentences = []\n", + "negative_sentiment_sentences = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sentence in book_pride.sentences:\n", + " if sentence.sentiment.polarity == 1:\n", + " positive_sentiment_sentences.append(sentence)\n", + " if sentence.sentiment.polarity == -1:\n", + " negative_sentiment_sentences.append(sentence)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(positive_sentiment_sentences)) + \" most positive sentences:\")\n", + "for sentence in positive_sentiment_sentences:\n", + " print(\"+ \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The \" + str(len(negative_sentiment_sentences)) + \" most negative sentences:\")\n", + "for sentence in negative_sentiment_sentences:\n", + " print(\"- \" + str(sentence.replace(\"\\n\", \"\").replace(\" \", \" \")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/6-NLP/4-Hotel-Reviews-1/notebook.ipynb b/translations/vi/6-NLP/4-Hotel-Reviews-1/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/vi/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb b/translations/vi/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb new file mode 100644 index 000000000..ecb4369e5 --- /dev/null +++ b/translations/vi/6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb @@ -0,0 +1,174 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4, + "coopTranslator": { + "original_hash": "2d05e7db439376aa824f4b387f8324ca", + "translation_date": "2025-09-06T15:22:02+00:00", + "source_file": "6-NLP/4-Hotel-Reviews-1/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDA\n", + "import pandas as pd\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_difference_review_avg(row):\n", + " return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "print(\"Loading data file now, this could take a while depending on file size\")\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n", + "end = time.time()\n", + "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What shape is the data (rows, columns)?\n", + "print(\"The shape of the data (rows, cols) is \" + str(df.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# value_counts() creates a Series object that has index and values\n", + "# in this case, the country and the frequency they occur in reviewer nationality\n", + "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What reviewer nationality is the most common in the dataset?\n", + "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What is the top 10 most common nationalities and their frequencies?\n", + "print(\"The top 10 highest frequency reviewer nationalities are:\")\n", + "print(nationality_freq[0:10].to_string())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many unique nationalities are there?\n", + "print(\"There are \" + str(nationality_freq.index.size) + \" unique nationalities in the dataset\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What was the most frequently reviewed hotel for the top 10 nationalities - print the hotel and number of reviews\n", + "for nat in nationality_freq[:10].index:\n", + " # First, extract all the rows that match the criteria into a new dataframe\n", + " nat_df = df[df[\"Reviewer_Nationality\"] == nat] \n", + " # Now get the hotel freq\n", + " freq = nat_df[\"Hotel_Name\"].value_counts()\n", + " print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many reviews are there per hotel (frequency count of hotel) and do the results match the value in `Total_Number_of_Reviews`?\n", + "# First create a new dataframe based on the old one, removing the uneeded columns\n", + "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)\n", + "# Group the rows by Hotel_Name, count them and put the result in a new column Total_Reviews_Found\n", + "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n", + "# Get rid of all the duplicated rows\n", + "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "print()\n", + "print(hotel_freq_df.to_string())\n", + "print(str(hotel_freq_df.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# While there is an `Average_Score` for each hotel according to the dataset, \n", + "# you can also calculate an average score (getting the average of all reviewer scores in the dataset for each hotel)\n", + "# Add a new column to your dataframe with the column header `Calc_Average_Score` that contains that calculated average. \n", + "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n", + "# Add a new column with the difference between the two average scores\n", + "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n", + "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n", + "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n", + "# Sort the dataframe to find the lowest and highest average score difference\n", + "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n", + "print(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])\n", + "# Do any hotels have the same (rounded to 1 decimal place) `Average_Score` and `Calc_Average_Score`?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/6-NLP/5-Hotel-Reviews-2/notebook.ipynb b/translations/vi/6-NLP/5-Hotel-Reviews-2/notebook.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb new file mode 100644 index 000000000..a1b469d46 --- /dev/null +++ b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb @@ -0,0 +1,172 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "033cb89c85500224b3c63fd04f49b4aa", + "translation_date": "2025-09-06T15:22:44+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/1-notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import time\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_address(row):\n", + " if \"Netherlands\" in row[\"Hotel_Address\"]:\n", + " return \"Amsterdam, Netherlands\"\n", + " elif \"Barcelona\" in row[\"Hotel_Address\"]:\n", + " return \"Barcelona, Spain\"\n", + " elif \"United Kingdom\" in row[\"Hotel_Address\"]:\n", + " return \"London, United Kingdom\"\n", + " elif \"Milan\" in row[\"Hotel_Address\"]: \n", + " return \"Milan, Italy\"\n", + " elif \"France\" in row[\"Hotel_Address\"]:\n", + " return \"Paris, France\"\n", + " elif \"Vienna\" in row[\"Hotel_Address\"]:\n", + " return \"Vienna, Austria\" \n", + " else:\n", + " return row.Hotel_Address\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "start = time.time()\n", + "df = pd.read_csv('../../data/Hotel_Reviews.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dropping columns we will not use:\n", + "df.drop([\"lat\", \"lng\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace all the addresses with a shortened, more useful form\n", + "df[\"Hotel_Address\"] = df.apply(replace_address, axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop `Additional_Number_of_Scoring`\n", + "df.drop([\"Additional_Number_of_Scoring\"], axis = 1, inplace=True)\n", + "# Replace `Total_Number_of_Reviews` and `Average_Score` with our own calculated values\n", + "df.Total_Number_of_Reviews = df.groupby('Hotel_Name').transform('count')\n", + "df.Average_Score = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the Tags into new columns\n", + "# The file Hotel_Reviews_Tags.py, identifies the most important tags\n", + "# Leisure trip, Couple, Solo traveler, Business trip, Group combined with Travelers with friends, \n", + "# Family with young children, Family with older children, With a pet\n", + "df[\"Leisure_trip\"] = df.Tags.apply(lambda tag: 1 if \"Leisure trip\" in tag else 0)\n", + "df[\"Couple\"] = df.Tags.apply(lambda tag: 1 if \"Couple\" in tag else 0)\n", + "df[\"Solo_traveler\"] = df.Tags.apply(lambda tag: 1 if \"Solo traveler\" in tag else 0)\n", + "df[\"Business_trip\"] = df.Tags.apply(lambda tag: 1 if \"Business trip\" in tag else 0)\n", + "df[\"Group\"] = df.Tags.apply(lambda tag: 1 if \"Group\" in tag or \"Travelers with friends\" in tag else 0)\n", + "df[\"Family_with_young_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with young children\" in tag else 0)\n", + "df[\"Family_with_older_children\"] = df.Tags.apply(lambda tag: 1 if \"Family with older children\" in tag else 0)\n", + "df[\"With_a_pet\"] = df.Tags.apply(lambda tag: 1 if \"With a pet\" in tag else 0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# No longer need any of these columns\n", + "df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_Filtered.csv\n", + "Filtering took 23.74 seconds\n" + ] + } + ], + "source": [ + "# Saving new data file with calculated columns\n", + "print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n", + "df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n", + "end = time.time()\n", + "print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb new file mode 100644 index 000000000..ef047753d --- /dev/null +++ b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb @@ -0,0 +1,137 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "341efc86325ec2a214f682f57a189dfd", + "translation_date": "2025-09-06T15:23:04+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/2-notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV (you can )\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to find the most useful tags to keep\n", + "# Remove opening and closing brackets\n", + "df.Tags = df.Tags.str.strip(\"[']\")\n", + "# remove all quotes too\n", + "df.Tags = df.Tags.str.replace(\" ', '\", \",\", regex = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# removing this to take advantage of the 'already a phrase' fact of the dataset \n", + "# Now split the strings into a list\n", + "tag_list_df = df.Tags.str.split(',', expand = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove leading and trailing spaces\n", + "df[\"Tag_1\"] = tag_list_df[0].str.strip()\n", + "df[\"Tag_2\"] = tag_list_df[1].str.strip()\n", + "df[\"Tag_3\"] = tag_list_df[2].str.strip()\n", + "df[\"Tag_4\"] = tag_list_df[3].str.strip()\n", + "df[\"Tag_5\"] = tag_list_df[4].str.strip()\n", + "df[\"Tag_6\"] = tag_list_df[5].str.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge the 6 columns into one with melt\n", + "df_tags = df.melt(value_vars=[\"Tag_1\", \"Tag_2\", \"Tag_3\", \"Tag_4\", \"Tag_5\", \"Tag_6\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The shape of the tags with no filtering: (2514684, 2)\n", + " index count\n", + "0 Leisure trip 338423\n", + "1 Couple 205305\n", + "2 Solo traveler 89779\n", + "3 Business trip 68176\n", + "4 Group 51593\n", + "5 Family with young children 49318\n", + "6 Family with older children 21509\n", + "7 Travelers with friends 1610\n", + "8 With a pet 1078\n" + ] + } + ], + "source": [ + "# Get the value counts\n", + "tag_vc = df_tags.value.value_counts()\n", + "# print(tag_vc)\n", + "print(\"The shape of the tags with no filtering:\", str(df_tags.shape))\n", + "# Drop rooms, suites, and length of stay, mobile device and anything with less count than a 1000\n", + "df_tags = df_tags[~df_tags.value.str.contains(\"Standard|room|Stayed|device|Beds|Suite|Studio|King|Superior|Double\", na=False, case=False)]\n", + "tag_vc = df_tags.value.value_counts().reset_index(name=\"count\").query(\"count > 1000\")\n", + "# Print the top 10 (there should only be 9 and we'll use these in the filtering section)\n", + "print(tag_vc[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb new file mode 100644 index 000000000..5cca6770d --- /dev/null +++ b/translations/vi/6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb @@ -0,0 +1,260 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "705bf02633759f689abc37b19749a16d", + "translation_date": "2025-09-06T15:23:25+00:00", + "source_file": "6-NLP/5-Hotel-Reviews-2/solution/3-notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n[nltk_data] /Users/jenlooper/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "nltk.download('vader_lexicon')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "vader_sentiment = SentimentIntensityAnalyzer()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# There are 3 possibilities of input for a review:\n", + "# It could be \"No Negative\", in which case, return 0\n", + "# It could be \"No Positive\", in which case, return 0\n", + "# It could be a review, in which case calculate the sentiment\n", + "def calc_sentiment(review): \n", + " if review == \"No Negative\" or review == \"No Positive\":\n", + " return 0\n", + " return vader_sentiment.polarity_scores(review)[\"compound\"] \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the hotel reviews from CSV\n", + "df = pd.read_csv(\"../../data/Hotel_Reviews_Filtered.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove stop words - can be slow for a lot of text!\n", + "# Ryan Han (ryanxjhan on Kaggle) has a great post measuring performance of different stop words removal approaches\n", + "# https://www.kaggle.com/ryanxjhan/fast-stop-words-removal # using the approach that Ryan recommends\n", + "start = time.time()\n", + "cache = set(stopwords.words(\"english\"))\n", + "def remove_stopwords(review):\n", + " text = \" \".join([word for word in review.split() if word not in cache])\n", + " return text\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the stop words from both columns\n", + "df.Negative_Review = df.Negative_Review.apply(remove_stopwords) \n", + "df.Positive_Review = df.Positive_Review.apply(remove_stopwords)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing stop words took 5.77 seconds\n" + ] + } + ], + "source": [ + "end = time.time()\n", + "print(\"Removing stop words took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Calculating sentiment columns for both positive and negative reviews\n", + "Calculating sentiment took 201.07 seconds\n" + ] + } + ], + "source": [ + "# Add a negative sentiment and positive sentiment column\n", + "print(\"Calculating sentiment columns for both positive and negative reviews\")\n", + "start = time.time()\n", + "df[\"Negative_Sentiment\"] = df.Negative_Review.apply(calc_sentiment)\n", + "df[\"Positive_Sentiment\"] = df.Positive_Review.apply(calc_sentiment)\n", + "end = time.time()\n", + "print(\"Calculating sentiment took \" + str(round(end - start, 2)) + \" seconds\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Negative_Review Negative_Sentiment\n", + "186584 So bad experience memories I hotel The first n... -0.9920\n", + "129503 First charged twice room booked booking second... -0.9896\n", + "307286 The staff Had bad experience even booking Janu... -0.9889\n", + "452092 No WLAN room Incredibly rude restaurant staff ... -0.9884\n", + "201293 We usually traveling Paris 2 3 times year busi... -0.9873\n", + "... ... ...\n", + "26899 I would say however one night expensive even d... 0.9933\n", + "138365 Wifi terribly slow I speed test network upload... 0.9938\n", + "79215 I find anything hotel first I walked past hote... 0.9938\n", + "278506 The property great location There bakery next ... 0.9945\n", + "339189 Guys I like hotel I wish return next year Howe... 0.9948\n", + "\n", + "[515738 rows x 2 columns]\n", + " Positive_Review Positive_Sentiment\n", + "137893 Bathroom Shower We going stay twice hotel 2 ni... -0.9820\n", + "5839 I completely disappointed mad since reception ... -0.9780\n", + "64158 get everything extra internet parking breakfas... -0.9751\n", + "124178 I didnt like anythig Room small Asked upgrade ... -0.9721\n", + "489137 Very rude manager abusive staff reception Dirt... -0.9703\n", + "... ... ...\n", + "331570 Everything This recently renovated hotel class... 0.9984\n", + "322920 From moment stepped doors Guesthouse Hotel sta... 0.9985\n", + "293710 This place surprise expected good actually gre... 0.9985\n", + "417442 We celebrated wedding night Langham I commend ... 0.9985\n", + "132492 We arrived super cute boutique hotel area expl... 0.9987\n", + "\n", + "[515738 rows x 2 columns]\n" + ] + } + ], + "source": [ + "df = df.sort_values(by=[\"Negative_Sentiment\"], ascending=True)\n", + "print(df[[\"Negative_Review\", \"Negative_Sentiment\"]])\n", + "df = df.sort_values(by=[\"Positive_Sentiment\"], ascending=True)\n", + "print(df[[\"Positive_Review\", \"Positive_Sentiment\"]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Reorder the columns (This is cosmetic, but to make it easier to explore the data later)\n", + "df = df.reindex([\"Hotel_Name\", \"Hotel_Address\", \"Total_Number_of_Reviews\", \"Average_Score\", \"Reviewer_Score\", \"Negative_Sentiment\", \"Positive_Sentiment\", \"Reviewer_Nationality\", \"Leisure_trip\", \"Couple\", \"Solo_traveler\", \"Business_trip\", \"Group\", \"Family_with_young_children\", \"Family_with_older_children\", \"With_a_pet\", \"Negative_Review\", \"Positive_Review\"], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving results to Hotel_Reviews_NLP.csv\n" + ] + } + ], + "source": [ + "print(\"Saving results to Hotel_Reviews_NLP.csv\")\n", + "df.to_csv(r\"../../data/Hotel_Reviews_NLP.csv\", index = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/1-Introduction/solution/notebook.ipynb b/translations/vi/7-TimeSeries/1-Introduction/solution/notebook.ipynb new file mode 100644 index 000000000..b46440636 --- /dev/null +++ b/translations/vi/7-TimeSeries/1-Introduction/solution/notebook.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dữ liệu này bao gồm 3 năm giá trị tải điện và nhiệt độ theo giờ từ năm 2012 đến năm 2014.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli và Rob J. Hyndman, \"Dự báo năng lượng xác suất: Cuộc thi Dự báo Năng lượng Toàn cầu 2014 và hơn thế nữa\", Tạp chí Quốc tế về Dự báo, tập 32, số 3, trang 896-913, tháng 7-tháng 9, năm 2016.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import matplotlib.pyplot as plt\n", + "from common.utils import load_data\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tải dữ liệu từ csv vào một Pandas dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "data_dir = './data'\n", + "energy = load_data(data_dir)[['load']]\n", + "energy.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vẽ tất cả dữ liệu tải có sẵn (tháng 1 năm 2012 đến tháng 12 năm 2014)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "energy['2014-07-01':'2014-07-07'].plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "dddca9ad9e34435494e0933c218e1579", + "translation_date": "2025-09-06T14:02:03+00:00", + "source_file": "7-TimeSeries/1-Introduction/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/1-Introduction/working/notebook.ipynb b/translations/vi/7-TimeSeries/1-Introduction/working/notebook.ipynb new file mode 100644 index 000000000..b82509f95 --- /dev/null +++ b/translations/vi/7-TimeSeries/1-Introduction/working/notebook.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "source": [ + "# Thiết lập dữ liệu\n", + "\n", + "Trong notebook này, chúng ta sẽ minh họa cách:\n", + "\n", + "thiết lập dữ liệu chuỗi thời gian cho mô-đun này \n", + "trực quan hóa dữ liệu \n", + "Dữ liệu trong ví dụ này được lấy từ cuộc thi dự báo GEFCom2014. Nó bao gồm 3 năm dữ liệu tải điện và giá trị nhiệt độ theo giờ từ năm 2012 đến năm 2014.\n", + "\n", + "1Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli và Rob J. Hyndman, \"Dự báo năng lượng xác suất: Cuộc thi Dự báo Năng lượng Toàn cầu 2014 và hơn thế nữa\", Tạp chí Quốc tế về Dự báo, tập 32, số 3, trang 896-913, tháng 7-tháng 9, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "coopTranslator": { + "original_hash": "5e2bbe594906dce3aaaa736d6dac6683", + "translation_date": "2025-09-06T14:02:49+00:00", + "source_file": "7-TimeSeries/1-Introduction/working/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/2-ARIMA/solution/notebook.ipynb b/translations/vi/7-TimeSeries/2-ARIMA/solution/notebook.ipynb new file mode 100644 index 000000000..9488f6d55 --- /dev/null +++ b/translations/vi/7-TimeSeries/2-ARIMA/solution/notebook.ipynb @@ -0,0 +1,1137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Dự báo chuỗi thời gian với ARIMA\n", + "\n", + "Trong notebook này, chúng ta sẽ thực hiện:\n", + "- chuẩn bị dữ liệu chuỗi thời gian để huấn luyện mô hình dự báo chuỗi thời gian ARIMA\n", + "- triển khai một mô hình ARIMA đơn giản để dự báo các bước tiếp theo trong HORIZON (từ thời điểm *t+1* đến *t+HORIZON*) trong chuỗi thời gian\n", + "- đánh giá mô hình\n", + "\n", + "Dữ liệu trong ví dụ này được lấy từ cuộc thi dự báo GEFCom2014. Nó bao gồm 3 năm dữ liệu tải điện và nhiệt độ theo giờ từ năm 2012 đến 2014. Nhiệm vụ là dự báo các giá trị tải điện trong tương lai. Trong ví dụ này, chúng ta sẽ minh họa cách dự báo một bước thời gian tiếp theo, chỉ sử dụng dữ liệu tải điện lịch sử.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli và Rob J. Hyndman, \"Probabilistic energy forecasting: Global Energy Forecasting Competition 2014 and beyond\", International Journal of Forecasting, vol.32, no.3, pp 896-913, tháng 7-tháng 9, 2016.\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Cài đặt các thư viện cần thiết\n", + "Bắt đầu bằng cách cài đặt một số thư viện cần thiết. Các thư viện này cùng với phiên bản tương ứng đã được kiểm chứng hoạt động tốt cho giải pháp:\n", + "\n", + "* `statsmodels == 0.12.2`\n", + "* `matplotlib == 3.4.2`\n", + "* `scikit-learn == 0.24.2`\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "source": [ + "!pip install statsmodels" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/sh: pip: command not found\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from pandas.plotting import autocorrelation_plot\n", + "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape\n", + "from IPython.display import Image\n", + "\n", + "%matplotlib inline\n", + "pd.options.display.float_format = '{:,.2f}'.format\n", + "np.set_printoptions(precision=2)\n", + "warnings.filterwarnings(\"ignore\") # specify to ignore warning messages\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002,698.00
2012-01-01 01:00:002,558.00
2012-01-01 02:00:002,444.00
2012-01-01 03:00:002,402.00
2012-01-01 04:00:002,403.00
2012-01-01 05:00:002,453.00
2012-01-01 06:00:002,560.00
2012-01-01 07:00:002,719.00
2012-01-01 08:00:002,916.00
2012-01-01 09:00:003,105.00
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2,698.00\n", + "2012-01-01 01:00:00 2,558.00\n", + "2012-01-01 02:00:00 2,444.00\n", + "2012-01-01 03:00:00 2,402.00\n", + "2012-01-01 04:00:00 2,403.00\n", + "2012-01-01 05:00:00 2,453.00\n", + "2012-01-01 06:00:00 2,560.00\n", + "2012-01-01 07:00:00 2,719.00\n", + "2012-01-01 08:00:00 2,916.00\n", + "2012-01-01 09:00:00 3,105.00" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vẽ tất cả dữ liệu tải có sẵn (tháng 1 năm 2012 đến tháng 12 năm 2014)\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Tạo tập dữ liệu huấn luyện và kiểm tra\n", + "\n", + "### Giới thiệu\n", + "\n", + "Khi xây dựng một mô hình học máy, việc chia dữ liệu thành các tập huấn luyện và kiểm tra là một bước quan trọng. Tập huấn luyện được sử dụng để dạy mô hình, trong khi tập kiểm tra được sử dụng để đánh giá hiệu suất của mô hình trên dữ liệu chưa từng thấy.\n", + "\n", + "### Tại sao cần chia dữ liệu?\n", + "\n", + "Việc chia dữ liệu giúp đảm bảo rằng mô hình không chỉ hoạt động tốt trên dữ liệu mà nó đã được huấn luyện, mà còn có khả năng tổng quát hóa để dự đoán chính xác trên dữ liệu mới. Điều này giúp tránh hiện tượng **overfitting** (quá khớp), khi mô hình học quá chi tiết từ dữ liệu huấn luyện và không thể áp dụng tốt cho dữ liệu khác.\n", + "\n", + "### Cách chia dữ liệu\n", + "\n", + "Dưới đây là các bước cơ bản để chia dữ liệu:\n", + "\n", + "1. **Thu thập dữ liệu**: Đảm bảo rằng bạn có một tập dữ liệu đủ lớn và đại diện cho vấn đề bạn đang giải quyết.\n", + "2. **Xáo trộn dữ liệu**: Trộn ngẫu nhiên dữ liệu để đảm bảo rằng không có sự thiên vị nào trong cách dữ liệu được sắp xếp.\n", + "3. **Chia thành hai tập**:\n", + " - **Tập huấn luyện**: Thường chiếm khoảng 70-80% tổng dữ liệu.\n", + " - **Tập kiểm tra**: Thường chiếm khoảng 20-30% tổng dữ liệu.\n", + "\n", + "### Ví dụ\n", + "\n", + "Dưới đây là một ví dụ minh họa cách chia dữ liệu bằng Python:\n", + "\n", + "```python\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Giả sử X là dữ liệu đầu vào và y là nhãn\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(\"Kích thước tập huấn luyện:\", len(X_train))\n", + "print(\"Kích thước tập kiểm tra:\", len(X_test))\n", + "```\n", + "\n", + "### Lưu ý\n", + "\n", + "[!NOTE] Đảm bảo rằng bạn sử dụng cùng một giá trị `random_state` để kết quả có thể tái lập.\n", + "\n", + "[!WARNING] Không sử dụng dữ liệu kiểm tra trong quá trình huấn luyện mô hình. Điều này có thể dẫn đến kết quả không chính xác khi đánh giá mô hình.\n", + "\n", + "[!TIP] Nếu bạn có một tập dữ liệu rất lớn, bạn có thể cân nhắc sử dụng một phần nhỏ của dữ liệu để kiểm tra, thay vì 20-30%.\n", + "\n", + "[!IMPORTANT] Đối với các bài toán cụ thể như phân loại hoặc dự đoán chuỗi thời gian, cách chia dữ liệu có thể cần được điều chỉnh để phù hợp với đặc điểm của dữ liệu.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00' " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.10
2014-11-01 01:00:000.07
2014-11-01 02:00:000.05
2014-11-01 03:00:000.04
2014-11-01 04:00:000.06
2014-11-01 05:00:000.10
2014-11-01 06:00:000.19
2014-11-01 07:00:000.31
2014-11-01 08:00:000.40
2014-11-01 09:00:000.48
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.10\n", + "2014-11-01 01:00:00 0.07\n", + "2014-11-01 02:00:00 0.05\n", + "2014-11-01 03:00:00 0.04\n", + "2014-11-01 04:00:00 0.06\n", + "2014-11-01 05:00:00 0.10\n", + "2014-11-01 06:00:00 0.19\n", + "2014-11-01 07:00:00 0.31\n", + "2014-11-01 08:00:00 0.40\n", + "2014-11-01 09:00:00 0.48" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Dữ liệu gốc so với dữ liệu đã được chuẩn hóa:\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "energy[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']].rename(columns={'load':'original load'}).plot.hist(bins=100, fontsize=12)\n", + "train.rename(columns={'load':'scaled load'}).plot.hist(bins=100, fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Hãy cùng chuẩn hóa dữ liệu kiểm tra\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.33
2014-12-30 01:00:000.29
2014-12-30 02:00:000.27
2014-12-30 03:00:000.27
2014-12-30 04:00:000.30
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.33\n", + "2014-12-30 01:00:00 0.29\n", + "2014-12-30 02:00:00 0.27\n", + "2014-12-30 03:00:00 0.27\n", + "2014-12-30 04:00:00 0.30" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "# Specify the number of steps to forecast ahead\n", + "HORIZON = 3\n", + "print('Forecasting horizon:', HORIZON, 'hours')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Forecasting horizon: 3 hours\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "order = (4, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)\n", + "results = model.fit()\n", + "\n", + "print(results.summary())\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " SARIMAX Results \n", + "==========================================================================================\n", + "Dep. Variable: load No. Observations: 1416\n", + "Model: SARIMAX(4, 1, 0)x(1, 1, 0, 24) Log Likelihood 3477.239\n", + "Date: Thu, 30 Sep 2021 AIC -6942.477\n", + "Time: 14:36:28 BIC -6911.050\n", + "Sample: 11-01-2014 HQIC -6930.725\n", + " - 12-29-2014 \n", + "Covariance Type: opg \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "ar.L1 0.8403 0.016 52.226 0.000 0.809 0.872\n", + "ar.L2 -0.5220 0.034 -15.388 0.000 -0.588 -0.456\n", + "ar.L3 0.1536 0.044 3.470 0.001 0.067 0.240\n", + "ar.L4 -0.0778 0.036 -2.158 0.031 -0.148 -0.007\n", + "ar.S.L24 -0.2327 0.024 -9.718 0.000 -0.280 -0.186\n", + "sigma2 0.0004 8.32e-06 47.358 0.000 0.000 0.000\n", + "===================================================================================\n", + "Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 1464.60\n", + "Prob(Q): 0.83 Prob(JB): 0.00\n", + "Heteroskedasticity (H): 0.84 Skew: 0.14\n", + "Prob(H) (two-sided): 0.07 Kurtosis: 8.02\n", + "===================================================================================\n", + "\n", + "Warnings:\n", + "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tạo một điểm dữ liệu kiểm tra cho mỗi bước HORIZON.\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "test_shifted = test.copy()\n", + "\n", + "for t in range(1, HORIZON):\n", + " test_shifted['load+'+str(t)] = test_shifted['load'].shift(-t, freq='H')\n", + " \n", + "test_shifted = test_shifted.dropna(how='any')\n", + "test_shifted.head(5)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
loadload+1load+2
2014-12-30 00:00:000.330.290.27
2014-12-30 01:00:000.290.270.27
2014-12-30 02:00:000.270.270.30
2014-12-30 03:00:000.270.300.41
2014-12-30 04:00:000.300.410.57
\n", + "
" + ], + "text/plain": [ + " load load+1 load+2\n", + "2014-12-30 00:00:00 0.33 0.29 0.27\n", + "2014-12-30 01:00:00 0.29 0.27 0.27\n", + "2014-12-30 02:00:00 0.27 0.27 0.30\n", + "2014-12-30 03:00:00 0.27 0.30 0.41\n", + "2014-12-30 04:00:00 0.30 0.41 0.57" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + "%%time\n", + "training_window = 720 # dedicate 30 days (720 hours) for training\n", + "\n", + "train_ts = train['load']\n", + "test_ts = test_shifted\n", + "\n", + "history = [x for x in train_ts]\n", + "history = history[(-training_window):]\n", + "\n", + "predictions = list()\n", + "\n", + "# let's user simpler model for demonstration\n", + "order = (2, 1, 0)\n", + "seasonal_order = (1, 1, 0, 24)\n", + "\n", + "for t in range(test_ts.shape[0]):\n", + " model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)\n", + " model_fit = model.fit()\n", + " yhat = model_fit.forecast(steps = HORIZON)\n", + " predictions.append(yhat)\n", + " obs = list(test_ts.iloc[t])\n", + " # move the training window\n", + " history.append(obs[0])\n", + " history.pop(0)\n", + " print(test_ts.index[t])\n", + " print(t+1, ': predicted =', yhat, 'expected =', obs)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2014-12-30 00:00:00\n", + "1 : predicted = [0.32 0.29 0.28] expected = [0.32945389435989236, 0.2900626678603402, 0.2739480752014323]\n", + "2014-12-30 01:00:00\n", + "2 : predicted = [0.3 0.29 0.3 ] expected = [0.2900626678603402, 0.2739480752014323, 0.26812891674127126]\n", + "2014-12-30 02:00:00\n", + "3 : predicted = [0.27 0.28 0.32] expected = [0.2739480752014323, 0.26812891674127126, 0.3025962399283795]\n", + "2014-12-30 03:00:00\n", + "4 : predicted = [0.28 0.32 0.42] expected = [0.26812891674127126, 0.3025962399283795, 0.40823634735899716]\n", + "2014-12-30 04:00:00\n", + "5 : predicted = [0.3 0.39 0.54] expected = [0.3025962399283795, 0.40823634735899716, 0.5689346463742166]\n", + "2014-12-30 05:00:00\n", + "6 : predicted = [0.4 0.55 0.66] expected = [0.40823634735899716, 0.5689346463742166, 0.6799462846911368]\n", + "2014-12-30 06:00:00\n", + "7 : predicted = [0.57 0.68 0.75] expected = [0.5689346463742166, 0.6799462846911368, 0.7309758281110115]\n", + "2014-12-30 07:00:00\n", + "8 : predicted = [0.68 0.75 0.8 ] expected = [0.6799462846911368, 0.7309758281110115, 0.7511190689346463]\n", + "2014-12-30 08:00:00\n", + "9 : predicted = [0.75 0.8 0.82] expected = [0.7309758281110115, 0.7511190689346463, 0.7636526410026856]\n", + "2014-12-30 09:00:00\n", + "10 : predicted = [0.77 0.78 0.78] expected = [0.7511190689346463, 0.7636526410026856, 0.7381378692927483]\n", + "2014-12-30 10:00:00\n", + "11 : predicted = [0.76 0.75 0.74] expected = [0.7636526410026856, 0.7381378692927483, 0.7188898836168307]\n", + "2014-12-30 11:00:00\n", + "12 : predicted = [0.77 0.76 0.75] expected = [0.7381378692927483, 0.7188898836168307, 0.7090420769919425]\n", + "2014-12-30 12:00:00\n", + "13 : predicted = [0.7 0.68 0.69] expected = [0.7188898836168307, 0.7090420769919425, 0.7081468218442255]\n", + "2014-12-30 13:00:00\n", + "14 : predicted = [0.72 0.73 0.76] expected = [0.7090420769919425, 0.7081468218442255, 0.7385854968666068]\n", + "2014-12-30 14:00:00\n", + "15 : predicted = [0.71 0.73 0.86] expected = [0.7081468218442255, 0.7385854968666068, 0.8478066248880931]\n", + "2014-12-30 15:00:00\n", + "16 : predicted = [0.73 0.85 0.97] expected = [0.7385854968666068, 0.8478066248880931, 0.9516562220232765]\n", + "2014-12-30 16:00:00\n", + "17 : predicted = [0.87 0.99 0.97] expected = [0.8478066248880931, 0.9516562220232765, 0.934198746642793]\n", + "2014-12-30 17:00:00\n", + "18 : predicted = [0.94 0.92 0.86] expected = [0.9516562220232765, 0.934198746642793, 0.8876454789615038]\n", + "2014-12-30 18:00:00\n", + "19 : predicted = [0.94 0.89 0.82] expected = [0.934198746642793, 0.8876454789615038, 0.8294538943598924]\n", + "2014-12-30 19:00:00\n", + "20 : predicted = [0.88 0.82 0.71] expected = [0.8876454789615038, 0.8294538943598924, 0.7197851387645477]\n", + "2014-12-30 20:00:00\n", + "21 : predicted = [0.83 0.72 0.58] expected = [0.8294538943598924, 0.7197851387645477, 0.5747538048343777]\n", + "2014-12-30 21:00:00\n", + "22 : predicted = [0.72 0.58 0.47] expected = [0.7197851387645477, 0.5747538048343777, 0.4592658907788718]\n", + "2014-12-30 22:00:00\n", + "23 : predicted = [0.58 0.47 0.39] expected = [0.5747538048343777, 0.4592658907788718, 0.3858549686660697]\n", + "2014-12-30 23:00:00\n", + "24 : predicted = [0.46 0.38 0.34] expected = [0.4592658907788718, 0.3858549686660697, 0.34377797672336596]\n", + "2014-12-31 00:00:00\n", + "25 : predicted = [0.38 0.34 0.33] expected = [0.3858549686660697, 0.34377797672336596, 0.32542524619516544]\n", + "2014-12-31 01:00:00\n", + "26 : predicted = [0.36 0.34 0.34] expected = [0.34377797672336596, 0.32542524619516544, 0.33034914950760963]\n", + "2014-12-31 02:00:00\n", + "27 : predicted = [0.32 0.32 0.35] expected = [0.32542524619516544, 0.33034914950760963, 0.3706356311548791]\n", + "2014-12-31 03:00:00\n", + "28 : predicted = [0.32 0.36 0.47] expected = [0.33034914950760963, 0.3706356311548791, 0.470008952551477]\n", + "2014-12-31 04:00:00\n", + "29 : predicted = [0.37 0.48 0.65] expected = [0.3706356311548791, 0.470008952551477, 0.6145926589077886]\n", + "2014-12-31 05:00:00\n", + "30 : predicted = [0.48 0.64 0.75] expected = [0.470008952551477, 0.6145926589077886, 0.7247090420769919]\n", + "2014-12-31 06:00:00\n", + "31 : predicted = [0.63 0.73 0.79] expected = [0.6145926589077886, 0.7247090420769919, 0.786034019695613]\n", + "2014-12-31 07:00:00\n", + "32 : predicted = [0.71 0.76 0.79] expected = [0.7247090420769919, 0.786034019695613, 0.8012533572068039]\n", + "2014-12-31 08:00:00\n", + "33 : predicted = [0.79 0.82 0.83] expected = [0.786034019695613, 0.8012533572068039, 0.7994628469113696]\n", + "2014-12-31 09:00:00\n", + "34 : predicted = [0.82 0.83 0.81] expected = [0.8012533572068039, 0.7994628469113696, 0.780214861235452]\n", + "2014-12-31 10:00:00\n", + "35 : predicted = [0.8 0.78 0.76] expected = [0.7994628469113696, 0.780214861235452, 0.7587287376902416]\n", + "2014-12-31 11:00:00\n", + "36 : predicted = [0.77 0.75 0.74] expected = [0.780214861235452, 0.7587287376902416, 0.7367949865711727]\n", + "2014-12-31 12:00:00\n", + "37 : predicted = [0.77 0.76 0.76] expected = [0.7587287376902416, 0.7367949865711727, 0.7188898836168307]\n", + "2014-12-31 13:00:00\n", + "38 : predicted = [0.75 0.75 0.78] expected = [0.7367949865711727, 0.7188898836168307, 0.7273948075201431]\n", + "2014-12-31 14:00:00\n", + "39 : predicted = [0.73 0.75 0.87] expected = [0.7188898836168307, 0.7273948075201431, 0.8299015219337511]\n", + "2014-12-31 15:00:00\n", + "40 : predicted = [0.74 0.85 0.96] expected = [0.7273948075201431, 0.8299015219337511, 0.909579230080573]\n", + "2014-12-31 16:00:00\n", + "41 : predicted = [0.83 0.94 0.93] expected = [0.8299015219337511, 0.909579230080573, 0.855863921217547]\n", + "2014-12-31 17:00:00\n", + "42 : predicted = [0.94 0.93 0.88] expected = [0.909579230080573, 0.855863921217547, 0.7721575649059982]\n", + "2014-12-31 18:00:00\n", + "43 : predicted = [0.87 0.82 0.77] expected = [0.855863921217547, 0.7721575649059982, 0.7023276633840643]\n", + "2014-12-31 19:00:00\n", + "44 : predicted = [0.79 0.73 0.63] expected = [0.7721575649059982, 0.7023276633840643, 0.6195165622202325]\n", + "2014-12-31 20:00:00\n", + "45 : predicted = [0.7 0.59 0.46] expected = [0.7023276633840643, 0.6195165622202325, 0.5425246195165621]\n", + "2014-12-31 21:00:00\n", + "46 : predicted = [0.6 0.47 0.36] expected = [0.6195165622202325, 0.5425246195165621, 0.4735899731423454]\n", + "CPU times: user 12min 15s, sys: 2min 39s, total: 14min 54s\n", + "Wall time: 2min 36s\n" + ] + } + ], + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "So sánh dự đoán với tải thực tế\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + "eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])\n", + "eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]\n", + "eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')\n", + "eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()\n", + "eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])\n", + "eval_df.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamphpredictionactual
02014-12-30 00:00:00t+13,008.743,023.00
12014-12-30 01:00:00t+12,955.532,935.00
22014-12-30 02:00:00t+12,900.172,899.00
32014-12-30 03:00:00t+12,917.692,886.00
42014-12-30 04:00:00t+12,946.992,963.00
\n", + "
" + ], + "text/plain": [ + " timestamp h prediction actual\n", + "0 2014-12-30 00:00:00 t+1 3,008.74 3,023.00\n", + "1 2014-12-30 01:00:00 t+1 2,955.53 2,935.00\n", + "2 2014-12-30 02:00:00 t+1 2,900.17 2,899.00\n", + "3 2014-12-30 03:00:00 t+1 2,917.69 2,886.00\n", + "4 2014-12-30 04:00:00 t+1 2,946.99 2,963.00" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Tính **lỗi phần trăm tuyệt đối trung bình (MAPE)** trên tất cả các dự đoán\n", + "\n", + "$$MAPE = \\frac{1}{n} \\sum_{t=1}^{n}|\\frac{actual_t - predicted_t}{actual_t}|$$\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, + "source": [ + "if(HORIZON > 1):\n", + " eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']\n", + " print(eval_df.groupby('h')['APE'].mean())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "h\n", + "t+1 0.01\n", + "t+2 0.01\n", + "t+3 0.02\n", + "Name: APE, dtype: float64\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 32, + "source": [ + "print('One step forecast MAPE: ', (mape(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "One step forecast MAPE: 0.5570581332313952 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "print('Multi-step forecast MAPE: ', mape(eval_df['prediction'], eval_df['actual'])*100, '%')" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Multi-step forecast MAPE: 1.1460048657704118 %\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Vẽ biểu đồ dự đoán so với thực tế cho tuần đầu tiên của tập kiểm tra\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "if(HORIZON == 1):\n", + " ## Plotting single step forecast\n", + " eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))\n", + "\n", + "else:\n", + " ## Plotting multi step forecast\n", + " plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]\n", + " for t in range(1, HORIZON+1):\n", + " plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values\n", + "\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)\n", + " ax = fig.add_subplot(111)\n", + " for t in range(1, HORIZON+1):\n", + " x = plot_df['timestamp'][(t-1):]\n", + " y = plot_df['t+'+str(t)][0:len(x)]\n", + " ax.plot(x, y, color='blue', linewidth=4*math.pow(.9,t), alpha=math.pow(0.8,t))\n", + " \n", + " ax.legend(loc='best')\n", + " \n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "No handles with labels found to put in legend.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, chúng tôi khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "c193140200b9684da27e3890211391b6", + "translation_date": "2025-09-06T14:00:04+00:00", + "source_file": "7-TimeSeries/2-ARIMA/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/2-ARIMA/working/notebook.ipynb b/translations/vi/7-TimeSeries/2-ARIMA/working/notebook.ipynb new file mode 100644 index 000000000..17d58e045 --- /dev/null +++ b/translations/vi/7-TimeSeries/2-ARIMA/working/notebook.ipynb @@ -0,0 +1,59 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 2, + "coopTranslator": { + "original_hash": "523ec472196307b3c4235337353c9ceb", + "translation_date": "2025-09-06T14:00:55+00:00", + "source_file": "7-TimeSeries/2-ARIMA/working/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Dự báo chuỗi thời gian với ARIMA\n", + "\n", + "Trong notebook này, chúng ta sẽ thực hiện:\n", + "- chuẩn bị dữ liệu chuỗi thời gian để huấn luyện mô hình dự báo chuỗi thời gian ARIMA\n", + "- triển khai một mô hình ARIMA đơn giản để dự báo các bước tiếp theo trong HORIZON (từ thời điểm *t+1* đến *t+HORIZON*) trong chuỗi thời gian\n", + "- đánh giá mô hình\n", + "\n", + "Dữ liệu trong ví dụ này được lấy từ cuộc thi dự báo GEFCom2014. Nó bao gồm 3 năm dữ liệu tải điện và nhiệt độ theo giờ từ năm 2012 đến 2014. Nhiệm vụ là dự báo các giá trị tải điện trong tương lai. Trong ví dụ này, chúng ta sẽ minh họa cách dự báo một bước thời gian tiếp theo, chỉ sử dụng dữ liệu tải điện lịch sử.\n", + "\n", + "Tao Hong, Pierre Pinson, Shu Fan, Hamidreza Zareipour, Alberto Troccoli và Rob J. Hyndman, \"Dự báo năng lượng xác suất: Cuộc thi Dự báo Năng lượng Toàn cầu 2014 và xa hơn\", Tạp chí Quốc tế về Dự báo, tập 32, số 3, trang 896-913, tháng 7-tháng 9, 2016.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install statsmodels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/3-SVR/solution/notebook.ipynb b/translations/vi/7-TimeSeries/3-SVR/solution/notebook.ipynb new file mode 100644 index 000000000..a3206fd3d --- /dev/null +++ b/translations/vi/7-TimeSeries/3-SVR/solution/notebook.ipynb @@ -0,0 +1,1019 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Trong sổ tay này, chúng ta sẽ trình bày cách:\n", + "\n", + "- chuẩn bị dữ liệu chuỗi thời gian 2D để huấn luyện mô hình hồi quy SVM \n", + "- triển khai SVR sử dụng kernel RBF \n", + "- đánh giá mô hình bằng biểu đồ và MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nhập các mô-đun\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Tải dữ liệu\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bây giờ, bạn cần chuẩn bị dữ liệu để huấn luyện bằng cách thực hiện lọc và chuẩn hóa dữ liệu của mình.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data shape: (1416, 1)\n", + "Test data shape: (48, 1)\n" + ] + } + ], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chuyển đổi dữ liệu để nằm trong khoảng (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-11-01 00:00:000.101611
2014-11-01 01:00:000.065801
2014-11-01 02:00:000.046106
2014-11-01 03:00:000.042525
2014-11-01 04:00:000.059087
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-11-01 00:00:00 0.101611\n", + "2014-11-01 01:00:00 0.065801\n", + "2014-11-01 02:00:00 0.046106\n", + "2014-11-01 03:00:00 0.042525\n", + "2014-11-01 04:00:00 0.059087" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2014-12-30 00:00:000.329454
2014-12-30 01:00:000.290063
2014-12-30 02:00:000.273948
2014-12-30 03:00:000.268129
2014-12-30 04:00:000.302596
\n", + "
" + ], + "text/plain": [ + " load\n", + "2014-12-30 00:00:00 0.329454\n", + "2014-12-30 01:00:00 0.290063\n", + "2014-12-30 02:00:00 0.273948\n", + "2014-12-30 03:00:00 0.268129\n", + "2014-12-30 04:00:00 0.302596" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Đối với SVR của chúng tôi, chúng tôi chuyển đổi dữ liệu đầu vào thành dạng `[batch, timesteps]`. Vì vậy, chúng tôi định hình lại `train_data` và `test_data` hiện có sao cho có một chiều mới đại diện cho các bước thời gian. Trong ví dụ của chúng tôi, chúng tôi chọn `timesteps = 5`. Vì vậy, đầu vào cho mô hình là dữ liệu của 4 bước thời gian đầu tiên, và đầu ra sẽ là dữ liệu của bước thời gian thứ 5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1412, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]\n", + "train_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(44, 5)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]\n", + "test_data_timesteps.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 4) (1412, 1)\n", + "(44, 4) (44, 1)\n" + ] + } + ], + "source": [ + "x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]\n", + "x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fit model on training data\n", + "\n", + "model.fit(x_train, y_train[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Thực hiện dự đoán mô hình\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1412, 1) (44, 1)\n" + ] + } + ], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = model.predict(x_train).reshape(-1,1)\n", + "y_test_pred = model.predict(x_test).reshape(-1,1)\n", + "\n", + "print(y_train_pred.shape, y_test_pred.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)\n", + "\n", + "print(len(y_train_pred), len(y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)\n", + "\n", + "print(len(y_train), len(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1412 44\n" + ] + } + ], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)].index[timesteps-1:]\n", + "test_timestamps = energy[test_start_dt:].index[timesteps-1:]\n", + "\n", + "print(len(train_timestamps), len(test_timestamps))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABasAAAGDCAYAAAAlN0y0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOy9eZxtV13m/aw9n7GmOyY3E4QhITIHERkUacjbIrZt+wrd7dCKgratre3Urd2grSD42ohoMwiKAxAwKCAaDGMSEkJIIGRO7s2dx5rrjHtYe6/3j7X2ObvqVtU5dc+vbt3K/X0/n3xy65xTq/Y5Z++113rWs56fUEqBYRiGYRiGYRiGYRiGYRiGYbYSa6sPgGEYhmEYhmEYhmEYhmEYhmFYrGYYhmEYhmEYhmEYhmEYhmG2HBarGYZhGIZhGIZhGIZhGIZhmC2HxWqGYRiGYRiGYRiGYRiGYRhmy2GxmmEYhmEYhmEYhmEYhmEYhtlyWKxmGIZhGIZhGIZhGIZhGIZhthwWqxmGYRiGYZgLGiHEzUKIn6B+7agIIZQQ4urz8bc2m+J7EUK8TwjxP8+xnZYQ4im0R8cwDMMwDMNcLAil1FYfA8MwDMMwDPMkQwjRKvxYBhABSM3Pb1JKfeT8HxUtQggF4GlKqQMDXnclgEMAXKWUPB/HtlGGfS8rfucrAP5WKfXBTTswhmEYhmEY5qLC2eoDYBiGYRiGYZ58KKWq+b+FEIcBvFEp9YWVrxNCOBeqgLtd4c+UYRiGYRiG2a5wDAjDMAzDMAxz3hBCfI8Q4rgQ4jeEEKcB/KUQYkII8VkhxIwQYsH8e1/hd74ihHij+fdPCiG+KoT4/8xrDwkh/p9zfO1VQojbhBBNIcQXhBB/JoT423WO/deEEKeEECeFED+14rnvF0J8SwjREEIcE0K8tfD0beb/iyYm47uEEE8VQnxJCDEnhJgVQnxECDG+zt9WQohfFEIcNK//QyGEVXifdwgh3iWEmAPwViGEb973USHEGRPtURryvXxYCPF7hZ9/UAhxn3lvTwghbhBC/D6AlwH4U/Oe/rRwnHmcyJgQ4q/N93pECPHbK455ze+GYRiGYRiGuThhsZphGIZhGIY53+wBMAngCgA/Cz0m/Uvz8+UAugD+dJ3f/04AjwHYAeCdAD4khBDn8NqPArgbwBSAtwL4sbX+oBDiBgC/CuBfAXgagFeteEkbwI8DGAfw/QB+Tgjxb8xzLzf/H1dKVZVSXwMgALwdwCUArgFwmTmG9fghAC8E8HwAPwigKDJ/J4CDAHYD+H0AfwDg6QCeC+BqAJcC+F9Dvpfi+34RgL8G8Gvmvb0cwGGl1G8BuB3AL5j39Aur/Pp7AIwBeAqAV0B/Pv9pxTEP+z0yDMMwDMMwFwEsVjMMwzAMwzDnmwzAW5RSkVKqq5SaU0p9UinVUUo1ocXWV6zz+0eUUn+ulEoB/BWAvdAi7dCvFUJcDuB6AP9LKRUrpb4K4DPr/M3/F8BfKqUeVEq1sUJYVkp9RSn1gFIqU0rdD+Bj670HpdQBpdTnzWcwA+D/DHjPAPAOpdS8UuoogD8G8IbCcyeVUu8x8R8h9CLAL5vXNwG8DcDrh3kvK/hpAH9hjjVTSp1QSj064DghhLDN3/vvSqmmUuowgD/C8gWBjXyPDMMwDMMwzEUAZ1YzDMMwDMMw55sZpVSY/yCEKAN4F4AbAEyYh2tCCNsImSs5nf9DKdUxZtzqKq9b77U7AMwrpTqF1x6DdjivxiUA7i38fKT4pBDiO6HdzNcB8AD4AP5ujbYghNgN4N3QURo1aBPJwlqvLxxf8e9fssZzO6GLWt5bMCoLAPYw72UFlwH45wHHtRo7ALgr2j4C7fDO2cj3yDAMwzAMw1wEsLOaYRiGYRiGOd+oFT//NwDPAPCdSqk6+rEZmxkJcQrApBHKc9YSqvPXF5+/fMXzH4V2Zl+mlBoD8D70j3/l+wW001kB+A7znv8jBr/flX//ZOHn4t+YhY5SeZZSatz8N1YoejnovRQ5BuCpazy32vsqHkMCHe1S/Dsn1vkdhmEYhmEY5iKHxWqGYRiGYRhmq6lBi6uLQohJAG/Z7D+olDoC4B7oYoSeEOK7APzAOr/yCQA/KYS41gjcK4+xBu3UDk3O878vPDcDHX3ylBWvbwFYEkJcCp0JPYhfM8UoLwPwSwA+vsZ7ywD8OYB3CSF2AYAQ4lIhxGuGfC9FPgTgPwkhvk8IYZl2nmmeO7PiPRWPITV/5/eFEDUhxBUAfgXAmgUsGYZhGIZhGIbFaoZhGIZhGGar+WMAJWg37l0APnee/u5/APBdAOYA/B60+But9kKl1M3Qx/klAAfM/4v8PIDfFUI0oQsZfqLwux3oHO47hBCLQogXA/gd6EKJSwD+CcDfD3G8n4aO77jP/M6H1nntb5jjvEsI0QDwBWj3+jDvpYdS6m7ooojvMsd6K/pu6XcD+HdCiAUhxJ+s8uv/Bbrw5EEAX4V2n//F4LfJMAzDMAzDXKwIpdbbvccwDMMwDMMwFwdCiI8DeFQptenO7o0ihFAAnqaUOrDVx8IwDMMwDMMwmwU7qxmGYRiGYZiLEiHE9UKIp5p4ixsA/CCAT23xYTEMwzAMwzDMRYuz1QfAMAzDMAzDMFvEHuj4jSkAxwH8nFLqW1t7SAzDMAzDMAxz8cIxIAzDMAzDMAzDMAzDMAzDMMyWwzEgDMMwDMMwDMMwDMMwDMMwzJbDYjXDMAzDMAzDMAzDMAzDMAyz5VzQmdU7duxQV1555VYfBsMwDMMwDMMwDMMwDMMwDLMO995776xSaucobVzQYvWVV16Je+65Z6sPg2EYhmEYhmEYhmEYhmEYhlkHIcSRUdvgGBCGYRiGYRiGYRiGYRiGYRhmy2GxmmEYhmEYhmEYhmEYhmEYhtlyWKxmGIZhGIZhGIZhGIZhGIZhtpwLOrN6NZIkwfHjxxGG4VYfyrYmCALs27cPrutu9aEwDMMwDMMwDMMwDMMwDMNsP7H6+PHjqNVquPLKKyGE2OrD2ZYopTA3N4fjx4/jqquu2urDYRiGYRiGYRiGYRiGYRiG2X4xIGEYYmpqioXqERBCYGpqit3pDMMwDMMwDMMwDMMwDMNcMGw7sRoAC9UE8GfIMAzDMAzDMAzDMAzDMMyFxLYUqy8UPvWpT0EIgUcffXTd1/3xH/8xOp3OOf+dD3/4w/iFX/iFc/59hmEYhmEYhmEYhmEYhmGYCx0Wq0fgYx/7GF760pfiYx/72LqvG1WsZhiGYRiGYRiGYRiGYRiGebLDYvU50mq18NWvfhUf+tCHcOONNwIA0jTFr/7qr+K6667Ds5/9bLznPe/Bn/zJn+DkyZP43u/9Xnzv934vAKBarfbauemmm/CTP/mTAIB//Md/xHd+53fiec97Hl71qlfhzJkz5/19MQzDMAzDMAzDMAzDMAzDbAXOVh/ASLzpTZvT7vvfP/Aln/70p3HDDTfg6U9/OqampnDvvffi7rvvxuHDh3HffffBcRzMz89jcnIS/+f//B98+ctfxo4dO9Zt86UvfSnuuusuCCHwwQ9+EO985zvxR3/0R1TvimEYhmEYhmEYhmEYhmEY5oJle4vVW8jHPvYx/NIv/RIA4PWvfz0+9rGP4dChQ3jzm98Mx9Ef6+Tk5IbaPH78OH70R38Up06dQhzHuOqqq8iPm2EYhmEYhmEY5pyQErj9duA7vgMYYMRhGIZhGIY5F4YWq4UQNoB7AJxQSr1WCPFhAK8AsGRe8pNKqfuEEALAuwH8awAd8/g3TRs/AeC3zet/Tyn1VyMd/RAO6M1gfn4eX/rSl/DAAw9ACIE0TSGEwPXXXz/U7+uPSBOGYe/f/+W//Bf8yq/8Cl73utfhK1/5Ct761rdSHzrDMAzDMAzDMMy5cfvtwI03Ap/8JPCnf7rVR8MwDMMwzJOQjWRW/xKAR1Y89mtKqeea/+4zj/0/AJ5m/vtZAO8FACHEJIC3APhOAC8C8BYhxMQIx75l3HTTTfixH/sxHDlyBIcPH8axY8dw1VVX4TnPeQ7e//73Q0oJQIvaAFCr1dBsNnu/v3v3bjzyyCPIsgz/8A//0Ht8aWkJl156KQDgr/5qNB2fYRiGYRiGYRiGlNOn9f+TZGuPg2EYhmGYJy1DidVCiH0Avh/AB4d4+Q8C+GuluQvAuBBiL4DXAPi8UmpeKbUA4PMAbjjH495SPvaxj+GHfuiHlj32wz/8wzh16hQuv/xyPPvZz8ZznvMcfPSjHwUA/OzP/ixuuOGGXoHFP/iDP8BrX/tavOQlL8HevXt7bbz1rW/Fj/zIj+AFL3jBwHxrhmEYhmEYhmGY88rYWP/fWbZ1x8EwDMMwzJMWoZQa/CIhbgLwdgA1AL9aiAH5LgARgC8C+E2lVCSE+CyAP1BKfdX87hcB/AaA7wEQKKV+zzz+PwF0lVL/34q/9bPQjmxcfvnlLzhy5MiyY3nkkUdwzTXXnPMbZvrwZ8kwDMMwDMMwzNDcfDMe//CdeNrYNMTbfp9zqxmGYRiGWYYQ4l6l1AtHaWOgs1oI8VoA00qpe1c89d8BPBPA9QAmoQXpkVFKfUAp9UKl1At37txJ0STDMAzDMAzDMAwzKlGEN93+H/FEYydw5sxWHw3DMAzDME9ChokB+W4ArxNCHAZwI4BXCiH+Vil1ykR9RAD+EjqHGgBOALis8Pv7zGNrPc4wDMMwDMMwDMNc6EQR4szGA/OXsljNMAzDMMymMFCsVkr9d6XUPqXUlQBeD+BLSqn/aHKoIYQQAP4NgAfNr3wGwI8LzYsBLCmlTgH4FwCvFkJMmMKKrzaPMQzDMAzDMAzDMBc6UYQkF6tnZ7f6aBiGYRiGeRLijPC7HxFC7AQgANwH4M3m8X8G8K8BHADQAfCfAEApNS+E+N8AvmFe97tKqfkR/j7DMAzDMAzDMAxzvohjJJmN++cvBToPbPXRMAzDMAzzJGRDYrVS6isAvmL+/co1XqMA/Oc1nvsLAH+xoSNkGIZhGIZhGIZhtp4oAgAcbOwEut0tPhiGYRiGYZ6MDJNZzTAMwzAMwzAMw1zsRBEEACEUi9UMwzAMw2wKLFafA7Zt47nPfS6uu+46/MiP/Ag6nc45t/WTP/mTuOmmmwAAb3zjG/Hwww+v+dqvfOUruPPOOzf8N6688krMcqYcwzAMwzAMwzCjEEUQQsERGWQ72uqjYRiGYRjmSQiL1edAqVTCfffdhwcffBCe5+F973vfsuellOfU7gc/+EFce+21az5/rmI1wzAMwzAMwzDMyJgYkLITo91It/hgGIZhGIZ5MsJi9Yi87GUvw4EDB/CVr3wFL3vZy/C6170O1157LdI0xa/92q/h+uuvx7Of/Wy8//3vBwAopfALv/ALeMYznoFXvepVmJ6e7rX1Pd/zPbjnnnsAAJ/73Ofw/Oc/H895znPwfd/3fTh8+DDe97734V3vehee+9zn4vbbb8fMzAx++Id/GNdffz2uv/563HHHHQCAubk5vPrVr8aznvUsvPGNb4SOEWcYhmEYhmEYhhmBOAYAVNwInSaL1QzDMAzD0LOhAovMcqSUuPnmm3HDDTcAAL75zW/iwQcfxFVXXYUPfOADGBsbwze+8Q1EUYTv/u7vxqtf/Wp861vfwmOPPYaHH34YZ86cwbXXXouf+qmfWtbuzMwMfuZnfga33XYbrrrqKszPz2NychJvfvObUa1W8au/+qsAgH//7/89fvmXfxkvfelLcfToUbzmNa/BI488gt/5nd/BS1/6Uvyv//W/8E//9E/40Ic+dN4/G4ZhGIZhGIZhnmSEIQDjrG6xIYZhGIZhGHq2vVh9ww0AZRzzjh3A5z63/mu63S6e+9znAtDO6p/+6Z/GnXfeiRe96EW46qqrAAC33HIL7r///l4e9dLSEvbv34/bbrsNb3jDG2DbNi655BK88pWvPKv9u+66Cy9/+ct7bU1OTq56HF/4wheWZVw3Gg20Wi3cdttt+Pu//3sAwPd///djYmJiQ58BwzAMwzAMwzDMWeTOaidGJ7SAJAFcd4sPimEYhmGYJxPbXqweJCxvBnlm9UoqlUrv30opvOc978FrXvOaZa/553/+Z7LjyLIMd911F4IgIGuTYRiGYRiGYRjmLLJMi9MAyiWFtvSBbpfFaoZhGIZhSOHM6k3iNa95Dd773vciMQO6xx9/HO12Gy9/+cvx8Y9/HGma4tSpU/jyl7981u+++MUvxm233YZDhw4BAObn5wEAtVoNzWaz97pXv/rVeM973tP7ORfQX/7yl+OjH/0oAODmm2/GwsLCprxHhmEYhmEYhmEuEoyrGsJCpZyhIz0tVjMMwzAMwxDCYvUm8cY3vhHXXnstnv/85+O6667Dm970Jkgp8UM/9EN42tOehmuvvRY//uM/ju/6ru8663d37tyJD3zgA/i3//bf4jnPeQ5+9Ed/FADwAz/wA/iHf/iHXoHFP/mTP8E999yDZz/72bj22mvxvve9DwDwlre8Bbfddhue9axn4e///u9x+eWXn9f3zjAMwzAMwzDMk4woglKAEgLlEtBOWKxmGIZhmFtv3eojePIhlLpwC2O88IUvVPfcc8+yxx555BFcc801W3RETy74s2QYhmEYhmEYZiimpyF/6y14yT/9Fn7kBQdxWecxvP5PXwrwfIJhGIa5iLn6auDAga0+igsHIcS9SqkXjtIGO6sZhmEYhmEYhmGY9YkiJJkN11EoV9DPrCbgiSeAN72JpCmGYRiGOW8oBZw4of/P0MFiNcMwDMMwDMMwAzl5Evi939vqo2C2jDjWYrWrUKlapJnVi/c+gf13nNFFHBmGYRhmm9DpAGHYqz/MEMFiNcMwDMMwDMMwA5mZAW6/fauPgtkykqTvrK5aaBOK1fFHb8LpEylw990k7TEMwzDM+WBuTv8/DLf2OJ5sbEux+kLO2d4u8GfIMAzDMAzDbAQpgTNntvoomC1DSi1W2wqVmnFWxzFJ03Fq41RnDFhaImmPYRiGYc4HuVjN9YZp2XZidRAEmJubY7F1BJRSmJubQxAEW30oDMMwDMMwzDaBxeqLnDQtZFYLtBMfiCKSpqPMwWJcQZjYJO0xDMMwzPlg7owEwM5qapytPoCNsm/fPhw/fhwzMzNbfSjbmiAIsG/fvq0+DIZhGIZhGGabIKWOAskywNp2lpeLDyl1hmapRNRgHgPiFpzVFGJ1liFO9bT09LyHK0dvkWEYhmE2n6NHMfdHtwB4I4vVxGw7sdp1XVx11VVbfRgMwzAMwzAMc1GRJECa6i2vO3du9dEwg7j5ZuBrXwPe9jaiBnvOavQzqyliQLpdxJkRq88IFqsZhmGY7cHtt2OuW4ZAhjDkVXxK+NNkGIZhGIZhGGYg8r4HAQBnjtJEPzCbSxQBjz9O2KCUkJkFx1GojNl0mdXtNuLURtUNcer06M0xDMMwzHlh927MRRXsLjURtuRWH82TCharGYZhGIZhGIYZiLzlSwCAM5/75hYfCTMMaQrs30/YYF5g0QHKdRdt6ZOJ1VHm4IrqHE5Pc2Y1wzAMs01wHMyHFVxaWUR4dHqrj+ZJBYvVDMMwDMMwDMMMRGYWSnaM6YPtrT4UZgjSMzM4sD8DWV36XKx2gUrdpiuw2G4jTh1cUZ3H6dltl1LJMAzDXKxIibmogn2VBYRHuAI1JSxWMwzDMAzDMAwzEKksXFpZwJmT6VYfCjME6T/8IzpdC6cfmqNpsCBWl+oubQxI5mAqaKHVFqO3xzAMwzDnAykxF1a1s3q2tdVH86SCxWqGYRiGYRiGYQaSZDYurSzizPTFKyjeeSfw0ENbfRTDkSoLjkix/6tEbq+8wKIL2CUPmRI0YnWngyh1UHdDhF0qGzjDMAzDbDJSopkE2Bk00W1nW300TypYrGYYhmEYhmEYZiASLnYGLSx2faDb3erD2RJuvx34xje2+iiGI1UWrqrP4uDDIU2DBWc1fF8/RhED0mohzmzUvRAh1+5kGIZhtgtSQioLNS9E2GGxmhIWqxmGYRhmizh6FLj55q0+CoZhmCHIMkgJTAUtLMUlYHFxq49oS5AS6HS2+iiGQCnIzMLOoInFU0QLC7lY7QHwPAih6GJAUgc1NyTRvhmGYRjmvJCmkJmFqhOxWE0Mi9UMwzAMs0Xcc8/FLVa320BIZPhjGGaTiWNIZWPKb6ORBBetszpJtslbzzKkysKk30FjmtpZLQDP049RqMudDuLM0c7qxNYrAgxDxPw8cOONW30UzLC0WsD09FYfBcMMSZIgVRbKTswxVsSwWM0wDMMwW8TMzDYRPTaJP/9znkAyzLYhjpFkNnbkzuoLvPNSCvj85+nb3TbOaimRKgsTfhuNxQzICBxfRWd1HgMSx/rDHoUwNJnVXUSpu00+YGa7cPTo9hlrSAn84R9u9VFsLbfcAvzZn231UTDMkKS64HTgJCxWE8NiNcMwDMNsERe7WB2GwNzcVh8FwzBDEYaQmc5l7EjvghcUowh49auB97+ftt0kueDfusaI1eNeF42YyAkvJaSy4bgCsG0IAWSp6k3WR2m3l1mdOhf3jfEiJ4qA7/5u2jaTZPs4dcMQeOc7R1//2c5ICSwtbfVRMMyQSAkBoGSzWE0Ni9UMwzAMs0Vc7GJ1kgALC1t9FAzDDEUcQyoLrmWEyQu880oS4OlPV/jMZ2jb3V7OaoEJv4Nm4uvcpVFJU+OsFgCAwEkRpc7oudVJgjh1UPeMs5oiB5vZlnz0o8Cdd46+/lFESj3e2g4kCTA7C5w5Q9vu4uI26bfAYjWzzTCxVYGdIIzEFh/MkwsWqxmGYRhmi5iZoc9s3k5Rn1LqLEmGYbYBUQSZ2XCEiZO40MXqL38VU41DaJyhPc5tk1ktJdLMwoTfQSMu0YjVvRgQPYUM3BRdSSAuS2liQIyzmsXqi5YbbwSe9jSdW0yFlNvHWZ2P4e6/n7bdD3wA+Nu/pW1zs0hTFquZbUQuVjsJwtiiidxiALBYzTAMwzBbxmY4q3/914GPfIS2zc2CndUMs42IIsjMgmNlEACy9oWt2MqbPoUxt4POEVqVans5q01mdRIQi9X6x8DLEKbu6EUWkwRx5qBcsyEzm8Xqi5hOW+HyyxWpWJ0kQKNBUwt0s8nF6gceoG03joGHH6Ztc7NgsZrZVvSc1VIv3nLleDJYrGYYhmGYrWB2FnPHOuRidbMJ/M7vbA+HdZKws5phtg1RhCSz4YgUVTdEayHZ6iNal2TPZXCFyRIgnDxut8zqmhuhTRUDkovVvg0AKPmpFqspYkAyG17VqOAsVl+cnDoFeeAQxjunyJ3VwPaIAkkW27hib4QHHqDNvpWSxer/8B+4TgqzCeQFFu1E3w+3xdar7QGL1QzDMAyzFbzzncDsLOJFAgGhQD4pO3yYtNlNgZ3VDLONiGNIZcO1Uox5XSzNE4bKbgJJUOvnaz/xBFm7281ZbeexLRQH3XNWm8zq3FlNlFnt1Xz9M4vVFycPPIA0ExifP4jWHJ0NOjHrattBrJY3/QMuV0dx5gH6HSHbRazerMzqI0eAb32Lvl3mIqeYWc1iNSksVjMMwzDMFqAWlyCEonG7FUgaXeyYSNFskja7KXBmNcNsI/IYkGqgxeqFCzuXMQlTOJY+RtWg6xC3k7NaZlqsJrvX5AUWfSNW+0pvex41X0FKRJkWq4VQ2yOvgaGnVEKmLNS9EM27HyFrVsa6H9gOudXywccwFbSwdILQWg493jp1anvEa2yWs1pK4JvfpG+XuchZJlY7HANCCIvVDMMwDLMFNJIAdTekdZAtLEDe9yAmpx/ZFmI1O6sZZhsRRZDKglMvY8wLsbRIu02dGhmlcK0UJTtBtECnLku5TYxTxlndE+ybBOJXkiwrsFjyFamz2h8L9M/srL44kRIKQM0N0Tq+SNZs8k+3YMpvYWY/XZsAoDahC0x2XYqqEyFKnb4lnAApgac+FXjsMbImN43NFKvZWc2QY/qtUt3V90MWq8lgsZphGIZhtoDZZBw7ghaUzOhmPDMzSDIbk24TzTMXuPVPKSQP70dzKePC2QyzHcgzq+tl7awmFBP+/M/po4uSKINrpah7XTSm6SaP2zEGJLATRA0Ct3LurA70FDIIQFhg0e7HgBA6q9/yFuDb3yZrblORshd/enFixNmqG6G1RPdByEPHcEllEdP3HCVrEwB+8RfpxU/plvrxRVSdolKQ334Qe5yZbbGbLU31ehW15pdl20OsZ7YZubN6zOcYEGJYrGYYhmGYLaBbmkTZMe6xxUWaRpWCVBYm/A4aj52iaXOz6HQgz8xiwmuhsXRhOzQZhoGJAbHh1ksY87poNAVZ03fdBdx/P1lzAIAkSuFYKWpuhMYsnVN3O8WApErAtpT+DOYIXJomWsTxdIHFwM90DMgoCqtSOgYkdeCNlfRDEd33deQI8NWvkjUHQL/dP/gD2jYB4E//FPjEJ+jb3TYY0afqhmg16cYFUtnYW17CzCxdnwUArRZw4ABpk8vii8i2yLVaSE7OYqJzAs35C7swLtCvvULtrhZC/8cwpEgJAcAfCxBKFqspYbGaYRiGYbYAmSg4VqrzOakmJHGMJLMx4XXQfOICD2fsdpFkNnaXGpg/vh2UH4a5yIljLVSOV7WzumVTNo0TJ8iaAwDIWPWd1RRCbd5uO0QSbQP7q5RIMwt2taQ/g3lJ0qbOrDbO6jwGZBSxOk3NQqsNu+zDs1LEHYJjNcQxcM89ZM0B0FrE7/1eX1Sjot0GTp6kbXNbkSQQAKpORBpllmQ2pvw2+cK4lMDx46RNQsYZHCuFIzIkTSJrcZJAZsbIcITWWv3KV5I2B0B3CZZFLFa328DcLBwZkl+3zEWOuf85YxWkyuIYEEJYrGYYhmGYLSBJANdKYYsMskm0Ch/HkJmNyaCN5gzdYEkp4Kd+CvjlXyZrsidW7yo1sXCSxWqGueAJQ51ZPVbRzseuBaoMnyShF6t7MSBuSCPUAjoK5dGDcFvbIGw/jwGpllB3QzSXCL6rFWJ1L7N6FPXH/K4QAsL34NsJojadmpQkwL33kjXXa7PdBh54gL7dM2do29xW5JnVXohWm84Cmwu11LU8pASOHSNu02Ttj/sdLE0TxeEkid5153XQPEZrV370UfromjRJMTmpaMXqL3wBaLdRmT1MXdecuchRiblfVSpQABcIJoTFaoZhGIbZAmSi4FoZSk6C7iLdhCTJbEz6HRphwhDHehs16cS800GS2dgZNLF4ml0IDHPBY3ZuOCUHpQDoSo9sUkYuViuFJMrgiAx1L0Rjkag/XFrSgr1IoeQF7q7OxepaWX8GDZo2dWZ1HgOiEKbOaGpVXkROCMDzENgSYYfo+1IK8fEzWJhJSKNbcm3+zjvp2gT0RzF9gW+K2lTyzGonohOrs8yMi9potmmlj80Qq/N+a8zrYnGOqI8pGhlOERRaLRBFdEl2Oemtd2AymSaNb8r7qKoToUX4ERw7Bvzu79K1x2w/0kTvhkClAgFwgWBCWKxmGIZhmC0gSQBHpAjsBGGDaGATx5DKwqTfRqMJssKNcQzs2AHSAT46HcjMxpjXRWeOxWqGueBJU8jMhuPZKJWgs4qJxGryGBCTrayd1V0aoRYAmk0kmY2yEyNZoBV9yMkzq8u+/gzaBLEtvQKLfbG6K73RxOpc+RUAfF8Xg6RyVj/6KOITM7hCHcHMDE2TgL5/79sHfP3rdG3m7V7UYnUvszpCs2PTjGGiqDcuanZtUtfjpjqrvS6W5oiuAzM2nPA6pLvuAP0ZzM2RNgk538Ck38bSfsKLoV4HYIp3nqbru6engc9+lqw5ZhsiY73AhEpFP8AxIGSwWM0wDMMw5xulejEgJTtBd4lQrM4dRJFHVgUsmV1COV5AEhPmPZoYkLoborPILgSGueCRElJZcH0LQQAd/0DkICJ3VhsXuOsJ1P0QjRZRZEmjYRbZQnRmLvC95Lmzuuyj5kVodJzRPwOzeycXq0sljB4DssJZ7VM6q1stxKmDcb9D6qxOEuDyyxSpAA7oj/GijgHJndVuhFZCtHMjinQtD7+DZhyQ2oCThDhjXCmTWZ1h3OtgcYFozGXGhhN+B03CeBVAn7Ozs4QNKoXULC4snSYsVFc8t56gu8jiGHjoIbJELGYbktcg6sWAsLOaDBarGYZhGOZ8k6Y911/JidFtEBX/ygss5pMyosC/5G9uhDt9AlgkzGk1MSB1j8VqhtkW9JzVFkplgW5K56xOWhG6VAIl0BNVHVegXlVoxD5IgkobDSSZjZobojt3gWftSwmZ2bA9G/WyRCMJRp9Er+Kspo8BSRCFRCKdbSM2O3jI4rYAJLfeicmTD6A5QyimgZ3VkBICQM0N0UoCmms2DCEzGyVH9wno0n1nUupCgAlV/VYTs+OIVBexXaQUq01ud4euMC6g3zupszpNkSqBqaCFpTOEDlXT91XdCK3DdOp6kmhfyOHDZE0y2wmlIBMFWyigXIZrpUg6dAWdL3ZYrGYYhmGY800+IbEyBLakE6vNjKnudbUwQSVWP3YQniVBWp3IOKvHvC46DS7NzjAXPCZaw/FtLVZLj8ZBND2N5ImjGO+cpCt8ZcQZ1xOojwk04hJNjlGjAQUteHTmaYVKcpIEqRJwPAv1Sqo/g1G2J2cZkGWIMweer92ZQZ5dTuCsVrDondVhqJ3VXhedM3T3r+Sz/4K620X3xDxZm0BfrCZK8Np+mAKL2lnt0+wOC0O9wGKZBRVKsTrJcOmlim6BIYoglW0KLHaxuETkgs5jQPyO3nU3yvW6AnJndRwjzSxM+p1NEqtDtGbp2s1vgQ89RNYks53IMshMwLEyoFRCxYnRblzg9Sy2ESxWMwzDMMz5Ji9SZZzVYYsul1ABqLkRmpRi9diO/kSPim4XUlna8dZksXo78MEPAkeObPVRbC0/8RNbfQTDc//9oxlezyJfZMud1VSZ1SdOIM5sXBrMY/oMnZMwyWy4ro4qbcQBjUhlwq9LTozOPJ1Td1NIUx0D4tmoV1J9Txji+/rCF9bQsszJ1E4DVKp9sTpMXXJnNVnkp1kUrbm0sS1S6Z1RlKIfoD+Kchl0GevbjUJUQzMJyMRqmemiqEIoOrG61YJ87CB2tI9SDbX6RWzzGJAGkVTTiwFp032u0OtXShE7q5MEUuk4u6VZQodqLlY7EVpLdDfGJAGuvhp48EGyJpnthJRIMwuOnQG+j4oTod26WFcb6WGxmmEYhmHON3n2a55ZTSXWmsF4pazQkR5ZNmNSn4JrpXBEhuQMkZOsFwPSRafJLoTtwF130ReT2m7cfvtWH8Hw/Nf/CnziE4QNpqnutwIbQcXWIiWFWG3bSDIbO4IWlo4sjt4esDyzug6904RQrC47MTrzF3gRJTOJtl0L9ZrSgv0QKvDbfzfGmQ/+I3D69FntAUAn9VAu64dKZTF6DIiUSDMB21aA78O3JaIukbPafOdlJyaNbUnqhQVcKhu0UkgefAz7qosXb261Oce8soModWhE1YJbGQBUh0isfuIJSAlMdk+gsUh0vpodIY4rdAxIiyiywzirx72udqwTCfb5Wg25s1oJLVYvKbrrKx8fuzFaTToxMf7Gt7EvO4LZ02y6uChJU0hlwzH3r7ITo9NmsZoKFqsZhmEY5nzTiwFJUXISdFtEYq0ZjItdO6GUoBOrQ12dvupGaO8nqiZkJqF6YMeVabYDUcRFzjsdspjmTUdK4I/+iLZBmdn9GBCqAovdLpQSGPc6WDpIZNGLYz2B9CzUxu3RIzByGg0I6H6rvUCbtf+qV5E2Z2JAtLO6VlVasB908jabkAeOoPnlbwB/93fLnzPKVFv6qFT0Q0EAhHJ0Z3WS2fActSnOasDcZ2YJxWq/CkdkEABdwb5GA8ncEi7BCUyfvvDvifPzwMGDtG2qWDtpxVhdu6AJY0AcK6O9bj0P0hQCbBwhqucRRXpBsOphzOtiseXQtGuc1YGt+wSqvCUpdZFVUmd1HgMStLEUDbcbZNh2AR0D0iYUE5Pb78J4No/OwdODX8w8+cjj0YxYXXHZWU0Ji9UMwzAMM4D3vIdsbK8xk3PXSvXEnMpFlm+nnpzU/yfa6tkXqwmz/ooiQoe2Oj2zOcQxi9WSOLp9M5FSz/MzKt3LOKsd30ap5uisYgohwfQFY14XjaOLo7cHFJzV0JnVyXCu4oEYZ3XNjdBapHXSHThA2lw/BsS1+rndgz6Dxx6DzMxrH3xQK5I5RqyOUheepx8KypZetBglDkNKRJkDz83oCyx2uxBC6UVhygKL5p7oWCniI6doGjXn7KTfRmuWNg99M5zaX/kK8Cd/QttmlugdXBgb0w9QDLyiqFfQuuaGaM4TRUtEEaT5vhoHiEKr8xiQkouKl6AT2zRRM4V4EQBkY0PZTbB7IsbsNG1x3FSZzOq4RBdZEsawhNIxIC26MWdewLVDZTphthdmp6xj68XWihOjzXMaMlisZhiGYZgBfPKTwDe/Sdhg7lC0FEpOjC6VyyOOtdOrVtM/EymLcaR6zmoysbrorA7FRVxRavsQRdvHVbxZbDexenyc8HjTVAsevo2g6pA6qwGg7oVYOkFQBBEoiNUWahOOzmml6A/Nh1lzQzQJc08BrctRfJw9pESqhM6szgX7QRewcWA2kwDHW+PAHXf0nzOLocLS8dKAdlVSZFaH0kXgqX6BxZjonlBcFF2iy79Noqwvfs7QLeAmma2LQRLmawPADTeQNgdA9y/79xO3GaWwrQyoVvXuMIoLouCsrrshmgu0NULG/S6ds9pcf65voVy1dJwbRWSHiQFxRKrHiFQxIJ/+J+yRxzH3KFWFSRSKQbb1ohmVsB5KOCLT41hCMbF3zdJt3GC2E/l8zgYQBHr3BovVZLBYzTAMwzADiCLgW98ibNCIPm7J0ZnVHSKhNndW1+vwbImoRTM51y6yTDtS5onUlKKIkHhkExJm8+AYkO0lVqcpMDVFWKytEANilzxkSpA7q5fm6SKRZGbB8SzYZV8fK4VAUywAt0S7wCYl/Q6eXgzImIXmMJnVxiXWKO/Biz/9m1C3f7Vvzc/7aKs/fQxKgkSs7kgP5SAFhEDgZYhSt38/GwXznZfsmO4+i76zuuZGaM4R3RONqDrmddGmWhQ2zMzQrwen6SaI1XGmndXVKhwrRdIm+GzzaI2xMmoe4SKT6fvqbheNWaJzoNdv2SjXHS1WU4yN8naNs1q1iGJAvn6vPl8pi82aGJDA1n0RqVidmy46dBJYnDraWd1lgfKiJI8BMTFWFTfS5wIbcEhgsZphGIZhBhCGxGJ1HgNSdvX25C5tARnU69gRtDA7RzN4zl1kVTdCc4HInZaL1RWzjZxUpWE2gzhmZ7WUQIvI/Hs+GBsDlpaIGssLw5YcwPO0Q486BqRNNDXJndW+pe2/wOgrLUr1hNuaG6JJfB6Qn1tSarHad+BWdL7ucM5qC3MTV+NEewKHj9nAww/r5/KDs/pF34KyhS5BZrUWq/Vn63uZLtpIEX9gvvOyE5OKSTJK+85qKrE6d1b7HXQWaDvaMKRfaJQSOHSI5mvqtZlkcKwUKJdRcyO0GwTCcu6sHqvoxQWqRSYz3qp7IRpNonMrj+vwLFRqxllN8cXFse4L6hWdqUskLsugCtcybm0q8mMVxJElRWd1OGJ0Ua9RiThzMO51WKy+WCkWWLQsVPwU7cSjWWxlWKxmGIZhmHVRCipO8OADhJl8+Up82TOTaItmFd5sS0W9jp1BCzMLBMV5lEISF2JAFmmLQZZ3lPWEjMXqCx52VmtNjtpZff/9wKc+RdsmlILqdFEvJXRidZoiUwKW5wC+rx+jdlZTFRTLFwQDS1cBBEY/eQviRs0N0aIS1g1JQixWp2nPXY4g0LEKgz6DJIFUNo7M6yip++Yu09nVQO/gVNFZXba0s3oU4acnVut7YOApXbSR0lntJOiGRGKSUkgSwLEy2gzkMITMndXExTuThHCHhSFN9dd++DBhm1Hac1ZX3RCtBsG4y2RLO+MV1NwQjUWisVxPrO7SidVRBKlMDEjurCaKAQEAMTGuz1miKBQZaAc8qYd0s8TqKDXO6hAt6dN8rlHU2w3BtVcuUpIEMrNgmzXccpChLX12dhDBYjXDMAzDrMdDD8GePgl5mrDcuZQ911+1nKFN6J4BANRq2Bk0MbPkjd6mlEhSqy9WU2yhVaonbrBYvX242J3V+WlLLVYfOADcfTdtm3j0UWBmBmN3fx6NaaIVhlyQtG3A97VAQZRZLYRC3etiqU0kVvccijZQKsEWGWRrxM8hF0+F0IJP1yHd6kvurC7EgAy9uGAm3odnqxirpVqsfuwx/VyrhSSz4Hp9Ucb1LSSZPZqzWkp0Uw/lUu6sVogyGme16nShlOjXRqAgd+1bqY6VoCq0aZzVY14XnUVasVpKerFaSr1zgzIKRCZKZ1ZXKnonV4Pg+gpDXWBxooa616Xrv3sxICEaHXvAi4ck77fyugDEYjXGtVjdmKNZYJFeWS8uAHR9YRzrrH2hd/TFS0T52qHU49iyQishEqu7XcSpjYobQyaKox8uRvLC02boUillek5zMQ+WCWGxmmEYhmHW42tfAwC4cWuk+fgypNTuGReoVoFWEpDmqaJex85SC7NL7uhtGueIZ0ntSGkSDMZ7H6RAMB6wWL1NuNid1Xl0L7VYnSRk9a76LOiCX3W3i6U7Hxq9PaX6161tA55ZCCNyVisltLO6Q9BnAb0iXa6vXcV1L0RzaURHpRFPlRCoVhSaiU92QeQf72bFgCAIYAmFrDOEszqzcWS2jFd8j8C35q8ATp7USmerhY70UCn37wHCsXt/65zJndUmrSXwjbOaQKxOuzEcK9WFjCMiQdHcE3uZ1VSxEgWxmiT+osBmLLKlKXDVVcDx43RtFjOrq05Ecz3kAnAeA9KiW7QATAxI16OJlcizpX0bokwUX2TaBWDE6ogst1sKF46VDte3DIspMulYmb4nzBG5wI1rv7KjpMfcFI5tEzHjWRJQ2SbcyJkLHimRZhbyW2GlrHQMCIvVJLBYzWwJr33tVh8BwzBPRrJs82LCAlvSjUONs9pxhRGraVweWZTo7MBqFTuCFmaawehOj3xi7ig9eaQQq/NJnRCw6lW9PZ1QpZmZIWuKKRDHF7dYnZ+21KKPlJswx01TCKF0DvQMwaRJ5a4xoQvs+T4EABUSZ1ZHXn9VYBQKu1cQBNr9OOr2fxODIoRArS50wUKiLy5fB6COAUmzvrO6NkysQpJAKguHp8t43vMtHEt268cPHgRaLbQTH+VKQezL7WQjOqu1WG1iQHzjrB71Zp5liLsSnpVqZ3Vs05xbRWe1S7AIkhOGkMrSzuoWvVi9GTEg1Sph36UUpFT9zGovpLke8qz98Qqqbog2VVxDLla7XSwlJZoF9xWLbAA2QawO0SSKQkkivbhQshN0Ty2StNmLAXH1tbA0SzCoV0ofq5X1d/NROaszB65lrtftUn2ZocOYj/JbYbkMjgEhhMVqZkt4/HGa8SLDMEyRW24Bfv/3iRsVemJTcmK6SVmep+oJVOuWFqsJJiRpmOhBs+9jZ7WLmW519G36+cS86ukYkDZBlevednoAlYr+N6Gz+tWvJmuKKRBFF/f4e1uJ1eYaG/O6NJnVvQUm87PnwbcTRJ3RRbWsG8EywvpSXKIRZ4pidamkc2VHFetMRIbrZKjVhc49JcpTzbvETYsBCQL9GQwSqYyz88Ssj6kpwA0cxKmt3dW5s7pamD7mQZ2jiNVhqMVqI4L7Pmic1cap7LkK5bKg25odRfo8yMVqqv7AnPcVJybdaJRl+j/yfutr30B15hDCLtGETkrtqLUVEAR0zurcHDBWQdmJ0e4SjGGA5c7qOKAZw5ixoePb/cKwmxADQrXAIkMJx8r0rrsTRMURkkTHgIzX9D1hgeBY0xQy1TnzVr2qI6wondW27P3MXGTkNYjyGJCq0NGOF/NgmRAWq5ktIYq4SCrDbBc+/GHg535ueywwdTrA9DRxo2ZSU7IThB3KSZnO/qzUbRpntVJI4ky7kjwPO8djzITV0QfPUYQ4s+H6NqoVk/U36iBfSmPSFJsiVs/OcnTgZsDOav3/7SJWKyW0kEJR/KuwGwIA4Pu6aF179D4xaUVwrRSlAHSCYppq4csVQBCg5kZojPq9GYemYynUxiw0qeKb0P94SdOQ0lSLPj1ndYSlxoBzwQjcWSYwOQk85coMB5s7emJ1W/oo1wrTx3yGPoqwbETw8phuKwhA46yOY8SpA8/NUKpYOv+XSKxOMhtOVbvVyWIlzLlEWnQZ/XUEcmf1gUOoxvPonqYUKS0tVnt6cbxJkQVtRHC37KLspejENBEziCIoJRDYiS4ySqGsm37L9cTmOas9unM2L1pYcWK0zxDdGHNnNaVYnUeLONDWV4BMrI7NLgulBFSHY0AuOvLMapNgVqkozqwmhMVqZktgsZph6FFqE4p0QYu/n/gEcNtt9G1TkyTA/Dxtm7LRgS0y7ayeo3HRIU1N8S8L1TGbpjJ5PiGzM0AI7ByXmOnWRp/o5M5qXx9rM/FHn5TlrkdbF1ISQpGqNFHE0YHkfP7ziNrJRT3+TvXudFr3K3S/Rb4IUHRWNwmG+0bxUiiI1XaCbmdEQU0pxB1d+EpMTujHiPL7lzmr3S4ag4RaAP/4j+vc6/I+1slQrjtoUxXpQl87Iz23smxZgcWKE6E7KF4iSVB2tLA1MQE841kOHl/cvcxZXa4VimDato5xGsVZ3ROrdQ46mbM6jhFnDjxHoVwxzmqKCy2PxqqVtPDXJhKWe87qCO3EJTu38vkW+SKbslF1I4RNogKTZhHftgH4vt7J1SHou3JndeDoPFmqLfpGABaeUakoxjC5S9Prx4BkbWpnNV1utwylzoF2I7TniYqC5sLyeBX1YaKLhm7TuF8pHev57g0rhWdLJM2LeDX/YiXfEdKLARF6bEBRfJoZXqwWQthCiG8JIT5rfr5KCPF1IcQBIcTHhRCeedw3Px8wz19ZaOO/m8cfE0K8hvzdMNsGFqsZhh4pgZe/XLtKqdvds4deoNkMkqRXV4yMaClEYEstzFCJ1YUYkMqYQ+OszosIGSPSzgmJmZBQrPYExsaF3qY/qiPF5KK6juo5q1WLTqzejGJSFzVLS8BNNyFuS7rdBdsQKbWAty2c1b0t6l0stZ0BLx6CnnDYF6sDO0E4agxIFCFJLXhOBpTLunUKQdEsCLq+zteueyEabXugqPjo/TEevm+NSWYuetmAVS3rreQXslhtBGTh2IDrInAShPGAqV+SoObqz39yEnj68yp4fGk3cOYMsLSEduKhMrZcrLaEQpqM0C/kYvWED0BrdGHqjj5RSBLEqa2d1VUb3dQlc6kmmQ237KBWStGMicTPorNaemQnQ35ukTurlUDNDdGlEqvNuMAxYnXNDWnEauN8dH0L5TLQkS6Zwx6AvikAZGJ1bmRAqYSSEyNsEkyY41gvKo2N6d0ARAssMtY50FUnQmuR6DyIY70jZKKm3fUU99sk6ecK+7qfIam3EIaIU51ZXXZidBYu4tX8i5VeDIgeG1Uq0DEgFLs3mA05q38JwCOFn98B4F1KqasBLAD4afP4TwNYMI+/y7wOQohrAbwewLMA3ADg/wohiMoyM9sNFqsZhh4p9bX1F39B3261uj12NElJ76yOFrvw7QSBk6A7T6Qo5QV/fAtuLYDM7NFFjzxP1daiQS8Le9QvLneR+RbGxkCTKdsTfRRQrcK3JaIlOkeKlNtjcWXbYBYnMgiEs9vjg73vPuDAAdo2pQQmxlI0KYqMrmiXXKyWsl9gsUsgVufCZz5z8Dy926Q94mdhMj9dR/Ucb6pLk1ktM0tnvzoO6qUYjchff/CZZZD/8kU0bvyn1fu4PL6pcKyUmdUenT6pyTItUFkWYNsIbIkwGuCojGM4VobAz7RY/SwXj0VX6JO00zEOaLf/ettGyYkRxSM4NY1YXRo3YnVJIEqd0Sf7SdJzVlvlAJkSpDEgrmfpQpuJTyNUmk6g4kvt/iVaFduU+KIsQ5pZqLgRwhals9qG46i+szocvMA0kDwHOnBQqRJml+fOyclJAEQL7vnYMNA582UnRqdJUGwzP1bfR62S6XOW4KYjoxSOSFFxY7QblGK1BXu8buqk0LQpMwuui96uIJJFgG4XcWbDs6T+rhbZTXvRYQos2svEap/FaiKGEquFEPsAfD+AD5qfBYBXArjJvOSvAPwb8+8fND/DPP995vU/COBGpVSklDoE4ACAFxG8B2aboZSeA/DuCIahRUrg6quBW2+lbTdJtFi9HbJqyWNAlELYiPvO6gWiD2GFewYASQxIkk/0AIgSUd5hQawen7S0WE10rLmzmnqQz2I1Md0ulAJ8SyKa2x4f7C23AF/5Cm2b8p77MDF/EM3Dc7Ttbqaz2g2xFBJMnFY6q11X94nRiM7HYl8QBLpoY4NASCoWWARQL0ndd60nUnW7SDoJGqEP3Hzzmm06jurnnhI6qycmNsdZDVsL9oGdIBwkKpvvuV7VYvU11wCPZM/oPd2p7UZlRWZ1YCcbv80sLOh7k1J9Z/WUvhf6PqGzOrN7AhUAshgQqWy4gY36mKDLLjfH5u2e0GI9obO6ViN2VhunatWJ0KUSq82Cu2MDsCxUA4lmHIzed+X59b6NchlkW/SzKNERZhMTqDgxOvN0hWEdVwClkh4bjSoCK9XvCxwH9ZrSn+uoC21KQUYpXCvV0TUNAlEd6IvV9You3EiRWx7H2rXvAAgCVNyIJl4kX2wNbP1dLbET70Lna18DPvpRwgbzhXFX31tLZUETY8UAGN5Z/ccAfh1AflVPAVhUSuXfwnEAl5p/XwrgGACY55fM63uPr/I7zEWElPq+yc5qhqFFSmB8nF702G7OatIYkDBEFAG+nehiYsRidZ6nCmCoL+7b315nwpmmepujbVxIVMV5CpnV5TGXJqc1SUy+thGrbdpBfpJwDAgp3S6kslB1Q4SL2yMMXEpgcZG4zX++BRN+G80TtPvpk2TzCizW3FCLaQRFUQH0tGo4ju4T4xGFhNz96mZAEOg2F+nF6mopHex2MoJGM/GBu+5atU2p+i5wAaI8WfTv39RitRBquVg9aHEhX+SoKYyP63t/x64jfdPPA69/Pdrf9wM9nR5Az7G9oUWLY8eA3/5t4L3v1Sd+lqGjSijX9LkUlC1dYJEiszp14HmKvFhdYsRP7awOaNo1bYhdO/XPhM5q8vgiKZEqgaoboduiKzydKqvnUKyWM5rdYblbueSgXLXQSWmc1Wk3hmulwMQE6l4XjXkCcaogrPfE6lGd1UoBSunauJZFVxw2TbVIZ2XaAd0g2nGU50uPV3W8SEiwMyjPwTYLV1WHTqyOUwfeZJVmYYHZdI4eBe65h7DBvMCiOU39QOjFVharSRg4shBCvBbAtFLq3vNwPBBC/KwQ4h4hxD0zMzPn408y55l8fMBiNcPQIiVQr5PtSu6x3ZzVi4tARhWr22wiTF3jrI4RNojcv/l2V1NERwg11MThAx8AvvnNNZ7sxYBsjljtlWyIMrULHHpC5sZ6F/UoRboMSm0vZ/V737sNri0zIRvzutti0QrQfcHSEm2bsj6Jkp0gTgkmz8V25SacA2aiZFtKxx+MGlOQX5vCqNW2bZy6I2af5vn9xvEWUG3PNgKV42sBtOyn6Ep3/T7GHEsjLukbycovpZBZjVJJu/6IclqTToJxr00joOTkN8Jlzur1p35ZLCEA/M2fNnqT76uvBp6oPgf43u9FR/p5mYFe2yUnHhwvUuDRP/4cfvOOH8Cxe84Ac3qXQgflngjuB8aZRuKsduC56N8PKWNAAlsLfzGtWI0dO/T/idTlJNFJFeTO6swUWOwSnbN5ZrXZHVarKjKxOlMClufoLfoUbSKPwMiAeh11N0RjkeBzyOt5+FY/BmTUqKUk0V20uUR7YvWo94Qo0t9X7qxuEYnVSaIXLaol1LwIzWhAvz0MvTGnADwPVTdCu0nwfZkCi+5EVbvrKSJbmE1FSuLaTkmCtOCstjxHj7lYrCZhmGXw7wbwOiHEYQA3Qsd/vBvAuBAiH63vA3DC/PsEgMsAwDw/BmCu+Pgqv9NDKfUBpdQLlVIv3Llz54bfEHPhw2L15nDHHeuIWMxFgZTapEt9bcn9h1BdOrEtRKok0boJmUjVaiFKnb6zukH04eYCTaDdM66VIm4NFsLXdWCagmL5RA++D0soZB2iGJCAPrLEdRQgBMYrCU3hRvT1me0iVn/848CJs0ZDFxjdLqLUQd0LEY7qpD1P5AtXlKS1cTiWOcEIFlZyNiMGJAtj2FZhMk7grM6UgJXPHIRAyZVaAB5ldXBFDEjJTtClEKuLBRYBlPxMZ9Wud4M07rtGbITN06eXP58vCLraWV1zIzQXaCak8ubPY3zhMFr7T5G0B6B/jprMat+WA8XqNJJwrBQvfnH/sec+V2fAA1rfWuasHlIE77G4iI98+RKc7tbxP+95HfDQQwCALoK+WF22aZzV5j7reYo2BiTfbVRy4FR8pMoavd18lRXoF+yjigE5ehITzSNoThN2MsYFXXUjdKkMErmr2KgL1SrQkjRiNQDAceBVPR2xQiFWh/paQa2mndUUiwH5OM7XmdUVJ0ZnVOd6miJTArZlFgHGLJrM6tytLDKdWU1VIzuPAQlcVCt0Cxa6CB6AINBOcIrLK18QGy/T5Yszm0qaEovV+a5WI1bDcbQBiYUuEgaOLJRS/10ptU8pdSV0gcQvKaX+A4AvA/h35mU/AeDT5t+fMT/DPP8lpZQyj79eCOELIa4C8DQAd5O9E2bbwGL15nDXXcCXvrTVR/HkJE3pq6hvBlKiN8gnI4qQHDiC6tGHEXZpi4ptBlJq4x9ZbnUU9Z3VToxuh8hBVBRSSiXt8lgaPDE3Na7WfFIqSzsUgZ5LMWqN7k7Tx6onT5ZQo299z9t09Tk1WZOYjyokk/P83rJdxOokAaant/ooBtDtIs4c1NwQsRyhkNp5RMpNcFanQosTANQZui9tM8TqnusvV5dHFavzLd92/z5QciW66YhV702usGcE4JITo9skEICLC4IAyiWlxeoBMSBJZqORBDjeGkd2coVYXdwRUi6jRuislvfch3Gvg9YM4YlQFKuHFJV7543bL6L4nOf0xepOB6vHgAy7iHX0KGbCKn7+2lvx+NJunLlDV0HtZKVeu7ZnI80sGmd1asPzAAQBHJEhaRPsjsrPLU/QObazDCoz9td6XT9GFQPysb/DZDaLxv7Tg188LGbhpuqG2lU/ahFEYLmgCC1WN5OAVKwWvqeFpFEzq7MMMlF68bJS0bUBGgT3RrPrLh8blp1Yd92jfL4mvsjJxepJlyYaKo/rcIXeZdImGhsUnPBarA5G/77yz8BBv3gnkVidZDa8CVN7hSoSh9k0pARIwxtWZFb3OjBCQ8PFzChVUX4DwK8IIQ5AZ1J/yDz+IQBT5vFfAfCbAKCUegjAJwA8DOBzAP6zUoq/xYuQfMzBBRZpSZJt4M47D2yGqHzrrcDv/i59u9QksYKTxQAIRWUzGK26EV0EhuEd7yBtDoC+DqamCHOrw9A4q6WekFPpCPngxhRYHDY/b11n9YoCi3BdBE6CcFSBXUrEqdObPNXcEI2F0bdkyqwvrE+NScyFldG3paI/L90umdVxvA3E6sJ1kGUgzNnZPDbDWS1DCUfofM72E3TCz2ZkVssohS0yYGxMZyu3RndW6y3fBbHaGyJaYxDFhSvfR2BLhBRide7SDHQnUw4ydNPBMSAy01vkf/HO1+Obd54dA9LLpjSLjE2inFaZWah5oT5GKooxILat7wfJ+qKyjDO9ION5vceuvRZ45BFtgv7MZ/TPPSxLi+DSHk5QO3oUM90adu5QeN7UMRx8QPf5nczvi+C5UE7grI4zB54nAN9H2Ylpdkfl96/ApnNs59EHlgJqNbhWimSJxrIsWyEqToxYjlgMdVmjxlntRLoPoHAg9epu6B/LVUu3PYpYXXSsOw7g+1BKjC6A50KtDSAIUPdCNJpUYrWJL3IclD2JTmyPdi3kbZqFxtK4rxfuSGJAbDhlDxUnRrtDJFYXCsNuirPa97W4TuastuGOs1i9XSB3VufjAq8vViuOASFjQ3ctpdRXlFKvNf8+qJR6kVLqaqXUjyilIvN4aH6+2jx/sPD7v6+UeqpS6hlKqVXKbDMXA9vNWf2e99C7szYDKVmsBoBXvIK+zdWiKy9E5L98Ee7D99GGVhunWdWNEDVoP4Q/+zP6c1ZKYNcuWmd1lDkIAoWSHdMJSkmCRPVjQKpDViYf6KzOt9MDfSfdqF9bz1mtxeoxr4ulhREH5KbN3IAwOaG0s5pQrKZ2VifJ5ri1t5Oz2rfMh7sNOsRNEaujFI6VYtJvY/4I3WqIlHrRgnINID9WjI2h4kbozI/4na1cDAMQeNnohYRWZFbrHSw0Ls1lzupguBiQxMSAnOqM4dj+1TOrXSNW19yQTKxOMlsXa6NCqWWiT+6AjpL1M8b1ebPcWX355boo1V/+JfC2twFPeUrhF0wcTDjsosXRo5gJa9j5wy/Hnj3Aqc4YAKAj/V7KFNk26jjWrn3jrC47Mbotgs84vxZMzQkAUF2a68u1M6Bc1uLfELuthmraCeBYKUj3xBix2rV0xATJPSFNkSrRK7DoVxxE6YhitTknFYTedpcvwlCI1Xm+tu/rGJA2QUSWlEiU3c/aD1Ldb41yvLmYZsRqstojuWBfclHxJdox3aKFAADH6bvrSZzVxv2aF1jsECzerHRWE9cMYujZHGe1yUMH+s7q7SJ0XeAQLrEyzHBsN7H65pvphYQkAb7+dfo2WawGDh+m33nTbG6P81V+5atwRAY1T2UrRm9AXnVDcme1lMDdxGFQSaLFajJndRwjlC78sqMzq7t02xxlpqvT6/y8EK3mYNFjOGd1vwCab8nR41uKok8QYNzvYGmRqE2jh0xOgkys3qwYkFtuAf7wD2nbBC5wZ3WS4Ja/b+F3b3waotSBZ28fsXpTYkCMs3rSb2NumjazWhDpPTlpbETHsTEtqs6PeBNLU0SpC9/tK+okzuq8L/AA+L7OrCYUFHOxulSCFn0GOauVhUZaweluHceOrFg9yCelruiL1URrFjKz6MXqHKGFusBN9eLCWp9Bluk4sWJ8DLTWDQD33gu88IVn/9rAdoscPYq29FB57tOw9/WvwOnrfwB4+cshy7W+Pk7prE4deIEWKUtUzsf83DJF8EpOjM7SiNdXbyEk67nAqVya0tERKKRBbrlTNc/Fp1jJ74k++kfhe1oIH0WoXFkYliq2xRSYdGxosdoNacRqE7fUW2Tz0sHxRYPIP9c8wimvkDqqsmrqmTiehWpNaAc0xXlQdFbn7ZI4q82YM48BoRCr852HkzWUHcLcbmbTSFM91SDfKWsWmOC6EEJBJeyspoDFaua8k0/GtoP4B+h7OfWx3n038Na30rYpJXD8OG2b24041k466oiZbSNWw4FjZXBVQne8ubPaiRBRFL0qkCTAN75B2iTk/kPY2T6MxQWiaVkYamd1xUbJienEpFxY9u1eZfJWd/BEZ11ndZ4pW3RWO5vkrB5CrP7iF4GvfW3tNqUqiNU7LMxHZRKFWUod+UkdAxJF9E5dYPOc1V/+MvC+943WRvvPPoxv/+EteOShDHGmC7QB2BZi9WbFgNhWhqmgjfkZWrG6WqWNAullD9dq2kW2NHp0T1e6KPv9dkp+pjOrR4wBiVMHnhERAjuhqY+QZ/iXTAzIhjKrSzjVGcPR0ysE2HyR0YXOrPZCNFs0C5gys7SgqIgWRI1NXxX8tD0n/FrflxHrHVP4tsiVVwKHDunF4JUEbqrjSwYJaq2WXkkWFsTuXdh7icAp61I88Oz/ADiF+BMqZ1oxBsTztADcJnTtG7G65oZoLo5+ffWc1b6v2yQ6txLL74vKVOSxJcK0S3FPWJFZ3fvHiEItUDidPU9HrHRGX1zoRWsEAca8LhodGmf1svgiPx28yDaIXr0B811RFcrO3eWeQKVmoS09mp2daaoXVhwHbsmBVBZdDIi5z1SGHHMPJL8OJuqoOBFdFAqzaeTdCVkUiIkvsgtGIc9KkUQcCUMBi9XMeWe7Oas7HXrx88476fMpkwQ4eXJbRIluGvmNh6DI9zK2jVg9sROOldJuRTPVvqtuhLBNu0pcLtOL1cnBo6g1jiOcJ/oAokg7q6uudvxFRLfN4mTXdVFxYrTCAdUxswzJ4ePo7l9jVaq4nR4w274TXfxoFIqFyoxYvdgY/Dncdx/wrW+t8WQu1puCJJM7bcyHFZKJjpTAxAS9s1rKzcnE3yyx+m1vAx588Nx//+8/FuFVb/8+PLawC0eWxrWr1pJa+tomYjW1szqN+s5qsqgh6GOt14nF6jx7uFbTi2GjitWpFk3K/irOaooYEJMrrHewjHaoAHqij+3rDrFUArpDxIDIzIbMLOwst3GsObF8v3Cv3wLguqh5pt8myKZc5lKlYKWjFEPEtpj376yi4TzjGcB1163+a4GXDhcDcuwYZGbB8h1ACOzZo8etL3kJMDdXeB2ps9rcZz1P38MpImaKC7hBgBpFdnnv/q1jJXaWmphe9Ab/3hBI4cKxUl0cOSYaxxWc1UIoMketVHZ/3SKPgyEQq3vXgREqO02CuhvKuJV9X2dWdwjy5osRM9DxRa0kIOlje9e16+pio+GIn0HeX3g2qnVLO6ApJh/5e7ULmfAUYrWy4bh6zF11Td896qQ5jqEAiGoFVS9GK7Qv7on4NiBN9QYLMrG6WIMI6EUwUus8FyssVjPnHRarN0esllKP70lzmLYZueBz0TqrgyockWmxeppIqevFgESI2rT5Kjt2EGZLG5IY+lhbRF9YnlldNTEghGK1zGzt+hNCi0mxt/4g99AhJLMNdO59ePXM0VUmeoEtEYWEMSClEsa9Lpaagz+HJOmLu+997wp9c0UMyNRuB3NRhcxZvRlidZpuTtFGy6Lvt48f18XQRpk3/s5bUizGJTwwfynmwwriVGe/KmBbiNVS6nOQPgfaiNULdA4qKYFajV6sto2zuuaGQ2Xir9+gREd6KBXE6sBXo2dW92JAcrGaLrMaAIRZvbM8R0cKDIgBSTIbk9UY1181h+PtceB0oZCmETxcV8dqVCtK56kSfHHkmdXmfRbPUv19OWt/BisKsRW57jrg+utX/7XSsDEgR49iLqxgalL/uHevHg9XKn0TLQAtUgJQMUVmtQPPLzirKdax83O25PRd0EujX1+9gn2+jz2lBs4sBQQHq/sCNzcynKHKrZHLndVUYnVmLYsyyx8/Z3p9k2nTnAftJtH35SjAtlH3IzSiEeM68naVBbesB0fVQKKdjNhuPjYsFN+ueQQRRnlmtWehOu7ovpDIWS2A5WL1qBO7NO0vNAqBajlDS/okWdgA9A7JcoY2RWQJs6lIqe89ZONu0xf2xOq8uD3FDjGGxWrm/BNFelBKLShuFu027bEqBTzxBH2R2CTR2zQv5iiQXKy+aJ3ViYJjGbH60BmaRgsFFsMOrVtgnRpP54bJ26w6EcIW0QWWO6vrent6Nya6bZqtg5ZnxOog0a6U9SZlSkEqS28JXc0y2tui3i/yMUyBRaWAD3xg/WNd6axeag3ePhlHGRpL+kt+17v0FvKz2jTVsyd2ezqzmkL0SYDx8c1xVm+GWD0+Tpixblhc1EXQznXeGIZAJW3gZXsO4Fh7AmNeF80kgF9xtDuvc+GL1UkCTE3RuuFlnMERKSaDNuYWCbYQ5+1KhWpFka4B6GPNgHodVTca/dzNndVBwVkdqNEzq3tRDehnVocECwFFdx4wXKSAWZydqiV4yuVSO7FXiNXFHSG1KpFYnWWQqYAjjPs1Ihhw5Ks0hY8y8JV2QK/nrFb2cuHY8IY3AL/1W6v/WuBnw8WAmOKKuy7R38muXcBjjwFvfjPwe79XeJ3rwiNcaPUC46x2EnQJI2byAosk2eU9Z3UGeB52lxo43SiTrLblWfsVJ0LrNNGN0YxhHLPAMnKBSaAvgBcKlalBC0xDtAmgfx2YQpsj54HnfYENQAjUqxkacYlE/OxFxAGollItqo6cWW31i2+7Lk3MjBHBXRcoj3t6bEqcWQ3f1+cASca41eu7qxWlheVRb7r55NB1USkrmnxtZlNJU2DPHlqxeqWzukRR3J4BwGI1swVEkXYQUYt/d9wB/P7v07YJ0DurpezHhVEiJXDJJYTbWrYh+Y3nonVWRykcYdwzc0QWvV4MSIioe4FvbQtDJJmNihsj6hA51HJn9ZguzkTmrM5db8b1V/WlHuQO2KKeZLYWUA4fPvv5XoFF8/OQMSBpCrz73eu8IJ/wl3S+9pgfYqk7IENRKSRf+ioan70V8vffgUMHMxw7trzN4sTBr7qIU4fkQpOzi5hoHkVznrYjSNPNiQHZDPJYiXMt9tO4dz/q8Ry+c+9RPO2pGS6rzuOJxk54VQ++JRE1L/zVZin17g2yKJA07S0ITvltzDcIxeqHHkdt+gl0m0SLbEr1CyxWq3rnxhAFXNc/SIlu6i4Xq4cVKddjRQzIMAtsA1GqL/IVxOqBkQKmj52sJ9hzmY5OkCcKC7+56GMW2Wp1QeMm7HR6kQqBnSBaJBR8ijEgg5zwuaDjnH2uCIFVRWwACNxs6BiQmbCKnZfpQbDjADt3As99LvD61xdely+0jrpAHseIC2K1dlbTLIT0FnB9nya7vFdgEYAQ2F3v4ky3TjKgzXeEVN0I7WmiCnDmWrCFgm9LxE0Cga7nrDY/UzqrRd9ZXXFitFsj9odpavLd9Y/1aoZGEowuVK4cG5YzPTYcVaxW1rIYkJobotkecTzbE+kERKWsHxu1L8wyQCm9g8vS161nS8St0RcBpLJ0DAi0WN1KRnRWK7VMrK5WoRcWWKy+oJGS2NDS29W6MaMQMxwsVjPnnc0Sq2+9FThxgrZNgF6sjqL+riZKkoR+G/F2Y3paj20uWmd1nPWd1QtEH0KhwOLI2ccFlKnfJOiaBMLQRJaEdGJ1GJrMag8llyCfNSdvw8x0qoHUg9z1JmVRBJnZ2r1y5MiqbUrVF1J6BRYHfG9JMmDQliSI84m52freGuRIiSIkSx00Ih9HHmxAphaO3nt29mvxWHsHMyLy7/4B9eZxdI7QZmtslrM6h3KnQV5k8lznjY2vPYQxr4t/9R934/U/7uOK6jz2L+2CXzPbGxsXvlidJFqsJiuyGEWQZtv7pN/GfJMmTxYAksUW6lYL3aNE52yaQmZCF9Qql3UMSHtwZ3vmDPDOd67d5urO6tELLJ6VWT2qs7rophQr+phBzurMxmQ9xd4rfewtL+HkocK9dKWzui7QTPzRB16djhZSrFRfXxQ1F1Zx5A4lVhcEuGEp+QOysAH93MwMZsI6dl5R7j28Zw/wnOeseK3ratF+1GFMXryz56yOyVz7PbE6z6weVfgoZlYD2D0eabGaYECbhNrIUHFitGeI6nkUCiwGdoLuEsE9Ic8VzscFtq0XjKLRxGqlsCyzmqTQZpLovOZcrK4p7aweRaFShcU0I9T3oiU2MO787GdXdMl50caCs7pOIVb3BHvRd2GNKlavXGQzMTsjx1ilqf4Mcmc1hbCcLyyY+0y1Chq3NrOppClxQet8kXdlZjXFvYZhsZo5/0SR7iSoxb977qG/PyhFL1bHMfR2V2LyzMuL+R45Pa1zqLaLs/oDH6AVv5Io6xdYXCI64GJmdUJ3401TwBEpFKVCZ5zVNTdCeI5i9dwc8Pa3Fx6I435mdVnoCTnFCZafUAWxeuCEJIqMs9oFjh49+/kVQgpsG74lEcbrf29SDhare/mc0AV/2kMUKksyG80kwP6l3bh24iSO3lrIAVkpVrsuBNH29+TUrM5+Jc5a2pTMaqWAOILnZuQ7eMbGzn3euHSmi7rXxeUv3IWf+29lXHlNCY+lV8OreNox0rzwV++SxTZ2lNp0zmqzWOSUXEwGHcy1fZpAbCMk1NwQ3RbRIlteXNAGEAQ6BmQIsXp6WmcIr9VmR3oolfp9NmVmdZ4rXHJiHbc0ymdrjkcVpzkmUkAl6xyr6eue94wOrnlegMsqCzh2VC17vpeBDPSLihGI1XlmdWAnCJdoXKqZEssWhAdmVueCzgY3DfTOg/UWLWZngSzDjLUbO/f0/8CHPqQj7JbhOPBtOXrmZ5IgSp3erqCyE6MTEkx9i9FYeQxIa3SXarGOw56JCKc7BGJ1lvWMDBUnQmuBqO/OnapWqqN7KHaFrJJZPXLdjXwRP89h9zxUnGj0aOXesep2yzVbj4sIxE+g7+ToZStvoI/99V9f4Wfo5Wv3x1s1L0SzM+LuoLy/8CxdMR0YvS8sCsAA4Ps0BYKNoJiPOStVsytmlElzPgY2B1upWeys3gbIKEWtJOnE6nzRxu/v4tJGIaL2L3JYrGbOO2G4fcTqONbzpW3hrI4y1GvqondW79tHP05oNDZHrH7nO5fHYY6KTFS/iE6DSKgzomPFjRBJuirXyR13wzl+CE5riU5TzGNAnAjROU5yH3wQ+NSnCg/kmdUVB6WKpYViihNMSr3N0cxMq6V0cAyIcXZ2Uk+r6isxEwe3WGBxSGf1unERxWJSACpBOlhYN+dNQ9Vx4JU/g++79FEcO5Iuq7DbkV5vfgPX1YUbh8jCHoTMLNpCZXm7m+CsVvd9G+LMaVTnj51zZMdq5HFT59pvNWZi1N1QV6oUAs/46Zfi/oXL4JcsncV3oYvVSQJ58Ah2PPF1LC4QLYhFkc5o9W2MjyksxSUaa45x1da8EN15yvgms+3biNWtznBFUddcuOo5q/ufZylQJDEgcWrD9S1ACAQ+dKTEKAOvlTm1QE8AjbvrF1iUysJ/ff1pPP8VNVxencex025/24OUaMQB6jX9c23cpsmsbre16GNcquEiwWA2TZEqAdvqf1++j/UXF7JszQKL6xF42cDs8uzkaZxoj2uxemf/8Re+cJUdVoTO6rb0UakJwHVRdmJ0KXaIFe+JQYC61x1d+OuJ1fqz3zWRYDqsjT7eMIYDx0pRcQkKC+YkCdLMgh2YBUyK3TZGWLbdvkPRt0csVNbLljZtmMzq9qhidV5s1TirRUCQrbxKdE9vJ9swfaxSyB5+FAcPquU1QlYWWHQcvRuA4JzVcW4AymXYIoNsjtgXrlIQs+pEaDYoYlvsnvu1XCUYz68Qq6s1cdFmVn/4w8A73rHVRzEc6W1fRfWbt9LFruU7F87KrGZnNQUsVjPnlyxDtP8oqkFCKgBPTwPBiAukq5GvvJM6q7/xbfj7H4RIYrpt31mG5FsPonbf7Re1WD0zo8Vqamd1u00vVs/O6kKblGMamSg4wsSANInEOiN6eJZ2aVFdZPJTn4VjZSg1z9Cds8b5WHWjc87XfuIJLM9WDkNEmQO/4sAvWXqiTyRWA+hv9cyL6AyIAUkzM8BerSKfcXstz02TCAfkbCeJfktrzoVWOKsrpUxHkQzhrG4kAfYfK+GVz53H0caErqYFaNEnCVCvm9e7ro5WaGxw//kqyMzSheWIyTOrKTcDJPd8G66VohIvkIvVrnvux9qYlxjzujrYD8B11+n7rFeyaRfDNotuF0lmY0fQwtJJomDCKOrlc1YnXC1SUnxpnY7ut5yIxlEL9IufOeg5P1tDCBNJArSb6ervS+qCg+V+ggNKJZAUWEwyI1bnbabeaDfyVUQfOI4WKzvrXBSm33LLLuB5uGyqg2ONsf4qlZRYiku9fqs24dCI1csyqyXNeZBlWkwsdP9DidXK3rhYHQx2Vj92bwu/cMfrMZNNLROrVyV3Vo863IhjtBIftTHtrC7ZCTqhPXonnosTgRarK06MdmiN1m4xsxqAV3GRZPbo440w1IvYVoqqG+rrmwJTDNGpBjq6h2JXSJoiXZFZHYx6HuTfVcFZrQttjh41tCxf2/d1Jv4o39cqi2zlkkJ7GGe1UsDf/i1Ove0vEUUCh/YXXt/Lojc/e57eDdAlEKuV3YsBqbkRmvMjTpZMH6IKMSAlJ0HYHvH8WlEEL6g6o4/nV4jVXtXTtVcuQrH6+HFdJPfBB7f6SAYj2xFqboTuLG1+/zJntZ0M3NXKDAeL1cz55ZvfRHTHPagee5RU/JudBa64YnuI1dFHboKXdhEsnqY73m5Xb2tJ5um2EW9DOh1tBKQeJ9g2vVh99936/2THqtTyzGpSsdqGnYt/RMqyLNfhWqmZNJA02XNWV93onK+tJ57Qua29a944q4O6BxH4vcdGRko9H8ljQIYpohNFcK1UD7DD8OzvIt+W6ha30CYD41vkKZ2T255b40Nb6awuK73ddRhndRzg8GHg+d87jvmo3BerkwRLcQljY+b1uVjddNdqcWiksnVhOWKk1PMoyvtM4pS0WO1G5GK1I86xD0gSLDUt1H2T2QUdrzQxAfgVLfh1Wtuj2OqOoIXFo0R2eLOzwfEsWJWSdtFRfGntNpLMRt0L0V0kuiHkW/QdaGeaG6EZugN3xiRf+wbaB04B/+N/nL0gZmJAyoUYEC1WD+gLhjjWYiRQqSy0AD7gQssy4Jd/ee02AawqVq+7/T93pJd0P3TZpRmOtSaA+fleu8V+qzLh6V0mo2YKmPO1FwNC4VJNU50pXHBWO76NNLMGxIBs3FldGiIOJjo1r53V8dhgsZrQWd1MAlTHbMC2UfYSdJN1YlCGpXhPtCxdaHTU3QD5ok1+C/S1U1eNupc8DHsLuBUnRnvUQqs5uQu6XtHnbJumlkfR/dobw4wYA1LMAofj0BTL7hXs64uqAIjE6n6/Zbm2NooM6GNn7z2CxpfuwRONnbi6fgYHv1iwVhezpYF+gcXuiOOtvL8wMSB1r4vm4ujX1jLyorsbKLaaJMA//MPZD+b3bwAQnqvv4aNM7pLkrDz0kRcsCtx9N2ERwE1GSuDSS/X86UInzSwdu0bprF6xIyRwEnQpIqcYFquZ88z0NKLUQc3uIInp7Gl5ccHtIFbHmXaMlNChE+mMMFRzQ5rto9sUKXVsGrWz2qZLv+hx993AJZcQnrP5oFGktGKS+TB7Y2eikzYpj/W3PFN9BkZMqrohoujc+pcnngCe8Qzg5Ml+mx3poVR3+5ORUU8wU0RnWQxIINFKgoECsBD6fd03uw9q/mwxSarlRQsDOxnsrP7YTQCA1nv/Zo0XmEJKgT7WSikb7PSJY8SZjUbk49QpYO+1E7CFQnxmoXesjYJDEa6LyYBGrE5S69yF2nXI9Q3KKJDYCuBZKapOhNYinVtZfvFWOF/5AoQ8h3N1YUFHHYxbvQtfCO2u9sraWd1uXOCLosZNuCNoYfEkUUGxPLPas4FKRT9GJFZLZWHC76C5QBff1Cv+lRdFlYMLPyWPPIFW4qPVVJj77NeWP9np6L6w1t/94HgWpFpH/ByGPAc6MI638nA7WJIE+NKX1nhSSiSZBdcu3AeNSLVuYbU4XtbXXXa5wNHWZF+4NztCxib0sVqVkhaSRr0v5iKd0GI1lUtVKksX2cyxbX0PWavvVsqIWucYA7LOQEnON7RY3SkP76weNbIjSdBKfC1WAygF0LuCRr2Hm/u35Zk6DiU1ersrMqvh+6i40ej1R0yti15mNZX4lY8L6hVtOqAwSKxccM/HMCOK1R3poeynvTZLdoJuQhGBYS9zVo8cA7LaIlt+QgwQq//yT5r45KHn46C6Eq+69FEcOlD4PlYeq+MYsXrEnWx5H+OKXiHf5tLohRABQBQE4I2K1fffD/zWb61yrCuiGop/75zIF5jyxb38HCCa1Pzf/ws8/DBJU5tOXrTwgq+bZeJgyMXqfKcNUNjVys5qClisZs4vExOIUgdVN0TSouvR4njzxGohiJ3VIoBnS5REROoolcqsFFI5s7YhUmoNgfL7ynd1Um79B4BTp4CnPY3QWZ1v88ud1aNWOs9Z+WFSOauDar8wz3rbsjdC7qx2onP+XA8dzPDyF7T7USBRhNmoih17XRrnDKBPpvyEsvRtuFrFYGd1oYN7w5feiIXDKyrH5dmMK1xJ4YBJWTKthZjWI8dWf4E5JuEWnNVDZla3Y09HUuyawPN2HMW9D5rPMHdWj/edPpN+B/Pt0QP9pdIORQHa61ZK/XU1GnRtJu3YOKtjtM/QWWiSbz0IJ4tXzzYfxOIiGkkJ9Ynl58111wF+HgMy6hbqzcYsXO0IWliaJupkTQyIXRSrR67ShV4ExI6gRZevbe4HtrMiS3OQWB3phaibDj4fN/zadyDuFK7xRkOLPhOFa9Rx9A6RESf8UerAL5sYkNxZPeBGLuU6b8cIVBWvcPy2PnfXvYXlLjtTBfuSKz2c7Iyd5ayuj5nzv1TS4u+o50G+jdjKdJ2BFsEkuhcDUjinBgk0vQKLGzsPXRcDFy2SKMPp7hjmG25/kXKdBkeOfwC0WC191Cb6ovLIETOmXQC9z7McZHqHwSjXQb5g4a4Q6UZ12YdhbyGkSrmDJz9n6+UNi4lrkoufhTGMjoMZ4X6Ti9VBX6zWW/SJ8poLYrUlFNLO6GK1KuaAmKi4QedWcvwMFqMSnpj6Trzy0kdx+Ezp7GPNF0LyAouRRxJdkzura25EJlb3PgLP21ix1UcewV2//VkcO5otf2urjI/XXbgbhnyh1TXvmWqOgH4z1OarzULKbSJWJwlSJVB1I3Spaq+YXUzLM6vjgXMvZjhYrGbOL1IiyhxU3QjJzNLg1w9JkuhOkjr+odMBxsaIxeryBHxLouQkdIKicU7U3BDdpe1xZzt+fEU28KhkGeRiExWrS3oehKHOQ6cmivS5RSpWmyI620KsNkXwSk6M7gyRSFeMATmXLZ5RhPj4NK7efzOO3X1Ku8SSBHNhFVN73J6AMfKXZia6ouCeCQIMLlRmvgsFgcPNKZw5uGLWaSYkRWe1b0uE8fqfhbzkcgDQ0R4r9/BlGZCm2i1iJk2eLxClzlBitetk+mObmMDL9+zH7Y/s6B1rz70LAK6LKb+FuRaBWG1EH99Ozjm7fDXSVEc4Uzqrk5aOdqk4EdrTdDkguTjhJF0knQ0OyBcX0YgDjO30lj38i78IvOw7Y92/XOjbG82MadzvYHGWaEISRf0t6pUKbCuDbBCI1aa43pTfwhLVsKiXJap/rNWFzlYeMJOMwwztxMNCXEEjdHHjHxZu0s0mutJFeaogguRCygYn/MtuI6YIXrVqYkCqtu4LBxyrlOt0xVKinfgoF8VqU2BvUAxI/lpAL7LJzD47BmS8Lygu+71zpRDbEtgSYYco/1ctz6zunRADCyxu7E85rtD5ygPE6kxZWGxaZxdUXEke/zBq5meSoBkHqIzr77NUAjqjxnUA2lmtRO88KfmmjsOIwtcyZ3UQILAlotaI/VceAxI4tHFTJrPaHqvqAq4UwzjjUMwX2fq7w0YTq9vSQyUXq4VAyZUDdwIMdazKhuMtX1yImqNl7WuBdXl8EQCoeJ3zIAwhGx0syCoOxZfimp1zOi837+xyoTb/XC0LNT9BMx5QJ2WI4+25lUsl7awedXyU6vo4oiBWb2gH5nveg7sO7EBJdZbfT1dkVg/rWC/y7W8DP/dzhbnryogZ34dtZaMtWBQIw+0Tf71txGqz66zmhuieww7kMAR+/udXPLhaZrXDmdVUXOCzDeZJR5oiSl3U3AjJPN2MP471nIHa/dpua3GCNAbECuDZRqSbI9qebAajdS+kceScBz7xCeAznyFs8O67Ic/Mo3zrzaTfV7OpXfvURBFQrxMORPJt375D63zMoydKRqCgigGJMhMDIhHOEPUF5jqoutE5TXI7n/48KlkLl1UXcOz2Q70LX8K4nahcE6ts9RSOrSe/AzKrlRLoqDLC1MPpIyuOY6VzJJ/oJQNiQKQ+jlYS6ByUtY41j4RwBwgeQE+snqzE2LsXwNgYXn7pE7jt8GX696TEUlLqbafvxYB0hl8ZOnkS+LM/W/Fglulzy0q1UL9AV3FWSnqxOm7F8CyJihuhdYZQrK5N9HdZHJ3d2C8vLmr36M7lCwfPfCawa4+1bZzVADDmdekE4DRFqkRvy3PdDdGcJejAOx1IZWEqaGOxSTQsl1Ifq7lUh3ZWxwptGWDh0uvwA1fcj298frH33KGjto4BmSpUWBwkfq5CGALPec7yY20lPip1E9VQsbRLdUA/O5Sz2i8cl9n+v+65mw8e8oXJyUk4Vgo5u9hrtxGXMDZpJqWuCwEgi0ccd+WiR9nVjqw2wSJblmkx0V7FWb1uDMjGndWOJ7Sov55YbaL/JieGaDt31I7qTDP3Ia+8Iq5jlJzaLNOTDYHezqiyn+p2R3RWF7Pb4ft6wXXULGiz89Kt+qg4MVoduj5GZjaceln3sy0CF2FetLFYd8MZMbs81d9NJTDXVFGsHtUJny3PgS458fAO4NUwu0wCt3BcZhwXh+v0CWahY1HWMD1rYc8VPlwrRXLsdP9Yi659ALVyikYSjHYtFDOri2L1iG7tZbn55hwYZLoAoLe9pSkeXLgE/+qSh3D8YGEyuEoeulJiQ+fA3/0dcPvtwH33mQeSBF3pouSZ7yYItPGAKCaNndWbQNFZfQ67QWZngfe+F3jggcKDUiJMXXil/rggsCW6o+7eYACwWM2cb6TsxYDEbboeODl6Cu7RAwBo1epOR4sTlMX1olDBtxO9HfXEPFGjkSnQ1KXLYNpkTp8mLlpoQoZ9FZKuRDcfOopq8xSoz61crCa7secxIGUXZTdBJxoxRzQnHymNjelJeZvIWR1KHQPixOhO0xVASzIbJSdGLDcupi3un8G438FVtVk89qgCjh8/q3iKJRTSLoUrC8vMM0O5PMyJXSoDAhlOH1txAUmptyT6yyd6gyb8MtR/s514hbDufpsAIIqjhWGO1YgEO+oR9u0DYFm4Yl+KE+1xYHERaLe1szoXRC1Lx4CE5aHdTrffDnz2syse7Hb1xDwvVEYoVqepLjK4GTEgVSdCe5ZQWE8UHCtFxYnRmd9gJ9NqoZEEqE+tkh/uujpHdbs4q70uFjseTVxH7iJzBVCpoOaGaMwR3MSM1XHc62Cp7dCsuq/IKPWrrs6BHrDYmET62jtduhLfc8l+3PdETZ/4SuGVH/1ptKSP8s5K/xfOIffzyBFg/369u0r/0UQ7H2sms7rq6GMdMQakLf1+Ti3QiwFZV6xeEQOCiQnsDFqYPW7+UKeDtvRQHjfPuy6qbohWe3QHsMxsOGVP99lEkQpnOasHOeFz8WmDUbauM0QMSKwgkGHnjiEatG14luwtpG6EhQXgb/4G+j4ipV5szx3QZaEXQkYshAgsjxUuBWrwzqgh2j1brJaji9W5kaHiayGtS9R3G9HHrpUx7nWw2Box/xjon39+McpsxDiYJNG7LIL+NRV4me5jRo5tWZmvPeKxpila0kc1KNxXHGewc90cy0JcwcICMHHVOHaVmph+dL7XrsysvmsfWqxujipWm7/renqXQS1I0IxHvL5W9lvmc42GafKBB6AUYAmFK6tzOHZnYWdQvvMw6LtfLaGQRsNfXwsn2njetRFOmzUAxDFaiY9ayXyGvq+/qwbNjr7t5qyuVLaBWF10Vp/DkHtxUdeb+vM/LzwoJeajCiZ3mT7QshA4EuGouzcYACxWM+cbKRGmjnZWR3QXcPyxm+AdfIy8bG4uVpM6q8MUXp7Vu0DUq5tiUjU3QpfCkXMeIBerJycBAL4tEXfpin9F7/0LBNNHgRad6xEAolNzGFOLpM7qxBT/qtWAZhzQCDQFsdq3E8RNmgOWsXZWl+yEeIeBDUdkfffTBliaiTHmdfGCZ4W4Z+YKyBtv0gPRcj+PrmQnoy8I5Q6qFYWv8ufWJIoghEK57uK6yZM4fWjF55ZnM1YKYrUtEQ3KrDZ9cUv6Z4vV5iJVq2xLHUasnqonuPRS89jkpG5lehpYXEQ39RDsndDPCYHJaoz5qDx0x3DvvehPGnJMpELPtU9YcFZKLVaTO6ttqTOrF+huNDJRcIRxVi9s8JrtdLR7dEUMCADA0Ts32uEF7hgxM6Yxr4uluATMzIzeZjH3s1xG3euiOU9wEzP9dO9YKQYc5ljz7fSiZHYsDCpaaNyvJ2Z8XL43QSvxkE7PQXW6ONEex7HOFMpjBcXD9FsqGb5PPPRQB4Gf4WtfTXvH2k58VOt6SuKWXR0pMcgFnqwfA7IspxboxYB018u/XemsnpjQos9JvRtE2/QFxKTptxw9nm22RhSr83Or7JkYECqxWpzlrLaEQhqvMUbKsl4cyUZwPGtwDEgC7C0vDS6uCGix2pZ60XmD9/H3vAd4xzuAY/cv4O4zV+hzNHdAVwRJIUQAy9TqckAQAyIl4tTuLzb7PnxLImyPOJ7NBeCKFtLaVAuNUiJTFqxaBeN+F4tdf3RhppdZbe4ved2NATvlokiPB9Zqsy19VMr986jkZ0TOanu5C9xORnNW532hv0KsHlQY0yx2LcZlZBlgX7Ibe0oNnNnf6B9rUVgHUCvJ0cXqFUULa1Wl5x6j7MDMxeq833LdoQqFAwDm5xGmLkp2gn3VBRx7uOAsyD+DgrN6Q074dhsLtz+Ea458DmdOZ702m0mAasmcR76vjQdEzurtJlZvH2e13oHbPYcdgouLwA/+IPDVr5oHzFxzKS7sFAVQ8lKEqUMsdFycsFjNrEu7Taz/Soko1U6UJKJzqiaZLqhF3atvhlgdhVpQLTlEFd+BXjGpqhvSTHJWQB2vAmyCWG0mDp4lEc3RnbR5trJICM+tgwcRHZtG/d4vEWdW23A9LagtxOVNEKvl6MV+DDJK++5XquiaMIQQSidWmJ83wtKcxJjXhf3D/wav2LsfX/pagOmwhl27zQs8TzvBCcRqqax+BXFAFyoTav3t5LmzetzDd+09gtPT9nLl1Gx37YnVeQyIHCxWC2Q6BmQNZ/UyG5kpTLNuhmIcI84cTI1J7awGgIkJfQ499ET/PRdsf1qsrmxIrD5LOO529aKNlen3vkR33aaxxHg1oc2szgssOhFaLbqOViYKrsmv37AI3m6vGgMCoJ/7G13gYrW5Vhwr0/EEVGK1MrmflQpqboTGIsH91jir616oxWqK2Z6peF8s/gVg/baV6l16x08KjF1SwdPHprH/niUsnWghyRzsX9qFciEFBI6jt5sPaz44dQqH3vNZvO7Sb+LOvz+lHzNF8Cpj+mBF4Ot9TEM6q1cdn+Rb/1c4q0tOsv6ugLy/yz+4ceNQnBG9c0gVxE94HupeF43WiNOp/P5d2WA+63r0CiwWHsuzoNfK8s+ycyuwOGQMyNVjM9h7yRACgWXBs1PE6cZ2GnS7wE1/p2AhxU1/ehofOfAi9KvKmYiZIVz767JKEbxSCSTiZzMJUKv2828DJ0E0an55Hg9W1rtiWiHR7o3cYe46GK8kWIzLNNntRQE4r7sxQKjcvx9429v0v9/7XuD971/eZkd6KJcKYrWXju6E7y1erhhvjXLt5pFI/vIYkGGc1UlmYzE2cX0TE9hdauD0if6C4LLPFUCtkmlhedTPQPWzems1aAF8lLlHvshmFWJAhigUDgBotbBoRMPLKgs4fqgwlsw/g0Ku8IYKvB84gIW4jGvGTuL0Y0YETxK0Eh/VgqGlQpXfjm0WA7LUQtVqk4vr7343bXv5LmTPknptbYMLbAt3PIyd930eOzCDU6fQvx8I0RsWAGb3xqj3BAYAi9XMAP7mb4C//mvCBvMYECdCEhM6q1MHni3JOwVysVopxJGCZ0mU7BjdUR0TOWZ0VLITdEdZ1V+F+Xng1a8mbRIAcOoUsVidJFAwzuo5OjVJC18p7VaeVgtR6qDuhvQxIJ6NiQlo0Y8iX7ogVo9cPKaAzhXOyBdtlNKDcQVs7P1nGZbmU4x5XeDZz8Z/fPlRfPixl2CmW8POfUboyZ3Vo4rrSYIodeE6hXPKdXVkw3oFP8wosFyx8F3XNXG6UwcOHuw/LyU60kW5WtxCmyBM1p+cyki/72/P7cM7vvyi5Z9bkiDNlg/C4Lp6e/J6k2jjrH7V8xfwgheYx6amMBW0sHDr/frnFfa9yZouZjlMx6CUzo4bG1txaXY6+joQeiEkatCNnOWtd6D28N10RWyVQtJJdAwIZeErAFKqfmb1Rp3V7baeME+tkh/eE6sv8OHjyo6VQqw2DkXb1QUW614XzUWCvstM7B1Li4sks708s7ogpABYf4xkrlkAOHlSYPzyOp42No1DD3Uwc0Qf41y30itfAKDnThs44ZcSuPFGHHvLB3Fofgz/7in34lv3mXPIbNPPxephawPkGsuq3UWSoJ14y7b+w3F0tES8zrHmnYnV70N3TSSY7laBgwd1VHGx+qDrou6GaLRHXLzJBcWKbwQvgnHcSoci0BPU1vy+egUWN5hZnRdYXGecJBOFG/Y91BMVB+E5CnHmbGhc/+m3P4zXuTfjGZ378MnP+jjSnIJX6vdVQdkaPQZklQVc27ORKmvkGJBmEqBeM5+958G3BtxnhyFfuPIdVAJdQJVk8C2lHme5LsZrKZbiYPS+q9jHAkMX2kyWOph/5DTwvvfh7i828b9/N8NsXqrB9AWV0ioxIBv4vtIUuOWWFceq7LPytUcadpv4omppedZ+1Y3Wjxoy4/+Tzbrun8fGsKfc6NfLlhJxVigGCcAtOTq6Z8hzodEAbr11xYPFaCwYZ3UyorNaSqTFIq+9QuFDLHI1m9rhusvXtWeOF8YpqxTBKznx8Ie6fz8WojKeOX4apw8YNdostNbKBWe1S2c82DbO6iiCvPd+VO+4hdxZ/fa3E5sm41jn4luZ7r82eMCL9z6BCSzgFfYduPUTZwp9yPLzM/AVuumIu20YACxWMwOYnSXuKNMUUaZvvAnhamHPWd2rpEwDuVhtimfkzupOk0gAzR2XToIusYhw443AoUOkTQLYBGe1acyzJaJ5OtWnf24RitW2jTB1UffC0fMIc+K4t82vFwNCceLGsRaAN8FZ3cusphKrVw46NuLuaDaxFAUYq2aAbeNFP/YMHGzuwEMLl2DXU0yFTd/X2wZHjdpZrfiXbaPqhmivM8jNwhiWUPhvvxjjVa8CTnfryy/O3EGUi9VCIHDNVrR1RIQkVpjwO/jSmWvxvkdeDnWqkK1h3F71oPC9O47OvlxvQG6Er3//6llcc4157MUvxqTfxtwZqau9rxCrg5JANOS2ufkv3YedncOYrESYL0b/t9vaoWilOvuV6HwFgDSSqHkhumeIKvZFEeLUgmec1e1Rc29zsgxSCjhCO6s7SxvsaM11I6qVs58zMSCdmCCfdDPJBy6XXAIAUDMbLDK5Gkb8czwLKJd1/EODYMCR99O5+EUx21uRWb2R6B7fTjA/D9Qun8DOoInpg03M/M3n4Nv6PAqKaxi2Pdwi+YMPAl/+Mr7/k/8Jnzz0fDxv6hiWlpRedUoSxJkNr+IOf6yFp/OP633vAz79afNkvsskWL4g6FjZ+t3LSrEawK5dwHS3Dhw4YOIECq93HNS9EM3O6GJ1HtWgxWqCvmC1zGrHgbeeEz7LjAC3sT/lemL9zOosQyIFPDvtF6EagOdkiNP13dor+euP+/iJp38Nzxw/ja9PX4X9jV2o1vqfpRWYeJcRxeo4teEVF5vzGK8RndWNOEC9bn7OF4VHnYPlWb2eQLUm0JaDC60ORX4hOQ7G6xkWozJJ8em04NTtLbhH618P8jP/jIXpBPjWt3DgjtP4lSv/Hh/4H4f1k/kui8LtrBQovWixge/r3e8G3vzmwgMmY3x5QesRM6tzZ3Ww3FldcQaI1Wax6/DiGHbsAFCrYU95Cadn+vFy7cTHslt63s8OeS3ccYd2ra/8u0UBuFbJaKJFlH12DMgwxepaLSxGZYzvLeGKySYOz9V6O5dWusB739ewBq/9+5FkNi6rLuD0KXPtJwmacYBqHjETBDoGhGgKum2c1VHUK24fEhmacqQEHn6YsMEkgVQWbGG+w41csGmKhVMhxr0OXrz7EO7+WtozYXrO8r4k8BXHgBDBYjWzLvPzxB2lEWurbkSbA53pAbinIrp+IcvQueNbGF88THesUaQHuZYczo00LKazHfqGvgE+/vGzDJAjkyR6IYT03JISAoBvScQtwuxXZZkMZELXfhwjUwJlJ0a0RLQMnRdo8m2IYIgt38OSf0n1uj6/mjQXmEwyEwNCkMmYkx+r6+o17o24OxYXtSNj3GS8Xv9C/MyLH8TbH/wB7LrCWAl9X1d4HlWs7m1RX8U9s46DIOlKOCLDK77PxSXfMYXZsLo8tHmlWK0PeaCDKIkyjHtdPDy3G7NhFU/cV9iZYNxetRUFfypOvP6A3JzjdqmQe3zJJZjcV8HjS7vxO/e+FrX6it9xXe10GKITb33qC6jLBew59S2cOVX4PsygWceASLLzFQBkZqPqROiu537fCO12bzGs4sZod4jE6tylaWWoOBE6jQ0uiOVf7LK8B0PPWX2Bi9V537dvn15gOrUwepvFbd+VCupuF40mwXeWX5tGTVEhjbN62RZ1W+e4y/V2tJnonomgi1oNsHbvxM5SEzP3n8bMmQzfMXkCZU8uSwTqb30f8DksLgLQO34OtXbgsr0SdiYhT04vixPIjzV/DwPeIgD9VaepdmD1JrXGoVh0U8LW19q6zebild0fR+3aa2O6WwMOHND3iLHC611XF9qkEKuVBbcWaHcmUQyILpZYGGfaeqfYmudBz1m9sT9luTYyJdYW//I6DY5aHim1Dp67QWd1luH0goenjU3jmonT+I7JE5ju1lAbL/RVeRb5KBOFfOu/t/yeCGDkSIVGUkLNrI3D87RBYNTdkoWs3nLNRjshEqvNmBuui7G60jEgQ4jV/+7f9bqDdY8VQCGzen25QjY6WIj0/aqZBHjTM27Fxz/l6VMnL7ZajAHxMy0kbUCs/su/1LfEniFqlRiQkh0PFNbXxeRrV1f0W1U3QquzzmdgBLhMWVqsrtexu9TEmfl+Mey29FGtFs6lYQplF3j44VW+t17GuD62sVqGxWjEugtmV1CvC87d9XIIySp3Vu9wMP6USR2rdeJEr90odeEFyzOrh5omZBlw9CgAoOpGej0/y3rO6mqlH91TcWO02jTy2rZxVpvzr+JECKlqcRmk1GvdZMSxdu5b5yBWnzyJxa6Pcb+DHUELC3PaJDkXVjBVWn4i9RbE2Fk9MixWM+syN0e8KJSmiFIHNTckbTeBq4UvK6YTJ44eRefwNMYf+zrikM4BHWXaWV12YrpiiIV8zjRbZ8JwDrTbWOZIoGB6Wm/f3xxndYqI6vtC31ltZSndx2q+r8BO6ArAxbGJARH9bdQEqwG9wln1us5oXaJZYEkiZQosEi7ayP6xKmBjzuqlJSzGZYxN9l0XP/bx1yKtj2PnLjP5yDOrR71u8y3qxVxC112/iI5SkKHULn/Pg3XJHiSZjcaRggi3ilitV/fXyU1TCjLOMO53kGYWfvxpd+GWLxSGBkmi3V6lFWK1O9hZDaAvDhimXvQU3HrqafiD+16D+viKIUi+yDCMWI0qqm6I3eUGTn+zkLOdu53yPHSqhRClkCqhndVU/XangyTTBbWqbohWl2ih0UwcnDyzeoPFfpJGV0cfrdbx587qAdEyW04+AdmxQxcunCc4D4rbvstl1LxQF9Yb9XMoiNUlJ6bJWTf52nmBxd73tt5Ci3FWj5cijI8D2L0bu4Imprs1zIRVPG/qGMrlFe/VcfSOrkF9+OIi0kxgT6mB//v2Bvxrn4pLKws4+aVH++8/F09MP5Al639nycFjAIDoyGn882/ehslsBjPTqvf+V+bUwnHgiBQyWeNYlep/lwVBddcVJUx3a1BnpnGsNbGsiFIvs7qzQSvySvIFpmqgF9lGEbxyejEghcccRy9arPUZnGOBxd4fWWsHj+mX3Q18TJ6TIR6Qg72MkychVAbs2IEXvesN+H+feg8m/A6qE4U/mi+IjiimtaR/1gJu/tywHDiwouvIndXjfaeub0tEo0bCFIrgWSV/4xFp67QLALBt1OsYOm9/ZgZYWGvtcEXBvn4ExPpyRRIrzEcVtH7pt1B9xj5UqgLPqh/Ho/e2gUZDmwMm+mORIMCGhKQ0TuFnXezcoXSN1fxYVwrrDlFm9YrCsFqsXt9ZLTMbFS/uO6tLSzi9VNLXZJrqdiuFNjYw3gKARx5ZQ6wu5EBPjme6Xs4o11eaLs/adxz9uSb24Httq6XF6ikX2L0bjpUiOWkiwKREM/FRm3B67epdQUMc0+IiVKp3XWJsTOcMLiyY72t5znzZiUnKBQF6qrgtxGpz/lXdCCGVAavfNB56iLDBUZzVR45gMS5jwuvoceWiAqTEfFTBVGX5ieS6Wj9gsXp0WKxm1oXcWd0rsBghkYLmIlYKsdRh+YEtaRxUho70UPdCxHONwS8ehijq5WsPfZMcst2cc8lgWo/N0CNOnQIuu4xYrDbnkmdJxITFO6URq13EdMdrvi/flmQZ0HmBJsezAd/Xg7T2iG0rhTTJtHBVq2HC72Bxicb9KeNCZjWhWC2EAup1WEIha5+Ds3qqP0v3Agsf+YjAy19uHsgzq6liQIoTEttGxY21+LUaSYIkteDYmd6ivns3fut5N+PnP/l9/UmjlOiudFZ7SkdrrNXXGpFkPAhhWQo/cMX9eOKQWPZ8IymhviJDsTyEsxrAWWL15GVV3GtdDyUsjF1aW/47rqu/v2HE6khnfO8uNXHmUGFmkDsUqcXqfDDuROeUWZ6mqxSKCUPEqQO34mmn+nqF3zZCvstCZIMFylUOdL7hYDLorMh7MAiBip9qd96FXDgmvydOTuKS8hL2nyRYcTV5qo5n6azikkQjGjH/1rQLAKhWMeZ1sThLMC5axfVXGZSLXhCrx8YA7N2LnVMZZsIaZro1PPcN16BUW6E25u60Qc7qhQU9oXtKHW/+jTHghS/ElbU5HP7M/XqQUSxaaNoc5CiV934bABD+6QfxV5+s4Nef/mlMHzJvsNvVfWy1cFyOo2NA5BrHqpTOpAaWi9X/+oWYzqbwwPyleNlnfg31qcJn4OjaE43uiGJ1fs1WAxN7YI0++Mqys2NAbL3jQkZrx3Usi48Zlvy7W6tPyMVqb/jxg+cqXWBxyH4mfPQwAjsBrroKVz+7jN/85Rh7d0hUdxVC1vP70SgKkBGoqsHyeyKADfWJP/zDK0SYJEEjCVAfM5+RiT8YlNc8zPEmea5wEOhYNwoFrCBW2yVPO+uHaFfKdTJo88xqb0UMyCBndawLRD96tIyrnyaAffuwr7KAUw/M4vN31dCWHsq7qr3Xl/xMF1gc8vuav+lLmGocxCXp0X4NanPN9s7pPFZilIUmI1b3CvYBgK134647LDLX185aqMVqx8HuHSnOdEwMhpRoS29ZJE5v5WiYic3Jk3jkznl02iv6pBXRGhPjakNFslclj9ty+guHgTtEsTqlTIHFMsZ3ecCuXbi8Oo+jD7d6x9pMguVi9bDRmXNzZqeOAnbv1uPfQ9NAHKOZ+Kjmp5bZebbROM5mE/ipnzr78TDcJjEgRgDWYjXtAY+PEzurk8RkVqd6UXwj88RTp7AQlTH+1CktVi8JoNXCXFjBZHX5Oe96gsVqIlisZtZlfp5eUFTQgmKibJrVfSmRpH1xIlogWtI0A7yqGyKepROro9SBb+nMajKxeqU4Tdbw5jAzA1x66eY4q32bVqxOhAfH0pEVSUhXCBAAfDtB1CL6EPICi74F+D4m/A4WZkevIi8zC66dAaUSJvw2FhoE7k+lIBPVFxSp1lak1BOxWk0XJtrICv/SEpbikh7kFnjJS4Ddu80Pvk9z3ebbUldUfPftZO0s0Xyyn+f4eR5+9EWH8PD8HvQqCUmJMHXhV/pKQ29r/VqD/DiGVFqguvLSBJdV5jE9XZjQJAmaiY96uTDgyotBrtfVriVWTwL3PVHHD/+IjfGpFeeSmTz13Pzr0G5mqLoRdpcaOHN8lYrvgUsrVpuiLFU3QngOGevz88Cv/Mry1JZcHHTLDipegna8sYJPa5L3BabAYnsjYnWng7moiqlKuOZ2/XKQoSOJinRtEirPUZicxBuf+VW8567rR280n5jnxaTqQtcGGDWgsuCsHid0gadquVhdduKzxYYi5nycKMfaWS0Edl27wzira7jmJRN46lNX/M6QOa3TxyKc7Ixj925zPNddhysmmjgyW9H59W5BHc1jhgbE4shQf26nF3wcaU3hVZc+gpnj5kBaLS1QjS0XlteNAcldxfby62XnVVVM15+Gx596A37yX53AT/1yIQfE1bUnmqE7mricLy6UHJQCE1FAsAhylrPaddePAckXZM7VWT0gBmQjWdgbjQFpHJhG3QuBK67QD7z+9djzgktRqxemuhuMPlgV49BcJlabmJ1BY8S/+zvgC1/Qp8qBA8Btt61styBWmxiQaNTxbH5ueRZ61VGJnNUK0N/9kEVRoRRks4PmyTWKoEupd5+W+s7qwEkQJQOc1eZWdM+DAa6+GsC+fbiksoSTjzXxho++Fs04QGV3X6x2fUsLSUOeWzOfugM7gxb2Nh7DqRPm2smvlQ3maxeZnQUef7zwwGrxRfki23qFYc29qSdWA6jv8LTbvdHoO7arZ4vVKh6ioPUf/R+0p9twuyvqdaxwwgcVW5sjRnRWyxX9lu+pwQUxu11dKD2r6R2Su3bhqtocDj2e6AsuTdGVHkrVYmb1kPOP2VksRGVMjGfA7t3YW17CqUeXgDjWC1f5qZXfZ6ONzZNuuQW4+eazH9+WzmrC6D1A14zoFUulwCwy2ULp738jdW1aLSzGJUw8fSfqbhdLLRuYm9Nj5h3Lr/teH8Ni9ciwWM2sy2Y4qwFo4S8jEqtNzqJnG+FrgUioNYPrihMjphIUo6h3rCV7A1WIh2h3GdTleImJY6Ba3TxnNeXNXWZ6IcS1UiSEDnsAepJ/Di7NVckLLBpn9YTXwcL86JMcHamgtFjtdbDQJBCr80G+SGkXbQoxIL69QbF6fh5LcYCxvavk9Ob4Pk1sSb5FPVgxIRHrTEjyLad24fk9e2AJhfTE6V67AJYLP4NEBCNQ7am18eLrM+wqNTG9UPh9KdGIS/1q5+ZYK4OKyKwVAzKltb3f/E3gf//vFb/j6jinpDv4mmi10Beri5nVeQxI2dNbRztEkR0mZqfqRuieQ5tzc9p8d+ONhQeTRN8PPIFq3UIrCWjuifkuC5Gi7oVoraELrEq7rfP3amvf+Hti9QU8CJfdREfmjI/jxXsO4+DiJNqjFjQ2EzLbCMD1OtBICMTq/NqsVOgiS1ZzVg8SgHNndSXp5TLveNFTMBtWMd2tYddeG1/+8orfMTnQg+7lv/OPz8NH9r8Iuy4x/ZHr4sprSrh39gocbk6hVCocl23rCX93gFht3MH3ZM/Hd+06iEm/jfkZ89k1m7qPnfD7v+A4Wqhd67TNhRdr+XnieUAoHTwx+SK88icvxwtfVJg6WRZqXoRGHKxbxHYg+Y4Q30ZQtrWLcNSBTJqa6I0VmdXrZZcrpfsOarG6FwMyvJDX20Y9pKC4dHgBY263V1QVAPbuRV9IMsc5TMTMuuTO6hW7jYapQ3PoEPDEE3rRcudO4PbbC0+ayK3aWDECI0E0aiRMfm55AiiV9O4l4hiQXKwemLf/6KNITs6i+e6/0LkSq7TZkR4qtWJmtUQ4QKzOz+e77/N6YvXe8hIevuU45roVHGtPorKnv5NL5KHsw9zDlpZ0/1dqapHy3pO9310WtWTE6o24aj/6Ue2w7x1Gu62d1ePLF+8cK4Ucwlm9oxb3xGoxVtfftRGr24l/lrPatdLeot96zM1m2Bk0z47Wy+cIfr9fB0ATA1KYavTi7Nb7vlotPLywFwtqTC+27tyJq2qzugZ5fq4KQFj976vkxOgOc33NzWlH7bjoO7Yf6wKLi2glPmq7zC60cyxA/Y//qIcRxa7ORGJvG2d1qnScXdimGxeq4UscDE8cI1X9AuTdjTjBWy19Hjx9F2xLIZMpMDuL+aiMyV3Lv3N2VtPBYjWzLpvhrAa0WB2nDlkBuDxXOHASmrxHoDfAqzgRhlh4Ho7cWe0YkY6i4jvQ/xzNQPxCd1ZLqQuVUBfvVDDOaqp2lUKSAI4wYnWD6HMtOqupbuzFyuS+j8mgPbpYbUQv18mAINBu7bY32qQc6F2zjie0+Eux5Rnoj/RqNVMMcgMnwtycjgHZV1v7NZ439HXbaAAf+cgaT5otmZUVearugMJX2lldGM3u3q0LoD02r3/O37+zfKKT/81VMd/F3rEOPvJxHf8w1/T7rzcFFuuVFWK1G6O9npi0jrMaAK66Sm/vW4b5DJLOgA43y9DqCFScCHvKDZyZKQxlcpGu4sO3JMIukVhttg7W3BDd9dypazA3B7z61Vgu9hW2xns1n24B1zir3aqPca+DxdYGFpg6HcxFFUzV1+6XgpLQW6gvYGd10kl0YdwgAEol7Co10JoZ8bNNUz3JMVvUx8cUFqPhioqtS8FZPeZ1sbhA0BeulVk9YDdEktmYqCa9a9P9nu9GMjaFmb3Pxs6dq/zOoH7LEHUyfOnkM7FrX188vub5Jdx06Pn4n/e8brmIMsxiGPpi9fQlz8XYDS/Wk8cw7m0Hbyc+KpMrxGqRrX3aKmXGkme/l127gDvuwNnOcgD1ss4aHrVoXy76BCWhhZlRz6ssOztuqid8rbMouhnOarNNfEOZ1a5CnG5ArD7W0M7q9cRq46Ycya1sXKq10vIYr2Hq0EgJtJoZ9t94L37gukN46H7ZH07lu4JK5kPyPH0PGxCBMZCi+7Vc1tcAxVh2hVhd80K0FgeMZxsNSGWhlfhofvyf8aY3rRj6GbG6XOu7lX3b5BWvQ37pfeNbjharn/lM7C0v4WvTTwEAPNHahXJ9+SK+UkNGUT74IGbCKnYGRqx+zJhWpERXuihVCi5wO0G4gdiWb39bRyL+1V+ZB5aWdFzHZGHcNGhHiDkWmdn42VcewItfbB6r6wrWaqnRy1nvLQIAPbE67gz+DObDCnYELQSqu3zanpsoAvPZmvHeMG7tnCgCvud7gHvu6b8XvSOk/zkGXrZ+nB0AtFr4z199A74xfYVebN21S4vVJwq7wMTy+0zJTtAdJn7NiNUTO2xg925cXp3HkccjYG5O77LYa6qFWxbKboJO4m5onnTffcB3f7eu55STd//bwllt3MpVJyL1ymUZNn4vGkTurLYyvVjR2MB9u9XSUTJX7dALM5kCjh/HXFjF1CX+spf2xOoLOS5vm8BiNbMmSm2Cs9pctJ5N7ay2TWY1YcE6s2JcdmLECZGobJzVfk0XautQidX53Wx8HAqA6tCK1ZY1uj5ZREq9G3FTnNW2HD3jL8cU1HKtFK5IETeJRg25WG0RFM/JMYNV17cAz9Mu6MURP4d8Am0DsCxM1CQWosroi0x5MUjXQlC29JZnihFOfkLVatpZvZHtXblYffnY2q/xfd3HDMhSBYCTJ4FPfnLt41zVWb3eVs88S9Ra7qzeW1rCqf39TL68rWK7y55bSeG7gOPA3r1DZ0+e7LuHGvEqYrUTob1edfp1xOrJyd48ajmuC2+YuJ1uV7uEjLP69FxBAckXWCrGWU3VFZqdC1U3Oqfb1tx0imfsXsRCUYjMF1o9XbBvw0VB1yIvsFgrYdzvYrG1gYiCdhvzYQVT42t/B8LbQNblFpF0TTHSIADKZS3Uzo0uVhe3PE/W5ej5nCjE3lQqOld6VAc40F+08ftCir5mh3BWV2XPWQ3LQlap49RiqefYW0ZetHA9vSPUBbW/OXsZdu/rX6vXvGwH7njdO/HQwiWojC3vs4ZxVuf9xMySh/rush5UZBlUqw20WrqPnSzkrueiz1qndi7U2md//i94AfAv/7KGWF1JtcN+lPMgFyp9G0HFphGrc+EvWMVZvVkFFikzq70NxIA0m2gsKYyV42WroK95DfCKVyw/zpITIxxld9RqucJG+Fo3Zgf6FGk9cgwHPvUgnr7wdbwsuAf/8umw/6RpC4DOrHaS0cWqYq5wqYSqG6E9T5tZDV8vjA7cFWLGG80kwG1f9/DhDyt88YvLj7Wd+H2xOo8ZStY/IWWiYIsUDz8q9DW6axcu+e2fwjemr4QtUhxpTCyvF7yRjPEHHsB0t45dlwe4pLyEk8f6NULaRQE4LwQ4jLO63Qb++q/x4D8fwXuvfQ8+9D4zXlpaQisJUNlRyFkftMgG9O75L7lmod9Pj4+j5oZoHpkHUl1noroiEsez5VDxhotqDONeF+NuBwszhc5+ZRSK56HuhmguDX8Pe8cvn8Z1e2bxX39J9dpcllkN9BfwBojVSWbjzqP79P2rXMZTdrdxcGFCuwUAmIoEGpNZPVRsy+wsFuIyJnZ7wNVX44r6Ao4elMCZM7rA4r7+vKHsy43FpEUR7LCFS/ZkOHWq/3AYArXaNhGrTR9TdSO6+bdudnmMFQV5ZrUj9GLFBsVqABC1as95ox56GDNhFVP7SsteymI1HSxWM2vSbAKV0edhy8md1SWHVKzuOattSZeXlMeAuLRidZQ68OrBORVhWK9dAMD4uBZ7WnR3NyHMdkzC82AzxOosSmCLDL4l9fdF4dTtxWCYzOpRCxYa0m4MSygzsCU6t4q5hHlm9ahidZ5Z7eiB50Q9xUJUHv26LbisSlVbV2UnEOmUTHsFFjeUB64UsLCAZhKgetnE2q/LM6uH+M6kXEd/NxOHSnkVZ/U6IoJ2VhcmAXv24JLKEk4djvR7WDnZBYaOAemJCE/RTiR14An9c6eDRhKgNrZ8kO8N2sGwhli9Y8fqgk/+WtdKEXcHi9Uts6V1R9DCbNPvX+8FZ3Vgy6EWFoYiSZBmxll9DouMczd9GZc/8jm0ZwvXThwjTm14gaDNEi1EoYyXYyyGGxDS2m2dvzexzmRz0ALIBYCMdZ8N1wUqFS0kLYzugNZRS/q+PT6msBCXR7uRKQWZ6t1mKJdpCriaY80nZACGE4DN+fh9z57Bb/xG/+FqVQt+1mrDlbxg31r9FgAsLiLJbChY2LW78PevuAKXVhZxYGnn8m3vpthsO1x/lpq7uWcWHb34NTWFmhuidXS+L1ZPrRB9rAxyrQKLeR+7irP6BS/Q6x75zpAi9bLU2eWjitXKgltyUKpYWuwYdQG3J1avdFavEwNyrgUWhxWr/Y0UWATiYSf7p05hKS6hPukuc08+97nA85+//DgpiuA1kwDV0vLPtTxEDIiMM7QePIwDjV24uj6Nn3vKv+C9f7Con8zHQOVyr03fkgPzmgeSL7K5elG04kSDHdBDtisA/d0HwXC7QsyYupkE+OKJa/B7L/4n/Pmfxcuel8qCGyx3Vq/7GSiFRAI7ghYmJoAJM4Tb+6xJdFMPz548gUxZvY81b1cINfjckhJ4+GEdA/LSp2tn9RnRe65VjNawbZSGKAYJALjxRiS3fw1SAlc0H8Qlrcdx/7d1ZEc78VDduVq/tX5m9bI4DgC4+mpM+h3M33cUSJK1ndWDxlsAFsMA434H434Hi48X7L/5faYgVk/6bczPD/4IAADHj+PWTy/id/y3wV88o38vL7JZdFb7Spta1htzhCGkstCJ3d561RXXVnCkNQU8+qhu01q+cKdjW4boC+bnMR9WMLnXByoVXPHsMRxpTOjM6jRAdXd/JaQcqA3FpKlbPg/MzWHv8W8sE6ujSJs6tksMiMxsVNxI74KgmH9Df4TkzmpjPLHrFT2fa26gL8y3e1UqwNVX64XPRoxvzFyJ571khVi9wRgrZm1YrH4S8fa307Y3Pw/s2bM5mdVWraJdZFTO6lSLJ4FNGwPSd1YTXSphiDiz4Y8FelJKJFarMNJb2sbG9LYW4mq85GL1Q4+hdORRJOtNcDfaZpzBsTJ4dopIEhQnAvpibZ5ZTSRWR20J39L/ka1CF90zuVjdGPH8KjqrYba9xyUSsTqvTl+qOehKd/Q2s6wv+lSrG1u4WlrSxYIsG1bgrf26PLM6HBxbkiTraA15ZvWKGBBHpOtnVmcW3ILbBHv26MnTMQk0m8gyQNhiuRVhGGe1srQjHwCuvhrjfgdLDxzVP8/PoxkHqO8826G4bsGfNcTqeh34/OfX+J283UGZ1e02WlLnOjr1MtJM6O8Q6J9bFd9s916/qaEpVDsfanJTZH4ec08sYspvA8VdLz23YaHwFZWz2jiAx+sZFuMNRFXklc13rNN3uK6+31zIzupYwbFSff4bZ3V3cXSnatFZ7fg20swavZhUvmMiCLTTiyK6Jne8ef3s14o7XGZ1rQbs29d/eN8+4Ed/dI3fGRQrAQBRhCSzETgJdu0qPF6pwPFt7AhaqEwt71+GyqyOU3hWgul5B7UagKkp7AxamHmi0SuwWNlZUKjyPnYdsVoqe1Vn9QtfCFx99er5mbVKtmFn9WOP6ZzS/pvpO6srNQtt6ZHFgCxzVg/6vrLs3GJA8pWMtbbgmfG06w0/JvE86LjAYSb7J0+ikQQY2x2s/7q8CN4o6wBJcrazesic9WR2Ca0mcCi+FFf91PfiusmTOH5K55qmSy1YltJ2SqBfYHHU+UdRyCyX9e6NRYK+e4WzuuzEg3eFmILOzUufia8uXIufueoLOPJo4Z63MsrMZOLLbJ3PNcsgUwu7Sk1cfXX/dZUKUPMjvHj3QVgi69WAzNsfKgbk4EEgijBj78bO5+3DvsoCjs3qHRx5nFvPrdyLARni+3riCTy+uBvPeEEVCAK8oLYfj307RLLQ0qLyroINvGdkWKe9PPorKIz9nvEMTAVtzO+fA+bndQTleEFQc114lhwsVicJFjo+xr0uJrwOFk+u8315HiaDNuYXhzxnjx/HfFTGVNDGU50jOLBfrVoYdqjManOeA+jtDAqe8wwdH/Ltb+v4hqBwr85z5ocphtjpYDasYsclejx7+fc+FUdaU/jogevRzKrLYqzKvl4kHFakDO9/HIGdYG/3IE4f7ndMYajHy9vCWW3Gx4Gt58xUB70pYnWSIFMC1lhNf//DFkxP0/6u9XIZeOYzMeZ1cbpTR1e6mHra8pVszqymg8XqJwlSAn/2Z7Rt5mI15Xy0t901H4xRiNVmIqSd1QldwTozeQjsBJkCTYcTxz1ntXZo0uxvSeNUT8zrdS2CE4nVSgHIMj1pINS/5R1fR+nY40gadOFWMkrhiAy+nWg3DkWsRM9ZnertcpRitS3NRITSWd3PrB73OlhojHh+5S4XI5BOTIDGWR3HPadPqeagkxI4q/PvylZAubwxZ3VuA7EHjIosCyU/0+L6gP5gXWd1kujto6s5q9dq1rj+lgkp9Tr2jnVwcrEEHD+OMHVQ8leIEEM6q3ui1tVXY1fQxPSD07oDWFxEIwlQ37Nc9BlYVG21SBJDL2JgJXmBxSFiQFpJoF1CU1MAgGx2vvd3pbLg1gJ9P6Aa6McxUmWh5MRIUmtj94OvfQ3zUQVTQRtQhe/PfPaeL8w5KxEtEtwTexEQNsbqZoFpI2J1VMHU7nWuhfw7vVDF6ixDklraIWtZ/RiQxRH77+LuFaC/EDPK2CBv0876i2HnkIl+FnmRTa8vpJSdGJ318jnzXRbB8vvGX/818NKXrvE7JlZi0Bb1JLPxhuc8giuvXPHc296GK59dR2WisKhl2zqypDugqFqUoeLGmJmztbN6xw6d4X9gCQhDdKS/3FltWUb4WmOxcbXdK4YrrwRuuWX146iWMzQ3KFa/+93AO95ReCBfZAsceFVPT3KpYkBKy92Eg2NA7H5hzmHZlBiQjTurxy6prP+6XFDc4I6bU6cKebK5o7ay/P5dchJ0BjmrFxpoJT4W7Km+uJFIqMUltKSP/5+9946XZDurQ1fl6nj65Mkzd8LNSVlXCIEEKBAkFDEPMGCEn8A8kgFjwGCDwAEeGQuEJWEZkBA85CeCJYEkJF3JQlfh5jgzd/LMOXNip8pV74+9d1V1n67au6pr0J37zvf7zW9mOuzeXb1rh/Wtb612zU+Af0WBofiEqTiteWdIE9K1GjFa7VbA9hsDq2uKxwd+KKjaDRoIGjPoGFZiYB9FyfdMXQNFiuAHOeOGjq2l+oDoVadi32ENL371LOoNaTTRpJB9PdO9zwwqh7YaLWHpIAGRh54GrK3F42DEDFL1uGaQke3gvseauGTP4eCdc8DyMubNAdbP9vG69387vrR2aISpy02yAZOZ1bUa5pZVrFl1nN9sIoIMqZPafInutwYDbDk1zBpDIit2JbWxZWufkmZWD4XB6v6ZNTRUsi4fMy/i1Je7iWa1mnxf1VBI4ooDVgPA/tlBIvly++1oaA76j5wl2tLmmCmqCGksIoakV+0WFveTdWruZbfjkc19+I6PvxUXBp0RTkZsQC24HgzkFhqqS4gn96/EjztXNtFuBNcRs1qGKtGxVAXpAglYrfGPXOJBL6jUbonNWSwGA9iBhpoekqz1TTdhRrfw4Qu34Z7jaxjNhgGyrhJS5i6zeurYBaufJdHtApub1ba5sQEsL1cMUnoRYT62WiSrXaHBom7IZKNQsQyIKtOJpqK+ujS7XVNcWG4FYHUUwXPC+LoSY7lqrkHwufugXjwDbXutWmZ1KKOmevAEXKiF23RDAirLAcmkVwhWx8xqHuNTMJxhAEPxCDjlVmQumC5RpyyXqS8BO0BTsLozJxOWZhUyICFxp2/PKqSEeto2498qBGo1cm2HgpsEBlar/PuxZkYErOYACblgNWNWp8tSRTSrIwWaknpekrD3kIbLwxngscd2tknbjTs0KRhAxZjVe/ZgacbG1dWInM43N9F1a2jtTblU8SRLAAR+BEUKM7QDMoIdnhzOwXw4JKWybRmYm8O82cfG09vx9/RDBWqTgtVV+QIwtrJE+1YESDp3Dut2A/NGH2rgJnOp58EN1RhEaGk2ehsVTLSULa/qMrS6Br9I8q7fJ2Yxe3Kc0DQNshQhdJ+hjJEYAKbjk4HV21NeW2ompWh0TGsaVBFDUE6b3hizuhJ/ZFZKrSVgNRcAzgCr9ZxiE5G5gM3Nv/LNn2W5pSTabRw+ru8wwaurLgYOR6fWDdFUHVxdl2IZkOVaDytPdXGuP4s1t416I3X/SxJUlew/Jh4gGatYnvxddvSdhmxo4mZtILfivfcSXC6WUmXrt6kmh95pF/CYWb3TGyEvKXqtDBZHkqICQQwWxZnV224N7f05Bsm0n2Xk197zHpK0AQB0u+j7Blqzo9I1RAYkvx1vgzBnt4IWZo4RceEZuYfuWbLOtpup7ypJMI2IbyzHi7QJXr2Opuagvz0lgBKGQBQRMEaWE4k0noQRnQvOrTcwv5esMbo/JN4t7HeWpKSEQVGgSgGpnsraJ1Mz2RfuPY/v/M7Rp37iJ2W84K13odEY+70VRWyuvXoVALBqt4jB7N69JCnx5AUKVptozozra+fvI52zV/DWT30Xtmp70ZmVgKUlLJh9rD/dxdPdOZzqLo0awzKppbwhwPYnxuhnzx1o4MPnb8cPf/bbAE0dLQ1RFOiyABg6HGLLraOjD4lh8yp9QxSNJiwAQNOIDMi22Nn2yYdd3DhDANrj7VWc+sJmSr4oGUuSJiA9Rp/7x7f/ffI19+3DDXNdPLm9hE2ngdZMql9Ms5rHhLcsIIqw5s1gYYm8VmrUMT/j42v3PkGA6VTUaxEh4Ajes4N1G03NwZ7aNi5fCuPPtH/td9G+8sT1waymCQZZovdoxWC1WcERMY6Ut1FddTHoC56/+31qrEt/15kZzByawV+v34OXve3Wna/nrYm7IRy7YPWzJLa3ydxQJbA8GBAmZZUgpW1FMBQfcdqzilmYGixqs7T0f1Ads5oBlQAqA6udMMWs5piGCAUr95Wj5FDereYa+H/+QahSCH31QrVJi4jKq0zjyD7eJmVWEzaOWpn2qx/JUKVqNasJWE2Z1YFazU3GQDpTBXRqLjctUMfaZBWZDZOY71XArCYHVwlq04QfyZWA1T5jVtdqdC4Q2yS8+09N9FwjAWxzolYDLAHzK89LXvLgg8CpU6N9HXj6DsMfEc3q8RL1fbd2cGnQAT76UWJMNE4sEzJYTIEIkoSlQzX86ckX4sInTxEZEM8cBQE0jcumdD2JzJ1lwGoRZrVvkEMiA6hOUy05Boo0zYTlVIU7rOsiiCQoUkh0OousXZcuEbayOUBH7mJ7Kxrpq2bIgGmirdnoblXQ15QMSAx8FWVW781BKEXlWr5SwdZuqrVfGVgd63MmYPWcMcTmxnRmbSQJFVKw2q3mUMb6WoZZXSuwLxEEUvKAyiNHsGMubGhOfl+jCJ4boanZ6HalmFm9v7GJi6dsfOfHvw9fd/zsDnMmVcsxPcphVueGphH9W8F1/OGHgRfc5eA1XzNIJJEY+7WmFr9nsyKTWR3A93I0qyMZak6uamIoCiQQ34iJkZ7rBEM3JHGDxUuX0HVrmDmS4zlB+2kqfuHt/LlzwOOP0/90CTu6uTAqXSNimu5vE8DDlQzoLQPodLBodnH1gUvouiZazdHXGwam3yOmq+5qNZK0mtbENQgQRSnskxIkuHMX3W88eamJPQc1oNPBwdoaLnz5KuD7FI8eTTApCkjClZMIOTa/hVe/evSpt76VyBg1JuyLhPwBKJ2+75kkoXbLLTjaWsPTHzudkA7aatymqXhcEpJ/4Qo2nAY2jT1EX3t5GfNGH2tnB/BCBYoUjJIO6L4od45NJ7tSMXe4if+9chTn+3OQxjNQbB3nkQMGA2y5tUSzeo12JN5XpZILTLO6K7aGPPGkhJs7V4AXvhDH2ldx8okA8H2S3O+kvjDTGBdhVi+l7hVJwg2HAnzLh/8V3vHoy9CaS01sTLOaRxqjg3rNbZOEBY0PvKuHn/vae9Fsjc5pMbNaEKzur9loqA5hVq/SvmxtwQlUzGgWnKowjWsZdH6K54OKkGV/4EAdbMHUw0ogGADJXNpuo63b6PUE3zcYUPmnZB6aecFN+IezR/GiV0yo6NkFqyuLXbD6WRJMrpP9XUXYPQ9tw6kUpHRcCYbsJwYiFYHVXqhAn20QJl0V5kTATrC6Qn1to21AVSKiwzYtkJI+mJsmWro9PWuChqfVoTJWcYVJCy9UUFNceBWC1UyjVGk3iI5o5czqcDoGXSqcYQBD9iFLlJlSUV/9UCblu6YpbCL00ENEP3NisAMm23dWZQIXl6gryVwwbSaeMhQ1NSRgvSLOnnr/38/jVHdxRHcuK8yaBNvXuJnBNLP6T/+UMLPi8DxyyEmzfaiWaK6eaiiPMqsBHHjzPTjVW8Sfn34ufuJzb0K9ObasixospkCE5311HQ9v7sP/+B8Aej0iA3KgPdJX3pzg+RSsLmLlTTUUuWD1cEjKbzsaMDeH5VoXK+foWsLuA1OFaYr9VkLBAGC5ILPacYCrV4lp4VyEjjbA1lm6UNNEq15TAF1HW7fQ3a6+ygJAIWZ13zPQ2pNTTi+aVPhKRQwA02vZIEY6wyJGOpOC6UsbdEKkLLL1jenM2mIWOCulr6IaYFyyhALAuaaFZcFqKUAWRsn6MomxzeKrvxq4447UAyLMaqYhr5N7mzGrDzS2cOGqgb5n4Lfe8pkdb9M0kOToJCCBAbUlwOookoTnmY2/+yIWH/4Env/Ie/HgfU7MUIyZkaZZTeUhA6vHgC9iiJnxHmawqBU8HspyvhY2nT8LgdW6oEFVrwf0+9gOmmjva+a/lsmAFGRWj4DV29vEYHFpVBqrrroY5mnCex68vou+bwIaTQZSVu3aw1fQ9Wpoz4y+vxKwmjGrY83qAmzCnDaDKGVYZxgk0Tbk3DuehwjAU5fq2LtXAo4excHmJs59aQ0IArihCl0Z/b1VXUYQZVRDAEmiR5v8nZpN4G1vG3uQaZdz5GB+/38dxunuAqCpBIh77nNxtH0VT39hHbBton2rq6NtckhI3sVVrNtNbCoLBKymY+DKWRtzxgDvestHR7dNVLIkVwaEgYXa6GfPH5vFl9YO4lx/FpK+E6zWFZ9PGhoOseXU0dEtdHQLWxv0N/Z9BOGYvApbE7sC2a4owslzOk7MrAK33YZj7as4dckEggAbTgNzM6mxpGnxZ2aG75M0xxgof/NthEX/xbXDaC3uTDBZHCY8O5tcdVpYWEgevu0bD+Oe//Z9OHbz6Het1SAOVvs+Bts+GpqDeXOATcZId13YgYa2bsGtUC7zmsX4d62IWR387UegnHsatavnqmNWs3W61SLVjH3BtaBPqmLS8k8zM+TP/v0TXn8dGJFfL7ELVj9LgoHUW1vVtWn9yV+i9dkPV3ogdRwQZnW9TlgIvQradhy4gQptrkk2CrzNkmgw7dNrwayuq0lt7bSAPQP+KKO0pTnodity4tVqUOWwWrA6imIZkFx364LhuyEp0a9SD50eIGMZkIruBccKyX3ADqVV9DWtWcfAWgHzxr//e+DjH89ukwDg9P+1GiQA4aA6GRAGgEeDKTc3rE0lSjbMA0NIYsV3AlwadhKjnJyoNWRhZjWbMs6eBT6Txk0GAwIizKZKPXnMaiYXMAZWK3sWcefRAX7uvtfhMyvHUG+OUwkFZUBSQNJrvm8fPvD1f4i/+9Ic/t+n78S54cIoCM7T146iBKye5EiWFbRd1+bLgPQ9E82OmoDVF+kERUEszVRgNhRiylPR3B1EMhQpJAkm0Xn7yhUgitCNWmgfmcOsMcDm6c24r/G113XCrO5O39UYGKqpNHFVwMeg3wcASOMUv3Q805nV4/cKM1icdr+RNrEFAE0jBlabU4DLDEhSUzIgJcDqzU3gN35jvK8TNKvzfDKmYlbz9VSz9Iq//uuBV75ytM0Gr6+OAz9U0DTIfd9qAdizB/v3A0/3Foh828GDO95GZEAyAFCW8FQK7ktEgJRUrH/pLOb0AY7XL+Hkg4P4fU6okWtkGJCkCKFVkcFibdQI8JoYLDItbJdjsFgQrBbSrL58GQDQVTqY6XDuHQYoipjgpWJlBcnczJjVY7rCuhzAzdsnOw78UEbPr0GSaT+Xl7Fo9rHyyBr+ywOvxNe+YDDyFsOUyBo2zQY8XdlQr6Op2WyaLx8UrI4lc3SdJNo4BpPwPDRUF92Bij17ABw8iEPNDZx7tB8zlRv66HdVVZpg4knMZCRYJAn4yZ8ce5ABlXka42GIL5yexQPrBxLG8sICjh708R/+8VX4rS+/LOlg3KYH2+cwqzd7sAIdl4cz6HQALC1h3hzg8a09WDT7+O7XrI6+QVXJvcVJCAJI5iIaczfMwA01BJE8arRK29V5RtlAwqxeNjBnDLC2Qa9zEMAONNS11O+l6yQZwvEbAECIEJaGmXYEHDiAlu6gPyS/87rTwPxs6guLAH80ETI+eX3bd6j461f/Lh7e2IfWcirBpChkzLqcyY6ez9xIG5clRr2+8/yk6AqpQBVZD7pdYgSsumhqDnpDOf5MJ1DR1mw4vetAtNrzyFmWRVXM6ocfhyqFMHur14RZ3dJs9AaCa0G/j55rotlMvufMDPDCF2Ycc3aZ1ZXFLlj9LAkGVlepW233fbQ1G26/OsEkAlZ7QKNBSrfXKtBsZszqdg0Nw0fProhJx8A/plFakayEE6gwmlp1pZ4M8FAiWkpuodutRqfVV00iraH41THs6eFck4NKJ/FYD71NGaBVrGxsE8wA+6qY1QysZk5zFYHVac1Lwh7iT/G+j2yAjCZC0szqtm6huzElSMXkVTQp1lAcTGt+lgaoZBl7mn2sDNtCG0bPDnF5OINmh88GqdUAy9cLaVafO0cOu/F5s9cjBovzoywPVQrh5QHAoRKbXabjNa8Kocohtt0a6jNj30GAWT3CwASATgfLr34O+p6BX3vwG3DvD/zpqJoH09fOAqhYX+WwFFjNPTxZFga+juasBszPk/LJy/RzWMmzoRAWfFVgNRuzlFkdWYJtXrxI/tY0SIsLhJl0kQISbO2qyZRZXaAkMS/SppmGQUp31wXn2l6PHDqaOWC1qLb4VyriihA6jmo1AtROa1zIJLfYvcL0OQXNpCZG2hiWGSyW8LI4exaJpARt1w9lKHqip9pQXQzyAGDfJ8xGs8D3iVl/Oa8pKgERM6vzgXU/lNHQyQe32+R9B37q/8Dnt05g/2EVeO1rdzbNZECymNUTEoLcoDIgUS5SmcTGBjBvDnC4uY4zTyfyISFkMl0zhn13eo31oa+hnvKYjIGvPM1qtjYXCTYOsubudGJOMHRDEtKs/sTfDLFuN7AdzcTbv8wQZFZHEfD5zyf/DkNgaYn46mF7myRL96TmSF4SAIjXkKtWMy5Sw223YdHs4W/O3YGG6uLNrx3dVximJMystm3g7W+fkJ9npAtTSQwWhzka0CLBNPEZWE2BWq5ZneehpZO1c+9exGD1+dNeUgkwBlYrmpytMw8kxKIi0jUUqMzdGmxtwQ8kPDg4hoXF5Hsdu1HBo5t78cT2ctwW+1uEWc0SOqev1Amz+sABzB+fxRPbe7F88yzwpjeNviEmB+QnBKNI2gHUzs1LUOUAL1k+PaqDnWpXiFnt1tE50sGeehcrm/RCT/q9dB26LHhetCxYvoZai5AOAEAKfESejw27gbnOaJKN6wvAxsfYQJBvOIwbWutwAnW0YkzTiBSjzxmzA7pfE5W0U1VxD4OtLQw8YrYuSxEiP6ncswMNLd2GW9G585pG6rtKAMJ+RZrVrVmockATYZU0mcylzSYBq4eCa9JwSJjVqWn/4EHg5S/PeP0uWF1Z7ILVz5LY3iYC9FUyq+1AQ0uz4VUtA0KZ1cu1LlbWK9Bsdl1iUlVTsW/WImZjVciL0AkmxloqNFjU6wRQlOhjUwVjlKoRBRNtdKsAPMCY1RXLgDAwSQ6ShbmKZp2AAEmMWV2htEalBotRlFQYzMwQHbaqwGoGCBgGdZHnT/GelyMfxICUFLN6RrfQ25zyOrB2qYZiRx9ia2161mMaoFpuW7hizQjdX74b4NJwBs25PCcxErWmIsSsToPVlgW8+MXAAw/QJ/t9sslfHHN8zyvPZgzMCUDK679/Ae/+mv+OG2dWUJ/ZWeopAdlmeEyndRxI+tZvxS980xfwJ694F/Y/d3lHm7lzQlntV1GwmjKwak2iWX1Dew2nL9fI4ZslmAwFRkMlv1VVzOqQMKsL6dezDLJKgPWOMcTmJStu0w1UaLQaoqXZ6PYr2JaxiggqKTBSupsXYZgczHaIfKbiOmBWx2siAOg6Aat5rL9UTEz8p6UaAMqs7mN9a4qkOwOSlIiwE1UPVkHWJ0AA0BG2JJsvGLBODeBy2crsvs1gQE8MWabgZz6QMklPNTOoGeQwj/HmuvAjBU3Th64nuf/O8QVctmZx6CUHsYMGByoDkmOwOKJ1LhpMU1bEMDoMsd5VMWcMoCsBvIGHiCH9bLNpmkSypQJd4R0yIIwJn8usVqCoxcHq3GqbEprViibnM2ppvO+vmvjc6g3YDhpx/j+vnyIGi54H/Mt/Sf69vU38e26+GXj4oShhVu9N+TjQJEDAAau9UMHqsEUYtQBw111YXJbw9xdvxlftOYkRnQGQc50TioHVa2vAv/t3wC//MnDmTOqJ9P0ny2jUQgw8fsI9Nxizmq3xqiqWaPM8GLIPVY1iZvWJmRU89rQJeB7x8tBHB5GqSUQGJGtwMYmuIgkWVYWpevnM6l4Pfqjggc1DWFpKHn7pV0X4+Df/Oq5aLQJIyckcayoebF/NTQSwJO+piwYBqzUN2i/8DLS6hqXnThDYpvesF/BlQHYwq+eAG24AbvjqA6OVALRdXfbFmNUOAauXa12sbJvk+/k+rEBDTUv9LrpOAGBP4LdwXfL+GggLxDDQUGwMN2ysOw3MdVL9ot8rysuKZsiAoNOB8R1vwoFlD61OanyyOSvvugKAZcENFPFkSBH5h60tQlxZpJN0GJBrS5nVpuJfH2AnG3+1GkzFq0y6xG/NEma1ypfsEW+U/i7NJlq6je5QcF9i24l2PY3Xvhb4kR/JeP0uWF1Z7ILVz5LY3gYOHaoQrA5D2IGKpubAy9t8FYy0ZvVyLZWhnSYYO62uYt+cjYuDTjVg9XhZS5VM3bpeObOayYC0dQvdXjW3NmNWCwFIosHYaVJIFuaKwveIZjWaTcJyGlYoraHL1cmAuC5h12shUK9XKgMSl6hrGkyVupJzNNF9L0L3S0/hj3/u8ZgEGgdLhLBDQK2GtmZje3P6Q7TPQJF6XRxMywumU0sBqj0dGytWiw9WWxb8QMJle47ISnCi1pAJs5rTLjNYdF1A0yLceCLE6dP0SQpW1xZShwe6cc7TrM5iVjcOL+BFR1Zx08wK6gs7DySKFCJwM8ZuEIxctzjqdXzTu9+EQ7/7b4DXv370OZ7hD+3ruPYkN0TBatdFFEmQdA2o13Fsfhunt2bJHM1YVjUVjZZMtAMrWg+YDEgh1mO3Sw46hgTMz2NWH2Jr1Y3bjGUXKLO6ErCascANhTCr9aHY/WVZ8AKJSFLkaY0/05nVbN5OgdUi5mcs1teJNMWkdv0oxVamBouiZlJZfSWyC1FSmu7mgx2TYnMzyTOwdoMxFnjcdlaUAavjA3/OazL0VPPaJMzqnP2h48ALFTRr/gibVpKIfuThw5PfpupyvmZ1ODkhmBv0fnCHYmXfG3YD820PkGUsqetYPe/AC1PAn2GgWYUJHpMBSYPVqko0xrMk2JgUSgmDRVXOMd0tAVaLHvb9vo0zvQVYoQHTzH1pYrDISQj5PuIql3MPbOLQ4DF8R+tD+I1f8/EPF47Dk7RRuRyWbOaZ4IUELI/BalnGwre8BE/3FnHXW24Gjh4deYtRk4WZ1ZYFvOpVBLT+9V9PPcGqzmiSrdGU0PeN6XRlx5nViiLErA4dD7IUoVUPCLN6Zga3HujhkbUl4MknqZfH6HsUVco3WOTIDE0MNm/lnW9puw+sLI8Y60nLS7i5cwUXhx0Y9dT3lSRiBMepBoiZ1QysprGwgBFQPA5ZJvdsKGevC75PiC9jQO3yMvDz/17BgTvnduaf2bzFy7sPh2Q/tziD2ZkQG3adDDaWDBsDqwmzWuC3cBxYvo5aQyYT99wc0W8/N8SG08D83KgxrKH4+XsOJgMyae/y8pfj6M1GzGVibapyAC/Iua70+6/ZTSzMCO75ioDVvR5JfM3pJDMVRUQS0bZhBxpMxZuuAuKfKth3bbcJsNythuXoyzpUmey57dUqNPKQ3Jv0PNuzNbFrbNvktxLwNgKQjINdsHrq2AWrnyWxtUUc1SuTAXEc2L6GmuoBQUUH0igaZVbXu1jZ5DMZuUFNqrS6hr1zDi4NxdiU3GCTL939VgJ+0n5Jhh5rVkfOlH2NS+CYDIiN7qACeRUQBpIqB0SLr18RxZ5pbMshpKom8SgiYLUUAo0GmppTjckkA0AbenVgteMQsNqIEsPCyljgCtSaRjbNTAKBB6pudNE9vYa/+FMH73vv2GYs1gKk/2eAWm9KmRkKgqsagFoNs8YQm1tTtjkGUC13HFwRmQvW1+GFCi55i2g0+X2ImdUCBotBADz9Px/AwY0HceDB/4UL5+mGqN9HBECeSe2cmVFZXnl2Xon629+Om954O+qdsTmVlSdnaYmGIQFKJrGSJIlQyia4yKtSPrDulmFWaxphG/LA6jSLSJKwZzkiFTW93ogMSKOtoO8ZlTGrI5BLUlNd2D3Bg8v2NrbdGjozAObm0DEsbF714+/hpQ0WNas6sDpUYkmgjmFha1NgM97roeeaaNU4h6zrwGBxxBg2ZlaLXdtuF7j/fuyUZEmNLdburDHAZneK9ZZpSysAJAk1E0Lzy3jsYFaPs5lFWGQlwWoRzWoAO1h/maGqaGguhnnl9FQGpFkLdkg/HDiQA1arOaZ9cUVIcbBalwO4lsD90O1i3W5gfq8OzM3hePsqTn5xG5tOA3M1Ok8xZnV3yvtrErM6BlU5Ek5FxgBAxpeUb7BYVAZEFKz2+jYe39qDWktgfAlqVqfB6rN/8GEc9E7jno2/wd7+SfzOwy/HX33Hn+1oNzcJAMTEmo7pjICUi887BAC443uet0M2q4hmtWUBe71z+DHpN3HhTGoO9/0RI8BmExh404PVxGCR/j9OhnH0mh1SpdhsRASsliQot9+COWOAJ//481i1W6i3RtsgBotSrmb1DikzXigKFCniy7aEMk5vzI6CyIuLaGs2Lg1m0GyP9lU3JDiBJgRW2448UgkwP0/A5R0hSbQiJAewz2BWaxrwnd9J5Ap2KHsphFDAXWpY24YBudMmBJvt7Xh+qempPsXMaoHfgjGr63TMz81h3hhg/ZKDdbvJlEFIMPmeQc5cwFlnjh7FKFgtE4+b3CQAEIPVi7OC8zHdL+eywFk4DtGsbkrAzAxamo3+pW7MrDYU//oAqz2PJEvabfI7bVcjH0sqpgPiu3J5q5I243Gi62jVA/Q8Q2y/RcHq1ozgPMPWr12DxaljF6x+lsT2NgGrK2NWU72kmuJWx34NQ9i+ClMlxnLLtR5Wtng0CIFIMavNukw07iqSAZGkCGg2yeJbhckBmxA1LdEVnjYDyQBFFQSs1gtoMHHCd4KEWd2ryN0gZlZXqFkdH/gjWvY+xOZGBQs8A0AbRnXgjOfBCVUYOgDTrJQFni77NmsSbF9ArmKzj65bw7pVxwffN/Ybx4kQupnUNAKoTZsMYUwfTabM6uH0c5fvEwYrxTc67RBbbo2/CdnchB/KuGx1cmV6WRgmva48thfdnzz+7s/icG0VB/qP48IpJ6FcS9JomTpjVmeBSTnMagBAo4FvfnMN99wz9jgFJwIvD6wux0rK7GtsVHbtZEAAxIcSqd2CLEUItvvJfFjTINcMcrCqqCoGQKIn2xPcgHa72HLrmJmVgYUFzBoDbLHKBFplwQx320VKEjl9TTOr25qVLfWTjn4fPc9Eq86Z59jv9AxmVsemhUAMVnP1VGn0eqQg5XOfG3siCODSvQYAQFVhyP502w02F9Ima3VJqHJjPDY3J4DVoQKFmY7FJl0CYHVB9qsqcUBwdu+IuvYxZrWbAz66LvxIRrMWjgIQAI4dA06cmPw2zZCzQZ8wTPYRRUJVoSu+2P5ge5uUuC+pwMICbmit4cxjRHN5rkHnKcMgusKD/Ka4MYlZzZKXPGZ1CbA6l1nM5mVRKRjaJutTZvR68JwIH798M47dJLAvYaAXB6z2POrlsbWFx0/ruLlzBQDw+zf/Jv6fV/4B9h8ce78Is5rOy7N1O2FWA1hcJGN2fBwDgGYqJLkiAFYPrw5Qu3Iae3tP4uKp1J6SdUpJMas9oyIZkBSzWvG4YLXnhFDlAN/6ymHyfb/6q/Hipafxoj96G/7g0ZehMea7Iav0ns2RAfFKyICoeckVIF5HAIwwq7G0BEkiSa9GZ2dfI/rerGD7m2YjHMFVM5nV4CTZ0p+XMce+4AXAK16xs1EhiUfGWNY0oN0mbOS1bSID4uuop2VbqA60m5e8ZOE4BOxu0HtpdpYwq+0GAXBbqXtM06ArPuxhzp6Dcw1+8ieBl7xk9DFVk/LHFgAMhzjdW8CeRXGw2lB8seSl4xDNagpWd3QLWxf6MQZz3TCr2SCamYGpeBh2qwFofTdMZEDWpnWFpcHuIUVBqxGi55licyFjVrcLgtW7zOqpYxesfpbE9jZhklQNVptVMqt9n4B0WgAYBpEB6db47+MF0/1kOtBShNCuAFhmk2+rhRndwvZ6BZMvO4DqelKavD2lGDTTKlZJu23DQdfWK5kgfZcYt+myX5kGVQwkyGGsMTt1xIA9MamaNSoAP4GEodekYLVTlQyIBkOPYuOvwWY1lQB+mDDpDINqHXKyut5mH13PxNDX0Q428Mgj6SfHDgGahhm9AvYnSy4wzWrDwtb29MzqgW+gYZLvKxmUYcwDfWg5+aVeUwisllSxbDk7kJzensf+xhYONDZx4ckh0OvhdHcBgaSOsqh4DEVaoq5p2RvXF7wAO8FqhZjE5jGr/RRYJhSMWZtlTFNWs1qwLDWuRqHVKWi1cKCxiYunUjIgphJXxVQlAwIAaDZJubMoWM2Y1Qsq0OlgqdbH5Q2TzM+eh223hpk5yqzWrcQNfsq+xgxgwyBA7UDgtxgOKVjNee11IgMSAwGaRvWaxa5tvw/s2wfce+/YE8zHgs0Buj69n0MQwA5UmJSdZtak0szqcRmQga+jwZh/DEzLA5WDgFR8KMWZ1bl6qkWZ1YpCNKs9Pfug7jjwQwWNerSDWf2Hfwjceefkt6kacg0WcxOCWaFp0CRxZvWG08DcXgOYnyeGZeccUvbeGGNWTwtWBwHsQINRS4372GCRkxQtWvQoy/n3wrViVm9uwo9kPLqxF8ePC4zbAsxqxwG8k2fx6OZe3PocA/j2b0/W7Jtu2tEuMVjMb9QLFXTq7ghYvbwMvOMdk98i6fSeEWFWf/4h1BSPVCel/QTY9aOLfKcVYMutT7f3ZjIgDKxmBos8c0FKgPntt/eS7c+JE3jjXSfxxhu+jEc29+2oDovn21yDxXIVIUEeE54mbhQ5HAWR6UZx3hygOT92owiU/vtOgJriYnZmdP08cSK/IiRXv53JVmbMscePA294w85GRcwQA8eHLEWk7ZkZLNV6uHrOSrxDjFSfVBW6EsARkCCE6yYyIACwuIh5c4B1m1xfaf++5LUCsi2Rl6FZTeOWW7BjrVB1OXs9YO0OLfz6g9+A73uDYOk6NQge9gX2R7ZNzi0tGZibw4xuYevcGLM6vI7A6lYL88YAGxvVNBu4QSK9J7rn5jaazIetFsTBaschBoszgmuYohASTZYE424Ixy5Y/SyJawNWU3H/qpjVQUBAOi1KwOpeBWA1Y5GZGqDrWDT7uHqlgj4HAVn8m03C/BQpo+ZFGqxmIELBCTgMgZ//+dQZLs2sliS0myG6nlkJm9B3w8RcsCpmdcpgEUClRoiqAgJW6xXISqTbbZoERLOrGQNOoMZg9ZwxwKaAuWAUAT/90/l9DSIZskE2q4quIMwrnWRv6w6x7ZL78Odu/HP8u58ZLR/1QwWqnoDVbd3G9mBK9iczQNMlKgMywGZvyjYpONOs0e+r61ClkG+ER4G9K926EFgNRSEVF7zr+o9fBACs2i10jhHA+sK5EGG3j1f+7Y/gtbecHH0Dc3znsJVVueAY5MmAlGHSxcAXJoNJ15hZ7dkBdMVPDiWtFo6113DqySApCa5pCXO9IhkQ9lk11YXVF1hjogj/8eMvwvn+LDqLGqCqOHHIwVPbi4QK67rYcuvozDMGtI3usAIfh3hNVAHTJCCCiDmN4xCwuiEIVj9TZUCYJv64DIgjNsf0+8A3fzPwoQ+NYca+T/YEbNxR2Rp3GrCastNqBrnmZkMhlRslmNWDQep29H0Cii6kmdUcBnQUke+Xp1c+Hmx+CaRsYLkos1qSUDcCDPycpDuVAZlpBXxTvVSoeaZ9TLO6KFit69BFmXTb23ADFcZ8E1hYwHKtiyuXIvJbNehvXhWzOgggIQX2AXwGNAOri7BU43ZzwCRq+FmUtS/R92YGBYBlKcTx42JtmqoYWA0Avfsex+Nbe3DzC9vA134t8Ku/Cvyn/wR84zeOvkFViaxE3nbedeGHCmYbo2C1LAPf8A0Z79EKgNXn11BXyRjSAyuZQsYYp4uzPtbs5nRyiROZ1S4sT0wGZARUlSQ876tM/NRdH8HJ7iLqs2PGqGzPxWNWF5EBUdX8fRFt1w8V7O8MRsFqSQK+53uwsN9Ec7E++h6B0n/PA5brXczOjN4rv/VbwI03Tn4PVwaEwyqeGILMantIwEIGVi/XSIINQQDL11BPy4BIEgxTghuqQkQRK9BQa9E+HztGmdVNQFYwcpOwvubsOTyHnFeLXANN4yQBADx5Rse82ceJmwTHVxGw2nHQ9wySVN6zBx1jiO3z26Oa1eCfN77ikdKsXqr1sLJeQYUgEsKcqXiw+xWB1WwfpyjFqkwsixgsCngbAUgqUJ+ppI7rKHbB6mdJbG9HOLwwwGYVgCowWoJSJbM6oMxqXScLXn/c8aFEMPMegwDA+xubuHSpGgkIAIkBXLcC8JPpOlGwuqZ4sApOwO9+N/BLv5TS04xN8Mh/2zMSum6tEtM+slBQsLpfJVhNDBYjoLISfVL2jVijdXO7gumNMfTqGjQlhOdjev0pz6Oa1aBg9VAoC+37BDzJfQFSJlaCJh9e1yJgtarhq5aewuqZIdbWkr4OfB11llPSNGLgOa1UQfpwUauR+6uvTFfu5vuknM6k8xWdY1avcOYvOnb8QBYDqwVNM7wV8qNeXbgF7RfchJpKNluP3u/iBYtn8Ivf+qXRNzCzlxwtUT8qaXyVx6wuo1EqSeR+CDN+M9amWhKs5hyehpZEDuXsYjBm9fkwke6hQK0qB/AHFTCr2cGr2aTztliJ58fPH8cDW4cJexpA49A8Br6B6Nz5+P5qzJJKm5Zmo2tXAFazecskIHhNdWGJyJPaNnqewQerReVavlKRTuACpIpJ8TDklKiz6PeJxufrXw/80R/RB0NaCSQhYVcy0N6bYm9AD/w1nVxz4YqQsdjYIJqn8ZkrCNB1a2jPJeuBCLMaAEHPREOSoMmkOiPP/AuAOLMaRPvVDXIqgxwHfqTgyB4H73mPeHc1PafsO06ylZABEb0fmB7PzAywsIA99W1cWZWJDEgzDVY7GEwhKQxgpNw5jrRsy6Tfi/oN6EYJzeq8uZtd26oNFimgeKjTEwOrVULCsXmgKh0e3U8/QAC1m4iuNFotjAhOp9rN9ZxgfY1kdOrexCYmRgGwetgLiM8QgH3qVVy6EMbzlgTE9/VMi0qkTcus3qFZzWdWMxmQHXPBkSM41NxAEClozI/JQ/L2XPRMUXRscZMLdB2948Am9u8fe+6ee7Bw88JOnxOBfbfvhlgye+h0xOcZVZO4MiASYz8LN0rki1zO+mUNI9TYfqvdxp5aF1cu+IlmtTG6X9BNagrKW8NcF2EkQTFpn48cwbzRx7n+HBr1aLTykJl651S3svNqEbBa1WWiWZ3ze1k9H3PGIPEY4gWt5BI6glPN6uaMTMBq3cLWhQFgWWRfQJNPlfhwXctIMauXaxX5kSH5TWsqR6+8UKNJYkeqFai+pPvj5qy498Yzep98HcUuWP0sie0zmzjy3l/E5vmKNH1cF7ZPweqqmNUxWE3A2qbmYOjyXVjDEDh5MucFYzrQ++rbuHixQmC5XicGcN3pM4W+7UORQtJXXafl5MWu73vfC7zylcDly6xRf8QEr9VCpcxqVQoJe6wKzW4gxRClG5wKzQU1jcmADLDVraicnupA6wbdLE67aXBdUpprImZWbwjoa3seJ/+QyhYDEAOrPQ++7ePSsIPWHsIQefmex/HpTyfPr9lNLM7T30rTKPuzAmY109dWVXQaHrac+nRyDUwGpJaA1Uu1HlZXOe+jwBYwwYBmUtDryzNP8alG9Eq0jPbxJUDTIHsOPvkxHy/dc2rnh/EM0Ma0bYVDkPFWSO8RZArLLJ+cllnNOUNbw4gwfZgMCK1+2V4j+n5WoMOsSYmG/1Y1WvMAKLPaE5PV6HbhhQoe6x9AZ5Ze36NHsWT2cPX+i6OJVqZZbVewyU9LApkmAdcFD04910SryZmPnukyIGz8pYxh66qLoSvIrN7y0Vo/g2/cdz/u+3wUtxmx8/MOsHqKvvo+rEBHzUz0tQEIrTOf/3xSab2xQQD2mI07ZqoWzwN5ZlJhSPY8RcBq0BL1ML9EPa88O6vR3AoW6lWimQoWFgo0mwf6RFGS9C4SikJAH4e/jgdbPVJS32oRGRBaYbjhNDDfoX0yTTQ0F4NpJYEsixAC0iCLJEHVcn6vMh4GQJIU5TG2qwar6f73x1/+Zdx+u1ibpuJxwWoGMDyyuReH2ls7ZT8mtKvKYX4Om/b1+N4+DhwQ6CtQjFk9CInPEICD9TWcf5joCgehBDlVkSUbGtkrTglWB2kZEEmiYHX+mY6dKSaB1abqY299C/WFDLZyzvxSilktBfkyIFRS7AM/9GliBjkWi4sT9owiMiBehEPNDRw9UgCsFpABGfl8kWAJJg6YZlkYZVbXu1i5HMZrV90Y7ZNhAK6Izjrb77MKOE3DgtnHl9cPYnlmbMNC55e8Jsswq+P1gKMxrslBsjbzgnouDAcCv69tE5LNjAYsLxMZkCs2YNs4P5jFgcYWkZKwriOwut7FyraR/3rBiDWrFcE9Ny+iCBEjYMoykY4FxGVAPAPNOXE5s12wuprYBaufJeGu99AxLFgr3WoaHGFWVw1Wh4AsQ9I1spHmLGinTxNjhMxg76c60HPGAJuC0lK5wb53rUZkQHpT3i5BgIGroqm5ZPPFGG8iDL1U9PvArbcCV67QBzxvhHHZaCukrKUCZrXnRlBlarBoVVeC40fEYFECEFkVMavHZUCmlZUAEoaiIScu39Pq38bMagKmzRkDbGzyx5bncczbx9lrIk7EdJNv+TrmDjYBWcbX6P8bn/qYF7933W5ifjYBq2d0C11rSvYnSy7Qg+vsTIhNpz7dmPV9DDx9BKyuqQIMWM+DBGp4IwhWG4oPx8rfOPlOiLrqYHVDxcyCBtx0E/bUt/Hu99fw0j0nd4rnyaTawMsCk5hmddESdQHN6hFgTzByy1Jpm3rRvlINRS5YbYEwq9mhpN0muufrVFNYkgieaBiErbxVwSY3LQOiuLCGYixKP1Tw2OaepKL16FHc1FnBE1/oAq6b6EzSBG7fqQCs9jwiCaSrdJ3xMLQEwCfbJjIgE4y+RoKBtM/UTTjba7DzkqqirlEghaejCaD36fvR/NKncOCTf0J05gFqrkj0OOOItdunSI4z3c9URQgAIbD6Z34m2QdsbxOd7X4fZP4YM1WDJEFVpXzAI0wd4goEAVIEStSLTDK89YvKgKhGMbNfWVeJPFZekq3EvKXJgRBY3e0CM7pFAGSq0bpmNwmz+iCtNNR1wqy2cxILE+Kznx0z2WQbhvoo+KcqFPjK0+0uZbCYz6wOIykx/BRsEwAinw9W//N7nkJDpFCTGSzymNWXSJb7/sEJHH/d7XxWJZOV4DGrQxk//8ZH8cIXCvQVKAxW11UXWFjAgcYWLjw1TJJhaqpjdI6JvR/KRBBQM1L6f0lCzQiJ3j6HVbxDBgQADhHm+pHW+g7Nai5YzXwqihrDyiHfYJF5P0yIhQXsHHMiMiAuAavf8wfi11/LSzABCL2AJMEKJgR1OYCb5T1Cw7Kwk1m9KqfWrtFrGJ+VeFWo7CyVAoHn3/J1+PjFm/Ca7xzLQIrIgDBQucA1UDQZQdZcyNr1AE0OxeWxVJX4UYmA1ZRZ3ZhRgaUldAzqj9Xv4+neAo601oiueP+ZDVaHLtULb7WwXOthtWvy3iIUvhclMiAiUna8YOuQHI2a3BeRAZkTBOJFDUx3gxu7YPWzISjF1lA8OFIFGtDAqMFiWJFeEjNY1IsxiFx31DjoHe8AHnhg7AUAWaB0XQhIEu1vFElEBsSwsDUt+Om6ZKIzklLPWsls4d69KWZ1XPJMDhcS02yoQgbEi6BKAQGQqtInjTWrKQjer6BEnxn2aVIiA9Kvppzepwe3hZYzvc4fkGhWm0Svec4cYGObvwnKZVaH4U5WHDsM8EpoIxkNzcH8sgrccgvuWTyJez9mx8+v2U0szEdxm23dxrY1JaBGNasZE2ZuNsK63ZgerPYNUj4IEB1RWUBH1PPQ1sn3FZUBISXa+Yd5z43Q1BysrisEl77jDvz8c/8GP3z7x3HH/o2dtuSSJAQAl2H9KXmHMmqwWBSciN3pM0AfN1BLGyyKyIDUVHcHs3prg34e+yqmibZmo7ddzXrAPqupORj0BTbO/T78SMYTa/OJru4NN+DmzhW89xOHcK7XIZtmWabszABukAP6FeyrpFGwWnFhOQK/L9OsbnNeyzbhz1Rmte/DDVUYRsL600xFjO0FoH/VQlNzSGUG87+gB/O6lnq/ppE5pgIZkHqKWS1JkdDhyfcpOPnUUwhXr2Km7pL/hyTppSnBSCk1t+R5CrA6t0S9DOuPVxnEkt5F2JS03UzGNjObvYYyIIN+hIbqkLmr1YK8MIcokgiz+jjVhpAkNGoh+qLGTyCX6Z/9MwJYswj6FgGwxoBWVZNy15kgkouBygBlkeUzq6NIKja2KAAeeBxd4ahA0oIyq7lGgE89DQB4wj+OffsF7u/YuDLnNa5LkohagQRLERkQi7CbsX8/9jc2cfFpL5EZ0lIdo3JuU62LQYAgkhJmNUDA6jydeTAZkHAnS9UwgF/5FRx56cGd0hpszFTJrOZVnAFJ1WpGu8vL2KmXL8isVuUJ7PKc4MmAeG5EGOuFwWoxcsAOzep1FRgMyNrVHL0+uiGRtZYHVrvJWZjFwqufD1mR8brvnRt9raZxNcY9Jyx8DWLZxLwx65JzsHC7RZL5jkPOLR3iadKZV7Dl1ICLF9F1TcyYLgzFhzt4ZiOegRsQCZZ6nWhW95uV4EYMg6ipnhhBhNsg8XZSWKWJYUCRQr5UYBQBto2+Z6C1IAhWKwo06Rm8T76OYhesfjbEo49CAggrbwry6x//cYp0lDJYlKQI0VQOQjSYwaKeTBLss/LC80bB6s99jrCtR14AxDrQVYHVoeunZEAG04OfnkdKSAz6IzHGm4gJA43hkJw7xsHq2KwOSA4mVcmAUFBZyDxIqFHKgoi1sCsAqxmzmmpWzxoDbA2q035VDQXLsy5WrVYlYLUV6ITsxDSrpwWr2aYwrfFWgFk9Xxtibg7A856HhubiBuMyPv95JGA1IzmoKtqaNb1UwdjhYn4uwrrT4FDHR+PDH8ZoBUVADLnSYLWh+HCGfLC6pniQ5agAWO1y9UR9L0JTdbC6JhGw+gUvwAtfouK7v82B/NM/hUl1wLmgD2O8lWD9qVKO6z0DwYvKgOj55fTTMBR565hlS8nhCQBaLXSMIba6ErwwZfpkmoRZvV0NIwMAUK+jqTnoDQS2TzTZ5fpKwqxuNPC655yD5Wt41+MvRcek87QkQTINwvGfdo5Jmy3RdcayBforClZfJ8xqPe3hVUALut+L0NRsyFKE0PHjNgeejoY+ClZXway2Aq2UDIjnUe+KX/s1YGihcfEpslfyfWw6Dcyao3sAWVMQXitmdQ7rL5ZMKqinCiC3r37J5F2UxawuO8cqCklaCNy2wyGtCtF1sl7ffDMUOcSl4Qzmbk5c3Br1CANPFwarP/hB8pXOnEkes3o++axxZrUaZSct6BgYMWUUCaqFnXkvsLFV0LyTy1hPVb+JtmmqHmyfA1ZTZvWT20sTJSB2hIgmPEsi6gXuA02DBCC0OYMrCMi6qHrA3r1YrvWwcilI5hd9FKxeMPtYW59u3gpYNSMNXUc+SBkQmTNNCSbPMfPzeO2bdJw4Mfa4CLOaeTSIBjPEzAOrPQ8RkEgpjcX3fR/wPd+T0dc8drlXEPwEJ8FEP06Vg2L3F6sI4WlWs3FFweo99S6ubOrAYEASuDOj3yMGgEXB6lTiYnYW+J3fIf4LI8HTxAe5rhNZ+3khojHug7RbgFldBKzuexSsBjBzaAb3rhzHvVeOkc+cmYEuB3D6z2ywOpb30XUsz3lYsdrVYBBeBKViZrUfyVAZkcYwyH5+kzNWfR/veezFODeYQ71d0GDxmbpPvo5iF6x+NgSt+5MkCJW4TooPfxj4ru9KmfY5Dixfh6l45GYbVFCC4nkEANeTSSKKJO6hbFz+4MqVxKMmfgEQl1Ibig/Hnh6sjsvVmAHctOCn6xKw2kzA9boqWE5OY+W8iz3+eezF5Qma1XTDYdLymyoWCj9KDBYrZ1ZXCFYzlpWGRAZkWJ32q2YqWOq4WKkCrPY8wnSpS4lmdY8/tjyPbJom7qnogxFSm05VJRljm6NZHSqYr9tkc/ic5wCKgh878kH81q+RD1yzm1hYTDRa27qNrjOlHhndMLADZq0hww40YQ3Fv/1b4M1vBj7xidHvMvAMNOtJOb1Q4opq07frvrAMSENz0O/nbPAjcghqajaGQ4mwbxoNomf0trch6/SrMRAhi/VXQltaRLO6sJYoOC7qTF+7BENRkwO4bv53HNryqAxIq0VNOjUC0tXo3Mc0q/N+K9Fg37PRIHIdA4E2KesPGDW2P3BzE//ips/gwY396NRS83QBoDI3xsDquurCckTBagOtGc5racn7M3YTzmRAzFGDJpH9BgD0B0BTJeuSGVkkSRgEhAE1CazOA6h4EQSw/FHNakUK4Q/5/WTMai+UocoBmvYa+n3gIx+O8Ndn78BcbSyjxg7bWfNsSbA6ngsy9p+BQ706CuqpAshlgZMkfaGu5gNftM3CAHgBGRBiVpbS2z90CAtmH6e6izj2goRN2GgAA98QBqvvv5+siTFY7XlkntS8HeANkYPJABXZbygVHNMyISBkYj5lzDsVhcgU5F3XoqxaWYapBgSszjkvMTbcE+frYmA1M67kyIAAENe+BQBVJVWzvLFlWQQ4bEjA/Dxhv64gmV+01HjXdSyafVxdnwICoDIgafwuBuGzLgJjwedgfv/sn6E4WF2WWS0FXI3xWKZrQhjGCCl4tK9Z1yCK4PlSMfATqTk2o90YqC1y36pE1oorA2LTSjZVBZpNLDf6WOnVgc1NkghpjV0fjTMOaISWQ04sqYsoy8AP/MDkvmpymMtSjWVACibEAOQzqz0Ukxdh/jM5LPA4bJsk1lvkGtzwvDnIiPDWT/5zHGxsAu02uf+vA7BaodUCnZmIGLhWgEEEbgBVCinhopp9fBCOMqvbusX3tbFtvOeJl2Bvsyc+vAQSLLshFrtg9bMgvL5DDgIApKgcSPvbvw3ccQfR0wNAndblaqUaXJcwq9kBkm3YCjKrV1ZS/aTtxu3pOpnYq5BBZgtfvU5KzKcFP2Owmk6KrDy7CFj94S9juX8Kez70Tly5Qt/neURLk633tRoB6barYFZH1GBRzDxIKJgEhFTh2EpLoTAZkKFZSO9xYvg+vIhsghfnAly1q2FWx4cKplnd548ttuBNZFczxo40ClabPLCWMasbFgGr63Xg1lvx4qXTeOTLTlwN0GjJcZtt3ULXNaa7tuNagGwTKLiqf/azwFveAjzySOrBICB9ZYAzkwHhJa7oZ7bqgZjmpaKgqToYWDnLJzX/ahkuJGmCrmFGqLqczVYuq6fKNKvzZEBKgOC5faVtlmdW5ycCLEdOmD5Aolnt1rDp1DFbT4x7WpqNbq9CsLpeR1Oz0RcxP6Mmd8AoWI29e3GouYEH1w9gtpGaT3SdHN6qAqs1LdZP5JW9A0gMFjuc3fgznVkdBFSzOtU/0URAFKE/kNGi0kAH9BVcPB8CQYChr6ORZijqemlmddoI0fJHmdVCWvugYHUvwmObe3HTzAqaUQ/9XoQ/+3MZf3zyRZirje0BBNjKhaUaAKhKlMv6i/dSVTLeYlmkgteeIwNSmlmt8BmKwBizGgBe+ELsmXXxfd+8AklO3t9sRBj4uvCB37aBW25JgdXDIc705rGv3d8BYBGWZnZSlH2nQsE0gHNkQAq3K2JcSQFF2RAfW6YekuR4nlSDG6KlEfk/UWa1wmNWpyULRUNVadKdMyaHQwIcNmRgbi6RamDzyyRm9caUYHUoQ03fKzygNt6nF/wsIWa1XIxZLaJZzT6vjHxRrr42vQYFgGVFV/Ln2ICcqcowq3nrV1zJRqtBZpZNYoh+/jw5y8yOIfaC+3l7SBizQskbTSPJII4MSFHNapG+el5BLWxNI5XugsxqCYBUIySzoy87gL95ze+ioTm4YXYTaDaJDMiwIs+oaxQxs1pVIdVMso+oQorUR6JZLSJlJ9Ag0dpPwOqWZqPX5ZwTbRteqODj3/ke8c/aBasri12w+lkQg20fDY0ezksCSFeuAHfeOQpWs9DloBq9pLRWLyBs8jEOVucyqw0DhuzDsfnX4eLFfJw8zayeNYbYHE7HJv2Pv1nDpWEHzRrdbLBDaYH5/MqpAfbUuthb38blp2nnfR/bbg0zraT0va3Z/EyhQDBttUqZ1YxdIYcETKxibDHwU5MARcFs3SG6X7wyNKF2ySZYrWkIQrkiGRDKrDZNzJlDbAxNblUED6wm8gepNigjxx7wNavnGg6RAQGAO++EJAFzShfr22RzFjNmJAmaoXDds7nBmLcMrKYgXeiIjQXLAp7//DGwOtasRtymCLOaSRztmfe5HkoAiAyI5uZrVjsO/FBGU/fQboufSXLZylOUvauygMFiQc3qWAYkQ7PaLykDois+YapmrWVM41f3ElBNVdFeMrFNwepOjd6jVAZESLKDF2mwWnXQtwQOhQGpHpltOKPalnv34kBjE0/35tHZmyrRr4pZndYIVlXijSACVjODRR5YTQ+5uQf9r2QwfwwjNaaZFjTv2g6H6LkGmi0Z6HSwv7aJi4916fyio26k7k06Xl1fEd57Pfgg8P3fDywu0q4wAzQ29+g6+b0EweremoMvXD2M5y+eRUOxMVjp44v3y/jcytEkaZPqb/zGCREF5YDKXPATJUEEEWZ1yUqTzHaZaVwJZrUu+0LghGWPgdX1On7h/30u3vYHzx15XasZoeeawglc2wZuugk4e5Y+MBzik5dvxMuOXpzU3VyDRQCFExbxwTwL+AqCUU8NwTa58irpKhLBMI1ICKyeNUhlwr59Yn1VpRB5XpA8pu7EoIaQ3DONZcHyddRbBKyeNwdY76rJ/JJmVjOwerNgQiIdrNLEHJ0Po0jKZUCX2heIMKujbCPEiSFoiClJUWEAGEA+uzyUCxtaS6pCJMIyZNf8UC7OrJakxCcj5/wxTg6QFqhGx9mzhLXfyQCrOWcEqx8QxvYOevqEEJQBUYsmRYWY1RLRGL9GMiAAkmtA5QG/8/g/4raXLwGGQWRABtcBWM3W+FqN3Fv9KQlzUUTAaomME9vNrt4S76hPtPZTzOqG6mLY5TOrvVCBVi82dz+jSR3XUeyC1c+CGHSDuGQ1CorfFOyM1emkQOAUiqvJAbxhBYCi5xGwukaHnWGQAy9H2N73CRvFdYlMydWrKVA9COgBi4CUMAzoSsAvmQPwn//zqBnNju4yw4p6neih2tOB1R/+BwOPbu5NwGrDIIy3AjpMK+ccLNe6hI17iaKWnocNp465WTqJmyZhv25MubixklgpqNZMiwJZcbuCEjO///s5ksZjut21hkxcyStgKMagahUO6rSvlq8TBowkYa7tY8NpcBlUbJM28Rr4PracOjq11L2kKDAVP1/nizJd3vriR/DSl9LHjh8HANzVOo0HLsyTzXp688f+Pc21ZQkLVrapaZjRLWxvio0xyyLJtZMnUw9SMClmMVNmNe+QFzg+VCnEJ/7gSbFztKIQg0UOs9qPZDRNf6cBT04IaVYXVSPiyYCEISmLK2ioRXRq8zWrC8uASBK0PONGIK5MqOmjY0VdmkMQyYRZzdjKhkFka6oGqzUHfUcAHKE37UtvXh9l1+/dC1P1sVTroXM0ZSSk69VrVssyahoxB+Ru9B0HG04DnQXOdxM0wvynih398P3RKi5AfP7udtH3DTRnNWBxEfsbW7j41JCAM56BhpEal5IETctJ2kyIX/gF4DWvAV72Mqq5zwzQ0mC16gqB1Z4H9K/08cW1w3je4lk0NQcrj22gphKQZa4z1gYHRChVSg6+wWIMVj9DNKsz251Ca19XBDWrLXnUHBbAkSM7PBAx0wqx7dUKgdWzShdDZthtWfjU5RP4mptXdnaX6d9mAPYASsmA5IJ/ZUBwKgOSe9hPV5EIBmFWq7n3rOeEmDMGqJkh8ZwQ6KsqhwjymNWlZUAECDjDIVkXmyowNwdZihD5QcKsNkbB6sVaD1c3pzCNDwKi4W+m1pTriVnNNKs5yQUA1YKf7ExR4hpIWe2yKpOihtbU1DvXHDeKYLkKakpKdm1+nsx3fVolOmeOvkcUrB6ECWObFwJgdWG5DtougPzE1bXSrPZ92I4EQ/WTPlMdlB/9xTl8z6/eHleLP9OZ1YGXMrc0TbQ0B/2N6fexfkgS0mZDheVr07O1Gf6Q8rWpqy7fMJ2ezxkDXih2Nasri12w+lkQg26QYlYXBxRXV4HlhQDtVpSAwKldty5XVILCmNUMrNZ1YnTSy9+MM2b1Bz8I/PAPE+OFGFSnoJfGFmkmAyJQLsJA8LzPHdGstsVkJVZXJz++3ZVwureQaOoaBmFQieow+T6uXAywp96FJBFGaBSRjm46DczO0L7VamhrNrpbU4LLzLRQDgm7vgLZcgCJvrQuF0qE/OVfElZ9dpvJBlAyqR76tJpZdHOtmQSs7hhDbK1PeS8wGZAW2fi0ZyR0PVMYrM5iVm86dcymwWrKyOGB1X6o4PlHN7C4SB/buxeo13Fn4xQ+9dAsOro1ulkvINnhuiTJsCMY87amxm3Om32sr3ObBEDuW3aAjLvh+wRMaiXziyEgX+PZxMW60RbfiDY0J59ZTbPwTdMXO+jS0DRkgwhMo7UMWC2F8HMMFtnrioSs5TB9aIlwYdAHSA5PWQcdZhhljM1v1JFn002B1YxZPZyCQcaCfc9ajYDVtsZfD+h7PvSz/ziK/dC68kPNDXROLCSPC0pjcWOMbVgzQpK84xweI8vGmd48Dh4RY1Y/Uzbhr3kNsLGRemCSZrWuE6NRi7OQ9XpErmtOB+bncaCxiQun3UQGxBi9hppJK00EF8izZ4Fv/VZgYYH22SeJhB3M6gF//fZ9oL86xP3rB3D3/AU0VAf3fgZ47uEN3Dp7GbP7Ro31eCCCF8jFNT9Bwc8c48ZYBqRMeXbVzOq8dpm0QQm2tohRGaIIQ0sizGrORN5uhth2a8Ljyl7rw/z934TZXYVlAdFgiHP9WRzZv3Odjpnwk64Bm9NKyIDE+u2T5sVpZEDyLgFjvxYYW4omI8gzGgVJ3MwZQ+xdDMRwe6ZZHeZUBZXoK1QVhuzzt7KWRUDpFgGLUK9DiiKEG1vUYDE1n+g69XiYDqwe+jrqabCajukoyyE5bYReJNiYyUq2jsvKCbapSgGCvCWxBGtfiFkdlbsGmfstRiySi5/5dC0ipphZ+/lYRiaVxJyfx5LZw6rVImvX3FimTRSsHtfvzwuezBCmXGfymNVMY1y0XdHKM8fBlltPPFZY3H03pG/4eiILxZjVPKP4r2SERP5IlUMCttdqhNCzOT2xi90rjbZCPBymBat9H0EkQ2HoJ/V0Gfb5RA4AiSeYSMTVRqV6uhup2AWrnwUx6EdoqGRSKKNZfebzqzhy6bNon30oAattO7ZqKwIo/uzPjkl0pMN1SWluilltKh63vMXzyJ8rV4APfQi48UaMgOpuoEJTEwDYkH2hs34QcMDqlM6iqsvZIFIqogh45SuT9v/n/ySeaqdPA9s9Gae7iyMGcEQGRPBgdPEiVoZNLNfIl59Tu+Sw63kEqJylr6MAzYiud5lgoDIzQhQxixBtN1SgtmqFxhYzkwLIdY7NQIFkE6wnYwtAJaCPHypQTZW4HNe6WFmZUjfL8xJtQQBSvSak78X2kl/8Iin3vffe0X5uuvXRsm9WPiqgWT1iridJwLFjuGv+PP7qzO2Yrw2RINmI3elFmITr68Dv/u6EJ1IJCwCArmPeGGB9Q+zaWhZQWz2LN36jhTe/mZ4NJzCrRRhJvhMUY/0pChqqi4GdczByXfihgmYtKARWq4ylmceeKXHIUaQQftZtVpZJl7fJL9tXcORFAGIcNi7HAMRjdNNpYHaRfjBlVvcsberywdAPybhXVTRrRB+dCyRlHXYNA/jX/xqHnreIzkJq3FE5nKmrN8Y+1zAAh8MkBICTV5o4MbPKZ48wZvUzZBP+1FNjSWLfhxsqo9XFug5d8fnltP0++p6JxrwJzM8TZvX5MJYZGh93qi4ToFYgeWfbxERV/uy9RP5pA4RZzWShaD9rqisGVts++p/+MixfR/O2w2hqDv72S3vxDdaHcNvsJcwdbo2+QYBZrcpBcYNFlaNZPY2WKIdZXVS+SGTeKpwQVFXCAOaBE0EAy1dR13wuaGvWZdg+P8EEAIgi2E+egwkbN2un8chDIbZXHSyYg52UbXCSC2WMEAFAkqDKdBxMmmuDoLgeOjNY5MiARJFUmE3J28P4XohZY4C9y4IgkSQl1UZZa01aslA0CjCrrUBDfYa2vbhICABPrpNKPmMUVObKq4zF6urY12IyILVRZrWep6/refCjctVh7DMnRhkpMwHwsxRYrSiQpQihm51sL1shByDHI0RJSFsFIt5v5Uis7CAHzM9jT72LK9YMLgw62HdsbI4RNFgkYHV1MiCEYFbSyDeHWR4T1wowq1VZQLPacUZl6yaFqFH8VzKYfFaKrdzUHPS3pid2EXlPYLYTYcOpV8Sslkc0q+uqi+FAjFktNFZZxMzqcl3djSR2wepnQQz6UcKsDqPCstVnPvhlHGlcxcz5hxKg2U2Yabmbj7G47z5gbS3jSdel8gd0QyHC/ESyv7t0iTCRbtrfH2FW9z0DTSMprzMEyzF5zOq4JFbThLVEgwAxO/SDHwTe8x4C3n/uc8B2X8HTvXk0G8kkWVNcDG3B2/DqVWzYDczdeQCQZRw1L+P0kz7g+0QGhFWTqyotfZ+STUhZt6pOdAO9qgwWGVDJwGpLbGx5HgGogwD49m8nf0b6mmZZGYaYPqnAh/qRDLWmUbC6h5XVKcHqMWZ1nKkVZFafOgU897kkCRLf675PQLrGGFitCjCrI5mA8em49VbcPX8Bd81dwJ7lMSYQY8+4fHCm2yXa8DuCGqDpNSVuc94cYGNT7NoOrw5Qf8/v4d9K/wmWFRHXe9+HHWgw6wkArgskrnwnIIwA0dODqvJlQByHGCzWg0IyIFzN6qgkszrPSGgKQ60okvK1X4vKgIBcAzcPWGXriDnW9sICGqqLi4MOZm+ljliShFY9IJULU84FgRcSI2NFQbMRoe9PAVYDwI034ugdTSykiNUMUJ1Kxz8Md/ymkibGdLr36X146Z6T/A15zKwu382qIgjIHLO+TuZEAAmzupa6R9mhjwdWd7uIACgzTQpWb+LiZZlWbuijGq1I6fkLgIpPPgncpJwE/sf/wNylh8lewffJeE6D1YonZLzsDV2sWi1y4P+Wb0HT8NDQHHzToYfwb+/+MF72jc3RN/DAalbuXNRgUZRZXaacnmuwWKir+e0ytnaJNnXZ5zOrmYSRyQce4nElIgPS7cIeBDAVDy9ZPoXPfLiHq5d9LJo9Ypo8FrkyIGU1qwGoKpFRmQjWlmVWXwPNahE2pe8BR1rr+PbXixMeYi3wHOAPQDnNal43mAxIm7a9sEDIFU91qQzIKLNaV8SrJft94EUvGpNNDAJCDkiD1cwfIWvuiiUwymlWR1maHey+1QuMWSZbkyfbUnJsqVKORwg7V1UphcI8YOTi+y1di/L3W0y2MD1+5uawp76NC4MO/FCBPjvmIK6qQh40lgUiA1IArM6TGYrZ5QUTYgC4iRBVicQJHWx/xJu6bZswqxt8sNq1nsHMaibXkQKrG5qDwdaUOnGMBa3KUJsmqYapAKwOIjlZhgyDXykLlJu7dw0WK4tdsPpZEP0+Yma1JnmFS3PPnFdwpLVOpCO2yXtDyyFgX6tVGFBMmyGOhOuSzU2TDjsRTV0kc8TFTzyJQ8113Hjmo+h2o7jNnmeiNQZWO+6UMiBRBM+nZS2SJAxW+z6wtUX+/cADwFvfCtxzDzlIdwcKVqyZBKzWddRVF5YreBuur6PnmWgfbAOLizjavopP/k0fv/ShuwhQOUe/s6YRzer+lLd3SgdOU0K4fgXmBrRdL1SgtuuFEiGeR8b6+94HLC2BAJQjfVVGmNVVyYBEkUTAHsPAUq2H1bVi17XbBX7gB1IPMDmcJl30ajVIUoRoKMasvnRqiLvuItLSMbuayYA0UquiqpL7K+8SpGVO0nHXXZClCO/6mvfiv7zt9OhzmkY2+ZYYWL21lTDi05/rjoPVRh/rm2LX1rpEdFlx5QqOtDaIqRTdxcbAnKgMiEO11oowqzUOs5oZLDai6pjV7EBSouxdlQMEOTIghY2vAD7oE8rQtBKHJ53KgHDKUuvjgE+ng44+xNO9eczecSB+uN0M0ROQ2cmNKELgU9YpA6unYVbTePvbga//+tQDuk4qg6Yx02GfKUnJ4UpAlxEA7r+8B89bOMsvdYyZ1RW4s08Zly+Tr3XhAvDOd9IHg2CnZrWmiV3bXo+sHa0WMD+PffVtXLyq0/3LGJMQKCSL9Mhnt3Fb+BAAYH77dCwDYgUa6o0UWK2KgdW+G+Lp3gKWblsCbrwRx9/yXPz8c/8GuhLgps4KOreOOcNxxsFIKW+BiMHPayADkikpMK3BYh6zuoRmtSYHcHn7TpaoHk+0ZbQJQAystm2SqFU8fNWeU/jsp3ysrQRYMPuTwWpWDZBxDQCUAqs1Jco02oyBxoJAksZjVqfNZAu0CyC/9N+NMKsP8a++Xzwjl+vjAEzHrBYAqy1fR22GnlUWFwm54sltAmI3UtedMqtFQZTf/32CJ545k3qQafjXR5nVpuplSxYwQLEoj4ZWhwVexvmD7Y2KgNWKQjWrBTTGC44tVQ6yweqUDm+h4Gjt+6E8HbM6F6zWRskBCwt47sI5/Nmp5+OG9trO8cw0/O38/lgWCsqA5AB/7CyjFrwGvKQoJVcVWhNEwWrKrJ5t5ryQkW6eycxqRkBjYHWtRkzIeaaFvGAV05qUVAhVIANCpIjGmNU5xEX2PgCF5+5nyj75eo9dsPpZEIMB0KAGi7ocwO0VkD6wLJy8YOKG1hpmdAvdVXKgdwY+TMUD2m3SpmBWjwtWpzVlRZnVFHy/eCHCj97+MbzywKOJzrXnEbC6RicSwyAbOx7DBRywmpVqqRSs1jQC6HBWHyZVEQTAww8Dt99OpFTPn0e8kWgu0CyyaZJDqcO/DVdWgLWne+h6JtoHZoC9e3FDax2/954a/uqhw9h2a5iZTa5rW7PRHU6hRwckZiiaRHU55enBXyApw2nXocmhsAwIA6t/+7eBn//5sT0b6+uYDEhkTykDwjQGKbu+oTr8cqGxeOc7gXe9K7XGMgBcT8DquupiuJnf1xis/uRJdDZO43WvAz71KfokA6ubqYOVopD7a5gvAxKEMmR9bKxQ/V9JAswTB0ef0zQyJwgkGZhUyw52Nfvx2MJPmdXrWwKnGMvCcMslup8ADrtPxWA1k2lgbYokrgrLgFBmdd/Oub8chxgsNqJiBotMbihTl7CkwaKUU+4ahsXLs2m7khRxDH8K9hWAZshCMiA7qtuPHUNnUcXTynHMLieHn1YL6Lq1qcFqP5ShSIRdY9QV2IHGlxnigNW6PnbZKTDBWxOFPhOpcS+iIRkEsDwVTd3lH86fQQaL586RW/eRR0hybDhEzKzWzQnMap72I9vANBrA3BxqqkeGTrdLNKs7YzegYMkzAJx8yMKJGaJXMmcOsXE1SAwWGbOaGS/zwGrXhe9FON1bwNINBJC84bteiv/r3m8DvvZrgW/+5p1JB87B3PdBqgdKgNWZjFqUZFarKhQpROhxgK+i2xwGgk9iaZadY1WVMFUFmNUTq0ImhaYROQEOOxEA4DiwA7KfvrlzBY8/pWBtDdlgdY4pKDEsRzlmtRLBjyavX4VNygA+m5I1DJQz78yVASk+ZhVVygTrAUwFVgtpVgcazHYCVt/QWsPJMyqsQEe9kwIENY2AiYLz98oK8PKXE739OGLN6tRYpqziwM1nQBdOBjHzyiywms47klIMrFalID9/yyRmCv5eqhTCd7LHAJEsEW8SgBizugRYresgmtV5YHUwNmfNz+NrDp/FB888B7d0JhgJUZ31XIA1imDZEjFuFAGrY+Av4/n4zM5vaiR4iSt6bQutCQU0qzedOjqtnEHISDf2M5hZPcG0sKk56Hcr8M2KErDaVDxYm1NiEOOa1apKzt88HMb3iWZ8wcTVM2WffL3HLlh9vUcUYWBJaGoO0GoRc8FuAYDu0iU8vLEPt81eQlu3sb1CJgI7DVYrfmXM6hH5AwamceYeb41oflwazuC1Rx7AcxbOJ5qeE8FqD44AWzkIcvrKFj4lYUEDfC1Rtt53u4SFcHi/j/kZH6dPA8cWiYh0c5mW5qoqAas9lctY/rM/A/78Y3MEpNk3C+zbh6OtqzhzycDl7TrCSIJsJMAf0WmdUgaEmRZqEvSaQkrFptWApu2GkQyl3SATueAi7PsE/PR9Yk4lSanLNs5WMAzC8OhPuUqkDxe6LpRcGY/3vQ9485uBhx6iDzCaENugMcM2jnOyd3kNuuzh8nAGnS9+DCfmN/DUU/TJGKxO3acsGcRhVgOpkuN0/NzPAW98I6n/TAc96IjMCUw3/S//Evid30k9wW4UtlHUNKKvuC0wZvt9AkTJpI3DwWlyiHJdsplg11Ulr3G8/Lkg1mktwKxuag6XWe2FCu440sNrXyvWLMAxLWQAcAlGTq4MSFmNUo72aynJEggYLLruzsMTfePNb7gNmJ3DkSPJw+0ZCT3PmA6sDgIEkRQfBiXTiPuSG0XLiFnJN0+flPOZUQREUkGwmo5ZzVT45a6yTBKNQUXVNlPEuXPAbbeR5DCAWBJokgyIiFl0zOTVdWBuDpAkKKGH4OoGBr6O+uxYybKgmRQADDZdtDQyDuf0PtbPDyfKF5mKzycQnTmDMJJwureIpb2p8SVJRCPrW75l53s48gdBAFJlUrUMiFeCWc3mrSzQhwLLhdiUrN0sQC2eY4s1yeQquAwqtgeuCdzfuk7MYbsCr3UcUkmg+JClCHNaDyfPKLlgdZYMSGwmVoZZzWRAJoyDWPe1ILM6d+2iDRc2LRRgVvteWEweDOS65hk3xtJpZZjVPNY+3cvJJt3/LCzgxcun8b9Xj5JkWCc1b9FkIze5QsOygJtv3glWD3wDjfoYWJ3HKk5LKhQJ5ruR0y6AcmMr7/jBfseCCRY1D6iMz1UlK9ly9lulZEAMSUAGZIxZLUmY/fHvxa2dy7j1zgn3nQizmiVpdV/sd2OJq6wjXcyY5Tc13ldZirKTojEIXoxZrUoCnh7MYLGV88KYWf3MMLOeGONs5VqNSGtUAVazNb5ex4LZx/rKlDrYQTAq9cXA6rzzHO0Le71wiDLsd4Mbu2D19R6um+go1mpkU9MTL1sL1rdgByoamou2ZqG7Rt5rD0KYih/LgIgyq103H6y2AupWDcQlY1xm9aWrUKQAFwcddHQr+SAAv/qOJnqegVY9YVcYsg/H5w/tXGZ17Fodxe3qcgC3LwZWX/mP74HZvwr5p38Kc+//rzh9OsKJDhHzbu6nVEtJQq0GWL4uJi+yTn4DaXEBeNGLcLS9hhvaazjU2iSZcbYBVlXyWw5LIEXpSJXgaDWVHEKmLcEBkotUrxcCqz0PWH16gJriAE8+idnZCJubqb5GKaNAKrEy7E2ZjU6zdhgAXgBIYprSX/3VwJe+RB9kvzX7vWo1cijdzF+E3YeewIxu4fKwjY5h4dh978fJk1Hcz013rJysAFg98fB08CBxCx0HrthBR5BZfegQ8F//K/CZz6SeYBvuFAu6pgiajdIvJO3fB9RqOKxewNmnHETbFBlvUVMxRSGMBA6A4NllDBYdDBwOszpUcGS/h2/6JrFmWdsAqjX/ogyiTCwtishhv4RmNYB8pk8Zg0WDU5bq+xj62iQMBj/zM8QfIDabBdBoSuhPq1lNmSMxG0NQFmpHUoYXdMwWKfl88kkigRGH5yFIl2QCfEkFAHAcuKGSSPPkhSQlSYWv8E78/HngOc9JkoFXroCA1WGGZjWHWe3ZAQzFJ/OBqgKLi1iqdbHyhfOkMmx+jK1cAKy2ex4hAQCYMwfYuGzH908sX6RpqCkuLF5uZXsbTc2B7WtYWuJ+9Ghfc2VASmpWZzFKwxCeL0NTCrbLYyiyhFgJg8VMQC2WLyrWZAzOCDGrJ89dk9ps6xa2twT2GzRRKy2TgXCktoL7Hq5jsdYn6/h407qc+XuVApWTLme3y0DwMsAfh1ldmP0qBFaD3AsFFjHCLM8GqwPHLyY5BlCwWkCzetxocnERd85dxAPrB8i5azY1b+k6NQQV9AgZErD63Ln0lwnI2XMcrOaAyn4ZzWpZJgmmHCkz9jrhYAB4zv4wcsslQlSZp1ldznsEQH7VXVEJDFByQEY1BIDEYHG8ku3mm/ETvzyLl/3cyyY2ymVW+z4hX2mCZzSaZMxMCJYBlVPtZv5eZfayMUjJTzBxmdWsWnwaAsO1jnGPGl0nMiA8HWhejMmALJh9rK1MeaYPAgRhSrNaVdFQXQwczrpUpoKHMat3ZUCmjl2w+noPy6K6YREBq2W/ELP65CNOXJY6o1vobvjEWZyatTAZEC/rwDAWucxqxuI0EuajIfPL27zLRKJk4JuYee3XAABkz0UYAr/yznlsOg206kkG3FB8OD5/ZeGB1SMlRcz4igPQsfns8/cBN8lPAYMB5gfn8PTpCHv1NdRVB80Dnfj1tboES6Cc3PdSwOz8PLBnD+afcwhffP0v44h5hSQWUsBfW7fRtSoAq6MxsLoKGZBxsFpQY90bODj//3wes6tPAP/3/41lbTPRrWaZXcayUhQCfg6mz+wCiJnVIuM1Hf0+0GwSQ8QYrGZtMsDLMMjCznFO9gYu2rqNFXsGnVaA5qkHMNgm7xn0QsKsbqfuU2awWBaszgrmJC+QwOp2gVtvJTIg588njztWSACh1JgVzkCzL1SrAQcP4nBzA+eecuBsWaRNBlYzRhKPWe1SzeoC7NeG5vLB6kjeqQXOizzTwrIardSZPPPAPy2zOlOzusTBAYCmc2RAWCl9Xew6yIZG2OpTgtUEAKbzCbt3OfN2DA4XkJgxBKo3oogYDgPAH/8x8Ed/lHqSeQ2kS4LZpjkvMWjbk/XrM4KrdflPFOfOkfn11CmCy125Ajh2RJjV9dR3YWA1JxEwHEREYoiN70OHcKCxhYuPEu3XxsLYqb0IWN2nYLVpYt4YYGPVj2Wh4nZoxZXNK0v1vJilLQxWC8iAlGFWa3qOZjXbSxUxqKJ9FWJplij7VuUQnjMZrCbM6uKsR00OiK9HXjCDxXHgZ1JoGtmTdwVey+ahvXsBRcER4zK+cH4JC/UhcODAjpfHTPjrgVkt5TOr4zm2SoPFKEr6W5BZnadZ7bthsSou2ldTYB+zA0yZm4NqqujoFs7351CbSw06tocrwKw+fBhYXR39vIFvjCZe2D2bWw1RVgYkfy4AUGxsyTJlVmdfg3hvWBCgUvP0tZlnT4mKEAD5VXfTMKuzNt/MYHHCnPV//J8tHDg+wd+CJe/yzgiMsW2Ig9W5wB8Dq0sZkOecQ6kfUqF2RUFKz8OWWxs9t41HLAPyDNesTmvRszPScEqIkZLQVF2mYPUAa1enBO1ZZUOKiEhkQDjrRxmz1V2wurLgjiRJkkxJkj4vSdIDkiQ9IknSf6CP/5EkSU9LknQ//XM3fVySJOm3JUk6KUnSg5IkPTfV1ndLkvQU/fPd1+xbPcvjne8E/uRP6H8cB33PQKMBwDDIAsFh/6bj/gdl3DV3Adi/n8iAbEekLNVTYGp+DCi6gkwvzyMAcBgCf/EXY0+Oyx+IgGkAvK6Fjm6hWQ+hfsPLAQAtqYfLlyJs9VSc7c2hVU82K4biwwmUhNaaEUFA+hpFwJveBGxvj/aVLHxJplDEnMnbIkj9Ixv7cOhWAprNGQNsbcuYibawaPZHwWojxNDXIF1fVwABAABJREFUuQddr2dj0zbJZow5J995J2aNIQ43NzBnDkjJMpAYLE4LVjPTQk2CXlfhhtXJgAAA6nVafiNzfysA8C0P5/qzmNVJhmGPtJKA1QxYZxtAtgAV1JfeEWzMqiopzxYYr+lYXSVAwp13AvfdR75mLCXD7gNNQ0u30R9wGMCWj7ZmIwhldG4jxlkteYhuF3jjvzmGj1y4bXTToygEXM/7yUpqKOqCjPhuF7jlFgImpRNDfUdDU7MngNViJc8AiB7rwYPYV9/CpQsRBlse0e5PMatFZEA8xqAS0c0DqOO9C8vjy4Co9YLChAJs5bKb8cxy17KGWlymT4mSTBDgy81j7FKmj1ApPZCM7WkYwFRnX2GHQcOALEUIrPy1NvQCogEseiFYspXDonnqKeBHfoT8+/LlsaoFthkfY1bXFA9WP//wWAisfoYwq69eJay/KALuvhv4yEeA7/3jr4M7zqxmGvacazsYgIDVbD44fBjH26v48c+9GZ++chz1xcboGwoY4dn9AKbqATffjDljgPW1KHGfbdB2FYVoVguA1bocQFVCLC5yPzpuW87RgY4NFgtWWciqTOQPJknCxCBCScZb1ppAE2KKXjAhmKcnSkuEyxgs6iKHUiYD0hA4vGoaZnQb212B17J9imnGWsVPbS9j4Uhz4tyjGXKmDIjry9CLMqBpqCoymcXlmdU5iVYAgRsUZkBzTdXY+iUXMx6OkwBZ2u12UFhahKwJXv4+DiCeHWkWsCQB+/bhpXtO4qMXbkFtITVvseo4gQpUgIDV9Trdv7Jbg2pWN9LToQCzupSXBTNDzJkLAJRIhAQI8si/blhKvkjJm7eYGV3FxrClkgBIJZ3zmNW+BrNWoL+MKJK31jL5K12wzzz9+imY1VpegqUMCC4wbwGImdU8sFqXn+HMajam2W2ikfNdvwqwOpShaHLCrF6fEvilkn5pZnVddTF08yelyBvzRBKJXbC6shAZSQ6AV0RRdBeAuwG8WpKkF9PnfjKKorvpn/vpY68BcIL++ZcA3gEAkiTNAfgFAC8C8EIAvyBJUqpYdzdE4uRJIiUbszQti+iGNSXANMkCUUCn96mnVdzUWQFuuIEAnH1iojfwDdT0MNF5FMzqMWb1vfcS2cRPfCL15DhYrSgwFQ6YBmLOM6Nb6MyEBIgyTbSVAR79EkENz/bn0WrQ/kkSDD2CE2i5JX5Awqy+7z5y2P/P/3n0i4wsUIyVxQGr/ZNnAACP+cex71V3AC9/OVQ5RFuz0NZs/J+3fhq1ZjIpEhkQgb4OXawM29DTJa/HjwMAjrTWSdk7ozhoGlqag65T1MFjLFgJji5Dq2vVM6tVFQda2zg/mBNipXlOiPP9OczeRGhky+FlUvId91UZZVaLmFTR+PjHgfe+F3jiCYwymdJsFZawKIDXr6wAy8vkDHn0KPDoo4Brh0mZOQCoKpqqg96AA6o6IdpUBqdzG2FLHW+t4ORJ4NJVDRcHs5idSd2nDADOw9PKgNW6Tg46AnNCrwe87GVEr1pJnY17loqW5oxoVhdmVhsGcPAgFDlCYDm43G1guTkcua5EBiR/ExIfSArIgKhyiCDPRZ7KgKi1gsgy1/G9HCOHHPQynp/CYBFAtfraAHSTw6xm5aMNwf6KSnbkBTVlSZc5NlQHw22O4a4TFAP/NI2w6ByONJaXzFOXLxO95rR+vxOo0NMlwapK5sO8ShPfhxuo0A2x3+yZwqy2rETp4O67iT7+xkAn3yUtaUI17Hnz1nBIwWo2Hxw6hB+/8+/x3q99D37p+R/CDbeNaTgUMFi0hyEM2QdOnMCMbmGrqyRgdTPlZaF4sDiHp9AhJepN0y8kA5KpAx1F8ANqsFiEAQ1ibJaptc+SbCV0anlsyggEKC/cbhagFkXl5i02tgTAaivQxKpCNI3IufUFvh/blBgGsLyMI611AMDCTfOTuyvCrC44BmiXrwGzOv+wX9ggmbabOV5pZ0c8UAQjVw4HJfYaQCKpwPHhibwxGRAA2L8frzzwKC4NZ0fBal0nhqAFwOoaLCwuRAm7OggwmARW50lghCGR6yvJrM5kK5fWlg7hB9lkmZgJX5C1n8suZ2dLo/i8JSHDGJZJVcglDBZNWcBgcYIMSF7Q5J2bt4+h+5SaIdhnVhGTw6wmSYAC/WTtisiAFJkLYjNIERmQBjozOdeJMaun2L5e82AyeePSGvb0YHUQjcmAbEzZpu/DCTTENk2U2DZw8gdO4BYkngCjYLUAIW83soP7q0ck6G4aGv2Td9VfB+C99H2fA9CRJGkvgFcB+LsoijaiKNoE8HcAXj1d9///F7//+8CP/Rg5oAKgwLKOZktKZEAKaFYPuj4pJT1yBBozMrFtbDk1zDbdBKQtCFa/733AH/wB8B/+Q+rJca3e2GAxf0L33RAdY0gmdEkClpawaPZx36cIcHe2P4dWMxmSsUkZ5/DIwOq/+Auiqfu3fzv6RUipiBT32VA8rt6lf5qIuj26sQf79oGUZQKYN/uY0S3825d8cuQMoOoywoxDw0i7QxfnB7NoN1Kvo+WdN7TWML+QupWZZrU9ZgRVNNIGiw2tOs3qlF7x4bkezvbmhLRfPQ8415/D7K17AUnCsnseK5eCpK+hDNVQ4rbrqpst8zIWDz4IfOxjwC/9Ekm0xDFusKh6sB3+puX3fg947DHKrJ4PgCjC618PvOMdwFpXH2XuUcM+Xhbac0K0adl3567DAIBj8tM4dQpQIx/fdvQ+tPakTg4i2f2yMiCKL8ysXtY2cI/yeezbF5F5KwzRd3ViCssOrozpwzMRAhAxMVfTBPbsAQAsK2v4zJVjOLGUKo9g7I68A1kQUDZhAQZVzFTOB6u9UIFWrxCsZqBPUY1Wjut9GESQpWIMMgCJDnLG4cmPSrDAQVh/uYxdJgMiClZrGjmMWBUyq3VdyO28cBkxZdE5vGojjySCADLH3HNPysDV97HhNDDXSGXV2HzYzweri4yvGKz+CjOrLQuYlzZQ1z085+Aarl4FejaZW2MdaECYWR2D1exQcvgwVBW4ob2Otxz7IvTFmdE3iDKrowi2FRJm9YkTJMnmBckPmZIvqql8sDpwfGhygFatAFjNAWqDkJTGFwYqReatqrVEw4SgUCgomDSx7JseugvPsZIETQlJYjTvUMqY1U2BuYvKgGz3xMBqCSB7ikYDR1rEH2Xha2+f+HLVUDLlKqaRASGa1dntFtZDF9jDeG5ExmyVzGoKKBatDMr7/nFfSwAeInJmDptb0m3Pz+PFy6fR1OxRzWoqAyIKogzXh6j/8s/iTvkhPPAAfTAIiATlDrA6P8FUquKKx1Yuw6yWCLsylwlfcmzlVoSkKlYLRV7yLoroebVYkwA5g+YmnZkMiKDsGmmU6qwLMavFweprKgMySRaqbLsFZEB6noF2O+c1VIK0iqJmFmEI/N3fVdfeDnNLTSP7Y6vEgJzUrpFiVm9N3+a2W8NMIyHNNVQHQy+/3djbqMhNJhM5Pi8nIbYbYiE0s0uSpEiSdD+AVRDA+R/pU79MpT5+Q5IkhoztB5BSJ8UF+ljW4+Of9S8lSfqCJElfuHr1arFv8ywPzwM++lHgrW9Fwii1bbJhaEpUBiSAOxA/PA57IRqaAxw5AoACD7aNLbeOTsMHdJ0cnARBPwZWf/azwHd/N/n/xgZ5zrc8kplKy4AIGId4boSObmGGnRGXlvCchfP40Ic1zLY8nO3Nj4DVse4rh63MZEA+9Sng1a8mc1D8Fpb9ZocWlt3kaVYPyJd5eqU+BlYPSP+/67tG3yBg9AIAnuWNMsgBsjE7ehSv2Pc43v7LqUWRykqIMqv7fSKDsvNDE3ZJY0ZFzzOqlQFRVRye7+NMf54PVl+6BD+Use40Mbe/Buzdi2VzGysnu3FfvVDZAVaLYuuXLhHW8wMPpORgwpCytyjrVNcJ61EAUH3kEVIFsfqlC1j6xPuBH/9xvOHu0/C9CD/9qW9EbQysbmm2ILPahmFEMI/tB3QdB5RLOHfShRx4eP/X/zfIy6l68Lxy57jR8jIgIgaL3c0ArT/6HeBd78LB5hbRrfZ99DwTLSP1mzMAXICk6fTcWPcVy8sAgBtbV/C352/H8X2piYrJgOQxq8scSlWVMJXzwGrmOG0UYPrQtgHkalaX24xnH/g9Nypens3azTJAC0N4ZWVADP7haejrqImYlAGArqOl2+h1p9gsxprViQxIU3XQ3+YkL72IADSi11bQx8H3E2a175P5+8/+jD7peVi3d4LVNcXLrzTxPGKweB0yq2sf+O9Y1Ldx55f/O9ptwPaSdSAOUbDakkaZ1bUacOxY8oJxuSBVJZIwDuc6DIewPRVmXQHZHABa6GA4CIkWuknBJEUhMkMcsNqzA6hSiMUZF/OTCbQ7I09TlrIeVaU4O0/IGLZouzyjsjAkAG2JeStzXSyrgy1J0LWISKXl7eVook1cBsTCtgizmu2fDAO49Vbsr29hrm7BuO34xJerusxnVpeUAcliVvtBORmQ3NJ/kH1RYRBBVcnYyfqtKFGjaNJC0eRcg0XfR/FrwM5JnH3nYCgRGbT0BmF2Fpoc4p+f+BzmF1Lvl4lkhJsn/8DC82CfW0UtGuJ5/ufxxS/Q+zEIYAVj8hACCabSOvN5OtAlpcxUhVSrZYHVMWu/jMFiLlgtl6qQy7y2NJleeI4FIKn8xI0VaOLkACBea3OPdQysNgT3Zew8k7XvjpOi4t0caTfr92LX9hqB1X3PJGTDrChR0cuLwQD4qZ+qrr1JOtBNzeabFvKCkQY1GajXsWD2cXWrOAPmscdSt3gQYNutodNKzM/rqouhp2XOA0BqnSl4AMvzcdgN8RCafaIoCqIouhvAAQAvlCTpdgD/FsDNAF4AYA7Av6miQ1EUvTOKoudHUfT8RWEhvv9/xBNPAHfcASwsAFtb9MHhkJRitRUqA8Jn/8YRRRgwM6HlZaBWg4wIweo6ttwacajVdXJwEgT9XJeAn6pK9mOvehUB2GlX0RgD6UyFz1T1PGL+2OnQ1y0t4QWLZ/CPj7Rw1w1dwqxupd4gCAD7PjDY9uE6IWo1cgliDeRYs5r+n7pnc5nVboiGaiOKJHIePXAAkCTM1WzMvO3biXhvOgTNmXyLlAu1W2NP/Kt/BeWnfxJz99yUPKYohCUfimXz1teB+++f9KGJtlqtrcEOtMplQPbPWbg46PDB6rU1Mk4BInmyfz/21rdx+ZyX6usoWF1TPGFm9eXLBFx+7LHUvcWAXEkif3SdaAcKgNW+T67rygNXsGx2geEQ9U/8DX7ih2w8sr4HNT21AVZVMWa1G6GtWyMVBgcaW3jgCy4WdIpcLSwkb2Asl6rBasaC5kgVAEDv9GosXXJQOh+D1X3PQNNIJdViGRD+tR1ue2QsmCaRvmm3cePMCv7+4i04fnAUoItlQLLuA+bMXoT1F2sdcsDqEmXE10Rag8MEL82kY0DCpMMTM1ichlmdc3iKACimoMyRppFk0PYUBjU0+aCwS2QYQszqwkCKoGa17xNCbkCr9V/3OuCv/5peMsqsnm+m5lRFIUnnPBkQdtgTLE/mMuD/icIeBKidewJ/8op345B3Er/wb2wgikh1VRoUYtVRnPl7aE0Afb7ma7LfwEp+eZUmtg07UGG2ddJ2u42jrTU8srGPSK4xhjBjVnOYPr7tQ5UDfOK/fEEc+2KashlAbazTWzRoiXrVzGoemzICSs1budcgkoszq0G2tdzkDTNYbAr8YMwou89/rT90CRHEMIDnPx/Kj/5feOe7sydfwqzO0JZmch1lZEB0kHbHr0EUwQtkaEVZ+2ztyrmkhQ2SabvXQgYkBv6ymNVl2OUU+LPzPDLAwGp39Do8//nAiRP4vV+zRraGAJV/yDPWY7G1hTCSIEsRnrdwFl/6LN1j0e8opQWoeTIgZTXheQBwWbBaBYIcYpPnodTYymWXTyEDkmlIH2uBl5i7eTJWBb0sAMSEltx9jOfBDtTYgokbPEmgKRMhecmFwsxqdvbKI7QAgOcRybZ6TqcZUa7CbZbnAWtr1bVHZPKkEWZ1Q3XRt6f0zWLnKGqw2NGH2B4UZ8C84Q0EP2Ntbrs1zDSTyqy6yfcOK8WsBqBp0TOC1HG9R6HZMoqiLQCfAPDqKIouU6kPB8B7QHSoAeAigIOptx2gj2U9vhuCsb1NPPRG9nqDAfqeicacQcBqARPAOIZDDF0N9QZhjaLTIWZlj25hy6mj0yabX1P1hMFqzyMs1VmqRv6KVxDmMu3qDvkDEVmFGKyeo8N1eRl3zl2AKgW4a+Ys7EAfBat5Tt80/KGL4anLwMWLwP/+3zh0CARMox9KmNX0/xSo5IPVAebNAWQ5IqW59Trw9rdj/oXH0J6Z8D0FwWrPIs+3xyqQ0WyOsr4AMkAK6GhubRGwdgeeR0tiVUNJWF8VgdURQA75Ji1H5YHVnkfkakDH1sICDjQ2ceGiFD8/SQbE4kjMsLh0CbjrLvJzpcHqKEJywxkG0VjnaAfSt2JjPcLqORtLNVrm/cgjODB4Ak91l4i8eAqcaGk2+haHSeeEVLudPrC4iAONTXzpfhlL6nr8WBwMmLgWMiCyoAzIuhdLl+y3T+PSJQBBQJjV5iizmjAR+B9v9XzUGLMaAJaXcWNnBUPfwPEbUn2SJBhaCCdQszPmzJizKLNaDuHngdVU+7TMgQRAvolOKRmQ7HHg+yh+gGbtZpVPUtCnjGa1oiv50khFx6ymoa3Zo1r0RSMIiERCSrO6ztOABmVWFznsMukaDqDKZEDW1sgtbxjAi15E/Bfg+1i3G5hvpe4vplmdt46zQ5koWP1MYFZHEcK1dchShK/acwqaHOLHg18FAtqndI061X7lJdmGtjzKrAYI6PPd3w389E/vfAMDq7PKiFm4LmGS1en1XVjAjZ0V3L9+EDUz9V7mt8ABpxjrr90pcN/mlenTe1YpCVZngn+MWV3CtJAkW/8JZUCm0drXonztVyAhQYjIQzGj7CEfJLIH1LhT18n1uO02vPHbsuccVc82WPRCeQpmtQQ/msAii3XLS7DrOXuYuDKoYHUYgFxfhMpN8JBiVheUQollQHLIJwNLJlWy6eug68BP/ATwmtfseL1qKIQFzgOrbUK+AYDjM1fx1BMJs5r1L2lUhSrlECSY78Y1SFwBKAVW58m2xJVRVcuARCWZ1Zy5u4zBIpfcRceHpBe7v7jM6iCA7RdgVstkXsqUVGASTqXWmXDyegDE+6JCvxdLLAR8k2RJikblysaDyYAIkKREw3WnA6vf8Y4xKIBhBWpypm1qDgZOBTIgKc3qtm6jOyzW5uoq8PjjxJSctUnA6mS8G6ZEyHh5YHWZCh4AWk610W6IB/fukyRpUZKkDv13DcA3AHic6lBDkiQJwLcCeJi+5UMA/rlE4sUAtqMougzgIwBeKUnSLDVWfCV9bDcEo9sF2nUf2N6GaVL54H4fXc9Ea8FIDBZFwWrLIvp5LbpYdTo43NrAuSeGRAZkJqLMak8Y9AvDCGcf7WPBuwScPInFxQT8GwxANlMjMiAc8C8M4fnAUq2HpWXah8OHYao+7l44j7vlBwEArZlUG4pCXLE5h+igb2HbNdFUbOC//3cc1Fdw7hx9MmZWJJq6hsI3Z/KdEAtmH8vzfjKnLSzg+39Qwx13THiDquYzPFi7FBgc+Z55IaqjCZIEGQ4T6czkQ1M6VHRs2d0KXB5SzGroOhqag/4WZ8y6LkzFhyyFMVg9ZwywsSknfY3kBGih7DRRZvXmJvDSl5I/sQwIM3dgh3dNI+WYHO1A9hU3zvaw0jWxvEcCvuqrgCiC+b73oK66qDVSG3yqWd3jgdVuhLZmo8NsaZeWsL+xhceeNrGsbxHwlpl00WtADEnyGi2rWS1osNiX4yRDZ3AR26tOilmd6lgMVvOv7bAXEDCJUTL27MGNM6vo6APM7R2laeg64IQqnzVS1OlbosY8WUFlIwqbf+Ul2sKQtKmVkOuQw+qZ1XkgHTOlKbNXFWD6jLyOF7pOmNXTyIDQyo34XE7lsYYiYHWRDa6gNBZjVl++HMu244UvBL74RfLkutPEXCs197PEVRbbC4jnO0UXG1/PCGb1+fNUB6QGvO1tBKi7cCE5zKaz2KoKXQngcJawiWC1JAEveQlwww0738CSdwJgtRuoyRo1P4+bZlbwJydfiNsOjGrtE4NFvoaiKhfQQwfyS8nDcDQhUySuqWY1B6AqCqrmlX2zipASzGpNA2GqcsBqSYp2Sslk9NOQfSEfB7vvE2ksQZqioiuTZUCiCF6gTG+wON7utFIwOQzFUiACBT9DL98ErwygCCDTINkLZZK8LHJtZRmGGpCkOw+sHmdW50QMPgqA1XFXpAgHW5t4/HFkg9UcZrWXBrVEgxks+tlSDaSDxX4vRZXyZVvcchIzuRWNVLO6DJEhk10+DbNaIHEz8jrBNnWedIXvwwnVmHPCDUmCpmFyMgwonxTNk4UCynnF8IB1FiJ7WSoD4gpUnYqG55HbWvR8PB7veheReo1jgmZ1Q3XQnxasZiC4TsDqlmajV5Ct/elPA8ePA08+mbS55VDlABpxsiAPrGZJ0cJg9a4MSBUhMlvuBfAJSZIeBHAfiGb1XwP4E0mSHgLwEIAFAG+nr/9bAKcBnATwhwB+EACiKNoA8Eu0jfsA/CJ9bDcEY3sbmHnks8C/+3fYu+ASs7J+H5avoTZfB0wTuuLDFWA8AgAcB0NfJ3rXANDp4FBzA2dP+eRm7kSJZrUjtrDWNQ9nTodYWHsC+NVfRevx+2JGGymvLSgDQpkoX33wDH711+jr9u4F3vY2fPybfh03d4h4d+toilEqKgNiueh7BpZaFhBFODh4PGFWMwaMnmQKDVmgPNuLsGD2sW9pdNJ7+csTtvlI0PLZ0OXIgNg+WpqF9qzgwUzTSFmuAJDAkgmXLo1/aIppZxhYMntYvVrBgpne5Oo6Djc3cPZM/nWNHHLIa5o+uY7z82S/H/hkL+B5sH0NRiOR1qirLiwBM0QW//pfEx2vNLOabH7CuE1SjslfrHwf2DizjRWrjaW79wHf8i1kQ2LbONjYHC0DZjIgeWYUASl/m6sNk6TN0hKamoO27hD29sLC6CEo3oRlNxu5JLNflJVEzBA5rwuJxqAiR8CePWhrFrZX7Vizummmxryui2m8AbD6AWpqill9++042NjAD9z6KUhjOjm6IREAIbfctyCoKhMTsiDKMSeieqqSUvxAIklRJugDAJJc8B6k/fXDyUZCpbREaV/zylLLyoDECbysucvzCNOrALO6pdvo9qaYu4IAQSQll8gwiDxWngY0CPu1ELOasuh41Ua+T/6c+ccV7L30ReD978fznxfhC18AlQGpY34mdX/xQATWKCB8XblyLf8UwTYXR48Cz3kO0R0DYS960EfBCyYDwrm2A1sZNVjkBZsPLc6+i06YkkH3PwsLuKmzgk9evglveFFq8ZVlsufytdyDbjy2ioLVWeOAmqKq8jXSrFbLAZWZABXTrK5SBmSKJJtuSPxD6bjJeF6wpLAAP8AeBIXA6szDeVlQmYaqSZPlRaYBqyWODAhLCBa8DxQpypVqKLV+cSujqCZ8wUSAqdMKsZz9/MBWiHxRgXkriiT+/O04IwmWf3HL5/Cud2EyWM2TwGDVYaWY1VEmoBgEBEgvuodRVSDIkMMhiRuJzLEFddZVKX/eKitXoUn5+61S5AAeWO15ZJ4tCFYbCqeKiZ7ZhJnVANQ84I/NMWUNFnOY1YW9YiQJmorcRAiA5H7OS16qKnQ1gONla6sXDbamlGVXd7vA3/996gG2d2DXSNMICU3QNyszKAiuaArRwTZ99FyjEEHi3nuB7/3eFFjNmNXt1O8tQO5znSnB6l0ZkKmCu8uLoujBKIqeE0XRnVEU3R5F0S/Sx18RRdEd9LHvjKKoTx+Poij6V1EUHaPPfyHV1rujKDpO/7zn2n2tZ2d0rzpob58HHAd7lau4fBnYWPEwawwJo5LJgAwFJzTHwSDtTN7p4HBzHWfPyYRZ3ZFizWre4RkAwiBCS+rjqt3G4iyVrfjYB9HdCok+9lAalQGhLLJcZjUDjQ15dL9w991otSXMGQMAQPOWlMIMz+mbRuS4qKsulu4k1LSD9lMTZECkuE0RVpbvhjjY2MSbXj3IfyELWp5tDzgGi06ARbOP1pw4QAMQMJIX29vk5Zcvjz2RLler1bBc62J1rbhD/I5Ib36oiVB3i8NQtH1ocoBmLYiZ1QAwq/awuUm+5xWrTVjMtO2a4mJo5fe31wN+71eHqIUDzD76Gez3zyZgte/D8jWYWrIpNxWPGPZxtMA9D1i/YKPrmmjdfYxkKu6+GwBwqLmBeju14DGDRTtnEXQceKGCA50BPvCBBKwGgAP1DSzVujtZf6wkM++gVwbwoKyJzM0di+1tABHQbgPz8+R33ghiZnWrNsr81OQArgizuh+OMqvvvhvKD/xL/MoPXtihCS+rMgE+c1gjhQ8OkkSMlLIOOcBUeqoSMJnxxcZcifJsRUGuRmlhtheQz9adopw+TrRVyKxuazZ6/enAattPOddTGZAhZ5r3PBDwr6gMiACzGgCe/OPPY+/gKeATn8Dt3c/i4YdBDRabmJ9J3V8M9BEBqwXHl8bAua8ks5rpmtRq5G/qNNjSbMjj7D2WcOZc24nM6rxgVSECzGoAyf7nxAncOLOCA40N3HNXiuIkSagZIaxAyz3oxuZfBefuTNZfFFF2VAl2HpvnJh2myxpfMeCLV/pfdN6iIPjEYRtFoxVaBUJIBqRIQogmcEUYdQSs9oXB6kyAKgaVy7HAtCzAfhoZEB6zugzjLU8OB0gqK6tkVjOgtkQyyDDALVMnMiDFkmwAxJnVN94ISBK+ufEJ/N1HI/h9m+wb2NxL28yVwGAmriV9N7IAYN+LSlWHqVoOs5oxoNWCeyOWGM4z7CspMZPHrCZJgBJgpiizuoRUYK50BTNYrIlfByKpkPF7MX3jsprVnGqAotU2qsaXSfNtn3gN5HVakmAYEklWiWQuBYLd8uvr5d4vScA//EPqgXFmtapS36wpyW2+T6qbdRmQJMh1k5yrRHVpQXCNr/96IgPiOEjA6lbq99Y0buKulNwUUnJ5u8zqqaIC5Gk3/qli++kNzFCzsn24hAsXgCfPGrhpZoVoMzIZEB7Dh4Vtw/J1mE06w8zN4VBzA+c2W8RgcU5OmNUiOr2Pn0JH6QMAFv7Z1wGHD6Nhr2OwYaO3FWDVaqGhe8mGgjGr89qmIN0OcweqyzxnDtBQbcgzo+W+shQhcHOuw/Y2pMBHXfOxeMsCYBg4FDyNc6fpLD5ugqGqhJXFkWz2vQgdY4if/qF+/gtZKAop+eWVk1N5kfa8YKZSVaFIIQJHTLP6xInJzGqflUKaJpZqPayuT+nuCyDyg7iPzDyCpwXuWQSsbjGwenYWkCQcMK7ix380xJ88eAc2nAbmluhYZg6/dv64vXgR+KGfqmNf93Hgve/FzAf+EFtbdDPoebg07GD/DEWkJAmGHpFDAyfD7XsRrl6NoEgRpFtuJg8eOQIAONjcQG0m9TsyZrUAWK0ZclI2t2cPuQbNTSzfsUxcJNLBNs15eo9lSmgZsMwDZ9hOaH4emJlBW7exvRnGmtVNM/WbUwNTj2dIAsAahKgxg0UWz30u8P3fv7N8gadfHzt9FwNoFFVCkKerXFJDMde0MAjIhqpom6DajBmHslgGpHJmdTmjMt7hybP8Ykw6ZrAoCFb/zd8ADzww9mAQwAp01Jl0DVsXLX6lTaH7i60zAprVAPDk1VnsrRMJCe1/fQi6HmHYD4lm9Uzqt9a0/AM0APh+IQaVZihfebCaASlsLpibAwC0NGcnbkelvLgGi45SDKwuoFkNIAGrb7oJbd3GY2/595C9UQS9ZoSEWc3RUCzDKM08mI+biBaJPIPFaTSr80AElhCsUgaEMatLJNk0XYIb5lTxAMWqQmhSWAistiLCrBaRFwGypZZisLqcZJLKZBUy2i0sUxCbA2e/pLAvAGtXCuE7ORVXJUFVAPkGySVY67H3Rs7YGjhqMWa1pgnJJfp9m7CW221g3z5okg8dDrauekTWsd1OXswDasuyimWZaFbn+W5IYbWa1SxhUfTIw0uyUd3uUt4jOZrVZGwV7CvAB6snMegF2uRqVlOwWthgEWTayvy9pmBWa3kmrozQUnAuiCWR8u7ZAdAc15mfEIYpkURoRWD1tMzqRoNs++KtH/0tdTam2feJIi6xi0UUYaevDEvssKQhS4wVAKu7XeDQIeDpp4GbbiJtxjK3LAQSd54TQlf8EgaLAsbLu8GNXbD6OoruuS20KVj9Mv1z+OhHIjx5oY4bZ1YSZrXCl6qIg1KMJJOuFvPzONxax9n+HDFYnFdIubPqCsmAuGtdNDUHshRiYVEGDh2CJBFw8t3/LcRvPPR1aKQBKmawKMis3hGdDjq6hbY+hiArCmFQWTmbwl4PUSShUQuwtEcGDh/G3vo2rpylszgzXtPlkTa5PoBOQWdyEeMrEBbVUq2H1qKgwBdje1n8CXJ7G7j1Vg6z2jCwVOthZXNKd18Q1micTRY0rmRg9du/5yRZr1QV6HRwsLGJP3mfhEdWiQxMXN6qaVS+hg/6HGhu4fY5gtTPhJvYXvfjJy8MZnFgNkk8mEYEm6dLCcDvWXhkfQ/2ztpAp0MepGD1oeYG6rOpQyVlVguB1emkTbsN/MAP4GVvWsKJH/lG4g6ZDmYklSOt4bsU8BA95NJ2dYWvpRmtrRMAY34eaLUwo1uEbM2Y1fVRME1I4w0ErK6Pg9VZwauyYEyfgpt8Sc05lNJ2AVRuWliqTeSXu/qBVNpgMROsZpvMa1CWag2jwszXtm6j2xf7fp/9LPCRcTeNIMDQ11FjzGq2LlocuY5SmtV8QJVdmqe6S9jzspvIHLO1hRv3D/HkWR3rTgNzM6NrbS6TkDYaAdeVDEjQt8gcw+YCmqxqaTYMYyezWlc44F8YYuhphKEoej+oKkm0CYDVI5JLqgq02+SwyoTHaRCwWucyq8vJgOSYdJXVPc0zWJxGs1oO4eeU0wMoPcdmMRSjSCruNQCyhPI0qwM3gCJaaaFp0BVfyHTYHoaFZEAyD+dlGdA0MllkLClclPkpSYk/xCSCQBjCCyTCBC9oWqjKAYIs804GUBnFE7gAspnVUblraxjgy4A46k6DxbwQlAGxul6y36J72P2tLp68UCOPp30BWAVPHrM6LCipELebLa3h+eX2MIomZzOrWeKqRIJFkaLsy1o2IcaSopn7rZKya5w9clydW1SzmreP8X3YgVqIWR2zlfPWmYLEk9jbh2OwWLTaRgSs7veBpupwz1+6KcMJtMrB6jLMap/itTfcAJw5Qx8MAnKma6S04/OqrSbEffcB3/EdOz8sJswBQK1G9nsFwOpejxyVf+M3yDbZsSPCrJ5JvUjTyJ7AvobM6l2weqrYBauvo9hesWJm9UtmHsF9n/PxyMUObuwkYLUu+3ALgNWSFCUb3IUFLNd6uDycgRVoMNs6AVO1AJbHB+i8oQdd8VE3AiwuAvFsEAS4uhLgi2uHUTdSEzcty7XzDOsYSFebsFB+7/dC3b+M7/vnY/1ipdR5ACidret6QNQUDh0i4Gbfj58fWaCYIze3PDsqZnrEzJTymNVRBM+L8FN3fQSveI34YUSTA3hDMc3qW27JYVZrEmCaRAZkq0AaPCNigJSC1brMN+vzLB+qFOJNX7eZPHjwIA40NrFv3sFja4swFD+57jGzmm9a+OoDD+MXnvfXwNGjhA0wTIPVHRyYS0q0VY0yajklPV7fxqrVxuGDqe91kEjVvP7I/fimV43eB03NQc/O2bBkVRjcdRf+7a8v4dixCe/hMQZATLrKlJLreRpvNNyVTZKJjpnVFtEN9n30PAPNWuoayETmJ8jQVU7HcBChpnhiYDWPNVJWT1WgXQDVMqvDkMzXpZnVEzZMUQQvkMsZauUZ00wjAyIAVtdUr7jB4lDsug2HwIMPjj0YBMQbwkjA6rrqYshLMhZl/SkKDF61EZJL88TWMva+8CCYe+9NjQt44oyJLbeO2ZnUGIrLswVkQESZ1WZ5ZvVv/uaYQU/JsPv+aJVFGqw2x8aeppEkdh5Y7XnEdNosUPodV5pw9l3jzGoA+PmfB97yFuAVrxjtqsABJ15Hi8qASBkHc2rg+swyWMzxXCgrA8KuwaR2p0gIxprVOb9ZnGgTBavlAK6AobM9DGGqJZjV4xchiqbTrNblyczHsoxtSUrWrgz2a9xukXEgpFldojJIiFldrEmAgtV5rP0ooszqAjIgPCNjGlbXS/ZbrDpQX8W9Zw/hUGtrpwzItWBWx4mryU/HvhslmNWZ+3kGAJfR2hcA7EvLVeQaLJa4b5lkZEb5wsh5TTTYGSFvaPk+nKIyIHkAcLyXF+8mAL4uPjX5KwNWZ7LAafQGMklWc66tUZMrZVZ79HYuw6xm4O/x48DJk/RB6kPUqo9Ka4j6ZgHEF/vTnx67XEzahYHVmgZFCoVwDRbDIZmiXv960ucLVw24oQKjlvo9mcdV7xoYLO4yqyuJXbD6OopuV0Jbs4E9eyBLEb71RVfwt6duxHPmz4/KgIiC1bZNMuvsoDc/D1mK0NIc9DyTMK4lCbW6RFg+nInSG7jQ5AANwyeSwpRRKgUB1ld89D0TjdaoARxhVufs3hhoPA7SAcD+/cC///f4pXcsjD5OD/y5zGqPGMvVDQpW0/LheKYclwFRFH6mGIDvRlClYkw6Uk6e8xrXhR/KODzbRaMtbrCoyQE8AbPN7W3gzjuBU6fGnmA6VKaSyIBsVwBWexTQpwaLIjIgE/U5b7sNL9/3BH73jf+Af1w5jL217eS6s8WHUxHg9hxosk/ugf37yYNeCqzuz+LAfOrH4YGUrL9DF/NGH0eOpj7fMIA77sDRQz5uetly8jiVw7G8nN/WceCGCvRagZOOplFWUg6zOv1biIagweLw6oAczGdmgHabaJh5YWpjM/qbS7qWb6xHw7IhzqzmyYCwg0OJUk9euwBKm39lgUml2gQrd+UYX5UBfbIYKVMaLALIHAeWBXJ4LsCsbmoO+kOxH9myJoPVBMRMJIxqigfLyW+zDLOaC6iCzId11cGKNYO9LzgA3H47AOCm6HE8cb6GIJShmKnrw0CEvARTURkQBlaX2IR/8YvAlSuF37YjrG2XjAUGmNC/W7oNQx/7rrEMSM794/sJWC0agjIgkTMBrG61gK/7up3XXGCd8d2wpGZ1Bls5ngvLa1ZPBDzixFXBNvM0Wmm7En1doWC/VwZYXcprAKTSwOWAE8NhgbmL7uFEfBxsG9VoVseJhSkNFrPkRUrooatqlO0PwQgVRQGqPO121m4kV8usZuzyEoCiaUSEWZ0j1dD3DCKzWCDJJiIDQgyt6X5r714AwMHgDD568Vbcsmdz9PN41zUMy1VcKQqRAclo1/NLGCEitS/KYu2HClS5bEVIxvNhCC+aQgYkR7NaHfdpEGxXk4NMSRzHjggRqIxUoACzegQ05ISmRpNlhoCp9NAzSRe0n9dKs7o/kAhYzWNW15RKNatdl9zKZZjV3S7Ztpw4QXSgAcTSjq1mMWmNdFy+TPbeDz2UenC8wl3X0dYt9LYK6D9HIaRP/gOwtoaDB4HzV02Ce6XnCro3tHP83jwP5cDqXWZ1JbELVl9HsT1QCbP6zjsBAL/4VR/BQ2/6RexbcMmNx8BqDvs3DvZCtsGl0govXHoaEqKYpVRryMTsh9OwZ/nQpAANMyRgNWVW65KLyxdDaLKPxsyosZzG06l1HAJ6TGJWZwVjQeeB9lQ7sFXzSQUuBavVyMOlS8CXn2pi260lcmwMROCWZxdkVjMG8CAfRPDSpTAiwQ5kAmD11hbwnOeQkp6RdSU2AiGa1YtmD6u9WkYr4uG7YVK+zGRA8hILyGAA3347buqs4FvU/wUJEdFuZQsQlVcZcsAkr2uRdptNYJkCyIzK5ftEBmQhNe55ICVrd+hhud7F4VvGpDl+8AeB//gfRw+VqgpJArnnssK2s5M2WRGbE+X005egSQX1imMtzfyXWUPKgNb1RNcwCIDhEFetFqm+SEcW22u8XQviJc9CMiAlDk/XQuePtpvHVi7VJnI0q9khp+iBjPYjT7O68JzFgjF9Mmg5wyGKyYCwhBhn7mZhWWQuHDkXBAGsQEONOdfTeTs3GRaG8AJ6bUXBL7Z2cZjV3noXc8YALd1GY84Ajh4FANwUPIoHT9Oy7PSgVtV8thdQmFmtGgoZUyWY1efOFarkzAyrN8aspsBJS3NgaGNjnYHVeeCf52HgG6jXCtwPggaL7tAnh34R9isvyYaU5FgpGZAMwGMKZrUqhZNlFaZhvOVUBoVBBLmETm1uOf0UCcFYGirnUDq0pILMaj9XxotFrFktClZnrbXstyphAgjky4CUZWyrSrbfQmlwPW+8ApRZXb0MSGnNap7Boudh4OujMou8EASTrJ5P9nGGEcsVHVQv49OXj+OWg2PePKyCJweoLZXEpnvZrOkw9oYomHAnyZVspm6p5J0IYB+WM1jMS7L5kVwqGcRLtjp2BEMuCFbHc1fOa5h5XoF7jC8DUlJiRg7h5RhiltnLCsmADGUxzepatTIgnkfA6jLM6m6XHOdGwGoq7dhspK6hYOUGi8uXgde8Bvj4x1MPUla7oitxm23NRndLcA4NQ2B1FXjf+4Df+i0c3Bfg/BrFMMb2xzy8pDSzehesriR2werrJYIAXUtD23CA224jjz38MPm72SR/UxkQx4GQqH0wsIl+XnqDa1l40dLT6OgWqZkABat9LTEzyghv6EGTA3Sao8zqljLE0+cUvGDxDOpjxnKqFCLIA6vzmNVZwYDlPGkJuuL/t+/6JBHdp8D8vN7HBz4A/OHHj2Fl2Mae5Shpk6d3CcqsLsikq6kurGE+sF64DKkgs7rTAV7ykrHS7PSG3TSJ5rCtCxsmZIXvg1wjRUlKXTkl1J4d7NRWXlggf2wbR1rr2NvqJ5tVynwc5rH2AXg9G7ockMoEClbroUPuIaZZvZgCq0V0/qIIvu1jyezh8F2d0efSWl4s2AEn77rmyeFkBQOo8jSrfRQvn1RV6ErGpjkVw0FKWzgGq0Og2yWaYUtjB2u2ucnbkEURbFsiJc9VyICUPUSLMqtLSGuoObqEAMoxq9mhLNNQqwQ4kcdIoYmusgaLeYcnywYBKAswq0WSK3H7FlHVYMsrgBSzOoz7WFM9DN2c+9H3i5cRM0CVU/rvr21h1hhiT4euye020G7jmHEB/+/n9+ANN3x59Pqww36W/i+QgBaCa5ek0bmwBFh9/jx3OyEUVj/YKQl0/Dhamg19YWb0xYpCx0GOLn5aBkQ0qD4nTxbJHgTiJni8JBtYeXZxsDovGeaHCpQpEldZxo1lja8yy95Bhp0ql6gIYVrQFcuAxHuDHHDCsgrMXXSddQV8HGxHKmawmAVUTsGABug6k2OwWI5ZnW+qRtjKxQFFJc+8k1VWViwD4kflEsOGKeUzq30fA89AwygAiKgqJAChm/+eYS9IKtmaTaDRwIHmJtxQwy037PQLEpIBKTEX5DKrAzk5UxRpNk+zuiwAzOYtHmBfhlnNkwEp6RGSJ8VYllmtKxwJo4LJcUBQBqQUWJ0jNzWtZnUes9pS0NRsvgxIXancYLEsWJ2WAUmD1T3PGJGvj0knghvvy5eBH/5h4L3vJfgya3ekwoX6z/S6gqaNT59JSJarqzjUfRinVxo7DeVZJYCVA1YXNUun8Uzwdnk2xC5Yfb1Ev4+uZ6I9qxDdMEVJqEmNBvlbUWDoEdncCtwYVj/YWU5fr+OepdO4YbEfb9gJWK3zmdUUrP7of7mf7JkpWN2W+7iwquPr9z+OhT1jbC85X6agFEjH2Gl5bF1qdhQzpylYvaht4gtfAC5tmrhitbG8lDDpRNh5sSt1Uc1qO19Hc6QURiQKGiy228ArXwl87GOpJxiz2iAA60zDR9c1p1swoyhhVqtqwnrkSNfEMiDjhzGqoXekScFqFgxA4JTQej07YVZT1khH7uJ97wMC28NVu4mFTuoaisiAdLvwfeBfv+BTuP3FzdzPH2kz7xKUBKt5lQvEmKYgO02kxA8pfc4Us1qJPPjbAwCA1Bq7NiKZ+DCEE6gwVUGAXVHyD2TMVKxqZnXZEnUOmASgWhkQpstYBqymDKJMpk+oQClTlqqqMBQv8/A0tOTCBosibGUWlgW8+c3AX/5l6sEggOXrqJlJ8rKmuLDykmFMRqnIEKCSQLk+DgD89W3MGkPsXUyNv4MHYao+Xnf4fvzQbZ/ASOkCj+2FlIRDgUQrAG6VyXgEAdEnrAysVr1R3dQf/VG0vu0bYcw1Rl8sSTD0iGi/5jAUh75ejFktKItUCKxmJfp5BotlZUCygBSmWV0GqOQk2cqDCDk6tWyvVXSOlWUKAis7y/+nAasVhSurUIhZLUnQNL5pI6IItivDVAsASllr7ZRgdXwwz2JWlwarqzfBU5ks2aSgmtWVGyyWvAbEYDGHWe37hFldK5Fk45wRrEFI5ljTJImhPXtwsLGJlmZh34Gx+4RXwVNWrkImYHQms7oM6QLI96CZcmwFAYepW4JZnTkfTjG2citNQI79huKVkwHh+EOw1wo3m8eEpyzcayED4kdycbDayEmE0OhbipDBolrTyLxaIbN6bg4YDIq/lzGrl5dToDKVARlnVov6ZgEErL71VuD7vx/4wAeSdoMxsLpVgFntbA7JnovGwf5jeOTSLNq6NTruBMh9notdg8WvYOyC1ddL9HpEh3KGGCkyKRAAMWAHkAycE/BZ0AAw6FKwOs2s/r7vw4Hn78H7/yFxp9fqGpl0OWC1O/ShKz7ml+kk0GwCsoy21IeCAP/h+X+Fl3916oZl2XIBZrVqlgGr80ueo0hKJp5WC5BlLCqb+MJ9ES5t1rFmN7G4PGawmAfQRRFlDRcz1OJqVjOWRxF2uWBpMkD0lbUPfgDHDjg4dy79BHPiJZ/baMnoe+Z0KEO8AaSMKCYDwjNYdDIO5ocPAwBeuuckblu+mjzO5BQ4OArRrKZg9eIiMZLU1vGzPxPiH77QRBRJkI1RhiIPRECvhyCS8c3Pu4x6Q2DjFI8VAWZ1vYQ+aZ5mdRljGioDwmVWD1MssnodUBS01SE2zvUJCybOFCXtSkD+os507sb1aHP6mgd8TmN2AyCXWV1K+/QaaVYrqjR540wNtcqW/pOy1Ml9jQDIajmGoi4HmQksy5aKaVazhBinKiZu3yKed3/1VylCI2NW1xKwuq66AszqguAfreBx8vTrkciA7N2Xur7UwPUvvuGd6BgWcODASLt5LFUgNccWWLsAFAarV1bIYSm9jEQR8Hd/V6gZAGN6qiw0Da397YmKCDFDMWvyomB1ozH56YnBOeyzIGC1oAwIvbZZUjikq1Fxg0UGeGQYuJbS70+3mwGklAIqOcCXH0jk+5cxQ1QjuJOSFmXlmwChRLZlS4USbbpJdbDzFlu6Jpp6ARmELGZ1QEB8XSvJrDaUfIPFkmA1l01ZojJKlQMEWfNhGYIIbRfgaLeXWGtlXUWYV9HneYRZXUQGhFUccfbe1iBETUnNsXv2YF9jC9994+cgzezcw/H0mv2opFRDVpKNmkSrUvEqi1hjPS+5UGZs5RnDRlHpscU1tC7NrPYzq1tLM6uZhFFWVQj7MQsMBjFmdZl1JsyWW2IgeMHEVa5kCQ3CrObLgEiGTvCKCpnVnQ45oxWN7maA1pWnIG2kBK+pDMg4s1pX/Fy2cjquXiVH8L17CYGOtTtusNjWbXS7Yn3trTloaQ5w7BgA4ODgcfyvRw/huQvndxjD8vASz5emkwEpUYG4G0nsgtXXS/R65G82G9xzT/Lcq18d/5NoG6lCgpDDXoC6MnbQO3EC+LEfG2Vlsed5zGqmK8xOibIMtNto6zbmDcp6nUmV5rINYyjArC4C0rFy3zxpCTbps4OjLAOzs1gw+3jiSQlrfYMsUDVtpM3c8mw6sWpqAWdyxtDjgNUjE7Zgu6IyIFi7CnzsY1j61F9gZSX1+FgJjlwzCAA3JVjtRynTEl0XlgHJY1b/4G2fxItuTq1gDJzJS4QA8PopsFqSgEOH8O6veS/+8GfP4od+6zi+4cBjoxsJulDlgQjxolRAt5w0WrEMiERY05kGMmyTX1TrjzJyeJI4I9rCkgTMzmJGs3DygQGWaj2M7mwgpqHoebB9DaYueIBgwGeWgecUQAqAa2KwmFVOHwVTMKuzGERTaIkm+toTnptCXzseXxlzgmVLhOlVWAZEnFk9N7ezzNHytYRZzZKMeaAyS/YVZFbrss81VfMtD4ebG7j9ttQ1SiWtUaslpsFAAiLkgdVsji3KrC7IGDl3jhSxpNc8xwF+4icKNQOAACmTzFZbrcnyvSIMxbLMai5YzXSFBZnVOqcs1Wea1aLSD0AKSMmW7plGszqr3bKSCjxmdRmdWgCUsTxZFglAqTZFZECGtkwSbYL3mG4IsLJ8n6yJWjG5IfbekWDEiLLM6iwWWQymFW9X0wA/ypEBicpVRuXJSpT2XFAUyFKE0KvWYDGutMhlVhto1EtUhHDApBFmNQDccgs0OcTvfNX7d0qxsQqerOE6BatYyQGr4+qwgvetrOQwq8tqVvMMFqdgAOdXspXQwabtankyIA6Ka1bLMnQ1JIm2SWcPIBnLBfaImZUbQAqsLsmszvq9fB8RUp4Eos3qSn6CCUDfVtESkAGJ13hhQ7L88DwCxZRiVj90Bu0n7gN+5mcAROTYygwW00c6ti8SqO4GyDBRFLJ3i8FotodmiQJdR1uzhMHq7rpHWNT79wOtFvbgCv7LC/8Cv/KCD8aV1CN9zcJLogiuJ+2UDxEIzZDhRbvM6mljF6y+XqJPwV42G9xxB/CqVwH/4l/EEhYAcY11Q0VoUhv2QzQ0h2/Kwp7naVZb/k5Acf9+tDQb8wpNlaXBalWFIkXw88BqplldkFEqYrAoSdHoItHpYLHWg2mEmDVt8jzb1ItoVlON0kIbZ0WBqXr5l7aM2QtzkhfJatJJdOHxe7G2Fo087qU/l21MpwGrY0CfbmBiGZD8t3lZB/NDh5IN6pvfnDwemwtywOqBC132EymdI0cgSxFesedRXFgz8dab792h/arzkgClweqc15TRTZMkvoFMJBMzpYJgtSYHcDnGTzEDlv1mc3No6zaeOiVhudZNtPZZaBq5BJyDuROqMA3BAwTPbLXsYZejKRsGEZQy5l+x2cvOp8qWugI55ZNlD2Ssr1KGMc0U+trQNBg5ycahXVAGJGZWi/XFtsmSd+utwBNP0AepwWIMYmoa8RrwcvrA5s8iY0tEGgvEUOrFS6fxcz+akj66/fbk3+HYPR3r12e36TkhdMUvNG9xq0wmxLlzJCeeXkZcF9jcLNQMgJSJ6xho8oIXAN/7vTtfb5hSPlPVdeGWqGLSRcDqYUjAakG94prqwhpkHPRpFVdhpk8e8zGKiHRPaWZ1MFlWoWxCkGOw6AdSORkQALoWZYKq7LMLh4DOeDx3Cf5mqqHwWVm+DzsokMAFsvVEyyYWaKh6Ruk7k5sqwfwUkQEpx37N0ayexgiQYzRalv0KgG+wWEQGhPnFcJjVMemAnQOf//yksveGG0ZfzNNrpnNMaYPFSZ4L6WrNghEDkHmGmCUSIWpeVWcYIoyk4lVnedc2vr/KyYBkaviDyYAUBKvBEoL5Ouvs84W7qmVUCALl5y5eNQD7rKIDgVd5FkXoO6oQsxq6TvZaFTKrm81y2HfvyhBtnWzeGmGPkA58H26gQq+l1s0CUqS+n1yudjvhZiZSpGOa1b2JzeyI7maAtmaTyt7DhyFLEX7g1k9BadVHz5+8SnR2/lZROCGWm2DZDeHYBauvk3DWeuQgycBqWQbe8AbgRS8aed3MDLDl1IUAxWF/MitpR5gmYYzwmNWWT4CvdHvf/u1oN0PMmxnMaukaMKvp4sMzk4oiaRT8XF7GgtnH4YUhlhs9zBmDZBFhoFce4MEY0EUyuzE7kWOwGMlQjGJuzCIyIIHjQ5HIZ6tyiGCY+o19H056AaK/a2RPkd1lbG22pmkakQHhrMGZ+pymCfzIjwA/+qMgTpk0GDgjAFbHzGoglhUxL57CI7/1MZyYWd3BrDYUD/aQb95ZRLccAKI8ZjU78BfciGlqxGUilHE71+UAHo9ZbY2VPM/PY0a38OTWMpZrvYkyIACEWGRFZEB0OacUrWzpoKLkGm3G5l9lmdUT5gPPLw/OZB74KzBYnDgOpjQq03MSDJYjk7LkoprVPt+ojIUkkekkDVYP05rVikJkQDw1t8zVL+pOH8sXCXgjyGPJO01LAOs77hh9A8/4CoDrRMWYI6qA2eyEOHOGXNv0FsXzgK2tQs0AoGD1uGY1SP7+Oc/Z+XpdR75RGd0XSHqx/YaQZrUVEWNYQRkQM2+dSSdCCiYaMwFgpg9ZBkyLK0IqZlbnmar5U8iAaFQGZILBIIDS8xaAfINFp1iiTdIE18QSYLUxqXqFHcqLgok0rokMSB5ANVUiJJ9ZXVaqIdMbgCbGCxnusuDtjZjBYkFmta743DNCbArKznWSBPzgDwJvfztwyy072hSRASlzXTMlI2OTzXJ7GADZGshl/DwUhZCwssgc01bdTRpbUQSvLLOarV9ZMiCuVAqs1vV8g8HQ9YnsXxEZkDz9Xza2yjDWpSBbBqQEA3zk9VnrQRCg7xpoGh5/LLA9Q4Wa1bperoCIAMCkLK4jdQnJYFLiQVWhSWJg9fo6sLBA/t1u72RWK/qYZnVPrOPdDZ8A6/U6cPRo8sTy8ugL2Zk2ay6kFUd6keolGjFYvSsDMlXsgtXXSXRXbczo1s7S+bFYnnWxYrWFboxBP0Jd9cSZ1UVlQABgcRHtl9yOeYPWm4yB1QqPRea6RPvULFbqmuv0DUwGFO+5B4tmH4e1i9hX38KeWjdZJBSFgKo8sLroYS82PMoHq4HUwUWwXRGwenBuHQ11FKBO67S6gQqjQT9X12EqHpz+FJNuXBo/LgOS/7ZMzWqAbJizNs1B/hTnDogpaMysptpWOHkSh4yVuI/pdk3Fhz24RszqLOCrpNM1j4lQSpOvCLM6LdcwP4+2buGp7hKRARlnVouA1Z5XmFmtyhlMJ6C84Q8HmCht/pVjWui50XTM6knjoKzjPZACqyc8Nw1DkTG+Ju3LwxBDV0Vd88TbVhRSkhqohdgN42C15esJJirLqKk+MR7OA6uLsvYZmJiXwAUZCxP1ir//+0kS+9u+bXK7HGa1JhWTASnDrD51imDpaRkQ1yVsmkK4dxRRZrVAwp0G0azWsvdHvj9aUSUSbK3lLIuEWS2oWU33BYGbcW1ZYrxERUgmADxNlYUsZ+t+lly7eNVR8RxbVgYkj1ld0mAx7lhGDB0iH1RYaidvcAUBAatF10TarjnJ2HtKsFpSlcnVUWVNNsESrXmmaiVMQekZIZPQwqQayt5fGZrwXlFZKBYCYLUVaOM5O26bIsxqy8LO6hVJIlKR4/deXMHDMRcsc12zCEjsusrl9jCsXzuC+XmUYVbnVTFNAVarcsYcy/byZe5bBtRlVAbZTjmwOp5jM+YuooVdzLhR0WQEvCqLEusMkQHhaGsXHQi85CXVeW7UBPZPFYPVrltMPSwd3c0ALcqs7qh9QjKYBOgX8M3qdoF2tAW8971oXXgsAauDgOA/uhq32dJs9AZi905vOyQyK7XaKHshLXML8PGSNLO6YGimsmuwWEHsgtXXSXTXXFLOwAGrzbqcbyCUih3lXVlBn48sjgwIAxTHZsHWHUcwf+sSOUSnJ3yagQ7CHMYb+x5FdRnlIBugAil/3HEoPXECt9wc4a3H/wH7lBUs17sjMiA6D6BjbKeCzGrC8Mh5TQkjCtGFov/0VVKGRGPWsJJybHbt2QKk62jrNrbXp5h0Y0ZYAlZzjSvBwJkC+pwM9OOA1bF0DQOrZ2dJ1tVxgM98hjxG2dYAYsabY1XIrJYkQJbJeMzSd2MbsYKO1DwDmfLMar4G8HDcTGpuDnfOXcTfXbiF3FsTwGoJQOhwSp59DYYpeI9pGgHpshJXZXVaOUCd55XUU81hzxA2bUmwWpfJgT8DRCgFUMUsqskHSAClGYoTWX8A4HmwAh21ImZiEDDWSwVrdhKzul5PXsd1+WbmMEWAFCatEuSzwGOgblKlyateNbFqIbc8G/QAU1AGRJFC+E4xsPrkSUIAH5cBASCsRcjeNPR11AzxCgYRg0UA1wastiGuWZ1nhEj76UdTAMAZRoiFk+0j7WZrYZcGq3NAHz+UyZ6gjAyIPrlEPfRDwvYrOW/l6goDsFylkAxIprZ0OjyPGiwWSDqrKmoKlZ9LzzPsUF4SyMgE7KdlVnP2MFqJ9ZvHrC6rWZ3LrE5L4BXsLwCupEIM6gi2madVDIAkBB1KOuCdE2mbZH7JWJuZDEiJKotcZnVUnlkdRdLkfTdLhFStWT0NWJ01H05rsKj4mTgokQEpBioDRG8/TwbEsSOSvC3Sbl5yoWT1afJ7ZRssAigvA5I1EFyXJhkF2tLLGSx+8YvEKHw8PMohEiw0HInudkSwKACzSpeA1ZOuUQEZkG4XaK2eBj7zGbT/9PfR2/LjjkaRlFzLrIqgvL7qFgGr9+1Lnhj/LVlfs/ZbUyRxY2b1Llg9VeyC1ddJbG8EhFmdPi1PCl0nBM1xLboJMbQkwqrlsZIMgwCKHEata4dE/3esvRe9WMJrf/wEOUSnQ5IgKxJhYuSAdACKHUg45aMAENjezsO+JGHueTfgzUe/hAONLeyrb0+QAcnpB9OWLgJWxxsQPrO61AGax6y+sIkm03QCsFzrJiaL7NqzzzUMtDUb3c0ptJeCgGjr6QkATxjrfLB6osFiVjBwJsxxo6bt6spYuzffPNLOiHEZBdJsq/rfC0D2glZyIxZrFWeVzZUpn1RV6EoAjyNVEJc8pzSr33jDl3D3/AUcWrB2btapezRPD9wONFEiZTIXZAFqU2xwAWT+Xn5AS9SLAimaRsDKCSyX2I26YmZ1aUOtPBmQaTSraTJkYgJr3OhQMIyaDGdS2f+kiCLgz/8c7VNfxtYW8IlPINasTjPXuCX6ZcA/SUqYhFlrIgDfj7IrTSYFrzwbqWSzaIcVBQ3VxcAq9htvbpJzw7gMCHtOOByHjIUCbEKuBjB7vKAOtC5na36yKGSwyECvrHmLlqWqRQ9Peaw/qtNbSrOarbc5jNLKDRa98pUmWSXq0/gCxHJeWWK1UUSY1UoBsJpJ7fCY1X5BZrUsw1TJXDqyJsSH8nKa1ZlswmnAahW5MiClEix5RqOpdksngzLug9LJIAGWZhRJ5eatvDNCEGDo6ajrvtg9weatrC0cIweUNBec+PXLki5ou6xfk9otVXUWX4NsGRBJiorPMUwirOpEo5pvEOy4UnGDRfDNYePkbUGwOpMkQtevsgmmTBmQsntZjlQgq8QWWhfpJuf/Y+/Pg23Z7rtO8Jtz5h7OcMc3T9KTLdmSLEtGtrEtY4XBZUDF4HZB0w4bUy6qu6MKKNu0HQW0C6q6mKEg6KKhKLo6gDDNFBDuCgzYBhuwMMaWZcuy9aSn4Q333fkMe++cM/uPtVbu3JlrZe71W3n87kPnF/Hi3XvuuXny5s5cudZ3fX+fb73SS0T8kR8BfviH+18nO6vrGucrNMzqI5zg4UPhki921zsazurzkxLL4gEAYOGlOBcaQ1eD4GvEfTX7szMwYV1MEr/ne5gZ7Vu+Zfcb93VWE8TqfYI2L2u8LsXqt0idnVtsgBgbYTiAfnUy/mBsNthvxzwIWNjPaviYzWK3c7y3vQ34+q9X/KU9RDoAei8KdyTpG0CRlnBsiTONB4Z85zt+Gv/nd/2r7fXmrOK0GDgP4aSb2llNET+bncLhidbqzgZzNwPe8Q4AwE3vQSNWV3kJ26p3nNWHfozTE8KkUFRRYJWHWITbtiHfLoeDK6tqu4jcdyVtWbAcu/n7qspziejT5r0+/ngvYHGQJcoOyv5PEatVi5GqQlnbcDRxFZ43vNCjOqsZBmTMWe3ssoWvXoVlAf/8N/9FfO1XbJTHHdyJ5xz1vcVqgT8YWpQS21IBtTBBFj3EeCBZ7BYlXax2PO6sljBaSSgYYD8MiImzWjYh5W7aWah3vrbv7u9MOT0F/sW/AP7qX8Uf+oM1/vP/HFJnNVx3OBCU6PrzfL6xMDC5bZjVGi7o0YDFrIavI1a7LuZeivVm/zEpTdkrNQzlzmotbnWSaLe+NxsMUzqrRZjxyIarlrN6SPzl50nCF405qylIhfZxp3RWi7FbJvrUNXdWE7pXwJ3VVd/113SvEAMWBzdGxRgSaJzzvjkOOhu4vCKf/b2dY5cl0srdPxOiWyrxzwQDMhQSLY6re8+6I7k24lkgiKrKtUfTxaR3SADjOBhZYPxYCeFnyKWY52xDMNjzfdt08KiFWjIGRDiru+YTgWwhzmGGxE/Sudo2XLtmWUyytUdVsbkIcW6omm+RMSAja7A0t/tC5B7l+ZZ0jBXVbN5qbrAoBWBTDIhqc4FimGt//8AG097dG4eHWHgp1vfi8e9t1cc+xv77gR8AfuZntl8XzmpvgIgmrbMznKUBDg7ZxtiRdYqTewXO1zYzvcmc1UPmI17nr55i6bIJoW3V23WVwLK1NIh98kGa467AMCBi4v6BDwB//I8DN27sfmMjrKtRMGQ81j7dUZc1Wpdi9VukTs9t5qweE5Y9DzejM7xxd/yjjVOLiUlji6cwROTkiM8HBp26VmJABmsPkQ6A3ot9jzCpIimYs7p7rhzCHzgFouuLXWa1XSAtRpzVNQUDMiwiXKSzerVmO5miReaGdRd37vA/29hsoBefke/jwI9xdqq/kPnX/5rPMTmnaxltOWD7ONabF4VmmBSAwZdElll9sfrLvxz48IfZrzsBpoL1OCkGBNgrjAPAVoDfs1xvwN1ADZCxbfgO45kPbQRsMnfXWX31KnDzJtzHr8P6P/2e/l8Qos9QWyp3Vu+NARFYBdVz0Ag0+x2uqRGmbBOGSHTPSJnVOZGDDeYolQqgzYKMIE7wz0slfAGgiT5Dbal5zgRKTWf1XqIPgHyTw12dNL//vR+5j6MjtnGXlS68sPXv2ePdRdkIadreVeda18gLG64OX9rzRp3VWQaas1pDrP7c54Dnn0gQru/3mNWAplidprsc8X3K8wZRDc28QPNdGzjFuFidsnnF3mL1EL6ocWXtf5oAtq4/xTNbtsOPdWoIt2QasCg7V8GTtYnMat9CVvY378j4JmDUoci4wlys3vtE+f06pChQAhYBhF7FxOr2sYtC/5lql2qcNcAU7BWwSBConKGsGB42arsUVMMQE57YxSRc+/kwBkR/jVCNi9Wlv//7dqyDR3TzaeLsYFlwbLZB1ROr+caVNgoGGHdWU7sBnBpFrd5g2fnZ+9aQa108XwYBiyoEQhOwqPmQ7eWs1uH3A/thQHTvrWYjBPIuXKqzegQViCzb/3wPD3Hkb3ByeyTcqVOf/SyLCfubfxPbjmlsndWz2W52yGg9eICzLMLy8QVwdITjYIOHr8dcK0h3P0sxjx9h4gPA+asnTGsQVbJ7phGtxXGH8mxkx13ZjK899kIbe3e3NQjd2nPtcVnDRWkauaw3oc7WDktg3cNZ/Vh0hjduH+DFoe+rKqSFg0M/GR+EubN6MFSuZFgAz9Xc3d9TpNPGgNglykI9SJZZKQ+ounJla/v6nb9zu2hxHCbQDYnVwlmt04Y0tCATRWxN3sdZvV6BoWCuXwdcFzftu7j9Wg7Aw+nGY7wn8XODAIcaYvXpKfv/YgF85CPAK68A86LAeR5isdi+hAInH3bp8pe69kSs/ZJQbPLkOeAFnfvAsoBv/3YmWF+50jtm4BQ7rkDpQQGaWD3WYaA5wW0ctVM6q9Fpo5aNSWWJOHcRea32UccBfuiH2PWViQHinh3aiS8KJOUMYbS/M82zSxQqJnpVIa88eJ4m2kaEw6al9CXaYEB0J7iexxywMmd1QRdS/FDBTeP3wILYPupaJTYyHcUEA8InpFIMSJ4zd2J0MWL15tYpIru1IHjpJRwcXMNqw/4dVlvJcxzGt1UJSZx5GWg+X7Zro1SJM/znNe+Zfe+FsfZstFBLGs7qmZthnew/Jr386QovfPbHEf3ZH0WS/EWAXcHmEmqJ1VmGB+kch4vT/f+OGJMVn1m8rtgCWtNZ7TvFKMoqSSzG/twXA2JVKFS5G032g/47UTnfaFrJCZivkS4LIwyIilNb2bRQNQw4q/Naf7Oidb6D7y8+du3tUgU0nNXu/u9EXqFfMgxIR6xOdDqXujXgrC4qAlYD487qgjKHaQwtij/nopWlu3MzlJdjsjEsNsfzSr5op6wRPA++vR4WqwV2a9971mabqI2o3H0/8Q0x7U0AMAG4zPh7sT2vMMndEMcZYlZTNlhcbDnr3ZveAAPCxm7JPWnwfAkcjCqDpnFW62JA/JGAxRT6eJEhJzwP49M19DAnfIW8dJT3rPjZWrWHtrE3tuTwEEfBKzi5V+CpPX/8yQmLLnn3u4GPfpRdb1F5DnjIMJv52Gz6ESfKWq+xKQ4QHYdAbuPI3+DV2ynOa4eJzc7x9ns9D55d7eesfn3VEavZ38nSDl7E8/ZCromKY2B2mI2L1WOd6E2WwwXkDVzWXnXprH6L1OnKYc7qfcTq2Rlu3xsZWDnnLvAlg3O3goA5q1cDg06aXgj7lY4BqQfRGkVawpEtSiwL+K//a+C7v5u1jLTOk2FAxpzVRAzIELNaXBtNxzpbPI5gQNYWc1b7PnDlCo6DDU5usZfG2cZlvKeWWH3gJTjdMwTrL/0lRhf5n/4nxiJdrdA4qxfRVqz27XIYr5JlNH7gHi+JPAfjrMsmTFev9p8Nx2Fc5SHHOtEJb4E5OKVFbB20PYehCpTOEVrgD2vxGwiNEKJiF9dgDwhsrgvPGher09Ld31ntjgcp0ZmXame1CQZE6awu6YFiXmDLw26oIULAdoI35Kw2wYDkkpbfPKc5//ZsxYsfxIjclm3jU5/C0RFwcs6vefva78GoLWp9N+EoC1uHdSiq2RRV/50s5wGLOhgQN8Um2f8zvvtqipuzM4ROgWS9HRsyjvHVYlZnGf7dnefw/ucf7P93PG+QI5lsKtaarDN22zaCPTpN0szSw4DYFQrVe0YsdCnj1qAATA/pGnKUkuaGgq+tDFUjtv5D7fprNmyIHSFj3MtN4elttO3DrBbOah1mNYDIL/sYEFNntUqgoTqgcUHOan5vqXQkE07tsLPahkd5vvhGSBarkQIAtOecvlMMG1p0N4ctazQQEyAIigAcV527UdQOLbhyD6cuWaweuGdJGJChjJC6Rq7b1ds6WSVaoa65WJ3rY0BCZzBg8SKc1QBIc07Pw+g9q/1OGEKWAHrYksUCR0HCDGB7ip6f/CTwZV8GfMd3AL/n9+yK1dmnPgv/r/0VzOo11joYbH4QKwwaZ/XJ3Qznsct0hI6zem9m9Z2YYW45jtStCxQFsElsRG1UjOcx0+CeYnWSWizEcyzrbczcZ4CxuhSrp6lLsfotUmcbhz3M+2JAxsTqokBauQj3cRSGIWZuhs16YMKSJOQFCYDpndUjoYVFWqqZn297G8M/tEU112UYkHLMWa15DfZgiVI5mmwiOvxt69jeitXLJWNS32cnc5YwRvUuBiTB2fl+/77zc+CP/lHGywoC9ntwZ3WDAdkzuJKUUL+PWC1ERd3gxoFNgCbcVNNZ7VgVynTCTRtg+PlqnNV6hwTUAVVNZZk2T3avQI48Z63qwZ4nLRylqnZ6EyFlgIXdYEAIE1yVuG4S/qVMpDYJWBSu9aIvKldFxTyzFLHasjhmRuKmEwxNXTFlzwljfJIydM1iwb7w8Y/j8KDGyVmrO6B1zNDJ1R1HwvFFQMwMnqsI19Pp4BH31UAoak7BgHh6AYsPXk9w7G9YgGZLeMky1tyj46y++0aJmZthfqDxjI2wXxuxWtdF5tVIy4EAz7pGllv7bwbsMW4x4YvA6lXNN/jGFTlgcYxZrTvGWtb2nu1uWlHDgXn5gXyz1YhZvUcrcazrrPY82FaNKtuDWa3trK4RdzEgkiBZrVLNN0zF6gEhifQciM6ogU1sADSxWnVcA/Fz6/yT3ztlWrB8Gc3uS98u98CA6GG3GqFWEeoNgBYS7dQoZfeBwTx2PGCRiAEZ6gYwvbeU3G4CYx3gmxaKrpiyZOYQVx+3NIoBEULiVNk+FZ9zUjB5LpTYliovaXPZkVwbFMX+JizLwuGywkkWbVuWR+rsDDg+Br76q5mcseOsvn8O384xL06wkcQHKStNmbM9CIDjYxz5Gzy8V+HBysdxsJEHLI50dwPA+VnNnNXPPgsAWLobnJ8Dceaw+XhLrGb5IHt8FnmOJLcRenvcYwMmIQBAWSKr3Eux+k2sS7H6LVKnG39vZ/XN6Ay37488VUXBnNUj2jcAwGcD0cPzgYlQmiIrXfghbUAfZVZrsopZeIr6W8pUwaweOE9HhGbIuFbAbnv2vtWwRAf+fSYYkJEdyNXGZhgQzwMWCyZGPyyBusZZzMI6ewGL5/tNBjYb4P3vB/7H/5GF765WAMqSOatn/HP1PIYBKS8YA6I6dG73mdVDNZT2zqvKCrlrf+RcXbtUi9VlSWsdHOpc4MnslMm472PQNUHCNQinz5BYresgGttcoDoUXR7iKrsP6ppjQAg8VTHBkzmrCyJaBC2xWhawSFyQCReVbEFiJPoA8P0aqSycJ8+xKf1Ro0SvNMTqyMmBF19kqeGrFY7qhzhZ8b/fEatZ8PAQs5omKAIYZB2y9lHCpugIBkQ7YNFNsU73f3ge3M5xJVyzx6L178tzlnmjI1b/y48G+KYnflWv42iEAZxsKvb5a65KggDDnSa6i/6xcYuKFLAsuE6FQhb+VddsI9AACSQ1CJiI1SqHouiIIWJAVJ0m2iicdgnn40ArMfmdOIbGKggBi0GFpIsB4ZkQJhgQi5/TTpkELPoDKDPqhuCQAxowFBQVm9giI4QYYMruA/ncKEsqdYegqoYctaKE015HrBZCrUKsJrmKMSAommxcXaSzWpU7QcWANOOh6hrQAxaVAcHC1EbAHzi+IieFFxOrJ3RWG6DnXFdtvmlMIkTGuBQJBGzHrj3XtUdHNU7SGU5eXe31/avV1nMRBB1nNc8Xm9UbLbG6TlL2/AqxOohxclLjM3cP8LaDu7vzJh1n9YZjRG7eBAAs7TXOTqpt7pGYM/k+R9bscc3imL0X5+74nGusE/3SWf2m16VY/RYpJhzuIVb7PkuNjUceTh1nteviWrjCvbOBny3CAi4KA0JgVg8GLAoMyL4vSotNsKz2OfUOWrDWVF1m9UUGLI7saq7azurFgonRJ4wJdpaHOAjS7UAfBFh6Cc5W+/371mvWffO938s2TIWzekes5knUaeGqNwGoSbxjok9db53VGgKNa5VqliiAPCn1BHB+XMeqB510AKblphkwq73A3gsDouXQcoeDXgAARcGwJhruT+Z0GnAo6qJ7WseVYkB4+BeJL+2qA1dNnNV24LHrpgq+okzCwNPEJSJ4kdfkcwWAwKuZmNQVFQ2c1dJ/f6fi0wyRm7OBi2Ogjjav4+HK200lBwDH4TiLAbGaiJjB0LnyTVFv3+4CfsyxTdEsl4TNjhxz7qVazOoHd0tcCfjqqDUmZRlbp+iI1V94zcWLh3e0xWpAHVSWxLU+sxrsFNIuUqFdfL4V+JpBZUPO6prArLYsuA7k4h/fvKS685QM4KpCDZA5tdJzNXh3AS3XX3fcKkAOsd3LWV1qYkA4o1OJfwAY0k8ndJhX6Fd9DEjJONbRjHAPAOqx61HDgDQuVcWfU8Vq2x5kwpOd1UJYVrigs7jUD8HjrOIxDEhc6t0PgwHBBs5qp82B7hyTfF1HAxZpm3eOo3bq1qUBrsKtBzvkSGO34ADL5gZFgbT09n9vtWoMZ5Zk9rRitUGot+fWyu6NPAfNJLIHFkonEPLo2Mar62P8J99xba/vX62A+Zz9uitW5ykzJsyqcy2xOjnPGSaPi9XXwhVu33fx0u0lXjy8zdIcRe2pQQBcrPYT4LHHAAAHOMf5Sck2d/18u44Sm7dD5j5RWcY2Xmd7XN9GrB7KNnK0pprtYwO4FKsN61KsfitUXeM02dNZ3UxARo6p46wWYvVqwG7RTBqnDb4iteA4DhP+hjons4q9gHRGnz1FBO32bGu4PZuKAdnHWb2OmeAA12XOai/GGReVz7IIB2HrRvJ9FrQ5FC7Yqs1m+7JcLDoYkPl2whZ4FWuhVonVdc1Tuff7uU2Jz2sgAK1Z6OwrKg4tynkVSaFGzAycK9uFH8HhUJ3VA+FEFKeP5w+3+ImWZy0HrFiQxXs4q3U3F1QTJpPF7hCbsiZyoB0HnlhEt5+HukZeEtEigHqcFc8AUfRpxOquQ1FgUIhidYNVkDmrCx/RXH8TwEIL0aMoJlZnbODiLYlH1QO88cDvO249b9g5Q2UA74EBKWrCe8YumaNWVnXNQnd0HESOg7lmwOKD+zWOAw5JLHbF6uvX9ZjVaczdhJQsh3iAo0lxVvs1w9YMfGZai37hhB/sCKF1xbgDog8prI4fVDnfoKDcxGFVwldzrtNiQKZxVg8xq2kbuJnCUSuOW9UWbF/vnKNQggERTlqqWK0yCFDd9cC2e2cA1UDtjFJ2XxqYA5wRZrURBkQhfDFnteZ9K/ivI87qjeY8bmxzgeQqBuA6kAcPiw7BqZ3V4rgm6BqFU5c6N3JVbGU+56Q8X7AseE7Fulu7RiyREUMQq8eQW02GA0WsVnSKAqBfV4ULvJkXERnjyg1n8Q7f8zM7uubgFx88idVqv89i2FnN3nOz4lyLWX12UrFu6zAEjo+x8FLE6wqffPAYXrx5LndW7yFWn8Uull4KHB0x3Ki7wdntmL0v24ZK32fvw33Eap0uobGuqLJETpxvjT0Hl7VfXYrVb4UqCpxlIQ6iYnzAFA/z2HOhE8oixOr1wCxbTEYJ7Djx92WVF5Z+Cw5f6JUDzOoyK/VT38eQJZxBpe+sVjgxeFVpznh0FGf1GAYkdrBwO87qMzYZOs0iHEStA/hMsEnS/V6uwlkNAMtlJ2Bxtn2B267NXI9jorLuvSWu10A7vfZu6RhLFJyHruPa58d1rAqFqt1XCJdTMqu5A5iSTu/4DqqBFj9kmfYiZy/GWZ6zzSsdsXosYJGImHGsCkWqcuQ48Jw9ula6xVs9ewJw+xnQdWvz8wXQ/7zqmp4iD9GibcsdigbOat+rkSkwIGVtwws1V/zckTEo+gCIz3ImSs9mwJUrAIDD6iFu3fOYiL1cbr9ZbFjI7gGALk7ssSmaV87+3HZ+zMFN0apCVjF25d6fGQED8vAEUmd1ngMvvAB89rN7HwppUjE3oaazOnDUTtUkrvU5muDOatn9Kkp30b/HuGUW/iV35xWVDcejhaIqu9lMOLUjrf9UDIgfWn0MSBvfRAyGZYLiCLNaA6nAXI8jrdRFwQRAzZshDOo+BoS7/bTGlnY5Diyr7gdFXxQGxMhZPYBFMglYHGBWU8XPMTGl2bgjrBEGxeo81wu0xsVhQBxXwS5v5rHahwQchzHhZSYRwzFWKqyDuKYVx3UUoio/V0e30wZgGSFuJd8QEu8tzfBWAKOh1klqsfe35jrJtmp5R6PJe2ZgQ6zpEiRhQCrkqvmhLgbkqotffPAEkmS/7x8SqxnyrcA8fzicRdap89MKB17CJjxHRwCAdx98Ab9w/ynceKLzOeo4q2OPYUDCEDg4YB3ct9bcWd26f/j7MCv2ZFaX7n5ZDuJcVSK4wWbraIf3Ze1Vl2L1W6GyDKdZhMPFHjf7PrvlQOsltMfP52L1/c0ezmqqi0zxIJNa38VCb8RZ7ei6X8cGHYqI4A4k3ovDJgRhfU+xep3yFF/OrA6cgjmn8xxnWYjDqPWG832ETo443e+zaDurl8uOs7ql+YxeV2rL75joUxTISnfvNiwAWyf8wEZIznlguo4/x6oH+WYASBMmKUOSH5PqztvH/ZlVDrxQ71nw7Gp0YV4DmozxAYeiEQZE4aoV9yuVp+rxNv2OiEB2PQKjzmrSQg8th70KA0JkVgd+LcfM5Dlb7Oqu+F0ekDvk2gcQnxcMAxJFLJ0GwFFxDy+9sWQi68HBzjFdu1SL1eLa6n5mLk+RH3jPFLoBi7bN295teQcLBbXkMp6gFrP61Nk6qzsYkKtX2amdn+93rCytWWDhXpOY7Tn7A/cB2VkdQI6tEVWWyHQ2RvcJWKxt0kbjkLOazKxub4ZIwhDF90x5roz/S3NWe77d3wxrukwIWQP8ZAfnXPzdZQeaqIaBzRVxXPG9OsWc1f7uNeDCt0VKq4OatU5FIgHbc1F2hxEE4OY8FX9uyKyWDt2GGJChMMQsrfUxIAIxM8R/5feG5e9/3DFnNQDyxpXSWU0NiR7rkLuAUNAir9lGPmGMUXY08k0ACmoJYLeN9P3VbLISDjqyRqBiQLyBTdGaf49uea4CjYXW5gLRWa3cvCxLlLUNx9/vfI+u2vjkyeNIMw2x+tZLwA/8AIL//R9txeqybJBvMyvB5sGerdJgzuqll7AJz8EBYNv46psv49nlfVjHR7vfzOdao8ZJAOeptz3ucomr4Qr3XokZNqsnVu+JAeGbw1pi9UDAokkQvQUMhyRf1mhditVvhdJxKYqHeWyA4Jy7fZ3VV8M17q0HTqARq6cVFEkOPeFSHcOATO2sFmFtOsKXENaH1iJpqd+aOrZTyGuVugwDwsVqywKsuuTM6ggHs9aJBQFCN0ey58uy7axeLDoBi/PWfTImfF4QYkZsLmiJ1U3A4jAP3dVlVgtxYsClCYA0YaqBEWa13iEBjF9bvlums8jZx1ndYBy0MCDjDkXKYtex6uF2X6KwLA0tbCZLNHFG2YpGHbd5qRZPeW7AfgVr05cygClIJGAr+myGJ4yb8xKRwzEg8zngeTiyTvDTrz6Fdx3f2lpV+DFdVcgmwJyaNcGNsScGRGsjaCisjv+svHLgexr3l+Ng7mXYZPu/l/IM8Gz2M6yqaHTNLGMf6dd+LfDTP73fsdIECHQxINxZnSby+50qVrP7dThwVttZPTSHaYQUAlZB3AcyZzVVrLbZeFeUkuBGw+ArZcCiwRjrh3Z/3BKdccSxcJ+Axbq2tFENjFW8BxpLc6wNQyDpClRlSdsMFKW6BuI9Q3Gm7ZO7QdhsdqxKbXYzEasHmNXkELwRl2IaV2zjTnONEDjFsPhF2BxunNWyRSg1KByKeRFgxq8Xa0XZO1yMMZQxdkCwJwf2QWzeya8B9boCrJNN6azWyVroniw/Rq/qGklus3wIzXvWtSr5c2CEAVFvLpDnsq4LzxpGSwDYe2Pw8NhBXrlI9wkXBLA6LbD4lz8CPHyI4PXPbuc8mw2b6zklZm6Gzd39OSBn59YWA2LbwOEhPvT4S/jamy83TuummjFr5KBVhTj3MPP4vOvgAE/MTnHrFY4gCnbFaoYB2eOacXrAvobMQb3E5P01xi6/rL3qUqx+K1TKElitcI+nTjzMYwMaXzztxfMRGJB4rv6esmRtxLqC4hgGhPKiEOFnKvdrzRys2oLimIjAXR5ak5umHVH9dxq+9kVgQBKPYUC4WA0AKKsWeqZ1gCBgGJBsv2GjaBk9Gmc1D2zZ2e3cQ6xmDuC9fuz+xxVitaabcJAlipYTXnMS5liKwD4AdUGciI0w3sjp9O5IYJ248TTd5WOBP3la6TmIRCveIAZEE93THFdxH4jrSuSpel7dR2sUBe0Z2B60Oc5OmQhUaC3KVM5q4oF9H4wBLAlYBKAvpnjefs7qVbkNWLQs4MoVHPoxPnt+HV92o5927toDGBDxPiCMW5ZVD+KLCkKr/mB7dlkiKzXHAoEByTQ+i9a/KUDauH3ynH3mH/oQ8D3fA/wv/8v4obKsZtgSHWf1yH0QxyAFLAahJcfWiBJBVfu2049hQKiufbTuAxWzmooEUuFFjJnVFxCwGPKA4I5Qa8LvH51z5Tl7rrVD8Epkis0VAGRndRiCYUA6gj0A8iajMjDcpI16hCtM2my1bbgOe/dLA9MNmNVsviH5M822/50aEauzpGIbd5r31j5itfjevQ8r0GAK9BwAuqCo2mSrbH0zCz8P11YgIw26zhpRWeHUpaKGlKKqQfcKMIBdE85qzfBWdrK8Q0yBg0kKD6Gr3zXt2SVyGS6xqlgXKfE9o8KAkLEt/FyV73DNsfboKvu+JN/v+1evnGBhM+Ra4BRIhVFjs0FWsXObuRnW9+K9jgcAZ2fAgRdv51zHx3jX8S385V//wwNi9chzyUOwrDBgc+7lEo/PTnHrVs3F6tZn4jhwHKCsJZvi3crz/TuZxCbrmLOa8v7aB295WaN1KVa/FUowPfYRfjyPpaWOidUiQXyfdZ7j4NCPcZKG6hA8A4eiOB9ZkTEglmTCLIqLCI4Nbcf20LkyJ52mWO2OByzmKXeBazrI2E7h8LetMw9zL2uY1QBgVSWqrMCDdI6j+S6zOnRyxJnGS/vOHeCHfgiLT3+MidViN7mtuu2xCUCaNLou45ulw2K1H06MmBFCnebCwbUqJQakKms4lFCWvZjVeofcOe6YWE1hKKra5gAk65ItyrSc1aW63Ve0kBLbiFXsPBMhxXUlDiLBEjXFgKic1ZQ2TwBeYEsxICxBXXPDplVBACWzmv1gAgZkwFErKl5XmLnZtiXk+BhHPpvMv+uZ1e4374uYmToYloKbwkh7diP66J3n3ONitWpe0KqqAux6+3mGVoaYr5My/gr61m8F/vJfBj760fEfnyZA4OTTOqszizGrNT+0IABzVg9gQLSc1WMBi4auP5VDsahsOA4tXM9za+VxAVwABsTAWT1z+8zqsmQCiqeY343VRTirxQbuPs5qXQzIzGIYkM6mKIV/3ZRKoLhAZzUJ4wVs+b+K8RCAPqpBPLdKZzWRWS1a6gcwIDRndY40H5hTivtB11mtyjOZYuNK6azWPuQwY5yv58icdcU1KEp6+LQq0Jp8v4rj+pY8ILgs9YKB26WacwKNYzv0NQ0dQgCWOOHJhh5w9J7KCS/Wc0R+/Zizet9n4fAqu57H0X7YjtXrZwzxCS5Wr/nPW6/ZutcumNngfjKq+4o6X4E5q8Wc6+rV7R92xWqhQYx1+QvHgnBOHhzgidkJXr/tsIDF9j1itbo8xg6ss9EmznUg04WaubDFwVw6q03qUqx+K1SWsQnDPouyfdNSxctiH2e1xVhYgxxNaoAKZ+qqeD4N30vTWe3YAwJwnnPH27QucDMMyICz2gADkg1NRKsKq9xnYrXjNGL10t1gdVrilfUxnrqy2X6/7yNy8713dgEAf+7PAbduYflvf7QJWBTn1z5XAGrh0wADMpjGLLjK2hiQYWc1FQPiDLg0GQ6H0IommNVDvEcDDtekzuo9dqDTpNZzPop7YKCd3qSNWMouN2xR93yLORQ7rr+iJjwDogaY1QUla6B1rnJmdaU/brfK9yHFKpQJIWwWAByHBeuNBSxuaoaBEGL1lSs48jdwrRIvPtvpZxzDgAin6sQ5DgID4mqGTA65vZrOKB3MjGVh7hdYF/64ywXA2WmNQ3f7PonsBEnM7meBAXFd4Ou+br+gxSwDfEI2wFDQZpJYZAyIlLEuir9r/EDTWT2EAalpY7fvKwQPA+EPGBCWDdqzHVexwdKE2NLGQ8d3+mJSUSAufUQ+Uazeg1kNQJ8r7KhFSoDNNxzdTi60MCCta1DnBEG9XSoGsEHA4j4YEMpmq+Oox8OqrGkuTcHUVYjVJuaAQQxIUrOxUNdZbReDYrU2dg0YDcQEYMZrVvHrifeWEhNn4qxWdYSAaMBqH1cmgtc1e24NnNXSd4LAV2k0LzXleUy3kD0MRYGkIIjgLssJkQl/JqHeQ11nxszqicRqbxHgWniOq1G8jzcAq5O8JVbnSDf85202bN3LndV/5u8/j7/wF/Y6BZytHBz48VZY/oZv2P5hO88FGHcri0pT1qUrbrKDAzw+O8Xr94K+sxpg9xWA0WA2nfetONcLCli8dFab16VY/RaoOuE7T/ssykTb4JBICWxfFhptqQCbzEqrqpBThC/BjlPgD/KC70LrTG6E61GFARFBQrrv9X1wFbqLPdvmAYuK4CsYYkCGNi14UJcfWOz6RhFg2zhw1ji9z7EDbSHX91kAY+7u5aRDngEnJwCApZ82AYvi/Joa2wQwCO90rQpFMiGzeowlCpa0THZWK+7ZogDNjcFxHbUMik4ValvHVV2IKsnYhIKwwTI0uUkEp3bf4wrnzIjo43j619W1K5QKZrUJX1rqfBTOakraOz9fcRzZuVK1CS+wpZP8IqdzGQGBVegfN9lUiNyMhAHx7XFndRLXWwwIAFy9igM/wVfd+By848XuNwtBUfHuIl9b3hGiDGThm61azGoMu72oYtI8LLHOA/U7sVUPXk9YuGIYNl06yRlbcAhnNcByLR8+HP/ZaWYxJJCus3rgPkhSi3R/BaGCsS5KFwOyj2uf+NwqhXUe+GRgqpUigcxZomoXOHVDsOns6orVhYfIH7+X5Sc7sigVGBDtd2I1uMmWJjWpGyCaWYgLf2fRX2SVEb5JGYB2Uc7qZtyidBjwwD6Z+9XQTanMsiA6dcfmRlnG3JNan5voMhlYK+ZxoT2XbeYvkpMtS7DNZkoInmJj3CgkWpgOBgIWycxqFQO5sOiiqmojwGCMBbizWoUBqVwEEeG43NAiXdvnOcvMIjqrlWI1MSdlDANCMl7sgZZgP3zPGzcM8Ve/7m8jsPNRnRYAVrGLhZcAV68idAqk8VaszisH/tzD+669gv/q634er7++3ymcrR0svXQrLL/jHcCv//VsvvzCC7vfvCeKtIpTtlYUx1wusfRTrDY24sLHLOx81p43jKAUlefsuPuMXWN6CTdzXGJA3ry6FKvfApVvcjZh2NNZ7duFup1BlAiT2ZfV6rpY+glWJ0NcYcKLne+UqsRqkqNUTBhV10CwRHWZsnuIqtpIgbHgKzAMCEX8bAZflbDMmU7NfWVZwMEBDv0Yn//kBsfBZvdFatuwPQdVbY224NQ10PR4A1i4CVbnbFOiN1l1R9JyuXOCem8NOavzytETfYYWI7yo4Z3KoBewxRMJA8KPK722ZcndeQRRdSQYNVmXTPTRdD2yyc0ABiSu9RLExT0wIPoAgOVMyKw2bFH3PO5Q7LSoZ6VmAF67VJuNJu1tYIsnJQaE4PYTxZjV/cXTZl0zTMdFYEDqGnFisYDFKGJfe8c74NoV/s1H/jQD73eO6VoD44voXNCd4Aq318i4pY0BUTlf+blS2K/zoMC6CNTvxFY9fCNl75TZDAhDhE6B+IRtxOf57mvIVmjq7cpywKcGLKbyP04ymwl/mveXFzpyZ5oowf7c16HmjgTDGvBvVc8W6poszgADbkITDIhnMUFR4aakaqrSzbuyRFx6u23HOjXiziI5oN3xHIdkU+m9E3mFc6cXsBjHYO9tKrNaOB+708OqYrk7FFbDGAaEKig66vGQ7NIU/37ZPWBwrqwjRJ1FlKU1Gwt1xi3bRuAyPJGqM6Z53+o4qwNneqQCxMb4QNgqpUOQz+Ok+D2DzQXHVYufDQaEItirMCAXFbAo8FUUZzXvYpLypXn43V6d3Z1jqjjQjUloYgwI2Vk91tmri8QJQ/zOF34egZ0p5y/tWqUe5m4GXLnCnNUip4OL1e6zT+JquMb/8YWP4v79/U7hbOPsMqsB4Du+g3VQHx7ufnODIh0e79KzlL2/Ws5qAEBZYl34mEWd6zeEl2mXDjKw0UsUn/EUzuoxh/llDdalWP0WqHSVs13zfd4YTVrquLMa6LCDR4575G9wcn9iDIjrwrFqtUhHaZkSwt9A0ndRO9DVp+A4wzt6xDbawfZsEMVP24bnKCYgomQ7j8fHOPASfOxnMjy/vLd1GIoKAjYpGtnaTVMgKLYpw0svwfmDAus1WKBjBwPiOwWyeOjeIrj2xYJfFYBGZVYP8dAhnPD6YrVrqzsMyqKmLZ5GnD5kt65oxVJciHhdIdJtp2+caepvoWBABp3wYpFG2gRQjFuGLepSAVi0qAdEIcXz4MscKUJQJYrVUpGuro0S7wEWgJZWfQZwvKnZfaUjUAKNWJ0NceOqCnHuIvJaDjXuFrEs9O/1xlk9wqymbrKpxq0sQ1nb+wXHtA87FLDYdANoitVRxTAgezirNw/TLQ88ihC5GTYnW2d1+x586ing1VeHj5dm9v7zIlEjmxZMrNbHgFge7zRRrUiEQ003YFF1WQ14qsxFJ3coToIB6aJ7iprsphwKFDNl+NfdTfeyZG3HoSkGRIFqiCuS+3UMA5IkIInV0dzuOauZWK1/rKZsmwk0ZcckYeL83MtZrX/YxiQyJVd4QEwzcgCPYUBEl4nmwQO/HmTtN+9bXWa14j1D7hCEouMMaCGRtA/JO1sVgZgGnYdeYKsFezE3IqBQmu4o1YagkbN6AANCDFhU8vaLAklJY1a7ViU9JjkIEcPoGvZ50RCM0o279oH59+1VXNkPrHxcrK5rlBU3+h0f73aTrdm63Hr2GQDAtex13Lu733v0PHYZs7o95xLugm7tKdIm5znbIBU7F9wUcujHeGNziFl3zTOy9myKJFYPM6t1lx07x750VhvVpVj9Fqh0XbDW9z0DFvcSq3VbUER7n2oBTd2FFjvbKmd1SWjBEW7tAVh+WVskZ7Vt1ajyIWc1QaxWTUB45VlN4hU3bXM6IXjHxzj0Y3zsF208v7zPerLb5ftskTfyttycFZgXp+w3N29i4aU4Py1x78TFlWDdE6sDe0CsprY8C/erame7KFhrj46zekxEAB0DMuisLi0ys1q5u0/pBGidr21VyvDKzYoH1Wm6Hlk4rPpbtDEge7BfAdAcVJYiRd4UreFLuIRNizrdWe3ZZb+dXCyg9+2wkZ1r1Wn9b4QkTXxTq4KZw9yfnU2xzUbf6QWgWTilQ9k0ec6clWHrM23/nK6zemx8MWBWqxZkALYbzb6mWD30niFuWnged+nu4axOVzlzLUcREIZYuCnWJ+y+yTpDxfPPj3Ors9wiOauHgsoasZpwfwEYdVbvvTHaBLWpndVUl6Yf2sMBi0TUkMr1Z+J4U4aCGmJAmhu9N8b65s5qxfsrWZf6GyFi3B7YwE0SsA1cXWf1wkVSejtjLFX4bsqy4MpMEibBjXsELFIcb45roVShGnLQWv/F2C0bBsQG0wW485qARV18USDPhhAVr/WxW0POahMEhnI9Y8gCV5o5RJ4H4fNSOqBhgJVAy9ikCrGlOqsDxbUtS1S1DccnmA5Uc06gwWKFgeb4PYBUaPKtTDZCpnRWT8ysFmJuaKVIxjIWswwWeLbLfL67QS/+8sEBcHCAq94Z7t/Zb4P2LPYYs3ofg4AwoY04q5NzPi8Ux+QO7cdnp3jp9AaiWefvj821eBVJwe6HfQYG24bnMJSttMvk0ln9ptelWP0WqHRT7s9m9Dw4dj2OFM5zvSCVMZcqdfE0FHCBVju5prN6zE1ZUNKjx9p6iG1+g+3ZaO3ETy1WZxlzg7Unoleu4KnFQ/zoJ59hzuorVzoHlSzyJLW+vcLMSVlC8LPPMldjXOLzdyI8t7wvdVY3bUrdom6EeN6w6JNlqGoLTqAxy22C9RR/XtdkxrhrK8RP8IkY0emjRAo0zhG9QwIAHEfu1OW1Wdf67Nd9Fuappbcwb0QfxZ9T0+mHRHCBGbKJzGpfIiaVJWub1J3ct85XOnEuCuS1Jre9fdjAQd4NgxQtnh7RoQjAjxzm9OncDPEGNGb1PhiQPGdiVdfJ8Sf+BPDhDwPf+q29Yw6iGgyc1YPvmYzz4DVtHo6rDn2iuv6azqw9xOrkLNuGV4YhFl7aYMXyrIafnDVuzGefBb7wheHjpbmtz6wW94Fsr7Wuyc7qfcTqurZg+7qbbMNOH4pA4weWHAMisjyozGqPB8N2xWoDfv24m5I+HlpWvXvfliXbEDToXmGLUvlnlia1vvvV5Rk0+zirNa9vtOAYkLazOrGYk5bMVwFcp+477CnhkqL2CFgkYUBcsPt1yhC8xlkt+TMDdI/YbFViQHKLBSwSnNXJAGs/jqF9P9iuzTYBFM5qEwyIepONGD495IQX72/Co6AMg6zZs0F1Vg9dA5OARc9XvBNkGUN7H1TRzcePm1Ac20MYEKqoDIVBpHVc16JtXO3FrN73uFzMDaxsNHulmeCEITCb8bkv/zMx3vs+R4QU2/DFoSoKnKUhlv6emKjm3z8iVot5oRCruZnh6x77NP7dnecxuz7vHXewi00cV+Cx9nznDG0woSzZ/M0lrJN4t1FeKoTwy9qrLsXqt0AxDEi+326W47CXYF0PPxi6zmrPg2tXcv4U0OKG7Xe4plwXjj3sKKVjQIaZ1Y49MVaCOHEeSo4GiO5yjAy+wHawby/2j4/xX3zpT+E73/FR/Lobn+s7q8e43bzW9xPGy5rNtoJ3UeKzt2d9sZoLn8oXppGzehgDAkB78Tjo1OXIEtfW5McNYSXQYihSW9FkE0bhpqS400R7siL4Kd4Q2MKiK2RgctOESelgQOxhJBAAMgtcusipa3o6PQAv4ONBFwNSdFy/WgflrvWuWC2EdaqzWmBAOu30SekipPK1AfgzlwXWdVTFZvGs24/HWcVDoo+4xj2O4o0bwLd/O3p/0Liy1JtsJEFROLYHxq3eJuMeZfsDoahEZvUYu75dyTnHmQlntZdifcr+XvaZL8D7638F+MEfBF57DYsFsNkMHy8rbOYmpDCrMzlTNi74JovueDC2iSucpfuO33sFLNIwICxgUS5WFwQWuigVBqQRqwkiQuP4kzqrJ2BWd8bYjSzQad8aYVancaXXFQQ0nQBj3UYUznq4cPsYkIQHjBqI1Q1+rnNtAdDEtH0wIATH21AIHhlXweeceReDAhh3sg0iZlIesKh7D4TDzurNBtrzOGl4KbZfomJAGrSG0llNC+90Ve5XEwyIyigkNix01568GK5Ccs+K3xM72ZTOah2UQreEs1Y25zJgVqvWM0aboqrrWtfGzmrl1IhvYO891loWEASMP30+krAo3NNBsBWrxVS6LVYfHbFf72E2QJriLA8ZUnqf+0yMWSP5acmqYKKyuBksC7h2Db/3S/4troRrzB476B0XwOick4nV+4+JY2I1APpmkCt5J16WVl2K1W+B0nJWW9Z+D7PujumeGBDSwtxSs3pNAhaV469wEOmKdGMiAmc+Op7eY6VsdeXFdnb1J3hjzuoqzRlLsoMBcewa//1X/WO8eHinL1ZzR1IvqK1Tm5OMTXKjCLh6FQBglQVeurVgYnX7Z3KRciyobHKHorguOmLHmFO3KNiChMDXdqxKHvQCoCgNFk8Daedkp48QlhXols0GJGZ1MOAeAoA41WzTF8/sQJeFBZAXpVInvFiQkJnVEg50WZoxq10XniVxrRcFW+wTndVSsbookBQeIn9cwFRVMHfZIrqHAQENA+I4DAMyRC+SYUCGSvDrB9o8SW3EQ90Q/DzF92nVRbBfhUN1H7FaLEo4s3rhpVidsXPJ7q+YM/DhQ+Cv/BWETt7O55VWLriyU4nVeU7vXtjDWS1+/r7HG+0O0wnIbpUX2HI+aaEZut2pZh6jwoBQAsUCe0BYJ4aqAYDn9ZnVprkAI9zLxlmt+U70nT0wIARntTvz2efVxoCkFlvgUwMWAbgirKx90iYOzdGARdo967g8vFOJaiDMt2wW3F7IXHQm862ReyvLeNisrrM6AMuGGHJW62SE8HMFMIzAoKAaZB1ngFnuxpizurJJc6PGAS1zVtcGc0PFGFuV9FwAYJhZzb6BJlZ7qq4Qo4BFOWPcZFNU2cEjjCcUpJ1AAin0ySov9TawASAIEDoFkrNhsTo7T9l8KgyBKIJt1ajFeNQ2qXGx2qmL8SlcmuI8C7Gc7/l+HONA80pWBeuWbZsxr15F5Ob4Nx/503j2yxa947oDCMrmuHGttVZUdkMARiHRAOB9yfPIf9NvMXqvfrHXpVj9FqjGkbHvC0NwhQd2cRqxcd+Hh+9oDguKBPbpkEMRQF7arAVH5wUkzrVSt9AWVLF6SETgrVi2o/dSczxbHXyFCTAgivugYaF3xOqdkjirA6dAthnBgJxkmHspE6u5s/rdN27jRz7+DJ5b3ttNDt53I4QaVDYm+mjjOgZEhKKgLaKHghABFNRJ/tC1NUg7b1r8FJ0WjbNas0U/dHOkA2L1amNj4aX7L8oa0Uc9FtQAmVk9tMihi9W2tI06LjxEEemQagyIEFQNMCC9cxXIEgNndbRwEHd4qoBw/tEDFqUipSi+gNr7Go88s4JRSkJj2QP4IjFxnlispo6xymN2ijGr2xiQBKsz9m/JNzkTWwDg/n1ED18bFqvrGqhrtn7UFqtzufiX56wjQHcBDciduu3SdVE1HSHq7jAyBiSUBLjyY5aVDdsjOqt9+TwmLyx4unM4Xo3o0z3XhotPOtXtPdO+EQQGhNq90jir0XfVQn/xDKDVbaT+FiYwE9AdkmsQp7a5s9rjXN2pxGr+d2pZe1RZoqxtkgYwhN8zwlW4tRSHI+YapFZyx2Eu1dyW3lsNEonKrFYFLMbQvx8G3gmkTlleF8KsviAzh+M7qGQGpKabkTY3UmFATHjNAHsnZJJAa2Nntcq1zg0NNLFaPu9uQlEJg4HjO2zjShHkS/q8RjIMipywiRuGbB67Gnborh5kWHhJ46wGsJ03Zhmbi3heI1ZfiWI8fDjys9OUrRdn+2+4s/ehfMwSxbIcit3OxWvXAABvP7wL6/q13b8wYpRqjjsxBgQAXax+5gnk73yP0Xv1i70uxeq3QDUp4vu+MMT3DdgxirTUC+QQ/N8BDAhpwiBYvTJndV1vg+V0djUFUmEgYLGoHP1xZ49ALfF9OmW5AyIC9RpggG/Gq9nR7DCrd0q0CYlyXeZ62wwv6DanBRMrWxiQD177DH7pzg08u3iwK1YPtHYBMA4qm9ShKO6BgXuLTW70xepBZzW1fVII9hflrFZgQCjto8JZnWSOcnKzTh3M3VQbAzKU8gyAhgFRbVoY8lSlAk1R6Ll+uzXErDYJWFRgQOLS0096bx935vUFDwCbxNa/r4Cto3bMWV34+19jzxtGNVA7QkSnycBxSd0AQ+5fMW4RApKVx+xUs3iIIuasdlOszpjonMUFW7j9ht8AAAjvvTYcIiTuC8/Tey/yBbR006LZrKBhkQYT6nUxCGMtxMR8DIDz4If4pNQFmSpgUbRRU5zVoaTLBOBjrAEGxPfh2BXKuCNW62xWdcuyeEiTI0XwkZ3VdoFsYJONGrCoFKtNmdWuxCShu1nTriEkDr/OFmHXomGsK0RVegieGgFB5grbDNeWl3Lhpwmb1XVWh9ZwwKJgmOvcswPvBGqXKDA2FhBNF2JjWMEYLyqHFjir2hhuNtkMnNUysdpgcwVovRM6F6JKc3owqhi7ZM7qPEdaEZjVngfXkgvAJs5qy3XYsynZXCB/XmNidVax89W5tmG4FwZk9TBnGE7urAawI1YD2BGrr4Ur3L8/8rOFyL2vOcC2mQteFVrIK17zeWH7uLwbm51cR6wWeJkxsTqu+/rGQA12ohsGmHreJQHEtC7F6rdApSm0hWXLqgefjiTmE2fdxdOQs5oyYXBdJiyr2K9iF1pnQWozN3ZZSbhxQCtIiMisHhOrCcIXAKVYzcRP/UAOpSuJV7op+87qw0Pg5s3+uYniYnWyHmFWnxbsZdlyVn/1/OO4EqxwEKQsibh1zNHgShNntUr0obgGBLZmADHDFtGEe8suURZDzGqCa4Cf79BknNo+6dsDzGpK+6htI/Aq1paquMCr2NVzVgtsy8DmgvjZWiVa0WQYEBOnD7hDsdvqWZYMrWHIrJZhQIraJnNqncDtL56Ea8anByxKnY8A4oTg9AKa+3XcWe3qOavHAhapocMj4yGpG+AiMCCOYpEnqWRdssVDO2BxBeD8HFluw1/4wLvfDQCI7n5h2Fnd5i7qlNi0kHVvCAyI7gIaaHAoVTbCrNbCgJSDrH2qQNO4yCTOau225FapGMAmIoLj2ahk95do0adiQIKAsaDb5gtTZjWG3VkNV1iXWT3SEcJCQfWFyqbtuocBMROrPZ87ljuudbLoNbT2MBARWEejOgiQLKp6Cse2qeCh4p5WFef364vrzFmtVlIaxIzmBosKDdXMY4kYEGm2zwR4FdVGiGXVsByaEx6AfNyqGSqGUm7gSJGRJpsAABBEttRh33SDUF40wtCicFZrBQ6LEgKw5JgmAYvKz0vMi4jO6qEu3Dyr9Z3gYcgwIKthc0DTxSYRq/OkZNephQG56p3i3r2Rny3LuBqpMRQp0LrH2sdt/7prv+ddvXuJ1TrO6iFzn6mz+lKsNq5LsfotUFmiiQFxXbaAHBgg0rjSm5SKttQhVAMRA+LapdxRKlqTdV8UljUcWigcbyRn9a+xWF2WTPwkBHKMvSjSTdl/SVgW8H3fB7zjHcBHPiI918jJkcQjzuqzYsus5onE7z56Ff+Xd/0rYLncvUaC7TXmrKaIPkOBmFSxep/2bN2d+GbTRv7HRQE4ZGf1xTCrfadQfmaNA1ZTTAr8enDxtE4cJlbr7O47TNiQ7u6bBizKbi0RpHQRzuoZLUCncTl0WZp8zGlCkTTL8vmz02VWlx6d/QqoxerUoQUscvbr4IQxz1HWNuxAM7xTtmEB0J8vMR4OsLAB0N8zisk4la+tPGankg2fx+yI1TVw/z7yyoF/7QB429sA20Z471UkQwn14r7YJ3S6XY7DnEm5pHsjz9kmC5FZPbR5R8kIYffWcAcPVaDZOSdRuqHb3cP6NnOqdh4yk+ArJV6FmpEiKgj6rcRlqddZIalGrJbM5Vg4sL7wFzoF0lz9rJtgQCwAddpyVmcO22Q2YVYLZ3Fr7C6Sgs1fqM5q1aaggVvZdSFv/YchW9mFUlS2LM3g7Vb5PuT89qJgLlW/1ja02IGnDt0F22DRdu0PvBNMMCCNUCsbCwxMFyqnrhGnVnUNxLkSndVKDEgBIwxIENnMJNK5EPG60nfWixJzLpkJuCjYfUcwHajWM829Rfy8LKuedlNUdDMq5pykLAfhrB7BgGTrnBkc+dobAFCxf8N6VTNEZ0usvmY/HHVWV0nGuOhTi9WyDbF3vIP9v+2wFiWuq0ovEMeNa61N3MFzLUt2v16K1W9aXQJU3gKlnfTsSQSETiVxrZdMLtpvhgRFIp/TUbmVDbiEjtsKouj+G/m5OlMHLFInoyMBMtQ2pO3ERt4ylKxLBE7Vv68ODoDv/V75QTlXOF4PvyjiVckckDPuoL5yBe7mVfyJr/onwNEzvWMyd4PiXqTyVD0Prp2pHwMqs3rEqcu6ASg8dMWmDVptqSRmda58vkyY1Z4dqzEgicU+f12GomhLzXPILK6r1MP8SAMDgpbjr5CE1Aqkgu4kpJk0S/6s4cnSZiduKHErcyGFJKTx2nFmCZHPsPVf2sUjmNW+wexMfE4dbscmtfEYBQPCxcTBCSNFULRK5QaTyTvRVT2z/LjUexbA8HuGEJC8d8BiXG2d1WnKxOr7NnD/PrIqgnf9iC2uDg8R3UkRn+UAFP9GqrPashD42La+t+8jHrAZzggLfsFET2rIMJxlVrCFnpY5YAADQs0I4ccGoBarp3ZWG2BAms9HIfqYYECazIWai3ymG4JojbGSDy5JLZKzmm2uDIjVmU0KWITvY+49wGZdY86/1DCrSZMCfsq+haLcFWvZcXPA1XxeAf4sqOcwAMihakPOajJbWQRMTu2sVm2EFAWy0oXvTR8Mm2S2nlGqfcyhTQDCWNBsqKtQDURmtRK3ZPJ5jWJAaBv5ju9ITVimGBAn9FgHS1espjDLRfF1nZRGSmXYc7e2DOuX53TXftMhpuKha+6Ji2M2zGZJNZxxzYDFwC6QrofnW9k6Z/dDEABzPrpX7J5ZbWws3BTwDhqx+ggPcfKwBjCQF3RSYOklWnMu34ccOdaqBmPVHmNu3gT+2B/rY0iBcWOAOK7YxNVlVitMgwAuxeo3sS6d1W+B0har+S5hE6KoOKbWjvmYq5jK5xxqpxduL8KLfdBZXVUoKc5qgQEZaPsGML2zurLhuYRrIMMJtCrdlPoMxT2d1cmG78gLwbHNwu6+gPYIFGOhhfuf5va4EwcsjjmrG8cXwVk90DJWlsS21BHeI5lXzNmvUh4dgJjIFh4M/KlrrFJPDwPCTlX5HFQFfYPJtUs5/YDji6hCitRZLXiqBkKK71V9IUU408gnKxGTeBikibAuxPS26w9oiR4UsVqGQWmX7njgecMBi1Se6lBrMj/uhWFAqM7qfQIWk3obpCMCFjdCrHbgX+c5BlGEyM0QrwbeeWITQ9dZDcD3amWYVFa68EKaM8sfYPinuuYAMdcYeM+Qw9ou1FndH2dNMCBDDkUT1BJsG75bIitb52sasIgRZzUFA2LbCJwSaSHnYAP6i/KmfB8HXoKz1fZzWWcee2+bOKu9vrO6cc8RRS/l2sPArex4tlxUhllYW/Pvl9yz7AcbMuFl41blUoZC9WYQryS19J3VAx08jaiq6QAHoB4LeJCx9ruLH1P5Dqd2MAGDWAmSmYWX7TlSJ3ye07E1AJQGt3hT053Vnsd5+5I/E+eve1zbhueU/Q5BAHlp0zdFLwgDMpQ7QXVWh26OZDN8PiywutxiQCwLdVUDVYX1Gtvu1DAEfB9hnYwK4GcnFROrNT6z/Z3VEh3iySe3Qnu7xFx+TKzWRBg1eLRLZvUjWZdi9VugmmAWjYWOY1Uo0+EBQnfxNMYVLmtbf/EkgspUzs/KJrmKWYufgqVJDecZEv74ccX36R4XwMC52iQMSNOir5qIUnhk7n4BizthWsBuO087XJEfc5RZTWynHxJ9mnR5nc9rLGCxwYDonWrDwh50VtOZ1arJOLl9cg8MCGWSG4ZAUnrycNiiwLrwMQ/1FjtDG1cmwZWOVcsfLY7u8ShOJ7TaXbscaJPwLwCeUyOT8KUB0MUJWZt+46w2w4BEbrbbwVHX2KQuE1MI7aOeXSKTsYpF6YrVYiwYcL8C0F+Y75uNQHVWqzAgxMwJ8ffHKkm5AzQImFjtpljFNnB+zjAgVxbsG2czxmUcWpBRndUY2BDjXGkSEkc4YBP5M58lFWvL3fe+tdh4P4QvqmuLtnhSCVSGY4HrD7WoE4UUFdKOuoHdKt+tmetL3Et8jA0i+rJonFmt/04M/Fraoi8qyWwmKOp+bp6HpZfgbM2ROHWNW6slHp+dGjGrGVt4l1ndBPURW/SVcxjxHBBFZSkGxCDUXBxXFoJHfh/w8r0aWSl3VqelSxkKR53VaaYQkojHLCqbvAkwtMlGRiI17lfJnwlMFNWpy8+te0yTgEXVNTDBqwDYfr5d7NqmppkD+DGVBoGiYB5ewjjjOpB2ROQm7nLRIaZwwlMxIK4ld4EDzFmtHbDoshB61TxDVLYpthgQywJmM9bBfrrGamMzDIgIp44ilkN1Pqymnp1UOPD1nNV7idW6GCtuDNjJm5BUnFhaAYvNvE8xPyZ1M/K6FKvN61KsfguU9iSXA+jzeARqrzPB9Ty4imADAM2kUTuMQoh0Q0FlFLFahHEo+Zy2/rgjJs0qDAh1MroPBoSydlDxHnmlccU2QXRmuRwDkoy8LJuWb6GuPfvs9g+7ipt4qQ/cW2Vt6zuAR5zwZV7pT5yF+DuIASFMbvgGkzJotORMOorwpUo7N2RWe0PO6ozIrB5Kp89zrPIQi0gvuO9C2n0FtqVEn31rgC8CWqn3HQ50RQmkaR/Xk7Soi+A3U2e1TKyW8RD2Ld/H3M2wXrWurXA++oTPS0xwVc8toN+aKsYX1WvWQFRWPrMAqpxPnC/AWU3JnAAw2MUlKsms7ftmNmMYkI0DxDFzNC+4NXA2Q+RkiAfE6jLOGAN3SrGa0mkjaqTTRNtwYFmsI0QW1AaYOX1U8wLTVtdAPucywoAMsV+pCCtennDYCzWFL0i1OyF2jjkgVmcWKQixCcEbQDVoC4oA4Dg4CFKcpQH77MsSr68P8eTijCyoAhwDUkvEaipeRCBxZPNuA/erMrBPCIpEsdrzFfMNMU+gPl8q4SfPWWdKYOBWVm2EiA3GKZjVdY28tMnXdZQDTWVW26U8G8DEWa3axBUZRNRgWMVx87ymmVlEKTYwGwwIUawO7KJLctv9OYR5p3SMbTaY6GK1NChabOJTPq9mI0R+rzdueJ3PzPcb3NhQZZuCOatFu8V8zv7eww3WMceAiLlTGDKxeiS08ey0xoEfX4hYrXWPibXnXhgQPUMmAOm5lnkF26Bz4VKsNq9LsfotUGlmkVzQQ2mpzTG10ukvxu2lFOnEi50gVjfM6iG3MolZPdD2TZ2MjjneqG1InjcYtNmI1ReCAeFtz0KY/uqvBp56iv26LVwD423vfMJA2lwY+LyyjBD8NOZ4a8I7afdWWSrE6oqHbBI2QobSzk2Y1f7A5lXjgNV1kYmgF5kdI8uwLnwsZnrPQoMBmTJIybLgOrX8PhDccqKQ0jCrOwIw1YkiynP7k/xG+KQeV7bYLQokpWscsDhzM6w3rfs9z7EpfNpxBecuUz8/dZaza6HzThwJWARA5Myrx8OypG+wNAeQHJSKARmcF7SqYet6HrBcInJzxKkNJAkb3+d8dyOK2IbowDsmXfE2V5JYzZ2qEtGnri2yoDa0iEwTTQwI+CbbwBzGsurJHYoAjJzVcsfbBMFXUzOr0XKqCjWFGvrVqmZxLvnMksxmphNdsdqv1WgsAElu0zAgAA6iDGd5xN63aYrXN0d44kqifZx2NQ77qZzVnqcOnDV4DpSb2GJeROQKKzEgJuInBjZCOLM6CCcUgHklua2/RhDdEKqwOkfPbDB6riamC5s5vfPSkpoOAJhhJRQdIVQMiOq4Rc7NLNSxy/fZZ9aZd29i/txSjut523d859qWKd0koRKr88qBZ7oRomSMEz4vm2FJ8rL/7weIGBDPY8LymFgdl+zYwjGyWDRiNXNWt9ZmQqweCrQGcH5W40CTWa0KBG1XIyprzLv3woDo4rGGWPsmIdG4FKunqMuAxbdApZmFA82ARd8pBsXqBmq/78MnFtADzGo6+1XtrKYm8TYtfgr+UElxVvO2b6UDWPBfJw5YLCqblEujXOjx2mJAFlrHDJ0ccTw8IUgSIFy0nNWWBfzgDwIvvwy8/e29Y7p2PMisrmoLtkdwKFpq0Yc0WbCsXRGh+1lT28aaTRvJnzUCOI2b5lrqABnSdQW24p/CSbjJHNY+qOusjmz1wrwosMoDzCNNsVrVlgsDDAjYbVPk/D5o30MNt1z7kAAAbxGw+6ttSRFCigFLlIkzu9eWcYSJIgKw/XzbixzeTm/urE6xXre+xgPwZhFh4SDcGIqwG4AFzupObscCFgGQXOBDuKlcLExtfW43gOFNUV2xWswLkgJjZ5MKUc33gSiCbdWoywpYr5mjdcbfFVGEyMkRx+pjZZtCvyuIlx/YvecAwPb3RK7u0HhI4RUPjVvkuQY/VwC/dszqwqBFfcRZbSJWz8MS6yLYjrOGYj0w7qxe6obVQaCx1CFVSeaQedAHUY7zPGDjd1HgQTqTZlrpVOOwbz1fDaqE+Gx5qnm3gQCsZFaL8FKiWN04yycOWPQDi3UCSDbZjDEgSsQM4d7iSIU6L3bj2gR+kYA03DlXyXWtaovG77cseG69ddi3/52GAYuW4lzzyoE/NQYkJ6xlZMeVBiwSMSCWhdAvt2NX6xhpXJHHLNcFisTePddm3UU0SKiCosUmvkdQGsW9VUvuLYiARU1ndbMpPvxteVLAt6uts3o2Yy73+yusc577I35uEDBzwHpYrD47g3bA4t4YkLk+OSBLh+eqaQrMdXQzcT0kYyFJK2iV5zFN5LLodemsfgtUmukHLI4xfdIUzJm078uCh0nlU/Oahxbm3FFLwoAokunFueaUtrERrMRFBSySd3bHeHSiNVnXWT3iegNaoQlt5IfrAu94R//6jDmrjUQf9edFfQGx9mx1eCedhz6waUOd5A91A5g4fYT4J2MAlyXi3MPMy7WP3WBAZM7qPMe6CDCb6Z1qs7kwJQYEgOsBpcKZZRL+ZUcBS2Zvq3WGQhIgF1Li1EBEAORidVkiLnyzgEXfx9xLsY5bn0ue4zwPtZ31ALgbQ92SCTA+Iyl0eAAJBIA2bg28Z4q8pnE/BbKjUL8TSQHJdqneaBRV10hyZ7sZ4DjAfM42YO7fZ/8XuxuzGRen1IdLVzljMhJSxVQYkDLO4Ng0tEiziJS1PAPI0hq+rrNaYECmDvwZYFZT+b+AWly/KAyIUcAigEVUYSWEWmCaMXbAWZ0KDA7JWa3AgFQVksJF4NLeYctZibOMO6v5+6bZNCLWoLOauhGkYt8aPAfKLAsRkGyg+0mDEKcI6VJiQIjOas9jJgkZYqWutxuMms5qX/ZOMHGpAmoxyaTLBK3PSxWIacBArjLVJtu0zuo843MCg3mc2GBoF3tuCRkhvKKgRlz4vfl8Y5SaylnddCAbXtcpmdUY6LIA0a3LMSBjwmcWV/CdFgZEOKvvr7DKw93cnzDkGSHD/8azc4sxqzXGgia0cMBWvNNxt08NbV62ir1v9VjYAC6d1Y9oXTqr3wKVZhaCSM9ZPcb0yTJNp49wkQ24igGQhFrHqpWs3rxyEBFeFI6n4NHxcyW9gBwHnp2jUIV0XYRYzYXKiLJj7CrCiXg1rkpdZrUTj74s0xS7AYtDJRzrYw5F4kaI6rh5DiZ4OHoW0MH2bLG5oDuyCme1wgFNDRodvAaGgofvKCYMAtcQ6p8vc1YrFuZZhqq24AR6F1fJpkTLpUppI3awdU20y4ShCGyfmY6zGoCZ68+3kK93RYQkAW/P1hf9APDJcNY716QMEEZ07mnDrE54C6VlAXmOu/ES1w8UiuBQCTTWgLM6Xld6oaAN71Lx5waBu0PMahZYR+WsVyizsj/xM8CADHZcicpzFl4XWttF0sEB+/+dO+z/4RYDYlmQY5Z4pZuSYTUoGJDQkmJAms+fIlYPhUnBwFk9hgEhupVtfh+078yqqGCbiD6hK12YNwu9ifnapNDlVi1n5Y5Y3ThBL8hZnWQ2whkBAyI2cGUDQp4jKV22MUhofz+YlThbhUCW4fxugqWH/eZsA+UFNtJuwKLotJrYWV0VFfkzU7aoNwYRorM6kM836tJQrFa5FEXAIpFZHfAw215/ZVWxe8vTfHbFuzatsTOSNggMovt1gAMNgMxZ99waRSp32AMgi9Xint2ZWYm5oamzunOujemC3M7nMTFzU6K9GopTm+6sBhCFNe6e+r2xizmr9Tfu+Kn2n4OpNkIUXRbUsHTPrZWbl6SARY4BUW2Ki2oQl+Jzm89ZAPS9c6xzH4tl63yCAKFzNhxoDSZWH2o6qy3PlQeNtirONLn4rgvf3iBLh5/LLLcQeHomTwDDGws27fn67b99cDp7WXvUpbP6LVCpYN1pOauLQWd1lkHP6dNwNEeY1dSgMpmj1GC31PFsedI3sOVb6b5/R5jVzWKPKlYrnDNkl8dYi1+iv4BuMCDJwMSw7aLbx/nWOKvViBkApHBBb0BMIjGrsYezurL1JzeOA2cgaJTaYQDHYS6XixCr7QKZ7LhZxnANmg5oALBDn7mKZaqP+CA1H9whd4NJirrjqpmXRq4/Idq1nNVVRk9PF+UHkDreqA4XAEAQ9FmHPGCRslnRlGVhHhRYt52PfBNkPiccz7bZ/Vo6ylljvK5Y0IvWBu5AwCJ1sduMW4qOECpWQYjrMmG5LFGDEC7Hu1fGktmFWL2zgbFcAuB7EcBWJOMDRz0wu8/WORkDohL/NuclibMPgIk+qjApXAAGhAdaU93Kss67JOFhWgYBcD1xXTjTHKJY7XmwrRpld24ggq8C+hJmMa9wnofNRlueVmat9Bhue04p/F/wTgAZ/gFgG4OFR+5iOZiXDbP61qsFnpidGIvVbtBxVtf11j1HvF89xRyGtdLT7i0/ULjguUGEKiiq5humqAY/tJENBSyGtOcrdHKksWSsFRuMuvvY3Anf21wwmcfy40rNNyZsafD5fD3grJ7ynjWdGyoENWa6MAtYDOyidx/EiUV/JwKIggpJ6fWd1QnoGBAP/ffiBBgQW+GEZ58XfSNEJVaTjDIjHVyistxi+o449myGx6IzvPr5EqsiYM5qUYJZPdItHcfA3Eu1zW0WMIwByR3W0ajFrC6RjTmrc5u5y6cUq4nP13IJHB6S/upl8boUq98CleaaAYvC5TPkrM41F08CA6JqzaUunlwXjj3iKCW82G3PYTt6Ct4jWay21KKqieMNULRnU13gAOB50tYuUWwBrekkcxy1o1YUD1jb2+3DRZ+he0v8bK3iTsIh9itJrB5xVhe1rY/sEMgS2UfFN21I70nhcpF1A0wwGZcG1glnNWXN63LOusJFBkBfrPYVbErwgEUis9p1Id8QE+MW1fUnxOpW+0LTqWDUom4z5mWbJSqSuA3aR22rRpV2AxY9M2c1GFN2024hFedNcb5a1lacUEyc4xgEZ3WlRosYbuCqWNgNZ53o2Ja+v6i8XnfPgMUsQ1q6u/fEwQEiJ8df+cQ3snu7hQEBMOqs9qnOakX3RryujMRqtoBSnO/UzmoTN6GYH3b42pvEZv9+KrM6cPrBsEXB5y/EcxUOxS5SQOQ4+PQlzHJe7zirNxsY/fuBlltXgpZIC4eGARE5DrJxS2wCEfMBDhYVzjLmrH7tlRpPzE9hFjYABJGzG5Jc14gLDxEBCwag2XDPin6YsQlixvMtufhr0skGqNntOchzDWAkYJGKAXEHgmHznLYR4jhyfr+p+1UlJhl2namua2M+Mhm3ZII9MYcJgNxZXdd8TmCw0eZ5jFvc6Zhlzmq6WB2GQCwTq2MCgrI5Vck8TqyTDZzV0hBXk/U3+AauAkdKGrsEBmQgKBwA8rxmXcPi+i4W+PrHP42f/PgR1nmARdR1Vo+L1QxlpiH+AqOGOUBw8XUxtwWyoZDJumbOap01k+MoO9FNmdWXZV6XYvWjXnWNNHe0mdWiDUtVWWbpOaublufpMSCuVaEs5AtotqtJc5TunFe7TJjVtiIEDwb8W47B6LmH+LmSg4TE7n6iEKsz6L0k+DGVgX2ixAJq30nuCFuaLPrwDRbVuZLFasESHbi3tIVK4ayWGRRNFk9i0qwIbgRggAEp5MfNc8RUsVqIT7IDiwmv5oPreWr2K9tgIorVKi6+cP1RFyQSsZoF3dBdjwDgCbG2jQERSdwGwTzNokw8p2VJb0tuVROAxs83W2VGra6NOKHqNNlQnNXl9MxqsTGsGLfI96wQlmV86bJkDhiqC3wPsTqvHLhB6z5bLvHCwV18bnUV//Rb/9IOBgTAoFidxSXZWR1FPLCus4DerGv2+ZOZ1TlSxSKy2RjWEasFvkj2AqtrOqeVO6t7YnXqMLGW6lCc82DYTtiqyWJ/O4fpjLFFwTAgBmL1YoEdZ3Uzxk6cCwCgGRMDr9IWv1SMdQDGYbYHy7q5Br/8KRcvHt42dlbPFvYup5YH40Y+0fXIg8oG3cqE97cXOkpntYmo2mBAuudqkI+xc74SZ3VSeDRntegIkTmriwJp5erfW42RofP1plOWjgGRul9Ng2EV8/nGyEAZD1VidVnSMm1axwWwew/UNdLSQegV5HurcVZ3BMA4tfU28DsVRWBjQWfsMnFWN/P5TsAi1dgGoNXdK99gITOrVTx0ENEtohNiRKxm+k7r2PM5vv6xl/CTn7rJQupnrX9PGEo3KvrHrPUNAqp8DFHtLmwN46RyTStKrEEcjXmyOFfJIjzLDXI3LmuSuhSrH/WqKqSloxegwtNSBzEgucVaJHQxICpHLTXkgot0UgFYBMtRxod9xGoCs1oZBgmGFCA73lTuNCoDGdguShU7kElikZzVzpCwDOgvoMSC9KIcigNitU91Vg9iQAj3Ft8MUuFwSmr75EALrWmADGMAW31RKc+ZWBsRbloxYZCccBaX+jv7GBZ9jDAgglktCfwhbViIaocW8mtrFFDFy+u2Z6PlnKEe17Lguezf2/Qm8qA2yzOLxJjPKqzzrehx73aJ69E5WUxpnNWKWW5zjTXSw4c2w4zGLXvEWW2wKToYtkrNBRgLWOSfoRW03jUHB/j//Ib/N/7sV/8DFpopfnYjVqvHuzSu6AGLkc1ak7sYkJWBs9rzWEiRIqG+2RTSEatdKINhUZbMBUTkQMvMDOvEMXNWRx4TETr8+rjwEPkjmBhVCdxU13jBRXAjZ/WCO6v5+W5iy0isB1pjbHeMaXebaRbD1ihyHASzmihWLxd146z+F//+AB9+4lfMxeqlzTpixDXggbvkewBQitUmbmUlKtAg2B3ANmBSimqgi9V+aCOTbVoUBZtvhbSNq0FnNWUjRGCGumtFEVxpunHVPa64zgZitXQjxGRzwXXhWXJn9SQYkE73SlJ4CF368yVE0K671og1DyFWS961YqylYEB8SfdK46ymY0Ckc6MG60g7bLN5qcKA6OoF4nkdE6uFuNoSq69HK5xtXDxI51jMd8XqwB4PbUxTMM1IUy8AoBarOTIw9DXGcI4ZGjJjNt0mOtNDrm2UqRwD4huM3ZdlXpdX/lGvPEeqyw3bx1mdazqrPe/C+JyuPcDqpb4oBvhDRs7qAawEmZ83xBKlip/8uIGKRwce3KlzD4hzHbgGALbtg/tOcpu294ld+0NoDRgwq0eCr0gICBE0KnNp1jV98TS0C20iVlsWfA9s8dS9wEQH9M7fkfTTr08LLDQDPgDenj6EASFOQlzPQllb6oBF3bA6UVZLhOIzyDgG47oZCCl+YDFm85TOaoAFTbZFqrKkuz5bNY/qHWf1nVslboTn5DZ19u9XtNMDiDc1u8YaG7iuXaKoLAZe7lQTqEUQgD27hIwKBRgE1g1tihqE2O7rrK5ra/fZ5cxqALsCGceAuCjUY3dSkQMWG8G8M8bEm5otoCnOat9H5GRKsfo09nDoxwRntXzcqkuzgEXf7odBbjJDZ/XMZwvz9qq36bKiuynVDkUzDMhiaTFXsdgMO/VwNVhPI1ZL3K/a83heduCxHAeFWJ2aOKsPLZzlIYpVgs+8FuHFwzsTOKudXXyTMC9QndUAfK9mCKuuAGyAAWk2U6UYEIPW/0COAWnETwNmdVpJxGoiHg3AVvxSOKuTwtPHiziOPGy2Ca7UP01xrqqxQPxcSnkepPPDPKPz0JUdjaYYEP4Z19muWB2XvtHz1QQsdljIcWYjcgwwIJElZVafbxwsvYTmrJblAhjiOhoUZffzMjGLQb3JBhDHA99H4ORIsvFwwR19h8+13nP8Kv7dned7YnXo5khGQxsJ5rYhHQagbbYKE94+zmqd7KgGL9J/jkyZ1ZdlXpdi9aNeec7aB3UmDGLnaYRZ7euIFILPqRogqMKX6zJntWxhbuJwGHJWmzCrB8RPcnu2OK5SrKZjQJSuCQBJZjFxRvPl49oKXIUoEcwS7d8JMNjWI4QgqutPcVzqC4gF6ymc1dRJE0cKSK8rb5+kitUsCFEyfhhO8n0fcheZyeLJ85ioJfnQVmcV5gQ3xlBQWcP/JTqzpMc1QfeIEq3iLbHa3FntyJ3Vjmnre717XEOnk6j5rMa68BsR/O6dGtejFVlM8QJ7EAOi7ay2LLgDY0GZV3As/bb/LWtf/sdFZdM6eIY2RQ2Z1aPOahlvvJ04016t8M83stN2xuhOpZuSuXwo6p8vF6s3G01mebs48zPObOnGxcnGx5G/oWFAFPcWmX/L54fdz2yTmInVzjxkTlWZs5oYADjkpiy6WBnNWiytHWb13VMfN6Jzo3HLjxyl+zUtXQQ+zXSgzHHgjm0SrxjAwc0IZ1mEj/9Cjfc9c48NVcbO6r5YbeSuR0v06VwDk4BF5RrBlFmtCFhsnJQUBjKAaGYhKSQO+8IgfNnzmKFFJlTlOVIKC1uFARHBlVRBUWwOd99fvJOL7KxWzA+NnNUDYrWRs9r3GTaz7f4UXRsGz1fjrG67a+saceaaOatnFmNWdy7Eam1h4aVksbrX0VgU7N6iGkSazvF+NkJuYDyRhg7zYkYZzXkc7+BKR8TqPMdu5zyfa33F1Vfwq6eP7YaUhyHvChsTwLm7eEoMCKV7Q+gFqowrftyscvTet1wvkYrV1FDzy5qsLq/8o15ikquzJhMYkIEQvCy39VrqxWCucL+SXWRcqFU5q8nt9EPtJ1Rn9QhWosGAUB1vivZs8uSGc8hUDvs0s5k7TedCjLmgASDPUQOwfR2H4gW49m0WdFhUElQF6K6cJrxT5aymCJUCrzJx0ChsG55TIS/74URGzmqw20bmdjJyVvOJUC3ZNl+twCa4EwYsmridGmb11BgQoOesNg5CBOCFfQxIk8RtclwhJLSc1aSw3U7NlzbWeQCs1wCAO3eAG9EZ3VkdKlr0ecUJ2MJM4/5qeJdDiBmSqKwObiQLNCPvGfE9+uc68j4AgCxjglt7ofPEE9tftwUy/uvQSpWtqVla67t8RCm4+E3AHmXcsixEfiXlcwLA6YY7qzXOt+ngUdxbLtXpw+eHfWe1yzYDic+tFfJJalesLn1EF+CsrgFYZJsmsDxydpjVd058XA/PjcatYGbvhguKEqYTwt7KoDstzxkijCjaLx5f4jwPceuVAk8fnLEvGgYszg5ctskonoOiYMzqkChSorUhOqGzekisNmJWh470ncCeWSIDGYAbutObA4ShRSZWFwWq2oITaM4NVBgQge6hTjUakap/XJNOLhVnnox1BIad1QaIGfg+M+C0EZ9lyTpZTZzVvo/Q7WxaiOc2MNtg2eHX8zqPXbKzWroZZOiAHsKAGDGrxblKxWrC2kNsLuUjwnJh953Vto33XXsFANuobSoI4Nt7oEVEzhklYHFKsVo4oGUGLFFcN9PKzVFkeQCG75nLmqQuxepHvcTu9tQYEDGY7fvwed6gUEt2+ghmtWxhLgI5iAgMcYxemQYsDjqrDViiKmd1TThXoHFQqZzVaWbpJzILYXloE1/XWTnEVQbooioP5lG50xiHioBA2IOHri1Ucoei0lldE4NGLWs7GZeIqgDoDEUfDCuhWjxRhCQR3Cjh7a/PK8zdVPu4tuco26jz0qYHLPo2c3ioMCBEzh2A7flwa2nj+jXBgIRyZ7UpBsQTbN2pndVHHhM9NhsAwN17Fq6HK7KYwpzVAxiQRN9l3jhnZIJiXtPuLdvmGBD5JptRNoItSbwHfk0wIAB23zVXr/b/nB8TjoPIyRCfK8KBk1rf5SNK4ayOY9AxIADCoJaysAHgNA70MSCBM/juoo5bUudjVWGTe5h5Gd09FCjE6sK7ELGa7CbltTiwd53V5yFzVhuMhY4K2SFQGBFB9BnIcTASKgHYV45QA7h/u8C14Jx90dRZfeD2MSCGYhpDjg0ELFLEtCFnNXXODXVHhEkYJNDClqjmW1Rnta0Qq/Oc5lhWiUllaRaCpxCAy7wyGgtUQdlGjHHVmsZ0bsgF+533LR9bTDoX4LKgzSRtIc1EUDp17EbLWd15164S18hZnXc3g/KctqYXxTfyZRiQoraNnNVSDEhVoSht/fFAZGMUw++oHrPasoDDQ7z3KhOru85qywJQD3/OTc6ZZie20+0EaBfvitjb2MaP6TvlsFhdlsh0O5kuMSCPdF2K1Y96UTAgvPVf6ayua6S5rRewOOL2ativRGeWVKQzcZTuE7Co+wIaCVhs0qMpbOWBgEVyKIngkKkwILnNUng1d0qHwr8A6C+gxDGnDirDsPORnPA7tFssNlh0J022zXE4Ev5t05aqeUxeKudIVfBJPhUDogisK+IctlWT2+kDp0C67l/bxllNWDyxE5s2YDHwa6QyBjIfXxzP4PXawYCcbHwc+DQniigvdJhYKxYOPIk7csxCxVxxf/EVb11MxKw+8rDKQ/bBA7hz32HOR6KYYvuuEjEDAElKcFZfBGLGsti4JdsIgUFLIp+MS+cFJrkAlkIAb1Wd8nuuvdBpC0z37u3+hSBgbcln8l7PJAHbaCWK1RaAKu04qxNL+/NvVxTWLEyq66qta5wIsVrjfB13YCPEMPyrx5QtS2wKHzNP9RLeoxRidVJ6dP1zLFTNYIxZHrtYFUEzxt458VmAq4mzWLERQjKdiBpxVgMg37M4PgYA3Ltb46rPndWGYvX8yOtjQC7QWU12v6quq5hzUzEgArfVec+Y5GMAUG5a5EnJxoKpndXUe0uEb8vE6tqA16zYGE2Tmq1jiHMjpbN6CgzI1AGLvs/MQkkXA+IhpCCGRFkWQr9kXSHiOhQFNoVv9NyGc4eha7oYkI3NnNWE9zfrXOh8Xty1b4wBUTirycxqVcAi1QneYB0H7sm6ZmbEbqDz4SEO/QRf99hL8KLW18VLSYIva1eac4Ojplg9iKSljDGKzI3ucdNK01nNjVKDGJBLsfpNq0ux+lEvEcyizawecFZXFbLKge9qtPeIiYIM1wED14AIllPgD8jt9BcRsCjaqIcwICbOapkI3rQkap4r0OyWSwf1ukaaOyRntfLzEqXr9GgCxRSfc0UXVVkrtVxMIr+ARjZCipoQsMgFKmk6vWHroIotbcR7hBoDEq8rukNROH0kGyzrNQ0DohwLarYAdm1am2MQWkys7ll9WIu67dAxIJZtM4ceF1I+c/cALxzcNXNAd53VNRPXQs+MxeZ1xOo0rowWj6KuPebiXrJoMCBfuBPgmcUDupg05FAEd1ZrYiD2QjWYbLIp0DUkV62KJQoYYUDYIm/ElbPO5U5osVDqfia+j8jNlc5qxnCnjzEzN8Nm1WE2x7aZszqENEwKRYHTLMLRLNMaZ5rNlYlZ+02bfo7tAlWI1b6BWN0WasVxubM6NGVWdx/ZCbo35lcC5qwWnRtnPgtw5QGfpFJlLgh3McVZLf6NCma1+LmkOjrC3M3whTshrjqn7GuGYnV06DOxWiigRcEcmgail69wKBqxRAed1USDCKAMdDYSPwHA86Ts8s2aB8NSnoVGrJb8WwVegzDf8p0SeXc9IzAgVDyacL92DB1pahY+reqQI2Md+bkOBiwaOKubjIjW2M1CbA3EagCBV+8Ky3mOszzE4YL+TghntrTb6DzmzmrCu9ZVBCzGhcGGmKprmmpsE4f1FJkmvFvWczVd654HywLqIWGZC+G+W+/ON46OAAA/9ZE/K88LkXTxtSvLLX2DgJhzDojVNaA3xggckMI4CQAoCuas1tHNfJ9hQC6Z1Y9kjV55y7JCy7J+xrKsX7As6xOWZf13/OvPW5b17yzL+rRlWX/Xsiyffz3gv/80//PnWsf6Qf71X7Us6zdd2L/qP6YiBiwqF6VA8yDrJqUOuV+LgtjyzBnIZQm5o5S6Ez8WsEgJY9gnYNGIWa3CgBDbkPikUcWjS0uXtWbqnC8PAtwLA6LjrLYq5IrrWuUlbOKkcchZfSFitQE7Temk462DJrv70hZaw8WTylkdnxf0xZPvK9PpVyuQMCDKz0s4qGxam2MjVktEcBPHOtC6b4VYff8Ibzu4a+asjtxeEGJSeohM3JRoidX8uHFiGaNFAODa4x7utsTql2/P8cLBPbqY4nlsYqwSqwUSRQfVoFqMoBWoRRy3lCnyBpuiPUctr7pgGyxUvnYx4qxON6W8i+e3/3b2/2/6pt2vBwEiN0N8Kp/ExInFxhhiwOLcS/tidWLTmdVgt2UscZEhz3GaRTicaz5nAyizpkWd8ozZrKsuK1v3bVliXQSYBQat5LYNvpKezlWrEqvFeRuMh+7Rgm0Oi86Ns8gowBWAekNMtKkHhHctF8Br2QTJBAEBAIsFrkVrfOr+FVyt7rKvGYrV3iJg4l+WsfMrywmc1XxjvLvhbpA5ocy1MTGIoIXrUAUsmkzkJMc1Yu03YrVkfUG9txxn6/5sr+moXYeiLAueW/WcqklqGW2Oq9YIDWppSmY1d5eTmdWOA8/hxh5xDZqARQNmNYDQr5izusWaP8tCHMzo7wQ79JnpoocBcchitedLskfynG22hsRrwA1jMtyUCV7EC+wRZ7Xmul6cyNDtUxTMjNi9tFysBgC87W3bX+/jrBZubQKzWsWBBkB3VjsjzOqSdQlo3V7iuPFAN+Ols/pNq31mTimAb6rr+r0AvgLAt1iW9dUA/hSAv1DX9dsBPATw+/j3/z4AD/nX/wL/PliW9S4AvwvAlwH4FgD/T8uyLj/5sSoK9nLTCVAZw4DwwUyX58MERUXoE7Udz7LgOjWbLHR39vjk5sIwIBRmtaXe0SMHqAgRXNGeTW5Dct3BpO+k9PTX+lycKPdxVuuGdyrvLTqfc8j5SN4tHXNWEyc3jmuhlLkpuahKfU823LRu+6iBmAawwLpMctzNumZ8Zaqz2smlzmpygviAs5pthhHF6gBsci/5vAAY7cKHQc1Yf0KsfmAuVvszh4nrHbE6NGASAsCNoxy3NweNky6JzdpyRdkHC+ZS5GJ1nDoM02DqrFYxq1POrNbGgEwcsMhPVTpu1TXyyqZxWpvg5f7n3WROEDFeyrwBXsm6ZO637pjwjd8I/Lf/LfBt37b79SBgbMaV6rMyQHb4PmZOhs16d4xpBHDiyjSMLLmzOstwQhGrB0KiTTcaPbfe7YrhLd9GzmoAsPj5dF21VP3T8+DJOOsTYECwWLD/c7H6/ibE1WBl7KyWuV+NHNB8EZ3Hks/G1FltWbh2mONXTx7D1foeu5/EdaGWZaG2+eeyWm1d5QZ0FRWqgaE16B0GAKbPnFDMN4q82uXI6pZiI8TcWa1eI9S1pX9vcVE564p0RWHGrAbgOXVvLpskYPMNasCiwrVvgohrxOrummaC8G3X4XMDcQ1EwKKps9rvO6ur2tYP2GyXIsz4PPHMMCDtfz/QbIqSh+5mDdr5uhgLqMxqxbjVYEt0jXj8WbTqSi0uczNi77FtD8DPP9/7ujXkrC5LJoB7mni/AQ40ANqGmBDAh8TqPKc7qyU6zKVY/ebX6Chcs1rx33r8vxrANwH4+/zr/xuA38Z//Z/y34P/+Ycty7L413+4ruu0ruvPAvg0gF83xT/iP+riD7Pl6+08MWaY4s+Fs1pz12lQUDRACjiOYmHetONpH3LQlURu7dkjYJEkIoxgQMgTZ44BUU1E09JFEBGRJSpkBxiz2LEqTWb10EaIgUNRlhwNMNGn5Nz2ycVqGy5hctMgSySLp5L6HKC1uz+lKwlskp9JMBgNBoTIrA6dAkkswYBsLMwpbgyV6CM2gmzaJH8IA7Lzcwn12NUMb2wOm4DF2+dzPBadGfFUD48dnOXhVkwTrZOGjpznHk/xudXV5rirDXeomk7sZjPmdDjZ4PQUOPDZtTARqy1A7azOuBiu46z27ZGARSIGRMXCFl0Wdq3/nhHzAsn7IM9qMr9fynrsVLwqEch4h5YFPPNM/+cGwWDAItu4ICI7fJ9hQDpi9SZ16GgRcGe1AgNSVA68kJiNoELBmGw0dsNxBQbExFkNADa/J8U14JviZLHaZuzNrNw1M+RZTXeWi2qL1XmOqgLLGTBJxlU5qwVSgRqCp8geKRODfAhe166U+NzqKq6Ga+C552jdCt0SYvXZ2STM6gZl1hWrDQVFAOqOK6r7VSFW5znRzNI6rmtVKNLd8zVyVnPsWpL1r1+VFezeItyzvithjJclssqlc4XB2OVFJyQ6STmewCRgUYJteSQDFgG4bkew58+XqVgdhth1Vpvy8Pnftax6951Y11ilHh0DErr99Qx/f5HHmKa7We6sJq+9xEaIYozRFqttG7BtZqxWictFgUzmKm6fQ3sOLb6xrlFXivPJMqYZhYSclCFmtRgfCGiRfECDaJBDBJNnJunqzQr7Uqx+k2uvO8+yLMeyrI8BuAPgnwP4DICTuq7Fm+hVAE/yXz8J4BUA4H9+CuBq++uSv9P+Wf+FZVk/a1nWz969e1f7H/QfXZm0SQxhQAgtEq5dKfEPzeKJyBWWOkpNdqEHXEkmAYvS3VdeRUlszW1Y2HJnNdmN4Hms3Vd2H/BFpLbuIybMAwYs1vKt0S40wqxmzmrai0Ip+oiJs0PgFQ/dWwIxQ7hnHQcoZYxSk00bqJ0jRpNxqDEgm1VFF5KEs1qGAdnYWLgGzGqFg4q6KA0iG2npSe+tnZ9LqCdvFHhtfQRsNijyGnZdstvUwPV35ZqN+8mi76yeQqw+v9a4KV+/H+DJ+YmxsxqLBW5GZ3jjtoXPfAZ42wEP4aMqX0PsVwBxRmBW+7Y6YNGAealkVjdhq4R7Vjg0p3SO7BmwGG9qvY2mIEDo5kjWkglHXSPOHda9QUQNzdxMoIqb2mSOGbM6spgzTeKstqx6uk02mIe1NeJfywE9iVjdCYZthBQKq5mXJxG+GLPcsHvD95kLOC9QPzxhLtLZjJRfsD1ZPnZIAhYB0N2vihyHKfIBrj2/BAAWAPolX0I+zk45/L7kzuq48BHNDO4B32JO3R7KjBjsDgCuC9uqUWade76uUdQGIXhKsRpwLTNndejmiDe798F6Y5kzq7P+Z6NEN+1zqm7NjAzt99cEbGXXtXZRZjDHgDRmjl9LZ7WJu9zFrmAv5nEGnQuA3FnNfuAEm3ft8bAocJ6FWEYF6dr2cHb8XDeFb+Ssdu1Kim0xwYAozQwmIvhI9ooQanvTjW/+ZuDpp4Hv/u7dr3MB3LPL3kZY+5hpqRlYCGzzMRTzwyLO2RyZErBY2Gp3OR93LB2Wk9BLusiSukZeWiyw0mRucFlGtddIUdd1Wdf1VwB4CswN/aUXdUJ1Xf+1uq4/UNf1B65fv35RP+atU5Q2P97um0kmIOKYUqbRUI0gMEyEL8bqVe0+El8UfPJay9R16guocRVL/qxmO/6uRRM/mbNa8meGIZPKFr8sY24X3QWEQJYMrGmTjeYCymbc4LzjnhJV5DVZ9Bna2Sa1YQH7OasJn1ezaSOZ2Jgwq/1ghPdIbSUPHekCMt7UdKePYFZL7tnVxp4WAyLCbhwiBiSymbO6ex9M0KL+5FNgYvX5OV55KcHTIljQAC1ydM3FaRb1ndUGjjcAeO6pAp87v9oIX688mLHzNXUhzOd4YnaKW3ddJlYv77CvGzCrAQyI1a42vqZZjMi4wgYt6sqOEB7MQxKreTCNbPOS7AJvOq6Gv61pVd/32vo+IidHvJK/v+PCR+QTupiAbcBiR6yOU8eMWT23mbNawismtdMPhERPkTeQVe5WrC5LbAoP89BMrK67GBAxxpgIlRKxOkkto1A1UZFfISk9vP5LD3BzdmbMaxbdG1XWb/sGYOaslmzgNnMtA0Hp6q97O64Ea+ainUistoRYfX7ewk0ZMKsvKGDRG+DUGovVnXdCmlnanTs75XmInLzXdbaJDfBFnofQyZHm/evXoJsozmqv7odvlyVjK0+Bg2m9xNLcNsrIUOVOGHWvcPEvLzuCGjWAvVW+V+129E2EAekGBKfrggl0hhuCvcDZLMMqD7CY087XC52eu15siM3m9PBOJQaktuH6xDWSilktnL+US+v7gx2CyHOm73SF5cND4I/8EeCDH5QeM3RyJOfqY5KugzBOKpjVpDHGsnhnmCQvqHW+4ufvXcJZ3Z0fC3OjQ+hmvKzJSuvOq+v6BMBPAPgaAEeWZYk74SkAr/FfvwbgaQDgf34I4H7765K/c1mqIjJ9AqeQTkAAMFep7i7ZHu5X6qTRdWu1o5Qo/MFx4FhV3zXBj1vVFmv51DwmcxVLzqdpHSQIX6oWJH6uRU1MjxaOHBnbiS+gbV9f+HOsahADQllAKRmtYF9yiKKP7/UZdwAuTqw2CVj0FMKXeA7Izmo5BsSYWR05bMLQebtvNtz1ZtCWKhOr14lthgFRdW5QndVBp21SlIkwwevJpx28tjkCzs7wiz+X491XXjMWUnphNxM5q599qmRiNT/uK/fneHr+cBJn9eOzU7x6L8Tf+Ts1PnD8GfZ16mp3yFld10hyW5uD3Cx0JcdsOkIo45ai7b2ZOFPGLeFykfzzTTAgrq14d7WqEav3vbZhyBZPMmd1UTABlMpalzmr6xqb3GVho8T7VsWsTlc5W/RP1RECs042oOVU3RGrDQKqRHXF6jxnCCviYh9gTsILcVYDePJ4jc+eX8OP/4sK3/j4p8x41QDfbO13BlVpzgQGimtfmA4kzmo21zITlK49GeDqYx7wW38r8KXT+JCYuGwje8Cc1TWgP9dslaqLyzRgUY1qIAa78+MC6J1rktlGuAohLHed1Y1YbeSs7v9REtcM3USYxzXz7vb7S4S6m2xaSMTqi3JW56VNx7ZY1naTreMuN2EgA8A8YGN1Lyjb0HQQiEwTjp47vV+wbgvD3QXbqlGlu2K1SbeNG3l9ZzV/f5FPlZvxZGI1oOnQbZXj2WzOrTBLaeWGtc4VAGoV51Uwq3XCfHnXhiojRFwYLRwtsOVLD4nVhLWi76O/GdYu/s7RNnnKhPVGKzCcG12WUY3ezZZlXbcs64j/OgLwzQA+CSZai1Sc7wTwj/mv/wn/Pfif/3hd1zX/+u+yLCuwLOt5AC8C+JmJ/h3/8RalFUe1Q9Q6JslZbatFyqblmfBidxz5zrZAKlBdxa5docwlA4xw7xJcZEpXsWgdJLZnKxf8wlVLvAYMA6IQqwH9xRMP1CoHxOo0rrQn5UonIcza6ZmDTO2s9ikvoH0CFonM6lLJbjfgpimY1UauJDCxOq8dqVhtxFBUOatj1wwD0l2V8utKXZQ6occmoioMiImz+oUAr66O8Tf/1Qv4uf9Q431XXwHmc/LxAPTDbiZgiQLA4sjdWTy9ci/C04uHZoscAPA8PL5c4Qc/+hG868UCv+mJX2TX1DSgSoHviQsPoaf3PAw5q002cH1fHd5ZVHwRrVsDzuo8J4o+ouNqJGCR5KyWtLyLkzW6b4WzOm6N0VmGB+kcx7OU7J6JFg7iwu+NM6cPKxz5sf64NYoBMQzHLbvOanOx2hLM6hZeBICR81fprJ5ArP7Ql9zGv7r1In7s30b48JO/Momz2rfLHqOTuigHsBUUlWK1mbP62jXg6hMh8Ft+y2TOsdkM2BQ+fvcffyd++dN8k9Tgs2ryMdrPQlUhF2OhibNaJlYbYNekm0xVhbRwmIuQ2h3leYjcrCHsiNoktnknm4RZ3SBmKBgQjzPxe85qM1xFE2gsE6uNnNX9eXdRmofY9sRq4ag1cFbPwgrrItjpkEsMUUsAEM4dttnKd3FP7uY49GKzMVE4djet8ZCftxXQcFvezOuvZ/h6lrwh1u7ubTvhTcPSVd1RZcmwGh6tQy5wcqQrxaRLrG11Li/fCFOK1VSMldCiVGI18f2lDK4URVmDNUSCztdNDCKXNVnt8wQ+DuAnLMv6OIB/D+Cf13X9IwD+bwD+G8uyPg3GpP4b/Pv/BoCr/Ov/DYAfAIC6rj8B4P8L4JcB/FMA/9e6rg0BeV8EdVHO6krfWe3aJXIFJ4i1EdNbnssBRykVA+JaFYpsQKwmsaUV7vJGRKA5dYcCFsmTG/FSk90HVB6ZuK5Dzuq41ndW+4oNC5g7FKXtQjzd+EIwIMQ2P8dVXANTZ7WKyVeAPbNUR0rkSp3VcQw6+1UsniQL83XiYO4RHESKRHIjHjygnoiKXX0TZ/XbI/yz196F3/ePfyt+5J8H+MprX5jI9VdsFw6moTSigoCJoGt2fV95MMfT8wfmwg+AJ66m8O0CP/T7b7EvhCFdUPE81m2TyKzFOUNLaJ4yY1YrOkIMNnCV41YjpFCd1QVyyfy+yGvawnxfDIhw/2kwqyM3Q7yWvL8FA9fUWZ20/q15jvvJAlcPRlT3gfIil22MdsbD0wclc6gRNoYBTL4RArScqi2xel0E5s5q22LTw65YbSJUShamccr58oYYkA+9+wF+5PPvwc9+aon3XZ1gjFUgO+JVqc3D3zmmglmdGAiKoh5/HHjiCfJfl9ZsbmGdB/jYywf4+KdCY/e3FAPSON6IBxXs066QYjLnBuScXiEoegbPl+sidArEye55GTmrHYfdr4XTQ/AlcW2OAekyqwvPKL/T8yXO6twxC1gMnP6Gc10zHro1gVjduWeL2tZzvXZqHlXMHNAyHSSGjnUACJberrNavLdMPjDfZ+/x9qaz+OyI2RBe5LLPq4MBsQD6OGhZ8JyqnxNC1QpEDYS4ZpVLc1aLNdJaLSxLMSBDNSZWUzedHUedmwWhFeh3bzQmtF8LDIh4z1ADdy9rkhr9JOu6/jiA90m+/jIYv7r79QTA/0FxrP8BwP+gf5pfvFVlhEGYT5izQvFCpMDyLQuug+1LvTMIFCU96MRxeJiUzPlo4Cp2VGI1dbe04TVbTLBviyYmSIEGLyL5MzG5McCAnKdR/3zznPZyH3HYA3y3lNBOr3RWC2Y1RfRRvdRMgjNEG1ZRoncVBAaE0PbMNm3kTt1JAhbzeOfreVoZJRyHc4cxWrvO6tjSxik0JUSqpP+MrRIXCy/Rd+yqWMXc5bKYWKxu7gsDIeXxt8/x0ukSH7zxMl569Rk887UPgOjp8b84VL6Pq+EKD849PAEARYE78RLXD2RQe42KIjwxP8Hrtyw8B+DW2QyPTcF/BfAbv+IO3h/9RXh3fgf7gmFLqsddj70zE25dzVO2fRfVAL6IuoEbBJCPW8070YRZ3R+7GyQQEQOSqzbGeW1iSw/bEAQInaLHZxUnS/msmmrE6tb4lOcMDRbQBTXLZ7zinlh9UuNgYmd1M98ycFaftZnVImDRUKx2XTDsx5RitVsjT2XOajqnVtS73lHg5+49g7/wn/wzOHY9kVjdR3bE60qbh9+U67IW7aT/R3EMJtQZiNXPPAP88A+T/7q0Zgsbd5MlPntvgZ/4uUM8t3zd6B7wQxt5ZQNF612V50YByUpntWFGCPuMy57zk4nVBv4s7qyOd6dx2KQO5pSOMwCwLIYzEwzkljB5IRiQypRZbSHPO2J1Zk+PAanr7XqOuDnejFs9DIgNN6CPW7Owwvq8K1ZP4KxeeEhKAJs1AOD0pGJitck8zvMQOonEWe2SxWoVBqQGjN4HrltvUZTdjWKqs3pQrHbgU/YBhBlR5awuCmSVDz/UxIA4hRy7BvB7zSflBQ11+VOd1Y2ZYyBkEgABA1L2u3pNkKGXNVnRt/cu69ek8rTSDzngA4TSWc2ZRrrJrqPuVyKqQemsFiIdkVnt2qUcAyKc4brnallwHRak2AsCFI43qlgtSyLmxyW7PGwbvluxdrzO+dZpps90EueqcpfzouyWDjKrDRbmfiBpHwWMmdWuNYyYaQKGdA476Kw2CFiM5EGIjWOdeOBg7rJFjspZTVk82TYOwgxnWdCbiKwSjgHRdXqI0KtU4qyuDdiUCrGaNGZ3KpzZuBat8P/6+r+N7/qmL7D1kikGxHFwNdzg/iZk95jgJxq4fAAAyyVuRme4fZv9tiy5SDuBWO09cZ0J35/4BPvC0ZHBwTiTLpZMyLmYoH3KQ6iG0iY7s3wfSEtvWma1bTOXi+R90CCBCGK1lrN63zEhCBA5GWKZWC2c1dSOAN/H3Nt1Vtepmdtr5+92xq3z0woHXjJpwGJe0F37AAuHTSQYENMuizCometvSrHa7/OKG2e1IQbEWi7wyW//v+N33fhx9oUJMCAyjFW8qekbuCJ7ROKsPl/ZOPAT4+tgmoXbrdnSwX+4+wxevPoAP/GxIzy/vG90bb3A7s9hGsOBoVjdDVg06egEtqFyPWe1a5YPwZ3VSgwIdR7XFqvbx92AfM82THwZBsRE+/QtFHWrI6SqkJYOQrcgi8rSgMWiMOOWYwQDYsDwn8/qXWY1z6AyEcABIFj6OxgQtslKMIfsHDToOauLTQbHqsjvWif0WBB9ZzMIgNE46Lry7g0A5hgQScAiw4AQjilc0BvFWCKY1TooSnFMlVhNdVaLObdCrI7XFek9Ln0ftIuCAfF9Jqx3M75M3zOXNUlditWPeKVxxXa3dR463tqVKZAdKApSsuuQ+9WEoej4jtJZTU7MFS5oBQcaADEMEvK2byEokgMW1RgQk0COwK/ZRLQrqMUFfLsktSY7doWimhYD4vpyVAVgthHih7Y8iEFwvSgvIIFtGULMUO4t35Yzq7kbw3GJDg8Vs1qEqhEneFbIReOuszq1ETlEDAiAg1mJsyxCd8W/Tl22KCM4q0NZQJVwwVPbfRViUpyYMRRF/dPv+Dt479VX8ed+y79kXzB1/VkWrswSPEjmrFVwUxg565s6OMDN6By379o4uVdg6Sbs/jcR/kTduMH+/7GPsf+b9KuLgMFEMiHPMlSUwFkxxnUXI3XNN9lMxi05a58F+Wofkh1XETjbYEB07wXbZmNhZfU3cFulzVUNAuYmjSV/ludMWKWKHo7DnNW515zz6qTAwiO6E0XJ2v8BnK+ApZeQ3rU1MLCBS2dWLxbAeRbuCB6loeMPAK4f5biXLCAUtSYEysRVKzace5xac2c1Dg9xFMTbe3cKZ7VEWI7XlXmOg6Qj4nTlMPejyX17ATU/dPEzd5/DR972Cbz0+gLPL++xm45Y0jkMF/5MMCAXwqyWdXIVBZLCMMzY89gGXrr7zG9Sh35vAQhCSxoUvd5YzLFNwYAI52NHqE0KD2FIdwA364Q2r1lkTVAd0KGjFpUN9oBUAYtlbcPx6NLLLKoZs7rlrAYAi/wgsFoeu1jlQUushnnAYhQhcnLEq+01WJ/ydy11fij+Xut90HQzGuKmemHZU2FAZBjK0qURVnyfvWM2amFZm999UcxqEbB4ARgQ2TxWVJnksK1a73yFua/oo2CM3jOXNUlditWPeKVJrd/mZ1ncmaVISyW+3JowKckAwficNOHD9fhuubLl2QADohCAARicq5qvTWdWD2NAqIJa4FXS+yBelbQJrutyZrV66EgSaLflNa79iRfmQxiQrHTpPHR7Yh46WHJ0j5kG8PBO+j2gYlZnuWUkVjeTxo6ovIkNAn8AHC5KnHbF6rpGVYG1ahOc1aErcSIUhVm774BYrYU8UNT7v5S1YzaWZVMhBcDVeYL76QJIU9y6BTw+OzU+TywWzFn9wMO/+YkMX3PzZeaimyKs6+ZN9n/xzjERqwUGQxb2kufMDTcVqqGumTPLpi2ig2CIWU1vffd8OV4kF8gS3XHLsraOJBnDn1ecao4Jvs8WuRL0gXAlBVTRw7Iwi3bdafdul7genpuJfpJFNACcn4EmhLsubKvud4TAnFm9PLCwKoJGVJ4iFBYAblwtcSdeNsfNUsL8tVNel68NIM4ctiFqOnY93UErTcWs7jirN+uazqwWbd8SWtPZysaB9+iJ1bOjAD9560V8y42fR+AWeG5536gziGFA+q7H1MStLJzVRWccMTHJAIDvw7Gr3eeWIzBIjFpRYh6TdpjVQqymOqtDS9oht16DZYRMhQERaxmDDbFmLBDnKhAYBpsAzbpWxpY2cVaLkMmOWF3XlpGwPJ8D67azWpy34dh99TEP99P5lll9Zk0iVs+9FOvV9jqenfBOIwOx2rLqHR2iQRoaOaslOFID8xGAQQxIWrm0SyDeBypmNaWbib+3htza4vu0ynGGMSAEYxsg1vUKfQtAllQkhFHD2pduil46q9/MuhSrH/FK4woBQfhgzEsF04fYmskERYmYBrPFE2vzlLuVawC2R3gJj6Aa6toycFYrUA01HSshdXjw41a1RbsGAIJAMvgC2Kwq2uKJo1DKWu2ka8RqirN6KGCRyqwewoCYhHdO7doXGyGSIMDShFkdudJd6DTjoip1kqsQZ1iLNj306WBe4iwLd8XqogDAd8oJAYuhI2F+ilZP6jpfIVYnqTVJizoOD9n/33iD/X8Ksfogx/2ELUhee7XGU/OHZosRAPA8PHYY4431Aj/14zm+4fFPTYIAAbB1Vot6/HH6sUSASiIZt8SzQQ3BUwUhEkVlP+BuN8l7hpyNAMAXYXXdLovCJrvsPbdWbjSK0nb/hSEiN0ccSwRpfu4WqYeWVVesvnunxvXo/GIwIOfcWU1poZUFwMEMMQMAy0N7x1ldZQUTAAwFj+vXatxNlo3gEW9qI54sAHiB0xOrk9whh7/t1JNP7l5DE8wQwFqJHf6Ztboa4xhmzGqnL1ICwNnGZYKS6XWYuGZLB6f5HB+6+St47vDE3FkdOv15nKlQKTptpBgQejcjfB+uVe128EzmrM77zurMNXNWBxw51V0jJDaZhc0wIG5PAAZgNMZ4XWc1vwcCg+BKN3D67y9TIwPU3G4ARtdgPu9gQAo+dhuOAQfXA2YSEc7qc9ucWR0EOA5iPDx3m/Xi3Ts1roUr+rvW83qYnc2qMtqwARSd46Z4kTFnNTFgcQwDAkDvnMV7S4bIg0GHFOdA99AavJKEJlZ7gS1f1/PK0pqhGDWfM9YR0u/gSQrPbKPxsozrUqx+xKtxVmsOEvs4q3WP6fr2gPuV6MyC2NnvT5aMeFEXhWoQgqLUBU5nVksdHoAxM8v3Lel90LzcKWK12IFWOOmSBNoLScaikzOry9IAA6LgNRuL1XaJIpX8+w2c1V4omTQD24kzkXPXtDl2ni/TYBrG+Cp7O/ymbamHy6rvrBZKM6VvjjM/e5M7nshtIlbbVo0y270PGAZkghb142P2f/Fvn0CsvnJU4UE6B9ZrvPZqjSfnJ5Mc9+a1Erc3B/g3P23j19/8zCTHZAe+uft7U7HaKZCncrHasurpuMImWCi0Ntkk70STDRbPlzu285y+2TyEBxOl7f4TCzIJ+qC5JgZuUiZWb+ccd98ocT1cGQd42laFMumI1SsLS5/GrA6cAmncuYe4a9+1K3L3wvLQxnm+3RC8/8DC1WBtLlZft5izmgse8aY22xAF4AlXbdtZndrGx2UH93Y/l3e+0+x4to3AKZEWu1z4OKbzf1mwXl+kBIDTtcu4so+Ys3q5BH7fB34Brl3hh977j5izegoMSEf4iwsPYWDmrJYxSo0wIILjn1Xbe0Dwmk32hR2Hj4m7eMdGrJ7aWR3bmHt0DIiS/2u0ccW7D9sYEENnteVJcEsCEUcN7wR7JGXXwHRTcDa3sc5bGJAJrisA2IsZE4H52P35uzMc+Ruzd6Jl4eo8YXNOPo+9c9fCDZONYYlJJk4sY7G64aF3hMq6tujHVTGrRcCiTghic6KefF4giqLvCFyHzMgBIItLhg0lzGGkHGheScy1As3j2r4akQYIk6e+biY1eRYF4tLDfHYpVr+ZdSlWP+KVpmDtDLoPnYJDBoAsVjueAgPS8DnpLc9SYd1kJ9514Vj1xbhflc5qGy7FWS3YeTKx2rANKQhqaXiKCUOxcSIoeFENT3JKZjWVh+7ZyvBOMrNaoFAmvrcaTq2CWe0SnT5u6PavbVUhKXgwDbXFzfex9BKcn+9+Oc64MEVlVh9aOMs7zmrxa8rE2fel7bNioUMOFVMEX00V/oW3vW339xMIwNevVribLIDVCq++bjNn9QQu6Js3anxudRXppmQM2Kmc1WEIPP88+/WVK1u3OaUEBkQ2IRcLnwnF6rxyaFgoDDCrTR3bsgU0zLpXGOtRvtEoapNpbmCJYCaJQDeFk2wW1WzB33VWm9y3vs8C0DqbYquNhSWF0SkWpV3WvgiAc+hi9eLI3RGrb9+1cTM6M8eAPGaz8YULHkkCOv6CV89ZXdfMWT2FWA1sx5TlchLOfuBVvbm3Ueiw6yJyMoWz2sHBI+is/i//S+CP/rZfBAD8rrf/LJu/mWBAIge5REhiQiXx/W1Z8JySdXW2uwSFOYCKq+DdhzsGgQlwFbAsREGFuPS3Y21VYZN7mHn0ZyGYOex+7Yzf69jGnHjPss3WPgcagNEz64qxoOUqTksXYWAgJMk6BCc4bhMMK3NWGzyv8zmYs7rDrDYeC8W7b7PB//q/ss//a26+bNx5d+WgYGI177a5e89iyC1DsbpOs2bThoWBGjqrJRiQMivh2JW5WC0xIKWlZ4YBGWBW7/zsfcr3Wc7ZgFhN6fCH6zLHttJZDaYVEI4LYNhZTcGACGxo5z2zKXzMQoOx+7KM61KsfsTLxFmtbJMgisBN262MqSsW5hSxWuzsq5zVRLHat4t+ix+AqqjIu9uuCzmuoiy314BwUGkqOWAc8BD4kLKdNuua3Jbq+I6SXQ6wNaVua0+DmFExq6ktz67LdsYl91Zm6qwe4qETxWpph0ETiEq7ByzPZeEj7eNOEEwD38eBH/fE6k3mGAkUjbO6tXjIzhLm/KQ4q1UBi0WBhBpyAmwnjZ3jJimmcVa/+OLu7w8OzI4HRtW4Ey+Zs/q2O5mz+rHHLfzYa1+KDz79OvvCVGI1AHzf9wHf+73sPxMOtmj7ljirq6xgzwiVWa3IMKBy7hqmpcqxTUUCifbsHgbEImNAlJkTrdqkmu6/IGDCr0ysnsBZvZxXbEOsLVaHK2OxOnIzxOvd++t8ZdMwIIrNMFMUDAAsjxyc58FWrL7n4ObszFj0vP6Eh7ttZ7XAX5g43gTGqiVWx4WHyMvpG63t+r2/l22I/YE/YH4siFBrrydWRw7xncg3W6XO6o3HWvXJL7GLqfkc8K51NhZNnNUzrxeyabzZbFnwHHkInlHwMjgaqbY7uArXmLgVBRWS9lopz3GWh1hGBfndGIQWksKTO6uJAYsXhgEJJAGLppsA4t8ncVOGvjqHYfRcfUkI3AQu6NnC3mFWV7l5uCA78Ixxi89z/IN/APyZb/z/wXfK6cXqBw6uRwYYENvG0k+xyvzmem42MMeA+P0Q16YzyBADUhdyZzU1YDF0ciSxYtyjoEuGEHlgDGi6s3oAA5JaJAyI9JltVZqC6WYkDEhnfswDvWfRpbP6zaxLsfoRr+ahI7QzKDEgVA7TAH/JZPEURLYclm/yYucCsIz32PC1CZM75qxWhOBVxNZB31czq00xIIECA7KhO32aTYtBZ7XeC8j2HOaAHgpYJG5aAFA6q0lcQsGsnjhgsUGWSFv/DRZPsnT6KSb5vo+ll+J8tXte68xjjhwTZ3UWoQ2ZXp/k7JiUibPnMeGry6wWAU1U94zrInDyXvBVnNjThH9F0fYaXrkCPPOM2fEA3Hzcxu34AFiv8enXIrxwcG8SYXlxPYJt1fiGm7/KvjClWO26wDveAVy7ZnYcjgGRhb0kq4KNWVMxq8uSByHSW9TFcXaqSSYnOqtl7dloYUBMNnBVYnVd63NVgwCRkyHO+udT5+butMODegc1dPc+d3sZO6v7i8jzjUNmVkuD9Qzd9QAQHvhISm9XrI7OzZ3VT3q4Ey/x+TfYKrzBX5iI1WHHWT2FS7VdL7wA/MAP9MMWiRX4VQ+rsEksdh0o70QRNioRq89izyys7CJLYKwAdu8bnGNvwwLY3gcm7len6ovV3BzghQYOYLeWOIDNuaehXyEuWs7qosDDdIbjhXwuvk81zurOvHOdOCxgkfC5+YHEVTyBWB3MbDZudTcBpnBWy8wcBp+Xylld15YZs/rA2WFWNyF1Ezirr4Yr3Lnv4LXXajwb3m6+blJXjiqWkyIwIPcd3IjOjMaDK1HMBHD+mTUBzgbvGd/vIyDY+8ugM8h14VgVyrQ/P0xLF35AWNONYUB4iKfWOfO5cW9jnFcWl4wBTXFWCwxI3T82WawW3z8QsOgTAjeb4MZOl8Wm8CcjG14WrS7F6ke8yGJ1ZMuFL4AuAqt2sxpeM53PKXVWm7Q3CT6pxK1ssjBXspUbcYIwufE8uAMBiyaTG9WmhQkGRCp+tipJLXbPahxb6SQ0RMwMiUlmzOpq8s2FZhNAxqw2SVF3Od+rs1uclNxZTa0gYBiQdevfWpZY5z7mPt315i0C9oy1FqZ3bxVMTDJwVqswIGTjiHBWd8XqzGHCxBQt6t/1XcDb3w78oT9k5irmdfUxD/eTBbBe4/N3IzyzeDCNsLxc4rHoDF+/+Hn2+0dxZicwIJINzHhV0tpIVeOL2Lyk3gJDIjh1UxQimKazgK5rhgGxaBuCQ10xAFireuEj8jSQQ0GAyM2R5P3vz+OCvb8NnNWLYw+rPATWawDAZ14N8bzpxo0QFTddsdrAWS3rCOGblx51IwSAFQbMjSfE6vsuHotOzZ3VTwX493efw2/4W98NQPD7J3ZWc1ZxZPLuusAKJHkxcWLTRQ/H4c9Cv6PvLPaZs/pRF6sNXNUA4M89tp7piAimQqXnSZzVPHiZmhECAK7DQ2d3HMAuwsjsHR6FNRNrW85qUxa2Gzgo2xxoXuvEYc5qiljto4e0m2KT8fiKjZN2190U4Wee1+885BshEZWHjpYLvHXcupiAWb2wsS62zOokrqcJm3UcXIkS/ItXvxTv/4qycUIbO6uP611n9YnHuphMxOoZ52Dz+2ATW8YdPGGI3a4FTNAZxIXl3pyzKFDWxDFG0dHZPjYAvXvM8xBchLPasuC7FROAJRlXCaELG8AoBoQRCfSP2+CLOhtMl87qN78uxepHvJqHTpcv7audquRJg2o3iwu1HtVZrWCmGfG9RPuJxEXXtDxTmNUu1Mxqk4BFp0ZRWVIHMAA6s1qwy7vO6tgyCvwBoBarKcF9Kge0aZjUiFhNmi/tgwExcYGrnNXUxZPsuRVCraEjZeklON+0/q15jlUeYDEzc2wD2Fk83Xq9xmOzM9rE2WFs7iR3emzKpDC4BqJNvyNWGwdXtuv97we+//sZv2OCspdzVLWF9CSGbxWwrXoaYfngAP/8N/9FPGZN48a5kPI8hgGRDFvxumLolkmd1Qbu16HgxpqOAZG5h5qNO4eWCzAasEhpo/R9uHaFouyP+fG6Ml6Y2gdcPOMMo8/dDvHc4r7ZfRsEjI2/3n2HnccuFkRmtW+XUme1KQYEvs82MLsYEEPR8+qTIT51egNvrA9QlxVi4Sg2cbzN3F230xTvrgus5hlrB4AJ5x9lvmVZCEMwVnFn8DpNAhaw+CiK1VeubH9twKsGAHfm74q/AN+08I20NN+t+7hELiTZvsEGi+Df7jiAze/ZMGAInB6v2CRg0/eZgNp1VqcuC1gk3Fte6LDr2prP51nNBC8Dofb4ioWH6WzasUDReRgbHtcP+yiULAPJ8dmu+aHLnNV87Gbc32nmm1eXGf7J59+Lr//KDTtv2zY+7pUrwIN0thWrTz268UQcc57uOKs3sXnAYhhxHI7MWW3ArPbtsu+CNukaFx1c3U5RXnWW62d6eB58R66XACyw0Cdkp/HT7QvAvJLUJgUsjmFAmudMFwMSqp3V0czcLHRZ9LoUqx/xogYsDgmKeVIysZaKAZlSqMUws5qcxCtaviUuujyrjfic0oW5WEDSN2D7fDN+XMuqjTAgPQYTgI0I/KEscvYVq3VeQGPizNQOxYrt9lIxIJ5KrBbPBkVYV11XIXxRxWpXwqwWriQDJh98H0u/I1ZnGdsEiAxcxWIS25qN3bpV4/HZKW2Ca1kI/bLnmhALSGNm9UU6q6eu+Ry2VeNXP+PixSv32NemEJaffpohRUQ9omI1c1b3N1KSTUVzPorv787yDdFYQ/gik3ettD27KIzQIspuI1F5jrj0tMVqAKgr7G4wgYvVhqF9WC6ZWLtaIc8Bu67g2LXZfTubIXJybM77YjXdWa3AgBjcAwC2m35dDIjhxpXj2bgWrfHM4gHO7iS4fRqylm+TgMXQYSF44n3AhSQT1+NFlizUOk648494HaKQi5Ttcaausc495n41eRYuqtoBwa+9ZnQoK+CbKz1mtRkH2nMlGBC+7mi6/QjVoJG6YrUps7rjrK4zItKxXYp55zp1yTg3GW5qClzFlevOjqO2CUI0ua6yd7ipkQFAENq9cSBNzK9BdBwysboVYjtV2OyV4xo/9vqX4oNPvMJ/WGTc0Xd81WafGR+/754GZsxqCLF6u2nBQs3NBPsowm7XAlrjtpGzOu/rECZInCZ4Wf7+K9KSdcnpvBN8f5RZrdstvT10Lce8gn1uFxGwSM56CyREgjxHVdtwiJlRlzVNXYrVj3iluU166IYe5iytzI4pw4AYuMiaNk+Js5rcMjUA9m+c1RRmtQhhkAn2JtdAFsbBjwuAjgGJbKWz+sIwIJmt35Y2wEM3QcyMYkAMmNW5RKyuiooJw1M6q3lbKvllqXJWGzL5GmZ17Gx5ZHmuz0vrVhjCsSoUm5az+g2bidXEVUno1z3XROPKobbmKpiycTYRs/oiaj7HlWCNf/PJY7zr+A32tSmE5eee273nH1EMiG8XyLriHzgGxICd14gGogSqgfoYqMZDwaymjFvgGBCJWF1UNh3L6FnM9agSq7mgoMVntKztxlRnI8A49AgAlksAQH2+wqc/Dbz96gP2dZNnIYpwHGzw8MzZ4TOuE5exX6nM6qxz3fgGLnVzAQBzrlsV8ph9ZrcfuLgZnU3y3P7Y7/4b+MprX8Ctz6V45eECT88fmgWKHXo7jNZGSJqKWT1xNSJV68XQbGBSxWohpLSfBf4us3xvEkTU5OV5wFd9Ffv1299udixJt1WDaqAGLIIFIfbWHmKOYCD+NQJNywGdFAZzDV5hiB1nNfm9tXuy7P/tuVFdc7GathHih33RJ0ktY1zF8TXnQpzVPUyeaXgn5CasBpFocG85B6w7TiCsXn8Qso3GCeabVx7zYaHGl9S/wr5gursCIDgImGEqjvH93w+sEoddAxOxepnjQdLCgCTmzOowstgY295gSQ07g4SzWoLyAmAgVudIFc5qEhbGdRmzWjI3BjgGhKIZAfC9PhKoOVeKsY2fLwA1szoDSVz3Q7tv7jPp8L+syepSrH7Ei8qsHgLQZ0lNa+kYwoAYuIobB0NHpGsSdE0CFmUYEENmtVKsrhy4xDmj53ecGKIMMSDN5LbLrDYUq+vaUovVucN2SzUDHgAonNU2XHvidnpDZrVnl8gl91ZRgPG1KZ/XyPPleIYYEAmz2oj15zhYBhnO0nB7n4qfYbh4OvATnJ1uz+3WHcdQrK56rokmYJG6gHScvphU1+z+f1Sd1YsFbkTn+NFPPI0vP2y5Z0zL83bH6S//cvNjTl2+v91c6IS9rFZgLc+6963vM+dM3HkfCO4pdX472Gliw6UGLAaSTdGiwLoIMAsUYvPYqYoN3AFnNdBi8u99shKBClysNhD+AACLBeNL39/gk58E3nnlDvu6ybPgOLg2j1mYVGvVV9c1w+0QmdUyV5axszoIsPQTrM7ZMd54GODGBM5qAHj3c+d4bHaGN17JmVi9eGg0Fh4c2SxwV1zTR91ZHXGxWjBfAWxSh21gEgWaMLL6zmrx60cRASLqO78T+B2/A/iO7zA7ju8zE4BErDYRgD3hgO44qy3K89qq5azEeR7uOoArF0FoJlbP5tbOxs3DeyWOg800zurOtV0XAeYhbS7rzbxeO33TJWwwFhxdc/Gw46w2dqzLxPqiYBgQg+M2RiGZs9rk8xJIHS5W/8rrB3jn0a1pMCBPhvjA9c/DfvnT7AtTzA35MdYPM/z5Pw/EKf/8DdBAV5b5LgZkIrE67mJAEmIAoCgVs9oEA6IKjOdF4mz7PmNWKzAgWVrTmNUQGBC5szrJiFkOwiSSy+ecaWYxjUsbAyJxVk+BWros47oUqx/xSjP9sDoAg0yfZuCZCgPShD4RF08KkS5PKzrjjLvoZHqqCbO6Cc1QOICp3G7PuxhntRe5fdYfgE3K3Z8X4azOHf0X/ICTsDBpp3ccWFaNKlMwqylpzI1Y3T+nIqcjZoac1ZZV09tShXOk10Jr3pa6jAqc58H22DnnpZksoIMAh16Ms5OtGHHrnkfHgAAI/LovVpsuIEUAWlusrirEhY/ILR5Nt9tshpvRGX785WfxTdd/sfnaJPWN38j+/1VfBRwcTHPMKctxsAhyrDK/9w47PweWRK5w6BRI4n6AjglbWowfvcm4GA9dA2e1hNH6MJ3heC4fz0ePORawSJ3sS3BAAOt+NuJIAsByiUM/xt/9qSfw/d8PfPjpT7GvGy7Orx7muJcsdoRKq65ox+ahR2m3O0wgZkzWTmHIumJWQLIu4Vv5dPz62QyPz05x65Ucr5wu8fT8gdFC7/Cai9Ms2t4HE/F/L6qCuctEqtY9EOcO3RwAIJpZiDvO6jrh4r0B+/XCy/OA3/SbgJs3zY7jSzAgE7iVpc7qojAKNQeAo0XJ7tmWA7iqLdie2Qb2/MBh4Xr83nr4oMZxsDYTUmTz+SzDuvDJeqI/64SigjurDYVaJ/KZq3hHrHbNHgFZwKLoPDSYH4eRxFmdmTuru2L1J28d4p3Hb0wiVr/4ngj/6bO/ALz8MvuC6XMLAFEEy6rxqc96+LZvA/7I1/4Y+7pB6OqVg2IHB3OeeFh4yTTO6i4GxNhZLXEsU0IQRQlntUJYZpv5mvOjEWY104wMmNUqZzXF2AY0ZsQiUYjVRJOnHzl9Z3Wes3fPpbP6Ta1LsfoRLyZWE1x6wv0qdVbzlg7dYw6IabnBAlp1XKPgCMdhg29u9Vx0jbOaGCYl5XPWtZE44QaOVKxu3OVEZ3Uzi+u8LTexbcSslgWyAACqCmnBAu20zlnFaOV8TvLcToSqdTm1ZYmsJDKrxYtSggEpCjBe2MTOagD0CW6HTyp+BmNWm7nTlrMS59nWQVQlGZv4G2JADvwEp6fbL92675k5qwOJWM0XkBYViC7EpLZYXRSIi0fX9Sec1d/85C9jYa3ZPTWVY+A3/2bgu78b+K7vmuZ4F1CLqMQqD3rj4fnawoEXk5zVoZMj3ijcrx4dMePLNsSEUEkNWAzt/nsmZy6l47lipTJS+zqrKWK1ZdWoO6uyeFMbCX8AgMUCh36Mn/jkY/hTfwr49denEauvHRa4n853hMq6JIrVglmd2btzmKJAUdvwqPcWANg2Fnzs/pWf2+BLr9xlXzcMwgMARBETq1+vcetsYTRuA8Dyit9zqT5I5zgibq5cdPkzF2npbe+BskSce4h8erhcNLN6GJD4NKPP4d5q1Xa/imeBb7ibuJX9oM9WJgfQt+pwUeIkjfpBiIaCBxOr/a1Yfb+azlndEatNske8uc9En9b9miQw7zgT90GryyI1vAeUzOrSM8vbjeweViJN+Xre5PPyfQBcsM9z/MobR/jSo2nE6vd/0yH+wLt/fPuFxx4zPibCEId+jH//K0u8973Ad73wU+zrHMVFqSuH5Q6z+tbZnL1nTJjVc4lYnRoi/fg7vCcCG2JAQqdAmsrveZJuIhB5qoDFFPCdC3BW50QMiMifUTC21wltc7gJWGzdA8374NJZ/abWpVj9iFeaW7SAxSFmtUhKpYZJyVzFJgxFxXGNGGeWBd+tpGB/E2e15fNdeBVbmWp+VTCry6Kmi5+AupU6M2ibGnJWc7xEGELPWaraCOH3lmsTxT/HYUGbkslCDdCcLp4H15Yzq/PcDANiAajS/jUAQJ8wBQFsq0YVtwS6qZzVi3pHSIjPcvMFtO8z9uvp9ho+OPNwJViTXWRhiH7Aoul1bWNAOgvoR5WnCt/Hb3zbZ/CH3/uj7PcTBOg0FQTABz/4SDsQlrMSqyLojYfnKxtLnxCC53mI3KzvrG7cr3TEjO8UfdYh37wjd6UqmNUP0xmuLGhi9T4BiwD074sgYB0sq92NhdOVjQNDFxWWSxz4MX7p1lU88wyasCpjZ/VxxZzV/HhlycIbAei7li0LgVcxd14nb8AEOSZqOWdj9yd+LsWXHb1GO0dZzWZ4LDrFG7dqVFXN1uMGx7WjgDmbWgLVeRbiYPFojrHBwkNWOtt7Ks9xmkU4mNEwOwAQzmzEbW43gNMHJXsOvhjEasuCJd5TLQGYPIfj5Xl9cSKJCc7ETh0tS5xks50NFgDmYvWhyzZbeZfBw3sljv2N2XPr+7CtGmWyK1aLP6OUHXYc0GDGK+MgwDBk57pJ2ZxL3AO+wXUV3Zdlve1k5agho4DFmSNlVoemzmrLguVY7PpuNnj9ZIYnZifTjN3Xru2e2xRidRThy49fx9/+t8/hHW8r2b1r20bvWiZWb53Vt84XxmJ1OHN6awTj4EbBgu7mTkzArE4UYjUJA+J58vPklaU1eZPFDyx5wGJdI8ldmiGRs8B7JjRem9RhvH1dDEjksvlxa9zKk5JGIrisSetSrH7EK81sGgaEu197wU9gAw8JVaBCNQg+J3VhrhDWTVOOfa/u84fQEquJLwp2EEXIJFWsDmwprsOIgQyondUpD/whOqsByMVqKkNO5aw23AQQbVi9HViTyQJ3VuddDhmAojS4t/jEpuemNG1LDUN2vvFuiFBausat1AeLakesXj3MGfvXpC8zCHAjOsPtB60xr66Ypkp1VodgbrcOBoQchgkAto3AZdzrZpHDWYeRXw7/3TerLAvv+3UePnjjc+z3j2IQ4gXWotMJIOp8bTEMCOE9GzpF31nduF+JJ+q6jCHYHWMM3zOMyafAgCzpzOqhgMUyyWndFkGA0MmRnO++Z85WNg59ggu+XcslDv0Ev3z/Bp5+omT3QzvUkVjXrta438KArB7mWLgxe78RzlcqVue5WScbr+USTKz+pRpftvwC++IUzuo5c7l95mXuSosi+vwFAIKAvf/4HCaLS/aOfUQXj8HC28WAZBnuJQtcWyrSq/aoaG4zIaU1bj28V7IN3C8GsRpALcTqCQXgBuvXuq6bxMbcVKw+rHYxIBOJ1d4yRFnbW7FaOKtNNtlEh9imNWcx5aH7Puu+bDurJwgXhOviKIxxmvDuqLI0RrbAsuA7Fdtgagdilh6imYFrX/KuTXOLlkHVqVlQIS48rO5sENkpmxtPMZdzHOCd79z+/vHHzY955Qr+s7f9LH7y00/iHU8ydAkWCyOTxJUrYGI1fye8vlriCWOxuu+EZ8G4hs5qyTyuzCs4VkUXq7sdna0ioUv4MSVyEYAJmNUyxKno7g1q/XuBr5V782Ne68Rhoda6GJCZ29tg2mxgzEO/LPO6FKsf8Upzm/ZyE6iCtC+axDGR+ei6bFc7lTmrDQRFhfjZMM6oYrWi/cTEWT0oVhu0Z3si+Kp7rlnNXMXUyZhYfPeY1a5RwCI7OZWzmiCCDgQhGt1bfAd26jYszy7l//zMwAnPW7HSeFdYz7PabHEuHIpx0XcAG6bTHy1Lls7OJ43r04LtaBvB/li6+Z0TtlA6PwdbPALkCXnjrO60egIwWuj0xCTOOjRJkb/wev757a+Pjt6003gzajGrmbO6hwFxsPRozurQyfthNwIzRNVTRUeIZJOtMHBse6EEN1UUeJjNcLwgMqtlbu1WxauS9q7xfURujvhs97xOzx0mVhu2vh9GGaraws3ghH1tgi6Dq1ex46w+eSPBkR+Tjx34dS+kS4zdpqih5YGFVR7goz/n4SsPP8PeWVPwj4+O8MziAX7hM3M8u3hgxCYF0Ntw//wXLDy7vP9oBtiCi9XtgEW+ueCFBu+ZmdPbbH1wr8KV8ItHrMavkVi93vDwcYP76/AAOOkwq402xkWJedUOs3oCsdqZWKzubDABLVexoehzZZ4xoXKzQZEUjLVv6lj3c8YCb4nVaenSMm142UE/iD7NbHN3OYDHj2K8tj7CP/vRGt/0+K+wL05lPPiqr9r++sYN8+PduIGvufkyvuzK63j7jTP2NcN3wuKKz0wHnNt9lgZs/mYiVs+dXvdKnDnMpWwQsOg7ZU9YThNibhg/Zujm/SwLXiQ2fMOsVjirM5CZ1Z5vMR2ma0Rrd2HrljChdYPNea2Fs5ogVneDYTer6lKsfgTqUqx+xCvNbVKqqRDpdiYgvOLEoj18IrSw23ohmNWGzupumJTp5MbzIEVrmDCrh8VqE2e105s0A2w+Tj5XAPB9OHa12+IHYJ26bDAnitXdSVhT/AWkzZBTBYqVJUpDZ/VgGrOBs1rFrDZx7QcSgWoTW8ytTJ3gWhY8p94NvJkopOrKcc0WDlytW58WWHiGYjUPAbz9kC2UPv1p4O1H95o/o1QYAkkn6VugYEwmIYFf74rVIkX+UcWAALti9dd93Zt3Hm9CLRe13Fm94WK17uKcC6q9lsyiwKbwMY+I94F4f3cXD1Vl9K5VBSyeZRGWc9q5zmbApvDVYvWaONkPQ6mz+nTtmjurARwe1Hhq/hD2nTfYFwwRIABweMVhjkouJt1+JcNjs1PyuMXGF6/3eSWla4xwevG5HP/gs18JpBkem51NhwQ6OsLST/HJ7/xT+J+//m+bu7X54jyJWZv+Z19x8fzy/iO7eAyWPvvMWhgQAEaishX0AwYf3K9wJdh80YjVlt0Rq00xXlA4q2Pi+qhVR0fYdVZPFdIVRWzuLcTqh2BitSEGJHDynf3bOuXZIwbOagC7YvVEQu3xImcGifUar9128dT8ofEx50HOWODiWeWObXKoOSBdK07iLgfw4uMrvHR2A//kn/r4yNM/z4431Tjwvvcxkfqd75xm8zIMYR8s8Au/449jdo938BjwqgHAmvP7PY6ZAUeYcw2eLyZUtvSCuubOajMMSODkPbMUw2oQ7wPugk4yuSZAdVb7dsEyviSVZgbM6sCSmxmKAnHh03jzfA2udFanHlsza56vE/koK3v3fXDprH4k6lKsfpSrqpCWDm1Q41wjGYB+E1v6TCNgu4CO+26vxCRUTIh/2e7fn8RZLWNWl/bFiNWGGBCVsG7ErA4CRE6O+Hz3GpylPg4ojFZg1FmdVw7cgHK/SnZKi4Ixqw02QqTBEYbOateSO6sZtoX4eQkXeOcFPIXTx/d4m6OwgIq0c0NndXAQsIkIX5ivTkvmgjZRUpZL3IjOcfskAOoaL70EvHjABSWi8CFL+p5isRsEYGJ1axMgLvxHN2ARAN72Nvb5HB0BH/jAm302v6a1WEDurI5dI2d1K0+PFRerZxFxM0gschQYEKpO64QeE006WAkT7ud8DuZKU4jVm3MDZ7WTI17tvhPONi793dWqwxs+nlk8AD7xCf6FQ6PjAYC9mLHry2+IN17J8Vh0RhbCm/Gl66wuzPMGvu03nuHjD57Et73rl9kXpkCAAE23hnf+AId+Yn5cy8JBkOEsZ+LfZz9v4/nlvUd28bi84uEsC5t7IDnPWfaKyf0qyR55cB9fVBgQy7ZQVtbOuxaA0byo6Whst31PIFYfHlk4aQXAlXk5iQO4eej5PO7l1wM8Fp1O7qxOznO29poQA5Jm0wi1xwesEwibDT73mofnJti4WgQFVp0QV8uqzc5Vsk5indKGAYsA3vF0jF85eQw/9wkfH7j+ebZZMWX2yH/33wF/8A9OczwAuHEDjl0DL7/Mfm/abSM2ZzYbnJ+ULG/Eto2ugeV3srPKkoWlewX9uI7DNJPC2aICwbEaJmK1UyDNB8Rq3VBIwaxWHDMRx6SgzEKrj2AEgDxHVjk0sVoVQM6LmfFoc04AO+uDOIZ5oPdlGdelWP0ol0g69olMH5mwDBYaQJqMiUCxbuiTaVibGHw759rsxBNf7L5XM4GuhwExc7+yg0yLAXHHnNXUSZPvY+Zm2Kxa17aqcJ6FWPpEty53a/eCAIHmumg7EsRu8UbW9m6IAbkgZ7XsRZkXllHAYuDkvedrkxiEYW4PveusFg54U+OEECK4WL0+K80xIL6Pm4cJ7myYY/ull4AXZ6+zP6M6qyVidZmVdG4cryAAa9NvhX9N4Vi/0JrPgT/2x4A/8kce2Vb6iyrB6e2Os2cblxywGMrCbsqSidUzuljNnNWdr4v3jGk+RAcDAoA8GXcCl4U9qcTqdU2b7AcBFl6K89Pdd8LpxjPHgAA4eGKBp+cPgZ/9WfaFJ54wOh6AZnyq12w8fOO1krmWjZzVfWb1FGOMc3yA//1b/jK++/mf2Dl34+qihSYQwQ+jlDlVkwQvf46L1Y8ob//JZ128tjlqxOq7b5S4Hp5PL1Y/tL6oxOpFUOyM3U0XnombMnTYGqHLrPb0W8jbdXRsMQwIH2fTuDZayzQVRaxTcp3g1VeBT78+Z2LlBMzqNspqfVaya2CAAQGYQ1uU6XpO1NXDEnfjJROrX3WZWG24czcPCqzzvrPa6FwlaEN2DSZwVj+X4e+//H6894m7bBNk6rHQJGNAVjdvsv9/5jPs/4bOasxmiNwM8UmK1z+X4fHo1NwF3l3bFwXi0tB4wkOSuzpEmoLWMc/PM3RyJJn87yapxTZHde5d32fM6kKBFsls/WPyimYW4m53GEDXCoDtul5ixkRdY525tDWz5D176ax+NOpSrH6Ui4egBT5hUSLwBypnNcXpoRDTjJEC3P3aMbsZDZBAK4W2F7B4Mc7q3IRZLViinQHdOGDR99lLfd26D7IMWeXAjxzajrHrwrUqKQ+dLHyIVl+ZWH0RzGqT0ELBrJa82E0xIDJndRP4Y+IgcrlY3XJW12BcPaOazWABqM4ZO259XjEMiOHE8cYN4E58AJyd4VO/WuHF6FV2rxIXZUys3h0L0qQ2XjiEEceLtBw5celNQRS42Lp61XzB8BasxZJxensYkMRlAYu6963D2kSTwt1xzjTOaiq7XDCru5lsYjykitVexz0ElnbuWpVRq2v3mO1qmH8Esfo42ODhg91r2IjVhk6Xd38gxO984eeA01P2hSnE6ihim8Mn7P564w3gZmQgVquc1RPkDWC5xFOLE4QnvGvlosRqUxcdgINZydzKmw1eu+3g6cXDSbAtF1HB8WyHWX33To3r4erixOopWvXfAvXU8RqvrY+aa5CnFZ37ykvKrE4ccwzIFZttrvBFTRMWP4GzeuZm2JwV+Ft/C/j9X/NxNoU3eRZ8v2cWun+vxrFvgJjxfczdTCCFAQBJPg0G5EufjfHJk8eB9Zo7q+8ZjwWLqGQdVy2hEoCxs7oGLsRZ/eILFT565wV8+OmX2Bem6oq5qLp+nf3/dW46MX0nRBGuBBs8uFfh1udSPDE/Nb8G3Y38PGfOasMuSd/juROt+ZExBsQpkBYKZ3Vq63O2XZcxqxXHTDKOZKVgQGYuQ891F+F5ztbgxO5uT7auBxqjyDws9fUNmVgtOm0undVval2K1Y9y8ZAH0lxUhLVJxGqys1oIf91jtlNdKcWF9a4I3jCrjTAgMmf1BQQsGooIjVjdGX0bDIhBwOLMzbBZt66tOHeysu7BtUsUiUScEMcmtN+Eio0QUyeh7/RZXI0rh/ICEtiaAtvAwu3p0rEt4jnoCFRTLJ48Hz1mNQBzZ+18jkM/xuk99rmvzmvmyDF0ugRX5khLF//V9wf4yZ8E3n5416jVMYjsnrOa8QPNFg6zmYW49Hec1VNMcC/rYipaOIyv3BOrPYYB0X3ZWhbCoEbc3rAAWs5q4omKFHmJs9pkU7T5i60DnzyscRRsLk6sFs5q3eMHAa4Eazw82X3mzxIPB565s/r5D1zFb3vuF7ZfmEisfmF5Dz/1i0cAgNt3YOasFp0bMma1affGwcHu7ycQlQGwd2r73zuFs6WmeKMAAGnCSURBVHpRMvFvvcbd+w5zKj+iYjWiiC2kV+wZu3u7wvXI8HzFPKUtVp/aX1QBi09djfHq+ri5BlMIwLJ5d9PJZrKJfRSysDa+YZHG1SSOWkQR5m6K1XmNz30OeOfxBLz9KGJOzfXWfPLq6zbbEDJwVi+8BOer7didZjYT6AzH7fd+SYxfuP8UsNngs7dCPLe4b7zRNg9LrPNdsdoyxbZ0xc+6RlK47D4wDfJ9eobjYI3fMPt37AuPaJdJU089tft703fNfI4rwRoPHgCvfi7Hk/OHk2Qj7GQxCeOJ4Vw+8OteTkiaWUYBiwwDIh9LSJxt32e4TKVY7ZDHWsv3enkLAOhaAdBoUVIMSJ5jnQdMrNYt34drl8jj7Wc1BRbqsszrUqx+lEuI1QbO6jTp/9EmsWn8IbED3w1tLEvEhW+GAbH7qIYkpw+QAEuhlbmVL4xZXTn00ODARlHbveMWeW2GAeFidbyRiNUGrglnwFltAbR7yy5YiFK7OAaEPMfnzLDuvZVuSkTUF5BlwRVOZclGiGvkrO4LVMxZbRCwiBa/veWsBmD+Ap7N2KTxLjveeg1zZjUALJfIKgcf/Q8e/rPfGjO3tsFkNJw7fbE6NndWRzOLiZ+tTYCUEjB6Wb8mZYVcjO4yqxOfFrAIIArKPg9dOKsNxGrfKXsbV2I8JJs8JM6RhycWc9FdtFhNdFY/6IjVpxufOatNRbrHH9/9/UQYkD/5wX+IP/z3PoA8B9644xgxZZmzus/an8RZffPm7mfytreZHa9dbXf1FGL1QY2znDmrH5y5zFH8qAo0UYSn5id47R7LXLh7u2TOakP3q23VO0HZD06dL6qAxSevJTvO6iSuWeelybwocnrdl5vUMXfSiXuTW4vjxHxjHAAQhlh4KdZnJV59FXgquLv784jneujHODvbfum1Nxw8OT8xWiMsvRSrjd0YOh7EIQuDNBTsn37GxhdWx8Bmg8/cnuP5A3Mk0DyqWO4Cvw+qfIL5sefBQssYU5ZsPe9VxmK19cTj+Eff/D/jWc8Mj/drVi++uPtvfuEFs+NFEVt3PLTwmU/XeGF5bxKxGsCuWF34iKjdcbx8r+51R2UZ2HhAxFCGDu/mq/vnlqScL61zbMGsLhz5McVG04QZV6aGMWkWFf856yKghZv7Pq6FK9w72T73U2A4L8u8LsXqR7mKAmll4KxWYEDIzmqRGq1gVpMHdYWj1DQ92g8stqMpCS00ZVbXWUesLgrGVvZpj5QXudKAxSKvmfhJHSh5SJUIpgfYuZMEZVGuC9dWiNVU1zbHgPQY6/wZILvIhADc2TFerS1aAMP2sPLPqwA8g4BFqbM6NXdWL2YV4z22mNXGTD5g63C4V6GqgPWqZsKyqVh9cIDjYIPv+PAt/Mk/dJt9zWBCHs4dhutoXdw0hXFbajSzmKu2JVbXACz3i4sF/ZYpiVgLAHlpsbRzwss29CXOau7KIQuKjiNPZ69rlLUN26NvXta1tStWPwRzaBqI1bZVo8zkThYy88/3mbP6dPffukq9acaYMATe9z7262vX+k5jSi2XeHx2hndffwOf+hTwxn0PN02d1SpmteE/H74PvPOd29+/612GB2zV8fH21xOI1QcHaJzVZVGzsK5H1VntOHj64BSvnB8BWYa7d4Br4cpMUOKdZ233ayPaf5GI1U9dz3ac1XFs7qyeL22sO502m9Qxxq5hPmfOXC5Wv3YvwBOz00mY1XM3w3pV4403gMfsO83XTY55FMQ4Od8KVa++4eKpuYGz2raxDFKcZ34zdt1aLfH47NRYrLbmMxwHG7z6hQppZrEQV1MMyKzcwYNNMTdscIEJf2aF+cyboOvu8cfxoSde2v7+URero2hXBH3mGbPjzWa4Eq7x4NTBZz5r420HdycRq22r3oqoRTFJl2Tg12xDrI0BycGc1ZT7y7KURjyAOau1A31tG64DFJWzNTK1qkH4UMYv32d6Q2fOnawKZhgz6G5Wi9U+Way+Hq5w92R7Tpdi9aNRl2L1o1zi5UZx6fE24p4zC8Amc2nMat8fxoBQF+airaV9rnXNBl0TsVqGAalrxg0zCDdwrApl2nlJmGJAFGJ1ntVmPFHhrI63X9qc5mbOEc+Da1VScaIR8Sd0Vhu59h2Hsbg6L7XVCkzwoLr2vQFntUV37fsO3y1uTew2qWvclvrE1Yy5ki7AWX01XOP+feBP/2ngT//4+80DFgFgucTveO7n8W3v/2wT3mjkrF64zP3auhGS1HyxO5tbu1iJqa7rZV1MycTqooCFmj2zhGcsDOq+s5oHNJHCY4Bt5kRXrOb3l+XQMwwA7GJATi0zBrTr9lFTrYpj0BYlgll93rmGdcVMWlOwen//7wf+6B8Fvu/7jN1uABrB+z2HX8DHP84clTM3p4vVoSVlVqelyzInTOvFF7e/FlzRKeo979n+egK8yOGRhbMsYsGVNZ9/PsICzdPHK7zC3Z9371kMW2IoVkcdsfrhuYcjE67wW6yeupH1MCCRqVi9sHbxDwDWKTGcq12LBVyrQn7GJt4vvbHEi4d3JnFWz90U6zVQVTWclM+NTMRa28bRLMNpujUzvHrXNxOrASzDAucZD8Ssa9xaHbAuE1MUymyGD1z/PP7Y3/0yfMPTn2u+ZlLzqGabFnw+n8S1PkqhWyLjSaxrRUeMP4FYHUW73SuP8FjY1Fd/Nfv/V36l+bt2NmPM6jMXL7/i4YWDaZzVO2a8ifJnfL+fO5GmFnwDJI4V+HK0BjgGhDAuWgF/1rtd43WN1KTL3Zcfd31Wsk1ByjEFYzuVzDmLAnnlwAsJ44zv43p0jrun23HvEgPyaNSlWP0ol2gpp6zJxGJX8jDHmU0TK4WorHBWk/Up12Uviaz1AhPH9AiQfF6Ns7rDe4xLH5FPPG53t7x1vkXlwA1oEzE3kDOrGwyIibPa3XVWnz0scWAiTnCuk8xZnW0K1t5EDVjsYmvKkm2EGLj2G4diSwBexzYWXmLkrC4qCbalNOCh2zZ8hydHt8LaNqmDuWcmVj95I8frm8MLYVaLdryf+AnAQcWY1aZC0nKJP/SeH8MT/r3GnWQyIfcXPtu4at1gSWrOUIwWDku6bjGrAVxObB7VCgLmAo5b46wYc4OA9E6IIkgxIADozxd3Vu+8E4HtuGCAhQLQSzsnLxoAwHWZgLKRX7vGWU0Qq68Eazw4a51XVbFx3LKmEeksi/E0205gk+LC7HuWn8XP/1y9/byoGBAhVnfmMMBE3Rsf+hDwwQ8C3/M904j13eMul31eKaGuXbfw915+P37hlxwsXD6GP8ICzdM3Evzgz/x2/D/+pIV//UuHeM/V14wxIN2g7KriDvMvFrH6sQKvtjEgqWXurD5wpM5qY3EiinAlXOP+CZvLvXT7gInVpvMtx8E8KHB3M2fdhmLeYWgOODoocZJFjTHg1bsBw4AYoFCWYc66+bhY/UZ8gMfnZ/SweFHzOf7we/8Z/vHHnsE3P/nL7Gumzup5jZU4V7Rc+yafl8v41I0BZ0pnNbCLsXrUAxYB4Hf/buDbvx34zu80P5bj4MoixYNkhrUwHpm+DzwPoVNskZl5zjAghmJ1EKDvrM5AN8wBfR56q+LcpW20SAJBAexmZBA1E9m5rs/5WpGIFgmdHEl3ftz+OZTjCmf12Xb9eh6z+e1lwOKbW5di9aNcfGdvFhJeboJB1HVW1zV3VlMxIGqxOqA6qzksf+dcJ9iF9kO7x6Mzbu3h7Sd52neXX4izOgdcewJndbplx509LMl8VgAMA6JgVifrknGACfdWKLtfBTeMigGxLPhuyYTKVnuTKQbE9W2ps7oo+OdFnIQEXtXbhZ8CA/LkzQKvrY8bsbbIKjO8jCjOrL536uL2beDHftdfx9c99ulJnNUAgLOzrVhtMCGXsYqbgCaDBclsYbMgJb7IqXPObDddlF7WxZTvY+GmWJ9vx+864fcEcYNFFbAIwCjE1ndKpbOavOAXY37rOTB2jrgua01XidXU4wtn9Wo7Ru98VlOKq1OV6wKzGd5z/Ar+2l8HPvT859nXie7iILJZwKJsI2SKDbEgAL77u4EPfMD8WO2yLHbcP/NntmO5QX3rN6zwbS/8B/z5f/gcrkcr9sVHFQMC4EPvfog//zV/D3/nH4a4Md8w4c8YA1LsZI/UVd382RdD3bhe43Z8sBuwaMisXhy5OyIlAGxyz1ystm08c3iGL5wfA3HMxerbk3xWi1mFXz25iaeup2xOH4bGAvDhssZJOmvE6jcehnhsdmrmrJ6VW/RcniMpJkAXAcBshhvROX769/9v+PDNX2q+ZlLzWY117jfj7BThnRfqrAaAp5/e/voR3rhrKgyBD3/YfG3A68pBiS+srmDu8LbhCZzVodMyTBWFGd6UV+DXvQ3nNLcZdo46dgm0RhcDUtdICgehW+iPCao8Ln4dyPetyLiKd4+7OqvoJgnPYwaJWHL9xDUhitU3onPcOd/OLR6sA1w1weRd1iR1KVY/ylUUyEoX///23jxOsusu737OrbvW1vsy+6qRZrTLkmxJ3mQJy6uIwME2tvEbDMbBBpPYway2QgIkL2sSIASIX3jBsYHYscEYG294xbKFF0kjaSTNPtMzvXdX176d/HHuraW7ZyTdc1pd3f18P5/+VNXtrtu3q8+995znPOf5uX7M/NvVAugbDZTqDpJOjItZVDF2uVu7XkdTCiTi5mgmEkoEr3c4SnUvkFDO6hWCom6FX9e9tFjdTMTOrE4ELpqdlYhDtPOSEgkETg3Fqt0SOpbm68g65fidBsdRzurqys+wXJKxndVe4lLOar1OrmuH1Zg7xeqiZSYGpPP/JaVegUWoYhwrCv5U9WNAdmxrqhiQsOfcKi5oILN6yC/gSyf34MgR4MrUObXsXbdDGi1xnJszEgPSEiI7xeqqpT3YDTJ2V4HFQl4qtwA7Nr2J6yLtVJDPta9dpcWqWjYZc2Du+2vjrPYStZVidXR/1BCrbavRVagtdqZ0hG0j5VzGWR2J1c/2842KKOXb7yvNl9W+TESArBWZDLYnF7B9tI6fu+ULrW1xuJyzekNcYwxNKFiZFF69+2F84ZERjHhhJbgeFqvHd7t43f5v47+880m8/54H1EYDzurIpVkqAZ4VnsO9fC4YxEoFKm8/zLRTzmq9PoybcpSRIRooNZso1hy1kk1TAN49VMCZ/CBQKOD0XBp70nNGRMVUCji2OIadWXPnQX8/sFBNtj7beiOsvWJIrJbFksrwNnHOhtfSQ4njsKtFdY3R7G+mM0C+o8BiK2JGM7O6y9wVOavjGm+W833fB9x5p6q7cMMNZva5gRjsb+KrFw9gfzosMqorVnteuy6AlO04CU/vOuB6YkUcabUm4hdYBNou6OUCT8vcFaOIZ1gQdIVYrTvR5DhqVVC+29xWWGqq1c1x7l+OmlAsllf538StmwWEzuolTOfbGetzRbXCj87q9YVidS8TLfeMk3sZOrNWc6oW41a4XS1buuM4Y194hYDnhPEH0b6i+AcNsdrxLHWTWOGsduPv93IxIDJ+gcVVZzWlRKESLkHRydX1ZZeglpurI6NTlMR1kRDy8s7qGJnVqzqrw8kVzXHeCgG4UBJKrI4bA7JagQspUW8KOImYBRax+ix8oepot4Ht29GVWV0pNfWdIwDgujg8OInFioc3v6HejtnQFavHx9XjxYvA0pJ6rjPQ83010O0ssFjVH+wujwHJ5aAmguis7k08Dxmn3GpSAJCbralYpJiiT5AUK8TqZrWuBucak4yu1UClZnUXJtIVKsO6E521AYw5q0urX/OKJcQTq5NJ9b8qt49rcaamN9H6XJDNQgjgkU+cwFjzQmtbHLzAQmWVPHQAG0OsNkUqhX2ZWcwXPYw4C2pbL7sJ+/oAAHcdPI3rBs6qbTrHG66QK+TVeXvsGHDlYCjSGMgE3xCkUrCtBmqLavJ6ctFXKwQ1zgPhLut3Nxoo1FwkvdWLxT4b9oyWcDo/CJkvtFfcmRCrR5J4aG4n9ljnwg36ERD9AwKLVR8oFjE3B6SdUATTEGjSyWaraOHCZAX9bsmMWD04qPpXuVCs933tSbFUqju7vFQWysigc41dXjcqMt6YmlvKZoE3vAF4xzt6+1q4RmwfraPatPHeI59SG3TPg0QCgddAuW6rcUxULD1u3ZEQz5Mr7uHVmohfYBG4dAxIrabMeG6M69fTOavjTrKEUaSdq4KAMAYkTr8wPNaUU0WhYnf3j6HqZonwZ541QmAkXcJ0Od3qa88VfSVWb6X+Vg9CsbqXCS+WcQPo/UQN5cqym3iYw5QMYlx4ohiQVQTF6HfGxXOa3ctd63U1m6cbA7LclVSr6S3tuVQMSKOBmkZmdevC2jlTGrk83JqWyyPwJUqNdlTB0kJDDfjjOnJs+7LOaj+Oszp015dr3XnNrRuljrPahZoI6Wi4+ZKtZnXjOqtdgbq0VkyE1JsJ6ESJrnBWh20g6dS0OuR9QzYWq0HbWW0gAgMAIAQO7Sjgc6/+XbzqjkX1v0sk9G/s6bTqfJbLwGOPqW2jo/H353kQQqJZ6nBWVzTdDQC8tKMmFzrFarfEjk2vEjqrO8Xq2akGBr34hcr8QKgYkI5rgXbxL8tS8UWNlddDAFqxUFm3hKVC+37SEqs1CiyqJZmXEqut2GK1EB1xBwAmztTV8vReFqtD55+VW1ARRkLEjwG5nLN6K02IJZOwhMTVI9MYsRfUtl5uA9HKoMXFlltVS1BKpTDkFTA7r86xRx8FjvSdV9/bQmL1iJ/HzFQTUgJ/8I0b8ZYrvqF3r11ecDY08xgRq7dVcXppCB/7RAK3jx5XG02I1eMZPDi9F3eLz6sN0cS+Bv0DQsWAlEr43OeAu/afDH9ZfAEwk5atzOoLZw1ety2ruxisic80Y7WEdaDDta/prPbtcAwuZaswrucaclZvcXbs93D0X/57XJcJ26qBduB74Sq5YlHpL1JoO2pdV3Qb8QBUagKupWGUiVzQy2NAIt0kzml2qczqUDPx48a8hvEqK8TqvEZ9I8tCyq2pegON7mt1OV+Pp0GEjGQrmC5lWteC2WISg4wBWXcoVvcyOgPTaOlFZdm/OOqMxczB9hK1VaNFYh9niOdBzT52dBrNZFav5qzWiAGJnNXVlVEojaaFhKspVi871kLdQ9Ktr/6eZ0gyCJ3V4WebW5TIajqrbdFEo7byMywVY4rVQsB3myuX04duBC1ndZRd3iFWF0qW+czqcNmYbcfvjK5wVjcaKNZdpLy6llgdZTbLknI+X5xzMeznzdyAoyXuk5Pq0cSARIj2ICza75498ffneeh3S1hYbH+GJgYkrSzs6Nyis7q3CXOQF3Lt++LsdBNDfj5+ZnXkrF5WtFA3usezmysK82iL1YkE+twyFotOSwQvlQ04q53LOKvLMcVqz1PCRFO2/u6jjwpcPXCht6MPIhf1xIR6TKfjr7RJ2WoSv+PeJWsbKAbEFKFodk3/WQz7S0ZyeteU0FmNxcV2jJVOJyaZxEiwhJkF5SR77DHgcCp0bG+E4momCLOKp6eVWD/oF3Gwb1pbUASwUqyOY+ZZxu7tdZxcGsav/eEgfvmGv1Xt1URm9Y4+HMhO4YgXCuCdhfZi0j+UUDEghQI+8xngnt2hQUBbrPZaYvW25KK56J6xsfZzA/tM99so1L3WxFKpLBAk9HPLvYQSqNFsqhiQJsVqY3RmdgNGroO+D2U8KBZRL9WQ0KkXFeL5Qt3DO8XqqmZmteMo801lmbDcaChndcyV8wBWjRZpFViMg+siSNS6VvMBQH4J8Z3VAJJ+s0vXiGgVboz5fxvpq2K63BarcxVPjem2Un+rB+nh3h6RVY3sHdtGkKitFKujZSJx7u/hEuJKVay+NFlnYO5B3dSjGIHWkqn4N3Y3SFw6szqus/pSmdWNhlr2rVEwAUD3hbdeRyESKjUIkqHzLxzwLuWkWj6pm1m9SgxIPg8VMRKjzfpeKNQu+wzKDVvPWR1lhnUEYudLCb0YkNUyq6OMM02jT9cES6OBQk1/wgLpNPq9Iuan1X4eP5/B4YGLZm7A0cDh1Cn1aMrx1ukY6utrCwBx8DyVRTbf/nvLtYR+FMqyLOzcQlPv3CJri6smaWZy7Q7y7HQTQ14hfgxIIFQUTGfOvG60BtThlBuOWVetEMgGVbXKIrzOatdFiJzV5dWPKbZYLQSQTMJN1FFdUILf0ccTuHpgorfPr2jy7vz57tcxsAJPubs67om1SlMtId5Kg6dQiHjHFV/Ai8ef7Om8agDte9XCQlus1nH+OQ6GU2VMF1NAtYpHH2niSOaMagO9PHFjklQKI/4SpmYTOHYMuHE0nAzScT6G1yRZaYvVSzUfaV+zvwVgxw7g8+evwh1XTGE8mVP/fwMZ7vtu6MOPX/XV9q62b9feZ3LAQ6HuopEv4ZvfBG7JHlPf0BGrM8BSVWVWP/6EhYPZaXPX7U6x2oSzelD9/VEx76WSrcYHmqaDroLpjYZyvZrKrN7qrIVYHYiWs3pmVmDEX9K+z7qeUCu8l9UhStka7ctxVMRMYdl1Klw5H1esFujQnTr2WW448WqnhfsN7GprgVFEoQD1GcQUq1NBsyu6p7XfKF4k5n0hnYa6FoR9LimlutZupf5WD0KxuoepFOrw4uT/AuEFYnWxulh34/X1w2zpFctSI2e1xo3d9a2u5fRGRMrIUbtKZnXssU4UA7LcWa3rLl+lAFzk8kj5eksSW87q5VEFcT/c0FldrzZW5EXlC0DajicCe55c1VndlBYsV6OITpBAuUOsB4B8xdGLAfFWcVZHEyxO/P+X50F1bCJxInRWx1oJ0Uk6jRuHzuLBJ9Ug+vGJLK7qNyRWR86exx9v/S4jdIrVOq5qQInVQR7Tix3F2qoGxOqooxVNBC029VYtkLUlCJRYvdD+n8/OQlX7jhsDkrRWOKtLJQPO6miVxbL7l+7S1L6g1hUJZESsdi4tVpeqMcVqQDlK/TwmT6uJxqNPOjiyUZzVZ892v47DaoVhowinrTR4CgLAsnDr6CnsTC/0/vU1igGZnzdWx2FkoK4cX4UCTp1sYm96Vgk0hopY9jyhs3pq3sGJE8D+9JTarnn/TnYKKY0G5iopDGWWLx999th9KYwnF/GuFzyoNhjKFb7+pQN43+1faW8w4KwWKXVsX/xWGi+8QyJRyqtvaPTlMgO2igHJ5/HZrydx947H1sbIYOBakBpwVQxIOLF0YSFQTnBdV+0ysTpf95AKNPvyRLFzZ/v56KjWpHBEELRjQCanLYwF+mK1qjuxrA5RRfWZtMTqRB2V4rKxpk5spuvCTdRRK62MFpFSxB+Dh87qFTEgBagJIR2xuu6uFKvzUqs/Kzy3bRBoNiEQHjdXy64rFKt7GDVDFPNkdl0EidVjQJpSIOHFO5E9D93VswHUq03YQm+5jOcLJSwvd1brFFgM7JWZ1VEMSFxn9WViQADEv6B53XEC0T4LNU87P69VBC7skeeWgIxTid9xtCzYNlBvWCtuFEt5S8NZjRWij4mImXSqW6wHgEI5oTX76ngW6qYjZqCE9eWTNrmaj4xfu/wbn450Gi/a9iS+clwNbB6/2IfD/RfM3IAjZ8+x0I0zNKS/TwC4/vq20HP4sN6+wirPM0tea4Ll4lJKOZ50PoOO8/bECWBuXuhNBJG1JRKrO53Vc8CQFz8GJEhZajLMsLPa91deD5u1BiydFTwA+pK1bmd1JYHARIHFSzmrK2qVV6xrbTKJA9lpnHhC3V/PTTrYmZrv7fMrcv0tLKhHTbFaAl2rgkolqOJfW2nwZFnAlVe2X+/evX7H8kyI/uedReA0Y0uGhyRmyilU5wuwZBMJS26dCBAASKWUWL3g4sRxiX3eRGt7bFwXKbuKfDH839RqaEoByzFwbqXT+NZ9v45DeEK9NlUETwjgla9sv9ap5RERCr7//xd34a1vrKjYCs/Tun9lRnzkax5qc0s4NeHiQHba3CTTvn3t58sdtjGw+1JoSKstVudSRsTq1j28VALqdcxXkuhP67v2CbonUl72MiOTdi3jQbGIyZkExoKcGWf1MtNcvuIoU5dGgUUvUVvprA7rnMW6ftk23Eu4tQHEN0hEBRaXO6uLIn5mNVbGm7b2m5dKN9M4XgBAtYryUq1tGN0qk8I9yhayZmw8tJYz2DYCp45SLRRru6rHOrEvwK4n2tnSYSexXJLagycvsLCwwlmtl1UsHFsN9JYVWCzpFOyLnNU1KNEruoBFjjddZ3XHoDTKrNZ2Vve76qKeV26J4xdSeMuBWa2OY8IOCwxWuidT8kVLw1mNVZ2E6hfGb1uptEC+7nU7q6uOVgzIpTKrSw0H2zRiW9wggWpTdk3azFVSGExrOn1SKdwxdhz/9ZGXAc0mTkxnsPfmWTOiz3JnjymxemwM+PVfBy5cUGtqdbBtjKSKahl1XXU+zub6cN+OOQNidROyXMH73gfYp0fw8uQTve/826oEAYa8PGbyfuv6PTtv4Sa/ALj9sXbpp8LJwM7Mal23MtSpuTwWqVSUesIyQrG60OGsriSMFFg8W7mEWF1NxHdWp1I4kJ3GU09ehZsLQMqtqVtuLzurly/L1xGro+tzp7M6KmK8lZzVAHDDDe1iu7fdtq6H8rTYNjA8DMzMqNcGhMqRoSamSxk89VgNV+wKR/5bpbgiAFgWRrNlPH5mHCefrGP/tkl1TdGJAXEcFWFUVNGGtXIDtqmInZERtWLnrIqWMCZWA8Ddd6taHkNDZiatwgKmj53L4PlHQle15kRIZjyFpZqP7zxs4+YD8+q6bapftH078IEPKFFdt28IhP+bapdYvX14QT8GJGmhUreBQgGyVoeAGpcSQ7zznSp+8CUvMbK7dFYgN+sDhQImZzJKrNYssOglE93O6mYThZqLlKOx8s5x4CfqK3KglQ5hx7t+hTGv1dJKt7YQUqsYpKqf1i32FopCK7M6lZSrxoAUC1J9tgbE6vkLZQx6BY7negA6q3uYQl6qmac4J50QK7KKAWjPkq0mKLYGTxo39lZWVCTSRVVtdfK9LlG0UEsEFwKOLVfkFcu6ZhRKZ2Z1FK1Rr6NYd7SXjY1uS+Bisa8lVh+7mMWV/ZNaQqXtCDSaVnfbArBUCJ3VMdpXqw0sc1ZLQKttpbNhpe+obUlpLgZktYgZDWd1a8lYx3nQaFqwXc1LtWVhZKiJXDXA4sUS0GzCtppmBlBjY92zzqbEakD9f3btMlJMayRTVsuowzZ7dqkPu9Lz2suIvUQdlVIT09PAt08PIeuU2LnpVSwLw9kqZkqpVjuYnU9oFVi0U566FnRMNLac1dr3xGVidcXSE5YB9KUbK5zVRgosVlZ/f7Fqa8WAHMxO4fgJpVMe3tHhVO1VMplugdpEDEhH29qSMSAAcNNN6vPYtq3bZd2rdLo/Bwe1dzc8amG6nMFjjwFHdunHNGxERocamC6nMXmhoYQkXWe5ZSHtVVGouUCjgcmLUi39NyEAL3c8mxSrLQt4y1uAV73KzP76+tDnlvCvrv82RMGMWJ0eS2GxGuDYCQdXb5tXG01et7dvV1EQJhyP0f8mFKsncmkzMSBJW/XnCwVMTwMjBmIlSAfXXQfce6+xYrujI1BjhGIRk1MCY8mcdl9exZF2GJtqNeRrHtJBI37bdRzlrF4lBgRAvDYWRousEKt1ndWOo2JASt1/a75k6WVWp6CKoq4SA6LrrLaERKNUxdyFihKrTV67SSwoVvcwxXxTa+YpSIaFnzrdutGJHfOG2RKrOwbQ5YrQHjylM0IJipH4GcWA6PRtXBcCWKVgn55j23FWitXVilTLRTQG/BCiVTU6OtZCzdPOKx7c5mGukgSWVCX1Ib+olpJritV1mVixBCdfSsR2VvuBWOmsNhEDkrXUDGxH2yrUPFW4MmZn4VKZ1doxIElbLRnrOFYAZjq46TRetv1x/OzPCbx0x1Nqm4mbsON058eZFKsNMtJXxXQprT5bKTFXTqqOiKazOmlXUSo0MTMDPDndrzKre1lM2+IMDzQwU0m3YpFmF22tAosi8NV9plOsNuCs9nyxIsO/WIKK1NCJAVkuVlcNiNV2BYXK6tf8aj0Bx2poxYA8dcrB0aNoix697KwGgJGR9vPOa+OzxfeREE3Ui8v7Wxp9jY1KNgvcfz/w3vdujCW5+/e3n197rfbu/H4flYaNR49ZOLxtQW3cSjEgUO7yi8U+iHq4wsLA359y62rsUa3iwgUYESkBqH5Qp4jWy4LHyAjec+1n8SO7vtgqMqg7EWIN9CFhNfHY2TQODIbX7V6dxE8m4VgNVJdU33CqkMSov2TGVdtUYvXZ003sTs/17mdAMDomMFXKdMSALGlfY5QBqaMfV62iUHeRSuoZ8bxEHZXysrGmjgvaceBaq4jVOgI4ALgu/MTK+mmFUkI5oGP25VIpqBXjy8XqKF5EQ6xOO2UUFmqYm6xh0Cv29rV7i0CxuodpVUuNedIFKQulZVm9rQtPzH22ihZ2itXlcACt0cHL9gG5sHI0AKDRQLWZgONpNNFLFC1sSgHLi98JWc1ZXS5DLc/WEb6EUKbq6LONYkA0ndUik1YdsYUivvUt4NbxM+obumJ10+qeCAGwVErEdlZ7HlY4CWVNPwYknbW6Y0DqdeTrHtIa8SqOn1B//3JndSNmJeaQ1pKx8HOtFBtqEsSE0yedxn37vos/+vMkXr/3AbXNVMe5M1O6V8Xq/lrLWR21K+HYesJHWDykWJCYmwOkFMg4FKt7mf4BgflKsuWims3ZWgUW4ftq9UfHfaYlVus4q5PWyhgQA1nYfVm5TKy2tcXqpF1FsbrK+xsNABIiYcX7LJJJ7M3M4vSFUKwen1Xbe/386utrPz9yJP5+PA8ZpxItigIAlCvQXsm2YRkc3Dhu4k5n9fXX6+8vFE0eO+7hyGgYL7JRPgtDjIwKfO78VXjhwUm1wYCIkPIbyqFXqeDipMC4KbE6keietOplwSOdxiuveArp+gIwNdXapkU2i32ZGXz2iT042B+21169bjsORpIFTOWTQLWKZkOqTHjN/5mfCou7Fwo4c1ZgV2qeYnUPMzputcXqWdvI6g0n5XavvKtWka/5SKf0xGo/UUO5uFoMSMwC3I4Dd7WijSac1XYN5arVXjUOVTcq7ZRj97udwF5RPw2IdDON/mwQIONUsDRbxexkHQMUq3sCitU9TKEANfOk46yudzur66WaVjHE1ZzVpbLQzqzO9lvI1fyu+IOmFBC2geJny3KgAWh1Rh0bK1y1pbK+u9xOSJUDHR1vrYai7gwsAGQy2JOew+kzAn/918BdO8NCeDpitWupY13urC6Gzuo4BRZXiQGpVZpwrYZeZnU20R0DEi3DSsafBLBdCzW5MrO63LC1xGo35XQVWJydkRj288bE6uePnsTb75vGLQNPqX3GFeiW07ksu6fF6jRQLmN+oqQ6IboDB99H4NRRLEgMDTZhiaZyVnNA0rNYqUB16EOxulBKqEnhuG7dVe4z2tEa6Lgedjqry5Z+ZnW22TUxXKtbcCyNAsm2rZaPNroHIwDa13LXjTcplEzCTTRQqQg88ABwZCgUqXpV9Ii4917lpv2lX9KbDPM8ZJwylpbam8oG+hrkOWDXLuWqP3wYGB/X318qBcdq4KHjKhon2raV8Ps8fOKeP8DvvvbzaoMBsT6dVBmyKJVw4aIw56wGuqNAelnwEKItrJ86pR5121Ymg6v6J/HtqR3YH1xQ23q4XzSWKWGylEV9eh62aKh7lm4MSNppO6snEnRW9zijOxxMlTP4L5+6Al88ts2IWC0Cv7tIcuSs1tlt2OeqlLrHsLIWOqvjjBfDuI7lOdj1cl1LM4JlIXAbKNXtrvHyUtlWWkHcFY3uKjGvUM5qrai8dBppp4z8bAXfeySBqwcmeM72ABSrexhdZ7WftlUMSIfjq1WgKW5mdZSp2+kqjmJAdMTqPgtLtfYAulIKRUqdzsIqjjfUakqo0DhWxxVKrF4m2Osuz245tjscwIW6q9/HTadxIDuNj39rB86cAe4afVht1ymw6Fiodx5ryFLJVs7qGCLoam2rVIK2OJPqd5DvaFuo1VS2tE4UTGBfMrNaR0fJDiTUeRB2bGZmoPJ0DcWAWELif/z4g0o/SSbNLac+eFA9Dg/3rJA0OAjMVVRW8dkTNeVy0T1Wy0IyLXCxmMVofxW7M/PMrO51wv+NLIZFymRTr2if76uVK4X2tSBXCgu46sSABNaKybtSWSBp6wmVfVl0OatbAnPczn1Yx6HaSKgYq050XTnhiO59r3wI3/sesCO5QWJAtm0D3vUuJVjqEInV+fZ1ulwJzQEUq3sb2wZ++ZeBn/kZY7m6ezOz+JV/8R04uXCFQX+//n43EqkUXr7zMYiLF1qvDexSOauLRVyYssyK1Z3F/3pZrAZWitW6EwGWhSvHF7EjuQB/4aLa1qN9QwAY6ytjspTF5LEFFf9g4P/lpZ22s/qCQ7G6xxna5mK2nMKHHzyI//HaTyqjkG47iP7fXc5qT+/SFRZYXC5Wl/P1+BPZrqtWyOWX7bPYVP0NjUicwGuqVf4d4+XZQqC3onG1mmSICjdq9L2z2Zaz+sv/nMKLxp/ccpPCvQjF6h5GN3snSCfUBaLD8bW02ERGY5+tYgFdmdXQzlDMDtrIVYPWsZ46m8C+zIx2niwAozEowCViQCr67vKWYzv6bBsNbVEVQEusfv9nX4j/9OsSohp+HhoDftu1VAzGMrE6rzFb6qcSK5yEJiZCEkkPTSm6XPsSelW51yqzemDIUvnia+SsBtBe5mly8OT7wG/8BvALv2Bun4ZJBG6rKOiJJxvYk541MnAIMg7O5AcxnCzizQe/oTpgPTwo2/IEAdJOBYW5CnI5wBHhORz3fPA8DPt5zMy1u1PTSz5GfL1CXa0lxB332mJFPwu7rw9YrHWI1ZHArNEhd12g2uh2zgAAqlWV5x13QBJes9549UN46CFAFMM81V4Xfkzheci4oVgdTiqUq9bWzKze6qRS+PBdf4IfOPI4MD2tti0v4rfZ2b5dPV4IxWoTMSApqJV3xSIuRs5qU4Liy18OXHON6g/v3Wtmn2vF8LB6vBgKywYEmqt2F3Cwb0q5LoCeFmrHBquYLGXx2c8C1wyeN9K2skMOlmo+GktF/OMjQ7h+6FxPfwZbHbs/jXozgUpV4FU7HzaTix+NBTrE6oa04AQa9++wwOJyF7Qyd8UUlm0bgV2LFhy2KBca8Cy9/kbgN9Uq/07dqJ5Qx2pYrC6WLZVIoOWsrmBuqo75nIWx5BLP2R6Avd0eRs0QacSApEPxr0NQnJsXGPAK8WNAlhcLAFCuWNqCYrrfxlJHEbzjZxwcyF7QG5B5HgSK5mNAXIFa+RLOap39RoUbO+IqJADL0RQq02ncMHQWb77yQdxy3Y1qm+dpuX0Gs3XMzKRXOqvLjnJWxxGrowKLXRmt+nno8H21NKrDWQ1Ab8LicpnVGve1/mEbC9Vk21k9J1TxN5Ni9WS4lN70DTibNbs/03ieqvJcrOALX3Fw37YnjXwGyX4HZ6cGMDy4gP9w89+otm+oOjlZA5JJjAU5XJxo4pPfA9545CG1Pa6TzPcx4p/HzHwCoZSC6UISo2NLejEgSUstIe6KAUkgSOiJ1Zk+S8WAVKvqfiibqr1qTLC4bkc0Vse1v1Gqqmtv3AFJ5BxdWMDu3eqxa/tmx7aRdmtYqrgq/9u2jUzgkg3I4KAqjD052Rb/OjORtwKdRSsBI4JqOmth9pwHlEqYnLYwumsJSB3Q3i8AdXw/9VNqQrDX+wTLJz50CsOGXH9VFf9l5i/bG3p4En9ssI6Hz47gbz8yji/f+d+BpP7fPzhmY66Swke/vg0vOXBeGRkofPUu/f2oNWew3V1Aq1CEKbE6LOitxreuXgSj48Cz6suH3ygVmvHHy66LpJ1Hsdh9jpZLUvU3dJzVPlBqdDirpWyv6Iv7OYTvk5XQEBFSKFlIuRoFFjMZpJ1JfPXRIdy4e05t2yrmiB6mx++eW5tCydKKAXHTrnJBd4i18wtCZbXqxoB0OasNuF9TPpodec1PnfVwsG9aW6QEAFluX9Gb1brq8OuIypGrtuNOUa5aapbQRBZ2R4FF9Q29itRIJnHL2Bn88R1/2r4Ba3aYDu0q4cnc6MrM6ooT21ntpewVy95NONbheSr6Jfx/yaq+WG17iZXO6jAGROejbbnAI2f1rFDOat02ALQFnnPn1ONWW9rk+xgJljBzoYavPujhjvHjZpzV/b5yVsvp1u8hPUwyicMDF3D08QT+/M+BH73ya2q7hlg97OcxvdA+R6cKSYwEes5qN2mvnLwz4Kx2kk47wqkQOpVTKa3JS9eFKry8zFldytXU8cYdkAwMqEdVvXTridUAMkG9KyJNOasZA7Ll2LFDCZ4TE+qakEptvUH0jh3dfSETmdVZSxllikVMzyUwGiyZ7xv1ulANADfc0P06inbTwD60H9cOTrQ39HCfc2ykiY+evBEvv+IU+tyykXNrcNzDbDmFrx0bxr1XhrWCKFb3Lp6HoVQZh7KT6n6rOYkPoP3+SC+I+nM6YzrHgW/XUCl3O6vLxWb8voHjIGlXW5p61z41tY3Al8pZHYrVzWodAqEOE/fa6DiqVkqpuyBkoWypRALNGJDvnhnEoeFQrO7h69ZWYQPcQbcuhZKlVWBR+CtjMOYXBAY1xGo3ubICqwmhdnlkx/FzLg5kNcXqsLptrdJsLXVuzRKaEKs7JgFKBtzltoPuHGgDLnAASoRY7qrV7Igd2lvFEwtj3c5qKVGuJ+Al6rHabCsGpHMSoKKfBd6atCip/1dxqaFXLRhhpMTyApP1OkoNB0FSI59y2ZKxmammcmOYGJRGS2ijz3erdZr7+rAtWMRXv+li13BZtVMDwnIwGCixunI+3LDFPteNRhDgmoEJ/MO3BjA8LJGthfmvcTukoVg9k2tf86YLSYxoZs0L11GTbF0xIPqFGzudPvXFAhKiqS36uJ5Q/YJlYnUxV9cTqzMZde0vFID5eeUuTibNTN5tEDLJRlcdA4rVWxTH6S7UuNUiQADV5nfvbr820C/aNtbERKEfKJXC6MXq1hQnstm2c/2668wI7IcPt58fOtTTfaOxbRYeX9iGFw8dVRsMtK2hHT7mKilM51yMuWG9hR7+DAgw2l/Fob6OcbJuvYEggEBHjZTOotNxsW14Vr1r0TjQUZNMQ6wulrr/3kpZ31ntB0LVTwv/9sXpKvrckra7PGlXUcgtE6tLCTW+1yyweHRyCHsyoVjNc3bdoVjdwxTKCS1n9YqsJABziwkVAxLXWZ1MrBIDYqA6/bJjfeqsp8RqnQ6DEPDsZpcAWio0tQv2OX5ihbO6VLHUfnUyqx2BqVLGvFgNtCMajh9Xj4ODWrvbsa2Jc4WBbrE6vBEJ14l1g/eynso97ZwEKBtoW56nZmCL6vNcmGui340/YQN0TAR19hYaDdSaCTiuRucmjKpoltTnOjMDDHkGinwAwNhY9/9lq7my+vownszhb782iNuuNNcJGdiZxPdmd2LECkVPOqt7myDAtYPn8aGv7MJttzTUddbVWJbpuhgJ8pjOB+1J0bqtP4EbHU+ns7oa5vzpTN6l0xBCorGYR2mmoMRkTXHGccWqmdXFXB3JhIZYLUTbRR0V/tpCrmqgQ6wO77Un5gewMz1PsXor0inUbkWxGgDuvLP93ICovHePxKn8EFAsGsnv39C84x3A3XcDb36zmf11ttebbzazzzVibH8KAk280H9QbTDQBjJjSeRqPqZyAUYTYf+QwldPs2O0hiMD5gq4wvO6Chc2SlW1uls3BiSxSgxIUcaPAXEcBKuI1crgp5dZnU6HdQHC8fLsxZqKt9QplO26SNmV1uLAiEJk6NAQqzNOBScXB7EnWIP6TiQWFKt7mEJZ86RbRayezyUw4BbjZ1YnE2ppcqegWE3oZyh6HhKiiXpJieATMx62Jxe0b+x9QRWLHYUbWxdzzbziFTEgtYT2Bd1xgRf/7XsxcV4t7WlU9CNLWmzbph6Phq4BTbHa8l1YQqJe7HAWR59HzBuQCHxIoKttlStCX5zxPKSdcisBZXG+iX4v/jkAYNVzKxJqdAo3wveRdcrI5dTL7zyRxNWDE2Zulq4LDA21X2+1G3B/P7YlF/HZR8Zx7Q5zYvXbfiqFF28/rmKLDO2TrCFBgIN9UyhVE7jturCnq+MsFgLDmQpmyun29SDK49O5brmuynvuyPkr1pSbRGu/mQxG/DxmJhsozpaMiNUqBmQVZ3W+qdeHAdpRICdOqMetJlanml0DvS+d3Y8Xjz9JsXorsmdP+/mOHet3HOvJLbcA//JfKvfvvn3auxvbYWOymEUtV4IdFdvdqmJ1X5/6bPv6zOzPsoA3vEEJ1bffbmafa8TAvn785JEvqZhMwEj/WKSSAATmSx76q6Hwxf5hT/Off+xJvGz74+qFieuA7yPjtIskF3INFZWp6az2E7UVYnW5JONHdlzCWV024KweGLJULaaweuPsVANDfl5bsE91TAJEFMq2nsnTspDOCEhY2NM8qbZttbFyD8Lebg9TqNgqeyfuSeetEgOylMAhP76z2u0L1KC0I9houhDgKn9SO6oh685jaQnwS4CfqCkTqOaNfSBVwXwliV2Rs7oE7fyllli9LAYkSGg6q12BYt3D3KzEdgDFglQXXZNideRO0xSr4fvYl5nB6QkHrVI0kQMw7mzp8kIUAEomikn5PtJOBYW8xBBU7GmfW9YTUIIAQNN48U74Pga8AuYWExCLQK0KDJuKAQFUFEhUnGmr3YD7+jCeXMTEYhrXbZsCJmFk4OD5Ah/64b8DLoRuDA5GeptUCo7VxEv3n8Xzr7KAz0M7BmMkW8HMCVVwtmIFcK3wWqApVgNoi9X1Oop1F4Hb0Fuamk5je/IcJi4I9M+VkbQT2n9/wrNVLNLyyuxLDb0YEKAtVp8MBw5bTazOoOWsnp0FXKuGjFthgcWtyB13qA7M0FDPi39ryt13qy8DKEERmJ4Ghj1DRdVImzvv7HbD9yhiZBi/98Lfa28w0Y+zLMC2IWt1WJWSuf2SNcMdH0SrYp+JMZJlIe3VsFT1MF6pIJ9rKl3HgLO6XO32m5ZKiG/Gc10lVpe791kshoYxJ34fsW/IxkIlCRRUFM7sVF05q03EgKxwVtsqykljfJ/ps+BaNYzXz6m2sNXGyj0IndU9zHzRUy7ouCf0sqxeAJjLOVrOajvtq0FpOEMGAJNLKYwnc9pRDRmnglwOeOIJtIPtNS8SA6ka5ivJdgxIdDE3HANiIrf74M4KXjB6olVDqlCAdrZyi0isjtAVq10XezKzOH2xLUzX8hXYohm/vQaBymjtclYbyEP3faTsqnJWS4mFBaliQExkrJsWqz0PA14R80sJfP7zwF1XGi6G2FnhfavdgPv7MR7k0OeVsCtpOIusc0l2lA1OepNwdcGnf+CP0G+FSxgyGa1dDvfXMV3OAOUyZiYbKq/asvRyP51lmdVhJn7Sa1z+fU9HOo1tyUVcmEpg+kIdGaeifX0R3srIEoBitQnSaagCcJUKvvJliZeMP6G+QbF66+F5wA/8APCSl2yp3PY1JQgwlszh6MkkRrzwfkCxeusxPNz92lAb8Dtr2CQSXBHT63SuPjW0wiAT1FqrowpLTX1ntePAT9RQrnSbFlTMaS3evsO4klK5e5+5vIWsU9Jqt16fr0yOoW40NyNVLSadGBDHQSo0oXVSqoaJBBpxjOl+G7vS82plO8D7QQ/Aq2YPM1NMYtjPa8WAOFYDtUIV0aVrPu9gYEgjrzcSuDrcrxfzaYwFOQPO6jJySwKPHW3i8JCZJVP96Trmq8l2DEgJYWZ1fKHOCSJndVuwL9VsZG09AfT9/+osGo8WMT+v8qWnZhPq/2+ic7NcQNMVqz0PQ14BcwvtG0J+voa0U4l/AwoCSKjJleh2aSRixrKQ9uvIV12gWMTCglAxIDqDPd8HUFqRWQ1A71htGwN+GfNFHw/8UxMv23UaaMCcsHz33eo45+aAm24ys8+NQjqN7alFXNN/HqIQOqjWQqw+eNDMPsnaMDQECAExPwcsLqptms7i4cGmigGpVDB1roIRf0n/nF2eWR06q7XF6kxGidWzLj7xd+N4/YFPAukrzR5rSKlgIAYkEhGi6+sWE6szGWCpqpzVRx9u4rqhc6pPoFv4iRACJJPYmz6Fbx4fwmhUBG+rTeQTdY/q70fLLWSoDQwOClQqHf1NXrd7myNH1AoWxwFe8Qoju+wskpxfksac1ZVV86VjitWRs7rSPX5dKlrI6K5Ejs6lKAZkRmJQ11ntuioGZNlnIJtSicwaQnhm3xD2pEND0+goV0P0AHRW9zDlegK+XdeKAQkSNZTy7cHtfN7Ru0gsu+gAwMVCRt9ZbVnIpurIVX089r0qDvebWVI/kG1gvpIy66wOnG5ntZQo1cKCWjodEc9Dv1vEwqLax/HzvioyaUKsHhnpdvl1zh7HwfMw6BUwl2vf2E6fktidntOKARnyC5ifb28qVw0UWASQjnI/CwUsLAr0u3ozxatlVsuamYKYg6kK5qtJnDzexP5UR1VqE6RSypn1Yz+29WaLLQv7d9Xwkbv+GJgynB/YKaAZyNEka4htK7eulMCZM2qbpljd3ycxX0li4kwd//4/JjAamBGrHavRquOAeh2luovAl5d/3zPY7/ZsHo/PjeLBJzK4Z+dR/WtBdM1f7qwuSH1n9fJJtauuir+vDUhmwFYD3UIBjx9t4Kr+ixTTCDFFEGBvZhYPnBrFiL2g+skUJ7YmneMiQ/3jwdGwPwCwXW0EbBv4kR8B3vjG9qouTTLJplodVS6jkJfK1GXAWV2pLnNWR2a8OGNw10WQqK2IAckVbGSdst64doVY3VTOap1+TBQDUuz4DKQEZJhhreGsPvDC7fjlm/5OvaD5qCegWN2rNBqAhBI/4zo1fR+BXe0Sq+fyjiogEfemGUU1dDir8zVXXXw1l6VmswK5qo8Hv9XEdX2nW79PByVWt2NAphddjAQabnUATmB3i9WNBsoNB77b1BarB7wiFnLqtDx+ITAnVtu2mi2O0HWneR6G/AJmc+3P8YknBQ71TWo5q7cnFzAx076JzxQC1V4121Y6FVYjzuexmBPoc0vanQUAKvojdPzVKg24VkP7/zWQqWO+klLiv3tRbaQ4YQTR34ed6QXgYvi5anRouhgfbz83tU+ydoyMqMcoWkJTrBaB+p9/858TSLp1vO2qr+kPdqNcwlIoTjcaZpzVALaN1PHh47fglXseU7cs3WONrqXLKv4UCzL+stSITKYtWN94IzA2Fn9fG5DMsKfE6lwOTz4FXJGd4v2AEFMkk3jtnofwmRNXqLFBMkn361blllvUvfDmm7uLmWowtM1ri9VbzSBCAKgiyUtV5axeWBRmCizatZWZ1UWpzHg6zurqMmd1KYGMo+msTqVUbMmCMnedO29hPFjUOx9cFym7gkKns7rRULXNNeN2vCMH8NLtYdyaoesA0YNidY9SK9bgWA110YnbcfJ95awutKul1moCbqIRXwROJiGERCMfitXNZltU18nnBJDtEzhf7MfkJLDPm1AbdcXqfuV4ixywp2dSyv2r46xOOt0FFut1lOqOKnylQxCg3ytifjESq1PmxGpAVeV+0YuA175W+38VOatnc+2b4hMnbCVWa2Ssb08uYmKuLfadnB/AvsyMtlidygg8Mr8DzVweC/NNlVmtI1B1tvcoYqYYRcxoitXDCcxVkqiWGvCqYSeX4oQZovgbGQqAppwuV18NvPWtwAc+YGZ/ZG2JxOrT4aRoNqu3P9/HgFfEd486uOfWeVzRZ0BQdF3VwS+G9+9aTd1ndJ3VALaPNXC+MIC7+x9UGzTF+suJ1cm4Tp9O3vQmtSLkrW/V288GJDOWRK7mQy7mUClJtdqO9wNCzBAEODJ4Eb9w46dxZGCCguJW5s47gd/+beDHf9zYuGtwdxojuwNg927g1a82sk+ysUinpJpwLpXwDw+P4yXbn9CPAbHqKNe6x/HlCrRjQEpVuz0+ApArOsi6+s7qfreEhVklJj/4eAo3Dp/V68fYqpBisdNZHWkyuv3NVKodP3fkiN6+iBEoVvcosxdrGNLJqwYA34dvd8eAaC+RCALsSs3j3JS6GJbmSvDtmtqfphvhRdcv4v0P3otX3jzdvugYEKsXqm1n9em5NPakZzVjQFY6q0sNA2J1Oq0u6Dn1OT510bBYbdvAm98MvOY1+vtKpTDkFzC31OGsPuXiUL+Gs9pxsD2dw0Qu3SpWeGqxX4nVmp/BG140ga9cPIj/83dumFld0h+URO29Mw9dN18bwP79wIPTe1SuWRS3wwGUGQ4d6n5tSqwWArj9dhZX3ChEYnWErnsilcKu9By+9GAS+4YNuaiiIjqdzuqGi6TfvPz7ngHj2wSSdgW3jR5XG3SXu14qBqRoSKxOp4F77tmSy6iDkTTKdQdPHrewYzjsc1CsJsQMiQSwcyc+8LxP4vmjp7ZcJj5ZW66+1sK177gD+MVfBK67br0Ph6wDmaxa2VsvVPCPx7bhZduP6ceA2DWUa91jzVJZxF/JJgSSgUSx4Xb145ZKthFndb9XxMK8xEMPAVdvX4BtNfX6yL4fOqvbMmajUFZ51SZWt77vfcp81FmPiKwbFKt7lJnJBkZ0xeogaDurpUS1CiRkKKjGPZldF/v7ZnB8bgCo1zF5poLxIGdkEHnb82r4/Ts+jB+99RE1s+d52g7ggQF0O6vnMtiT0RSrlzurGw2U62EMiA7pNAa8IuZz6thmljy9AptrSTKpnNUFrzUL++QZDwez0/FvwkJge38R5wv9rYmAqWJaLaHTFIB37QJ+/oZP4wsPpLCQs/Sd1cBKZ3XJjLP6ppuAf5rcj339C0q0t+3ebAMbkcOH289HRjgw3ars2tV+7jj6kwyDg9idnsM3Hs1ib5+hIl2uC9+uoVIOxeqowGKg76z2+gP842t+C14izNk3EAslpVjprC5CP7N6q5PN4vt2PoZ3fvgOvOQ6FoAjxDhXdhSYpaBIDHLffWb8QWTjkslaWKr5+NLXHbxw7zkl1GpnVtdRWeasLpUtNQaNue9kSqBY7xarc2XXmLN6fk7iYx8DXntNGL+nI1YHwYrM6sJ8FSm7YkaszmZpPuohKFb3KNMXG0qo1Mw1Gk/n8aePvwCNcg1PHJO4IhNmtcYVl4XAgeEcTiwNA6USJs/XMZ5cNON46uvDffu+i32N43rH2EH/oNUlVk8sprE9uah14bWTbrezul5XzmpPX6zud4tYKDgoFSXcREOZd005q01i2xjM1jFXVq51KTtmdTVcdDsGS5go9gOlEppNAFKqz0BTrEY6jVtHTuIbR9M4M5NUOdiaYrVlCTSaYqWzWvP/ZY0M4e4dj2OvFxYZpTBhjs5s6bvuYjblVmV5kT7d68vwMHan51BvCGwPQkHRQA60Z3VkVkdxUybMxX19uGW0IwJF9x4T9VNWOKuFGWf1ViabxQ/s+zaemu7Du15zSm3jPYEQc3TeD268cf2OgxCy6ciM+Fiq+fjwZwbwxmseURt1tJ1EAl6ihnLdVlGsIaWKgJ+ox953kFwpVi+VQ2e1Th8uqcbcU/MOPv5x4L6rHmttj00QIOVUVhGr2d/cjFCs7lFmppsYCZa0HZX3v+SLeCo3gmMPVXD0oQauHphQA1ONwen+0TyO50aAYhHf+67EvsysmZmsyN11UVNQ72Bg3MN8NQkUCgCApoSa1dT4XAdHbUwU+7rE6nLDge9pOt5cF/2pGhbKPr78hRpu3xvmdusKKWtEss9BoeYBhQJmZ4GRrH5e1Pbhqvpsy2VcvCCxLVhU3zAgVvt2HRm3ihtHz6tKxJpikuPILod9qSxUJI6u8DM0hHdd/UW8dujr6jUjQMwhBPDud6vc9pe8ZL2PhqwXiURboLj+ev39hc7qXX052BV1rzHlrI4W8KBeR0NaSLgG7gedjpEox10Hz4NtNdAoLROrSxSrtclk8PzRU3j4h/4jgjprGBBinEOHgG3blFA9NLTeR0MI2URkxpKYqyTxzWN9uGPbCbVRR6wWAr4HlBtOKzITAMoVC0EivrPa8l00l62Qy5VcZN2Stljd7xbxZw/fhFe+QiKo5dR2nbGt5yHl1FCsJFqC/fSFutLNWOR+0/G0YrUQYpcQ4otCiEeFEEeFEO8Ot98vhDgvhPhu+PWqjvf8vBDiKSHEMSHEPR3bXxFue0oI8XNr8ydtDqanoO+sBmCnPBzITGN+stoWqzVF4APbSziRG0FjqYj/8eEM/tWVXzPmrAYAzM6qRwMDMn84jUrDBnI55HJAxg4LQ2pcePcf9jBXSWFyLhS8Gw0s1Xykk5rOaiGQGXSQq/n4zN818Ir9T6rtPZrTKdLhjSafxxNPAIfGQmFZ47MdH65jotAPlEo4eayq8qo9T98BG7qoP/jDn8PvvPCjapumCJxN1rvy0EtlYSSzGkNDuGX0NG7tO6ZeMzPLLEeOqHWZukVGycbmJ34CuPdeVXhWl6Eh7E7PYV9qujUxasJZ7dt1FQNSr7cHJSZW2nSK1bp51QDgunCtBqqFWtfmUoVitTaeB3ge0qJgtG9ECAnxPOD++4F3vGO9j4QQsslIj6XwxYkr8YIdZ2HVwwl9TW3HD4TSNjrE6lI1ET+zOjwmAbSd1VJiqeIibVf0+nCWhf5UDZ84fT3e+vqymVpMQiCZBAp1Vy1rBnDuHLAztUCxehPyTEbrdQDvkVIeAfACAO8UQkTlMX9HSnlD+PUpAAi/9wYAVwN4BYA/EEIkhBAJAL8P4JUAjgB4Y8d+yDJmpqXKrNYd5Pk+Bv0C5qdqeOQRiWsGz2uLn3u2VXFscQx/9+kEbr86h2G/YEZQXe5oMLHPTEY9Li3h0aMdMSg6F0nHwVuueAAffvxGNaPXaGCmnMZwuvz0730arEwKDWnhi19O4MWjj6uNutnKa0X4GcpCUYnVI+Hyd42bsJdxUW0mIEtlHP1uDVf2T5oZmIfHuj+4AK8ciuqan+vukRLO5gdazup80ULaqeiLSQMD3ULq/v16+yOErCSZBF796vY9Qod0GlcOz+GDL/r/gJmZ9v51EAK+L5R7pliErK2RWJ3N6u/P8+Am6qiWuosMF8sWM6tNELXRaNUZxWpCCCGk58ns7MPJpRHcMXysLdRqCqpeYKm+YeSClhKlagJ+Qk+sBtAWq+t1NKVQq/k0zT39fRLXDJzH4R25tqFDsx+TylhqdXcoVp+fAHak5mmO2IQ8beuTUl6QUn47fL4E4DEAOy7zlu8H8BEpZUVKeRLAUwBuDb+eklKekFJWAXwk/FmyCrdcsaCEZd1BSRBgwC1ifqaBU6eFiuzQFIG9rIebhs7gp39tDD/5mjOt36NNNgscONB+3ZkvG5dMBiN+Hk+dcfE3/6eBV+98SEWA6MSrCIFbdkzg0fltSqis19FoWrBdA07NdBq5qo87b84hKIfirwkxYS1IpZBxy8hPl5RYPTSntuvcKHwf25OLmDjbwAPfFHj+6EkzbSsSpmdm1ASD52mLPnvGKjiTH2zdKCfmA2xPLug7qxOJ7kKA+/bp7Y8QsrYIATEyjD2ZOWXvAIzE93iRWF0oYGFeot8tmYmF6rz/LcuZjoXnwbEaqBbrXZuL5YQq+MPBgx7RqjOK1YQQQsiGIbNDjeFvzzyihFrb1jZJWIGnIjuinLhGA+W6jcBtxO8jui4k0O4TRo8G+m83HSnj/ud9EpieNuOshhKri53O6gsJOqs3Kc9KXRNC7AVwI4AHwk3vEkI8JIT4oBAiWku6A8DZjredC7ddajtZhVdeP4ED2RkzYrVXxNxME7IhYQmpfyInk3jv9Z/FrYcWcXgsFChNXRxuuaX9/O679feXyeAnDn8Zv/XV5+Mz/wDcs+uokYHezqESzhX6gUoFzVoDQkgzjrdMBu++5gv4lTc/qS7oltW7A9NUCtuTC3jqSanE6r5JtV2nLQQBjgxcwKOPCXzvkQSuHzpnJrM5GuzPhxMABtzqu7fVlFgdzmyfm09hZ3peO2ceAHDbbe3ne/bo748QsrYMD6vHyOliIsYqaDurp6YFRoOcmesLAIyNqccrr9TfVxgDUistE6urCcaAmCD6X0WDvF7tExBCCCGkRbovgdHkEg5mp9SGwUH9aEvfV5EdoVCLahWlhmYB7tBZLSuhSB0J4QZWxu2/ysV9+74LnD+vokt0TYMAkpkECvW2s/rcRZvO6k3KMxarhRBpAB8F8DNSyhyA/w7gAIAbAFwA8FsmDkgI8XYhxINCiAenp6dN7HJjYmjmKRKrz01YSPn11jYtBgZwZOAC/urdX2tfKE3lKt92myp49aY3mcnS9Dy89orH0WgAP/zKBSTtmhHxc3S4iclSFigWMTcrMeQVzDje0mm8+9ovIJ0LiyumUvo3tbUilcJPHP4KfuWDO/Doo8AeJzxmHSd4Xx+ODFzAA99x4SYa8BJ1M20rlerej4E2sHt7HafzQ+0lSAsp7DDhrAaAG25QItKLXsRZYkI2Anv3dr82MCHmpxIoN2wgn8fUbAKjwZK5grvvfS/w9rcDz3++/r5cF65VR7XcXbehWLUpVpvg2mu7X1OsJoQQQnqeRAL46js+1B7Kmyhq7fvKBR0JytUqSnUHQVJDL/A8ZTooKLG6UaoiYTXN9N+i2ksnT6pHA32YVJ/dlVl9ftqls3qT8ozEaiGEAyVUf0hK+TEAkFJOSikbUsomgD+GivkAgPMAdnW8fWe47VLbu5BS/pGU8mYp5c0jIyPP9u/ZPJhy0AQBBr0Cjp10MdYXOr50T+QoW3pmpn2hNCVW+z7wkz8JvPjFZvYHwO5L4Y9e/Bd4z2vCgnUGLpJWJgUpBVAoYHJKYCzImXFWRwLHhQvq0USe6lqRSuGl25/Ajmwe738/YBfCLGidY+7rw5GBCfz2J/bj3tsNZb8CSvDvLFRowlm9RyhndZi/dW4hrZzVJtqB4wD/9t8Cb36z/r4IIWvPwYPt52NjRq7dgwMSs+U0UCgoZ7W/ZOb6AqhJxec9z0yh0dUyq6Vsi9XMrNbj8OHu/xPFakIIIWRDcMXhjn7b8vpccYg0lw6xutxw4Cc1+nOui5RTQSGn+nFLczVknLIZsTrS8556Sj0aMIy5GQ+1ZqIlVk8vOBg2UeuN9BxP26qFEALA/wTwmJTytzu2b+v4sfsAPBI+/xsAbxBCeEKIfQCuAPBNAN8CcIUQYp8QwoUqwvg3Zv6MTYhBsXrAK+Kx0wHG+wy5oKML7eyseWf1WhCJBpNhTIWJgV4yiaRdRWGmhIsTTYwlDS3PjuIqTp9Wjz0uVgPA773hq3jD6yWwtKS2a4rV1w+ew2/e+xW87wfDm5qpttU5+WXAtb9zn6OiYPJ5AMBkPqnEJFPOR0LIxqEzW/6aa4ysiNm1vaGKuBaLmJpLqPuMKbHaJK6rMqs7xepaDdWGDcfTL86z5QkCNbEAqLiZqJ9ACCGEkN7miivaz02I1ZHpsEOsrjZsOL7G+NN1kXXKWFpYA7E6ijKLdILOIt9xibSBUIeSTUNRt6TneCajnjsAvAXAw0KI74bbfgHAG4UQNwCQAE4B+AkAkFIeFUL8FYBHAdQBvFNK2QAAIcS7AHwGQALAB6WUR439JZuNqFqqoRiQExeTGMucb23TIsrmnJ1tC5O9fHGIYikisdpEBnIqhZ2peZw/JTB5UWI8yJkRwXeEMe65nHrcAGI1CoVWoUl4np6Lrr8fvl3Hjx75BlC+QW0z5SLrdFZ3FjCMiTOYQaVhIz9TRhqqbmPCkuYyZQkhGwfPU9eVxx5T8T0G2LVT4mxhAChcwNS0hRv9JSDoN7Jvo4TLR7tiQKLsbrqqzfC2twE/9EPqvssJUUIIIWRjcNVV7eeGYkAAdGVWAzaEp9Hfcl1k3RJyixIAMD/TUEW9TfThIt0owkQtpg6xem4O6PNC4b6X9SgSi6cVq6WUXwWwmkXoU5d5z68C+NVVtn/qcu8jHRh1VhfQlBbGU8oBqn0i9/WpwVIu1xZVe9lZHUU+XLyoHk2In2kV+XDu7AAmJ6FiQJL60RLYsUM58qS6WfS0WN1ZtNCEq7pznwsL5otJdbrRjhzR3186jX93/afwjr++C3/87wBfhEUpuESbkK3J29+uJu8MRZjt3JPAucIAkH8SUzMCo8klM5OtpgljQGqVtlhdnK8gYF61OYTQqwdBCCGEkOeeTrOUiT5cEEAAaJYqsBAVRbT1hGXPQ9YpI7eoVsJdvAhsSy6a6cO5LrB/P3DihHq9e7f+PjvE6ocfBq4bCwtYUqzedHBtZq9isMBikKjBs+sYc2bVNt0Bj2W1ZwbPnWv9np4lOlaTYnUyiZ2pBZw9JzA5LdTybBP7dd32chnASLbymtGZXR6J1bptK5tVg/J8vhWvYaxtRQXQkkkznYVMBm++4gE8NdOHRx9pYmdyVh17L58LhJC1I5k0JlQDQHrYR77m4fx54PhEoAos9uI9wbaVs7oKtcQEwMXzDbXiiGI1IYQQQrYqQgD/+l8DL3uZionTxffhJ2qo5NQKtuJiDSmnotffcl1k3TIWc8qfevEiMJ402Ie7/fb2cxNidShKy2IJDz0EXDsYpgf0Yh+ZaEGxuleJYkB0BdBkEkIAA0EZ440Jta1zhi8ukVBZr6vHXp7J2rmz+7WhGJAjAxN45HiAY6cDHMhOmxMpd3XUIe1lJ1U6rW5ipVJ7IkDXWW1Zah9Smp1cAJRY/Z73AB/4gJn9BQEgBF40+gTe+54mXr7z0dY2QgjRJpWCJSTe8IcvhSeqyuXSi85qIeC6EtWmHS5HVWK1MVcOIYQQQshG5YYbgNe/3kwND9+Hl6ijkq8BAGampSouqO2sLrUWzF+csjAeGOzD3XorMD6uxHoT4/ogUJ/BUlU5q7On1HaK1ZsOitW9iqkIhPFxAMCAk8d4JSzaZ0Ks7iwmlUj0dsGfTvEXMHORTKVw68gpfOOJQRy/mMT+zIw5UfWGG9Tj/v3A9deb2edaIEQ7h+rkSfVoQlyP2lIkVpt0Kh86BPT3m9mXZQGpFO7a8Ti+/o0EfnDfdxgBQggxRyqFsSAHS9bxiTf+JQK71rMdcccRqDYSrYI/FyakcuUws5oQQgghxAyhs7q8ZFCsDp3VuaXQWT2dMNuH8zzg/vuBn/opM/tLpZCyK/jmYxl85zvA1UEYMdKjfWQSnx4sK0/QaKjiREKYyZdOpXDn+OMYE1NqfyZO5KuvBv7+79Xzq67qbffU0JD6u6OquYac1f1eCXN5B4fGFpSZ1pRQefPNwHXXbYxB/vAwcP58O4fKRMb24CBw9my7QFcvC8CZDF687Qn8zk+fxMBSEUiZiwAghGxx0mnsSp3Gke0n27FIveisBuAGCeWszueB/n5cvCDprCaEEEIIMUkQKLG60ACg0jhHdMXqdBpZt4QLOVUz6+KsjW3bFs2unDe58ri/H++57mP406++CC+/p4HUfAGwNXO7SU9CZ3Uv0umq1j2xhQB27MDvv/DD8O26ytM0cbHYv7/93ESxurWk0wEMqCKGuoSCwa3bz+GOXWfUNpMO4I1ysY0+1/NhVpQJZ/UVV3S/7mWxOp1G0q7hJ+/4nnrdy8dKCNlYDAzgZ2/8LH50z+eBxUW1rVfF6mQCtWaiVb/gwqSFcYrVhBBCCCHm8H34dg3lvIpinZmBclbrCMvptCqwGGVWz7mqD9erekR/P16x6yg++OI/xa++L8wuSacZxbkJoVjdi5gqrhjRmdlsIgIEUNEf992nhMU77jCzz7Xk5S9XIvW/+TfAwID+/sL/zc/e9Hn8yJEH1batKFR2TgIAwLXX6u/z6qvbz5NJc7Eda0G0SmFyUj32qJBECNmAWBYO7G0g45RVfYgeLuDqplxUmwlEgYcXpyzlrO7VgQ4hhBBCyEbD9+FZdVQKhsXqjhiQxYKNrFPuXcNBKqWc1OWy+gAARoBsUhgD0oukUsAP/ZC5Qd6ePe3ny4sN6vCKV6ivjcDzn6++TBGKkof9k4AVXhy3olh97bXAX/6ler5790rxOg7btrWf33OPuhn1KsvF6q3YBggha8fQEDA7q56nUj3rGnFTDqpTdkusPnHOxc7rFoDklet7YIQQQgghm4XIWV1sAgBm5hO4ws/rmRnSafS5JeQKCUBKyKZU3c1eFauFUGa2mRng3Dm1jWL1pqSHVaAtTDoN3HWXuf3dfDNQKinx+5ZbzO13K+O66qtaBebn1batKFSOjKhiCR/9KPCDP2hmn0IAP/MzwPHjwPd9n5l9rhWDg+pxYkI9bsU2QAhZO4aG2s97eOWGk3JRbdp44MEEGimgWpEYDZZ4TSSEEEIIMUUQwE/U22L1QgLD/ZpiteMgm2pgseJh/kJZFfQGelesBtRKeYrVmx6K1VsB2wbuvHO9j2JzIQQwNqYKAUave/mCvpZcc436Msnhw+qr1+l0gQM9LSYRQjYgnWJ1D3fE3YyHUjOBX/urg/jH/wTc/9pT6hu8JhJCCCGEmMH34SVqqBQbgJSYXnAwMr6kXQwxO2gjVw3w8z8PvPO276iNvRzlFsWEUqze1DCzmpC4dAqVQdCzy7PJGrJcrKaLkBBikg3irHYzHgo1F6enA/yv/wX8yPOOqm/wmkgIIYQQYgbHge9KlKsWUK3i/FwS48mcdk2TzKCDM/lBfOd7Am88+K1wY8bAAa8RFKu3BBSrCYlLp1DJAfnWZGQEsDouo2wHhBCTjI+3n3cWn+0x3KyPL104hNu2n8GrXw0MiTn1jR4W2AkhhBBCNhp+0kKp4aK5kEOlJlRsh6az2u1P4nR+EHffsgiRW1Qb+/oMHO0aMTCgHuuq0CT7m5sTxoAQEpdOESHKLiZbi0RCzeSGRcU4q0sIMcq+fcBb36omR/ftW++juSRONsDnJ67Cl1/6hwCeBxSL6hucwCOEEEIIMYaXsvGjn/kRnPytPPZlwyLcms5qZDLIumW87Mgk8HhBmbF6eVy7f3/361271uc4yJpCZzUhcel0Vr/iFet3HGR9ufJK9Tg+Dhw8uL7HQgjZXAgB3H57TwvVAPC8Fwb4+Mv/O25NPQpICRQK6ht0uhBCCCGEGOP7bpjGb77gf+O3/qQP1/SfVxs1ndVIp3EwO4XbBx9Xr7PZ3o443bu3+zXH4JsSOqsJicv4OHDzzWqJTA8vzyZrzA/+oCoGeeut3ZEghBCyRdi938buq88CuSawsNB2VlOsJoQQQggxxtWHajgy9TX82rHX4ZrsGbXRgLP6q/f+BsTkAfW6lyNAACWk33038LnPqcdeFtZJbChWExIXIYAf//H1Pgqy3gwMAHfcsd5HQQgh68vIiIpEmpwESiW1TXfwRAghhBBC2mSzEAL45dc9ihcuPQHYtvrSYWRE6b3Hj6vXvS5WA8B99wEHDgDXXbfeR0LWCNoACSGEEEKIHiMj6vHMGRUFEgRcbUIIIYQQYpJMBgDw9hsfxLZkzowxoDPeFAD6+/X3udbYNnDTTfpCPelZOIoghBBCCCF6RGL16dPqkcUVCSGEEELMEorVmJxUj7p51QAwOgokEu3XG8FZTTY9FKsJIYQQQogekVh96pR6ZF41IYQQQohZIrF6ako9mnBWJxLA2Fj7NcVq0gNQrCaEEEIIIXqMjqrHmRn1SGc1IYQQQohZsln1WKmoRxPOaqDbZHDokJl9EqIBxWpCCCGEEKLH2Fh3NfZIvCaEEEIIIWYYHOx+baqY9e23q37cm97U7bImZJ1gGjkhhBBCCNEjmQQOHgSefFK9vvba9T0eQgghhJDNRhAA27cDExPq9d69ZvZ7++3AzTcDrmtmf4RoQmc1IYQQQgjR5+qr28+vumr9joMQQgghZLNy8GD7+eHD5vZLoZr0EBSrCSGEEEKIPnfcAQwNAXfeyQEPIYQQQshasH17+/mePet3HISsIYwBIYQQQggh+mSzwK/92nofBSGEEELI5uXWW4GvfQ248UbAov+UbE4oVhNCCCGEEEIIIYQQ0uukUsAv/dJ6HwUhawqnYQghhBBCCCGEEEIIIYSsOxSrCSGEEEIIIYQQQgghhKw7FKsJIYQQQgghhBBCCCGErDsUqwkhhBBCCCGEEEIIIYSsOxSrCSGEEEIIIYQQQgghhKw7FKsJIYQQQgghhBBCCCGErDsUqwkhhBBCCCGEEEIIIYSsOxSrCSGEEEIIIYQQQgghhKw7FKsJIYQQQgghhBBCCCGErDsUqwkhhBBCCCGEEEIIIYSsOxSrCSGEEEIIIYQQQgghhKw7FKsJIYQQQgghhBBCCCGErDsUqwkhhBBCCCGEEEIIIYSsO0JKud7HcEmEENMATq/3cTyHDAOYWe+DIJsSti2yVrBtkbWCbYsQngdk7WDbImsF2xZZK9i2CFk7TJ5fe6SUIzo76GmxeqshhHhQSnnzeh8H2XywbZG1gm2LrBVsW4TwPCBrB9sWWSvYtshawbZFyNrRa+cXY0AIIYQQQgghhBBCCCGErDsUqwkhhBBCCCGEEEIIIYSsOxSre4s/Wu8DIJsWti2yVrBtkbWCbYsQngdk7WDbImsF2xZZK9i2CFk7eur8YmY1IYQQQgghhBBCCCGEkHWHzmpCCCGEEEIIIYQQQggh6w7F6ssghNglhPiiEOJRIcRRIcS7w+2DQojPCiGeDB8Hwu1XCSH+SQhREUK8d5X9JYQQ3xFCfPIyv/PTQoiF5T8jhHiXEOIpIYQUQgxf5v37hBAPhD/7l0IIN9z+YiHEt4UQdSHE6+J+JsQMm6xt7Q7/lu8IIR4SQrwq7udC9NmgbWvVnxNCvFQIsSiE+G749f44nwkxwwZtWx8SQhwTQjwihPigEMIJt78pvF49LIT4uhDi+rifC9la9Nh5sGr7XuX9l7p/s2/YQ2yytsW+YQ+xQdsW+4YbgA3attg3JBsCk+eXEOJU2La/K4R48DK/8xXh+fGUEOLnOrY/53okxerLUwfwHinlEQAvAPBOIcQRAD8H4PNSyisAfD58DQBzAH4awG9eYn/vBvDY0/zO3wDwllW2fw3A3QBOP837/zOA35FSHgQwD+Bt4fYzAP4fAP/rad5Pnhs2U9v6JQB/JaW8EcAbAPzB0+yHrC0bsW1d7ue+IqW8Ifz6lafZD1lbNmLb+hCAqwBcCyAA8GPh9pMAXiKlvBbAf0CPZbSRnqaXzoNLte/lsG+4MdhMbYt9w95iI7Yt9g03BhuxbbFvSDYKps+vO8Pr5s2rfVMIkQDw+wBeCeAIgDeGvw9YBz2SYvVlkFJekFJ+O3y+BHXh3AHg+wH8WfhjfwbgX4Q/MyWl/BaA2vJ9CSF2Ang1gD95mt/5eQBLq2z/jpTy1OXeK4QQAF4G4H+vcmynpJQPAWhebh/kuWEztS0AEkA2fN4HYOJy+yJry0ZrW8/m58j6skHb1qdkCIBvAtgZbv+6lHI+/LFvRNsJeTp67DxYtX0v+x3sG24QNlPbAvuGPcVGa1vhz7FvuAHYoG2LfUOyITB5fj1DbgXwlJTyhJSyCuAj4e9aFz2SYvUzRAixF8CNAB4AMCalvBB+6yKAsWewi98F8LNY2wHBEIAFKWU9fH0OqjGTHmYTtK37AbxZCHEOwKcA/NQaHgd5FmyQtvV03CaE+J4Q4u+FEFev43GQDjZa2wqXeL4FwKdX+fbbAPz9c3EcZHPRK+fB07Rv9g03IJugbd0P9g17kg3Stp4O9g17kI3Wttg3JBsJA+eXBPAPQoh/FkK8/RI/swPA2Y7Xz7bPaLTPSbH6GSCESAP4KICfkVLmOr8XzsjJp3n/awBMSSn/ee2OkmxENknbeiOAP5VS7gTwKgB/LoTgtWWd2SRt69sA9kgprwfw3wB8fB2PhYRs0Lb1BwC+LKX8yrJjuRNqQPK+5/BYyCagx86DVds32ZhskrbFvmEPsknaFvuGPcgGbVvsG5INge75FfJCKeVNUBEf7xRCvNj8kZqFnYanIZxx+yiAD0kpPxZunhRCbAu/vw3A1NPs5g4A9wohTkFZ6V8mhPgLIcTzRbs4xL0xj+8z4fv/BMAsgH4hhB1+eyeA83H2S9aeTdS23gbgrwBASvlPAHwAlwzdJ2vPBmtbl0RKmZNS5sPnnwLgXK6gA1l7NmLbEkJ8AMAIgH+77Gevg1pq+v1Sytk4v49sTXrpPFitfbNvuHHZRG2LfcMeY4O1rUvCvmHvsRHbFvuGZKNg6PyClPJ8+DgF4P8AuFWoAo7R+fUOqHv4ro63PW2fcS37nPbT/8jWRQghAPxPAI9JKX+741t/A+CtAP5T+PiJy+1HSvnzAH4+3OdLAbxXSvnm8Ns36ByjlPKeZcf8RQCvg7rIP+2xkfVhk7WtMwDuAvCnQojDUAOSaZ3fTeKzEdvWpRBCjAOYlFJKIcStUBOs7DiuExuxbQkhfgzAPQDuklI2O7bvBvAxAG+RUj6h8zvJ1qKXzoNLtW/2DTcmm6xtsW/YQ2zEtnWZ97Nv2ENsxLbFviHZKJg6v4QQKQCWlHIpfP5yAL8ipTyLjvMrFJmvEELsgxKZ3wDghy+37zXtc0op+XWJLwAvhLLUPwTgu+HXq6CyWD4P4EkAnwMwGP78OFQuSw7AQvg8u2yfLwXwycv8zq9AdeZK4fvvCbf/dPi6DlWk5E8u8f79UIUCngLw1wC8cPst4fsLUDf0o+v9+W7lr03Wto5AVYf9Xvh3vHy9P9+t/LVB29aqPwfgXQCOhm3rGwBuX+/Pdyt/bdC2VQdwvON43x9u/xOoCtXR9gfX+/Pl18b46rHzYNX2vcr72TfcAF+brG2xb9hDXxu0bbFvuAG+NmjbYt+QXxviy9T5BXWv/l74dRTAL17md74KwBPhOfKLHdufcz1ShG8khBBCCCGEEEIIIYQQQtYNZlYTQgghhBBCCCGEEEIIWXcoVhNCCCGEEEIIIYQQQghZdyhWE0IIIYQQQgghhBBCCFl3KFYTQgghhBBCCCGEEEIIWXcoVhNCCCGEEEIIIYQQQghZdyhWE0IIIYSQLY8QYkgI8d3w66IQ4nz4PC+E+IM1/L0vFULcvlb7J4QQQgghZCNhr/cBEEIIIYQQst5IKWcB3AAAQoj7AeSllL/5HPzqlwLIA/j6c/C7CCGEEEII6WnorCaEEEIIIeQShM7nT4bP7xdC/JkQ4itCiNNCiB8QQvy/QoiHhRCfFkI44c89TwjxJSHEPwshPiOE2BZu/2khxKNCiIeEEB8RQuwF8A4A/yZ0cb9ICPFaIcQDQojvCCE+J4QYe5a/+1TH9m8KIQ6uywdHCCGEEEJIDChWE0IIIYQQ8sw5AOBlAO4F8BcAviilvBZACcCrQ9H4vwF4nZTyeQA+COBXw/f+HIAbpZTXAXiHlPIUgD8E8DtSyhuklF8B8FUAL5BS3gjgIwB+9pn+7o6fWwy3/x6A3zX89xNCCCGEELJmMAaEEEIIIYSQZ87fSylrQoiHASQAfDrc/jCAvQCuBHANgM8KIRD+zIXwZx4C8CEhxMcBfPwS+98J4C9DN7YL4OSz+N0RH+54/J1n/RcSQgghhBCyTtBZTQghhBBCyDOnAgBSyiaAmpRShtubUEYQAeBo6JS+QUp5rZTy5eHPvBrA7wO4CcC3hBCrGUf+G4DfC53RPwHAfxa/O0Je4jkhhBBCCCE9DcVqQgghhBBCzHEMwIgQ4jYAEEI4QoirhRAWgF1Syi8CeB+APgBpAEsAMh3v7wNwPnz+1pjH8PqOx3+KuQ9CCCGEEEKecxgDQgghhBBCiCGklFUhxOsA/FchRB9Uf/t3ATwB4C/CbQLAf5VSLggh/hbA/xZCfD+AnwJwP4C/FkLMA/gCgH0xDmNACPEQlBP7jbp/EyGEEEIIIc8Vor16kBBCCCGEELKREUKcAnCzlHJmvY+FEEIIIYSQZwtjQAghhBBCCCGEEEIIIYSsO3RWE0IIIYQQQgghhBBCCFl36KwmhBBCCCGEEEIIIYQQsu5QrCaEEEIIIYQQQgghhBCy7lCsJoQQQgghhBBCCCGEELLuUKwmhBBCCCGEEEIIIYQQsu5QrCaEEEIIIYQQQgghhBCy7lCsJoQQQgghhBBCCCGEELLu/F9cCxj9aYoWTAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,6))\n", + "plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for training data: 1.7195710200875551 %\n" + ] + } + ], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,3))\n", + "plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE for testing data: 1.2623790187854018 %\n" + ] + } + ], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Dự đoán toàn bộ tập dữ liệu\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor shape: (26300, 5)\n", + "X shape: (26300, 4) \n", + "Y shape: (26300, 1)\n" + ] + } + ], + "source": [ + "# Extracting load values as numpy array\n", + "data = energy.copy().values\n", + "\n", + "# Scaling\n", + "data = scaler.transform(data)\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=np.array([[j for j in data[i:i+timesteps]] for i in range(0,len(data)-timesteps+1)])[:,:,0]\n", + "print(\"Tensor shape: \", data_timesteps.shape)\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = data_timesteps[:,:timesteps-1],data_timesteps[:,[timesteps-1]]\n", + "print(\"X shape: \", X.shape,\"\\nY shape: \", Y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "Y_pred = model.predict(X).reshape(-1,1)\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = scaler.inverse_transform(Y_pred)\n", + "Y = scaler.inverse_transform(Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,8))\n", + "plt.plot(Y, color = 'red', linewidth=2.0, alpha = 0.6)\n", + "plt.plot(Y_pred, color = 'blue', linewidth=1)\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAPE: 2.0572089029888656 %\n" + ] + } + ], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "f8f3967282314d3995245835bdaa8418", + "translation_date": "2025-09-06T14:05:19+00:00", + "source_file": "7-TimeSeries/3-SVR/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/vi/7-TimeSeries/3-SVR/working/notebook.ipynb b/translations/vi/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 000000000..5394971a2 --- /dev/null +++ b/translations/vi/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,695 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Trong sổ tay này, chúng tôi sẽ hướng dẫn cách:\n", + "\n", + "- chuẩn bị dữ liệu chuỗi thời gian 2D để huấn luyện mô hình hồi quy SVM \n", + "- triển khai SVR sử dụng kernel RBF \n", + "- đánh giá mô hình bằng biểu đồ và MAPE \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nhập các mô-đun\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Tải dữ liệu\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
load
2012-01-01 00:00:002698.0
2012-01-01 01:00:002558.0
2012-01-01 02:00:002444.0
2012-01-01 03:00:002402.0
2012-01-01 04:00:002403.0
\n", + "
" + ], + "text/plain": [ + " load\n", + "2012-01-01 00:00:00 2698.0\n", + "2012-01-01 01:00:00 2558.0\n", + "2012-01-01 02:00:00 2444.0\n", + "2012-01-01 03:00:00 2402.0\n", + "2012-01-01 04:00:00 2403.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy = load_data('../../data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bây giờ, bạn cần chuẩn bị dữ liệu để huấn luyện bằng cách thực hiện lọc và chuẩn hóa dữ liệu của mình.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chuyển đổi dữ liệu để nằm trong khoảng (0, 1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + "Đối với SVR của chúng tôi, chúng tôi chuyển đổi dữ liệu đầu vào thành dạng `[batch, timesteps]`. Vì vậy, chúng tôi định hình lại `train_data` và `test_data` hiện có sao cho có một chiều mới đại diện cho các bước thời gian. Trong ví dụ của chúng tôi, chúng tôi chọn `timesteps = 5`. Vì vậy, đầu vào cho mô hình là dữ liệu của 4 bước thời gian đầu tiên, và đầu ra sẽ là dữ liệu của bước thời gian thứ 5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Dự đoán mô hình\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Dự đoán toàn bộ tập dữ liệu\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "coopTranslator": { + "original_hash": "e86ce102239a14c44585623b9b924a74", + "translation_date": "2025-09-06T14:07:50+00:00", + "source_file": "7-TimeSeries/3-SVR/working/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/translations/vi/8-Reinforcement/1-QLearning/notebook.ipynb b/translations/vi/8-Reinforcement/1-QLearning/notebook.ipynb new file mode 100644 index 000000000..dabb5722d --- /dev/null +++ b/translations/vi/8-Reinforcement/1-QLearning/notebook.ipynb @@ -0,0 +1,411 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "17e5a668646eabf5aabd0e9bfcf17876", + "translation_date": "2025-09-06T15:07:06+00:00", + "source_file": "8-Reinforcement/1-QLearning/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter và Sói: Giới thiệu về Học tăng cường\n", + "\n", + "Trong hướng dẫn này, chúng ta sẽ học cách áp dụng học tăng cường vào một bài toán tìm đường. Bối cảnh được lấy cảm hứng từ câu chuyện cổ tích âm nhạc [Peter và Sói](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) của nhà soạn nhạc người Nga [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Đây là câu chuyện về cậu bé tiên phong Peter, người dũng cảm rời khỏi nhà để đến một bãi trống trong rừng nhằm đuổi theo một con sói. Chúng ta sẽ huấn luyện các thuật toán học máy để giúp Peter khám phá khu vực xung quanh và xây dựng một bản đồ điều hướng tối ưu.\n", + "\n", + "Đầu tiên, hãy nhập một số thư viện hữu ích:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Tổng quan về Học Tăng cường\n", + "\n", + "**Học Tăng cường** (Reinforcement Learning - RL) là một kỹ thuật học cho phép chúng ta tìm hiểu hành vi tối ưu của một **tác nhân** trong một **môi trường** nào đó bằng cách thực hiện nhiều thí nghiệm. Một tác nhân trong môi trường này cần có một **mục tiêu**, được xác định bởi một **hàm phần thưởng**.\n", + "\n", + "## Môi trường\n", + "\n", + "Để đơn giản, hãy xem xét thế giới của Peter là một bảng vuông có kích thước `width` x `height`. Mỗi ô trên bảng này có thể là:\n", + "* **mặt đất**, nơi Peter và các sinh vật khác có thể đi lại\n", + "* **nước**, nơi rõ ràng bạn không thể đi qua\n", + "* **một cái cây** hoặc **cỏ** - nơi bạn có thể nghỉ ngơi\n", + "* **một quả táo**, đại diện cho thứ mà Peter rất vui khi tìm thấy để tự nuôi sống\n", + "* **một con sói**, thứ nguy hiểm và cần tránh xa\n", + "\n", + "Để làm việc với môi trường, chúng ta sẽ định nghĩa một lớp gọi là `Board`. Để tránh làm rối notebook này, chúng ta đã chuyển toàn bộ mã nguồn làm việc với bảng vào một module riêng biệt có tên `rlboard`, mà bây giờ chúng ta sẽ import. Bạn có thể xem bên trong module này để tìm hiểu thêm chi tiết về cách triển khai nội bộ.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Bây giờ hãy tạo một bảng ngẫu nhiên và xem nó trông như thế nào:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 1" + ] + }, + { + "source": [ + "## Hành động và Chính sách\n", + "\n", + "Trong ví dụ của chúng ta, mục tiêu của Peter sẽ là tìm một quả táo, đồng thời tránh con sói và các chướng ngại vật khác. Định nghĩa các hành động đó dưới dạng một từ điển, và ánh xạ chúng tới các cặp thay đổi tọa độ tương ứng.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 2" + ] + }, + { + "source": [ + "Chiến lược của tác nhân (Peter) được định nghĩa bởi một cái gọi là **chính sách**. Hãy cùng xem xét chính sách đơn giản nhất được gọi là **đi ngẫu nhiên**.\n", + "\n", + "## Đi ngẫu nhiên\n", + "\n", + "Trước tiên, hãy giải quyết vấn đề của chúng ta bằng cách triển khai chiến lược đi ngẫu nhiên.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# Let's run a random walk experiment several times and see the average number of steps taken: code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 4" + ] + }, + { + "source": [ + "## Hàm Thưởng\n", + "\n", + "Để làm cho chính sách của chúng ta thông minh hơn, chúng ta cần hiểu những nước đi nào \"tốt hơn\" so với những nước đi khác.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 5" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Xây dựng một Q-Table, hoặc mảng đa chiều. Vì bảng của chúng ta có kích thước `width` x `height`, chúng ta có thể biểu diễn Q-Table bằng một mảng numpy với hình dạng `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 6" + ] + }, + { + "source": [ + "Chuyển bảng Q-Table vào hàm `plot` để hiển thị bảng trên bảng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'm' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'm' is not defined" + ] + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Bản chất của Q-Learning: Phương trình Bellman và Thuật toán Học\n", + "\n", + "Viết mã giả cho thuật toán học của chúng ta:\n", + "\n", + "* Khởi tạo Bảng Q Q với các giá trị bằng nhau cho tất cả các trạng thái và hành động\n", + "* Đặt tốc độ học $\\alpha\\leftarrow 1$\n", + "* Lặp lại mô phỏng nhiều lần\n", + " 1. Bắt đầu tại vị trí ngẫu nhiên\n", + " 1. Lặp lại\n", + " 1. Chọn một hành động $a$ tại trạng thái $s$\n", + " 2. Thực hiện hành động bằng cách di chuyển đến trạng thái mới $s'$\n", + " 3. Nếu gặp điều kiện kết thúc trò chơi, hoặc tổng phần thưởng quá nhỏ - thoát khỏi mô phỏng \n", + " 4. Tính phần thưởng $r$ tại trạng thái mới\n", + " 5. Cập nhật Hàm Q theo phương trình Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Cập nhật tổng phần thưởng và giảm $\\alpha$.\n", + "\n", + "## Khai thác vs. Khám phá\n", + "\n", + "Cách tiếp cận tốt nhất là cân bằng giữa khám phá và khai thác. Khi chúng ta hiểu thêm về môi trường, chúng ta sẽ có xu hướng đi theo lộ trình tối ưu hơn, tuy nhiên, thỉnh thoảng vẫn nên chọn con đường chưa được khám phá.\n", + "\n", + "## Triển khai Python\n", + "\n", + "Bây giờ chúng ta đã sẵn sàng triển khai thuật toán học. Trước đó, chúng ta cũng cần một số hàm để chuyển đổi các số bất kỳ trong Bảng Q thành một vector xác suất cho các hành động tương ứng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 7" + ] + }, + { + "source": [ + "Chúng ta thêm một lượng nhỏ `eps` vào vector ban đầu để tránh chia cho 0 trong trường hợp ban đầu, khi tất cả các thành phần của vector đều giống nhau.\n", + "\n", + "Thuật toán học thực tế mà chúng ta sẽ chạy trong 5000 thí nghiệm, còn được gọi là **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "# code block 8" + ] + }, + { + "source": [ + "Sau khi thực hiện thuật toán này, Bảng Q nên được cập nhật với các giá trị xác định mức độ hấp dẫn của các hành động khác nhau tại mỗi bước. Hình dung bảng tại đây:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kiểm tra Chính sách\n", + "\n", + "Vì Q-Table liệt kê \"mức độ hấp dẫn\" của mỗi hành động tại mỗi trạng thái, nên rất dễ sử dụng nó để xác định cách điều hướng hiệu quả trong thế giới của chúng ta. Trong trường hợp đơn giản nhất, chúng ta chỉ cần chọn hành động tương ứng với giá trị cao nhất trong Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# code block 9" + ] + }, + { + "source": [ + "Nếu bạn thử đoạn mã trên nhiều lần, bạn có thể nhận thấy rằng đôi khi nó chỉ \"đứng yên\", và bạn cần nhấn nút DỪNG trong notebook để ngắt nó.\n", + "\n", + "> **Nhiệm vụ 1:** Sửa đổi hàm `walk` để giới hạn độ dài tối đa của đường đi bằng một số bước nhất định (ví dụ, 100), và quan sát đoạn mã trên trả về giá trị này theo thời gian.\n", + "\n", + "> **Nhiệm vụ 2:** Sửa đổi hàm `walk` để nó không quay lại những nơi mà nó đã từng đi qua trước đó. Điều này sẽ ngăn `walk` lặp lại, tuy nhiên, tác nhân vẫn có thể bị \"mắc kẹt\" ở một vị trí mà nó không thể thoát ra được.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 5.31, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "# code block 10" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 57 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5wU5Z3v8c8vEk1islETkuPtlcFdT3LMvjbRsF5iTnajibdkQ5KjOeRKjKsnWT3rms1mwVw8q/EWL6gJXlAwxBsqQSWCIgJeuDPc5TrDfQBhhoFhYBiYgef80U8PPT19qe7p7qrp+r5fL5jup6qrnuqq/tVTTz31POacQ0RE4uE9YWdAREQqR0FfRCRGFPRFRGJEQV9EJEYU9EVEYqRf2BnI5aMf/airqakJOxsiIn3KggULmpxz/TNNi3TQr6mpoba2NuxsiIj0KWa2Mds0Ve+IiMSIgr6ISIwo6IuIxIiCvohIjCjoi4jEiIK+iEiMKOiLiMSIgr5IlXr1nW007T0QdjYkYhT0RapQy/4OfvLkQn78x/lhZ0UiRkFfpAp1HjoMQMOu/SHnRKJGQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYCBX0zu8HMlpvZO2b2jJm9z8wGmNlcM6szs2fN7Gg/7zH+fb2fXpOynGE+fbWZXVyeTRIRkWzyBn0zOxn4V2Cgc+5vgaOAwcCdwHDn3OnALuAq/5GrgF3Oub8Bhvv5MLMz/Oc+DVwCPGhmR5V2c0REJJeg1Tv9gPebWT/gA8A24AJgnJ8+BviGfz3Iv8dPv9DMzKePdc4dcM6tB+qBs3u/CSIiElTeoO+c2wLcDWwiEexbgAXAbudcp5+tATjZvz4Z2Ow/2+nn/0hqeobPdDGza8ys1sxqGxsbi9kmERHJIkj1zvEkSukDgJOAY4FLM8zqkh/JMi1bevcE50Y65wY65wb2798/X/ZEpACz1+7k0bfWhZ0NCVG/APN8GVjvnGsEMLPxwOeB48ysny/NnwJs9fM3AKcCDb466MNAc0p6UupnRKQCvvPoHACu/uJpIedEwhKkTn8TcK6ZfcDXzV8IrACmA5f7eYYAL/nXE/x7/PRpzjnn0wf71j0DgNOBeaXZDBERCSJvSd85N9fMxgELgU5gETASmAiMNbPf+rRR/iOjgCfMrJ5ECX+wX85yM3uOxAmjE7jWOXeoxNsjIiI5BKnewTl3E3BTWvI6MrS+cc61A1dkWc6twK0F5lFEREpET+SKiMSIgr6ISIwo6IuIxEigOn0R6du27t7PrLU7w86GRICCvkgMDB45h03NbWFnQyKgqqt3duxp56xbprBme2vYWREJVWPrgbCzIAENHjmb+1+vK9vyqzroT1m5neZ9B3l85oawsyIiEsicdc0Mf31N2ZZf1UFfRES6U9AXEYkRBX0RkRhR0BcRiREFfZEq1GOgChFPQV+kimUauUjiTUFfpIqpxC/pFPRFqpBK+JJNTIL+kfLOV+59k/ELG0LMi4hIeKo66FuG8k7djr387LklIeRGRCR8VR30RUSkOwV9EZEYUdAXEYkRBX2RGHBqvCmegr6ISIwo6IuIxIiCvohIjMQq6Dunek2Jp0zPrEg8xSLoK9aLiCRUddA3FW5ERLqp6qAvIiLdVXXQV7WOiEh3VR30k1TNIyJRc+iw47/+spyGXW0VXW8sgr6ISNQs3ryLx2du4IZnF1d0vQr6IiIhSFY/H65wNbSCvkiVeLuukUfeXBt2NiTiAgV9MzvOzMaZ2SozW2lm55nZCWY2xczq/N/j/bxmZg+YWb2ZLTWzs1KWM8TPX2dmQ8q1USJx9INR87j9lVVhZ0MiLmhJ/37gVefcp4DPACuBocBU59zpwFT/HuBS4HT/7xrgIQAzOwG4CTgHOBu4KXmiqBS15hGRuMsb9M3sr4AvAqMAnHMHnXO7gUHAGD/bGOAb/vUg4E8uYQ5wnJmdCFwMTHHONTvndgFTgEtKujVZKNhL3OiQl2yClPRPAxqBx81skZk9ZmbHAh93zm0D8H8/5uc/Gdic8vkGn5YtvRszu8bMas2strGxseAN6r6sXn1cpM9L/gTUn74kBQn6/YCzgIecc2cC+zhSlZNJplDrcqR3T3BupHNuoHNuYP/+/QNkT0REggoS9BuABufcXP9+HImTwHZfbYP/uyNl/lNTPn8KsDVHuoiUicr3ki5v0HfOvQtsNrNP+qQLgRXABCDZAmcI8JJ/PQH4oW/Fcy7Q4qt/JgMXmdnx/gbuRT5NREpMNZuSTb+A8/1f4CkzOxpYB1xJ4oTxnJldBWwCrvDzTgIuA+qBNj8vzrlmM7sFmO/nu9k511ySrRARkUACBX3n3GJgYIZJF2aY1wHXZlnOaGB0IRkUEZHS0RO5IiIxEqugr5taIhJVlRrONRZBXw9nSdxpjNxo++sbJ/Gth2ZVZF1VHfR1mItIVKWWRQ8ddizatLsi663qoC8iIt0p6IuIhCCsmggFfRGRGFHQFxGJkaoO+mq0IyLSXVUH/SR1sSxx5Zzj91Pr2N9xKOysSETEIugnVerhB5Go2NXWwT1T1oSdDYmQWAV9kUrae6CTZQ0trNy2hzteWaVCh0RC0F42+zT91iQMV4+pZfa6nRzT7z0c6DzMtV/6az70vveGnS2Juaou6asqX8K0cNMuADoPJ0odpptLEgFVHfRFRKQ7BX2REL2xegf7D5a2ZY3uHUguCvoiIVn9bis/enw+v3rxnZIu97ZJK0u6PKkuCvoiIWlt7wBgw859JV3uk3M26cFEyUpBX0SkAhpbD1AzdCK1GxJDg4d1Yo5V0FfpRyopebwl2+yorj3e5q1PBPvRM9d3S690m65YBX2RMKilpkSJgr6ISIgqff0Xi6DvVLEjMaOLi+jTICploMtqEZHuqjroi4hIdwr6ImWmRjsSJQr6IiIxEqugf+iwilxSebq3JFESq6B/9+TVYWdBpAc9tCWVFKug/+ryd8POgkgXXQFIGKo66KsAJVFWruNTJ5PqsHPvgbIst6qDfpLpURWpgB2t7cxZt7Pgz5V6RC0VdqIt1/7pPHS463W5bkHGIujriVyphG+OmMXgkXPCzob0YXe+uqrs6wgc9M3sKDNbZGYv+/cDzGyumdWZ2bNmdrRPP8a/r/fTa1KWMcynrzazi0u9MT3zXO41iByxZff+nNNV9BA4EpcyHQ9zfU+cienlOWIKKelfD6QOyXMnMNw5dzqwC7jKp18F7HLO/Q0w3M+HmZ0BDAY+DVwCPGhmR/Uu+yIR5n+zql6UXCLZtbKZnQJ8FXjMvzfgAmCcn2UM8A3/epB/j59+oZ9/EDDWOXfAObceqAfOLsVGBKWSv4RCx50UI+Q6/fuAXwDJuwwfAXY75zr9+wbgZP/6ZGAzgJ/e4ufvSs/wmS5mdo2Z1ZpZbWNjYwGbIiIi+eQN+mb2NWCHc25BanKGWdMHCkqfluszRxKcG+mcG+icG9i/f/982SuIWjVI2DoOHWZWfVPY2ZAIyRqWynSFGKSkfz7wdTPbAIwlUa1zH3CcmfXz85wCbPWvG4BTAfz0DwPNqekZPiNSvVJ+1fdOWcN3H5vbNU5qqbR3HCrp8qT88sb0sKp3nHPDnHOnOOdqSNyIneac+x4wHbjczzYEeMm/nuDf46dPc4nnzCcAg33rngHA6cC8km1JACrpS5gMWLtjLwBNew+WdNnN+0q7PCmf1vbOrNMqcfunN+30/xP4mZnVk6izH+XTRwEf8ek/A4YCOOeWA88BK4BXgWudcyqexMg9r61mWUNL2NkI7K7Jq3hhUUPJlucobeHtF+OWUDN0YgmXKJXwdl336r0FG3dVdP398s9yhHPuDeAN/3odGVrfOOfagSuyfP5W4NZCM9lbyRK+Wu+E6/fT6vn9tHo23PHVsLMSyIjpawH45pmn9G5BeY67Yjtce642+wkp6BI7Dh3m1y++w79eeDonHff+ovIh5VGuiomqfiJX7aMlalKPyPRCyN4DnSzc1PtSXyGFm7frGhk7fzM3vrCs1+uVvqGqg75I1OQqvV371EK+9eAs9rR3VCw/SSoeRUPq8VGufaKgLxICs54NC5ZtSdzv6Og8nOET0tcVWr2s6h2RKpIa8EvRy6buV1WHqLfeEYmFGXVN7G7rO00inVN1TRRFpcm4gr5IHt8fNZerxtT2ejmpP/rmfQfZvKut18sslYjEo9hbktKkuVwnCQV9kQDWbG8t+rOZSt03vrCMG55dknH+Yn7rqQFif4Cncy9/aJbPm64JwhLWiVZBXySICvxCexN+t+bpyz9dbYUfCJLCles+jYK+VESxDyBFRVGl7wqW5fIN4CJ9j6p3eiH53fXxuBOqhZt20XGo+psSjphezx2v9ByyrvNwdW67hhINx4qte3hyzsZQ1l1QNwx9jqorS2Lltj1868FZXP0/B/DLr54RdnbK6q7JqwEYeumnuqW3dwQP+sPGL2XHngNd74OGVYXf+LjsgbdDW3d1B/20X5HaMhenaW8igK3cVvzNzGp36PCRg+2ZeZszzpPt+OvrVV9SHlEYI7fPUqyXchs2fmmg+XLF9zCO02ufWhTCWiVMsQj65eacU2kt5nL1eJkUtUPkzTWNgZp3SjjK1Zw2VkG/HD+6toOdDBg2iT9Mqy/9wiNGN/2KU4qf7oqte/iHu6bT0pa5M7b0Y3vKiu15l7moBD16SnCF972j6p1I2u1/hE/P2xRyTnqvdkMzO/ce6JGuB3h6pxQ/3Qem1rFxZxuz1gYbX/eXL76Td577Xq/reh21qxApHwX9Eho+ZQ33vLY67GwU7fKHZ3PFI7NLvtz7X69jwLBJJV9utSpFAFZ1o2QTq6Bf7tY790+t4/d9vJpnXeO+ki9zzOwNJV9mNUn2sqnWZZJKD2dJn1Xtpc69B7IPdF1q2b5J3W+Jvqj8DGIR9CPyXcdWNX//k5Zt429vmlySZeVszlnmATh0lRE96nunGClf2uHDrixn2moOaNLTo2+t4+a/rOh6/9aaxuAfDniwhBGAo1IKlSMmLXu3LMut7qCf4rQbJ2XslOqRN9fy0uItgZbxz2Pm89mbX8s4bVtLe6/yJ6WxoWkfNUMnsrRhd1mWf+uklYyeuR6APe0dge6BFBrDcwXgbNPS0xXE+75bXl6Rf6YixCboZ3P7K6u4fuziQPO+vnJHVxPNOEoGknELGlj17p6M83QeOsySzd0DbiUD0LRVOwAYvzDYiTyb1G4Vsvn2w7OZt6G5wCVnX25qCb9ue2ugtvYihYp90O+tOFSFplc3/Pz5JVxyX+YOo+5+bQ2DRsxk+daWjNP7iuZ9+YdHXPVusL6IijnnfWX4W1z9p56jdanuXXpLQT/mWto6WNu4t6jPDhk9r0fVWDLYN+3tO2PKVox1/ddNtbdukmhR0I+5r4+YwYX3vFlU4HlzTWPgqrFstu9pp2V/6arMol8S7l2AL9f5Qaed+FDQ76W+/mPZuDP44NzFBpxcJ5RzbpvKF+6cVtyCC9TecYimvQdoj3gnY+lt7mfUNfUYo7fQ4RFFkqq7P/0y29kHAkhQ5axhyLfo1vbgDzftbjvIuAUNXPWFAV1PsmZcZ4YN+tSvXwXg7075MBOu+0LgdZaMSzQGyC7z9nx/1NweaZ+/o/uJUjVE0fDtR2bzoWP6MepHf99jWlSuQhX0e+Fzv32dE449OuxsVEwUDtrP3jwl8ffU4xhYc0KP6UGyuLShuJvM/3DX9KI+F4GvLa++kMe+YN76QltzVV4sqnfKWQoK0sqjL0j9ivpC1cHBEMbrLaQqLFXwwy//nOpuQXqrqoO+Si+5ZRvoPL3qIKnok2c54lQBy2xsPcDztZmHMIyChZt2d6uzT+3KumboxIrkQaeS+FD1Toz95qUjfa7nutlazMkzSs0Q/88TtSzcVJ4ndEvlouFv8dEPJqoKc5Xms41tEJ1vW6Kuqkv6kluuJz63tezPWHUVpWCeS2oud7T2HBgmmvKfXh2OXwcYIEUSxs7bxKvvlKcPm0JF5aeTN+ib2almNt3MVprZcjO73qefYGZTzKzO/z3ep5uZPWBm9Wa21MzOSlnWED9/nZkNKd9mJUTkO+4T0r+r826fxlm3TOkx33MBq0neWH2kI7JK1u7katFTDa57ehFPzNkYdjb6jKHjl/GTJxeEnY1ICVLS7wT+3Tn3P4BzgWvN7AxgKDDVOXc6MNW/B7gUON3/uwZ4CBInCeAm4BzgbOCm5Imi3MKIA+ffMa3qSmQOR932zE/vvrx0K7tSrgz+OGtDhXIVfQc7E/dOdBNWoiBv0HfObXPOLfSvW4GVwMnAIGCMn20M8A3/ehDwJ5cwBzjOzE4ELgamOOeanXO7gCnAJSXdmgppyjCObLotu/cXVCL77csrutWxl0vDrjbWbG/11TRHzoa9ufTc1rKf655exE+fCl6iembeJs7M0mNpKaRuT5UX/gGYsGRrrz7fm2q78++Yxr1T1hT12T3tHRVvLfbKsm1sbk60xNqyez81QyeyMEaDxBdUp29mNcCZwFzg4865bZA4MQAf87OdDKTWATT4tGzp6eu4xsxqzay2sbGAvsorKFlyK6XHZqznT7PLf9n+hTunc9Hwt3hm3mYKrXgxLGMATX4fW3cH71562Phl7Cqwx9Ig33scAnwmBfXrX2Jbdu/ngal1+WfM4OLhb2VtLVYuP31qIZfdn+gwcEZd4nsbO29TSdfR0tbB6BnrI3kPLHDQN7MPAn8G/s05l7lfXT9rhjSXI717gnMjnXMDnXMD+/fvHzR7FRW93RjM9FVHngZNDxLlrHoo1YG/cluuwy65rpKsiu17QhwfIYQDLKx7IWGNQ9Fa5iEuh72wlJtfXkHtxuhdQQQK+mb2XhIB/ynn3HifvN1X2+D/JiNKA3BqysdPAbbmSC+7CJ5sQzFtVXoXAIX90B2uqOBQlhu5Zd6nC8rwY83W3DLdna+uLvm6o6alrYPdbdXxYGMmyaveYh/oK6cgrXcMGAWsdM7dmzJpApBsgTMEeCkl/Ye+Fc+5QIuv/pkMXGRmx/sbuBf5tLJJ/sTmbdjZq+V0HjrMw2+u7XofxUu2pBcWNXDFw7MK/lzOTUqJVZm2PUpfRzVU7/x5YUPF17k6y6A45fKZm1/r6lIjTOU6dhf7gYTumryqKy0qx2aQh7POB34ALDOzZD+6NwJ3AM+Z2VXAJuAKP20ScBlQD7QBVwI455rN7BZgvp/vZudcRTqq2Nxc/I2izkOHOfu2qX2mu4Ubnl2SdVpqFc6ry4O1XT4cYASpsI2esZ7jj30v3zzzlK601G3NVsLuOHSY68cu4voL/zuf/G8fKns+o2z7nr7yLEPpjJm1gZsmLC/rOqJUIErKG/SdczPIXg9wYYb5HXBtlmWNBkYXksGwvbmmMWPAzzZcYLVJ7+MmU/VOvhJM+oH/qxeX9TZb3dzsxxL95pmn8Nry4EMMrti6h0nL3qVh1/6K9Lp54FDuHlmDtAqrViOm1/P9cz7Bhz/w3oqt8+m5pb15m0lUSvep9ERuHpnGSnWOijzl19rewYqtxZ1cnHPM39DM7LW9q9pKDdj5Si2bmttYmFYXvqe9g/1p3U8/Oafnj62zRB2ozahv6vb+3ZZ2NjX3rFd1zvGbHKW8Fxf3bozdTK4e03P4w1LoC1djSTv3HmBHa8+bt3dNXs2vKtBkWRT08ypFq4ZtLfsZMb2+4HsBVz4+n8seyDwWbT7OwRUPz+Y7j84JPH82qdUj+b6NfQe7B/gfPz4/y5zdFTp6Vr7WRsnt+drvZ2Sc/lzt5h4DuKdaVIa+epYU2aVzPoWMR1Col5duZUPTvkDzBjn5fO63r3P2rVMzTmsrc4uaMESxekdBv0hBW2IA/PTJhdw1eTV1Owobi7Y3zb0OpRxt+w/2bqCXoAE2k8U5AmsQW3bv59aJK4ouzWarMvnPP5e2iilMo2asK9uyr3t6ERfd91bZlp8qzPhYyO+5EBGM+fEN+hOXbqvYupJB93CRp/1iWgulVkt9+5HZrGvcmzM4z12fvxrIrPCD+D0Br5Q6DmVe8vXPLOLRt9ezpKH7yeOFhVtwzmUthT5V5vraJ2ZvKOvyC/HAtPqyLr8cDyNWQiGNL54tU9fbhVSPVkpsg/61Ty+s+DorudNTb8Au29LCBfe8yZYcj7v/KEA1jHMw8q2epcpccf09AY+wc2/vfsm/o7Wd7Xva6chSwh+/aAuTl7/LzLVNGaeX269fKm+rj76ot4f37raD1AydyMtLjzy+s2Z7K/uKrPZpzNC76gV3v1Fs9qpGbIN+ujezPMZeiou+Qm4LzN/QsxVr0JPFz58/0lwzU510as+XhejNpW/Qkn6q1vYOzr51KufclrnuN6llf0dkSk/VYH3Auvtsevv8yjq//lEz1nct76Lhb3HVmGD3hQpZRy6z1jbxwqLKPytRKQr63pDR8zI+hZkpZjlXmqZYre0dfOvBmaxrPFLXf8XDs3uUyA8eOsyNLyzLe7k6bsGRA/W495em6Vtqff7qlNGdgiom6N82aVX+maTkdgZsMvrioi3UDJ3Yo6O0XCF/4859jJheWDVU8hwyZ11xj/PkuxeV7Xj+7qNzuz3v8vLSrTya4Qq3r6rqoF9oy5v/9dCswKWVl3I06UvvtyVZJ/r4zPXd0qet2sHCTbsZ/nr3zqrOT+uAasLirTw9dxO3T1oZKG8Ag0bMDDxvLtv3HGCXf1x+d5YO0nK2/Cni5Higo+eN50zNLqF7oPl/RT5ok8zijLpwqoqiImg5PfnEcCENE74/ai53Tc7dvUSxFwqz1jZ161eq1K57ehG3FvDb6y56l6JVG/Snr97BnHWFt1F/fkH+yzqHY21j9svE9GqJPe2JYPlcbXGXjMkbwNkasMysb6LtYHmau33p7jf4l6eKv/9RTEk/U23S9WMX93hQbOe+g7yR8mNP78O/vsDWUoti1L1uPqve3ZO1yjOb13OMxBakBVl6s90g4XJ90z6+++hcrvxjzyqgYk4iuQpzNUMnsnFn4ne/YGMzNUMnUuevFkZMr+ebD5amoFVuVTtG7pUB24enW76lBQaemn/GgvSMYm0HO3lweqI/n3yllGxx0znHym2tfO+xuXz9Myf1OpfFSg/G5XLFw7O7vf9dno7Jvnzvm+XMTlW75L78z4ekH5Y/fWohG+74Kks272bnvgNc8KmP9yoPQa66v5RyY3b+hmb+vuYEnHNFD5F5/djFOacvbWjhEx85lr8sSbT+e6uuidM//qG8VzEQnadzq7akXyrFPhGbz/Apa7rqFPfmaZ2Q7dj/3eTVXQ9vFfoMQCllC77rGvcW9NDVlBXbmVvE1VmpRO9CvLJyxdgOf2Lfvqedt/NUgw0aMZMf/zHx9PH6pn28sXpHUaXu9I+s3LYnY0OHpCsens3m5jYeeWsd59w2tceN6VJ0lJhcwtKGYM+fpK4yW/VopVVtSb9YyX00d91OTj3hA9yTYUSgH4ya16t17G47yMz63gW3WfVNPPTGkZ4/g/Q1Xy6vr8x8WX/BPYWVtK/+UyJQfOusI2Pr5HpqtpSmrNjeZ9ujV8L3HpvLoM+elHW4zKT2tPsxXyphE8lL/cAntwz6ND84rybjPHvaO7ruzWxOuQ/0wqIGBn2mx5hNef3kicyjwS30reMKOZFsbO5d66hSUdDP4n+PzN59Qbabipn8/PklPZ4KHTxyDqveLbwljMOxo7WdptaDLNtSnkf6o6C3TQcLtaShpeuEE2cz6xNNFedv6HlvY976Zuat717KzlRd8alfv1qSvHz/sbl8+qS/yjjtpgnL+d45n+A97+mZgQUbd3XlMzUc3/DsEv7p7wqvAg3SG+2yMnWvUS4K+mlKXe02Lu3G8KX3vx0o4Kc2H92YcpL5x7veoO3gIf7j4k+WLpMRU45+byS/sfM3RaaL5Rn1Td06z0ttrnzYwR2vrmJthirN3+R4aK5c1Xf/9IfM/TtBorEBJJrD5rtKqhQF/TRjZm8sawdWQathxqS0REmtxmnzrSCC3DgSKUSh3RuVq7+aTM66pfuAK5meDM9nXY4Wd5DorqRQnQG/tC/f+2bBY0KXS6xv5KaXwpPGLyp9t7oiUVfojc58fUlNydGEsxLS85evSW569VUmvxy/jJqhE7veBy18RSXgQ8yDfmq3BVGT2v9Il7g3L5Gyatpb2Ohw/zEu9++n2PskpareS29VVujPZ1tLz76q0gdUzzTeRtTFOuhHWR88liRmolL/H1ShLTbPu31a/pn6IAX9PkTVTiLFu/GFcMZQuPkvK0JZbzYK+iJStFINc1nNRqf1uRU2BX0RKdq/R/i+mGSmoC8iRXtpcYYGBxJpCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxUvGgb2aXmNlqM6s3s6GVXr+ISJxVNOib2VHACOBS4AzgO2Z2RqnX07I/OkOTiYhESaVL+mcD9c65dc65g8BYYFCpV7KuMRqjzouIRE2lg/7JwOaU9w0+rYuZXWNmtWZW29jYWNRKPnvqccXnUEQkAp695tyyLLdfWZaanWVI6zZypXNuJDASYODAgUWNFGtmbLjjq8V8VESkqlW6pN8AnJry/hRAozCIiFRIpYP+fOB0MxtgZkcDg4EJFc6DiEhsVbR6xznXaWbXAZOBo4DRzrnllcyDiEicVbpOH+fcJGBSpdcrIiJ6IldEJFYU9EVEYkRBX0QkRhT0RURixJwr6vmnijCzRmBjLxbxUaCpRNnpC+K2vaBtjgttc2E+4Zzrn2lCpIN+b5lZrXNuYNj5qJS4bS9om+NC21w6qt4REYkRBX0RkRip9qA/MuwMVFjcthe0zXGhbS6Rqq7TFxGR7qq9pC8iIikU9EVEYqQqg341Db5uZqea2XQzW2lmy83sep9+gplNMbM6/y+8XRcAAAQhSURBVPd4n25m9oDf9qVmdlbKsob4+evMbEhY2xSEmR1lZovM7GX/foCZzfV5f9Z3zY2ZHePf1/vpNSnLGObTV5vZxeFsSTBmdpyZjTOzVX5fnxeDfXyDP6bfMbNnzOx91bafzWy0me0ws3dS0kq2X83sc2a2zH/mATPLNFBVd865qvpHosvmtcBpwNHAEuCMsPPVi+05ETjLv/4QsIbEoPK/A4b69KHAnf71ZcArJEYpOxeY69NPANb5v8f718eHvX05tvtnwNPAy/79c8Bg//ph4Kf+9b8AD/vXg4Fn/esz/L4/Bhjgj4mjwt6uHNs7Bvhn//po4Lhq3sckhkldD7w/Zf/+qNr2M/BF4CzgnZS0ku1XYB5wnv/MK8ClefMU9pdShi/5PGByyvthwLCw81XC7XsJ+AqwGjjRp50IrPavHwG+kzL/aj/9O8AjKend5ovSPxIjqk0FLgBe9gd0E9AvfR+TGJvhPP+6n5/P0vd76nxR+wf8lQ+AlpZezfs4OV72CX6/vQxcXI37GahJC/ol2a9+2qqU9G7zZftXjdU7eQdf76v8Je2ZwFzg4865bQD+78f8bNm2vy99L/cBvwAO+/cfAXY75zr9+9S8d22Xn97i5+9L23sa0Ag87qu0HjOzY6nifeyc2wLcDWwCtpHYbwuo7v2cVKr9erJ/nZ6eUzUG/byDr/dFZvZB4M/Avznn9uSaNUOay5EeKWb2NWCHc25BanKGWV2eaX1ie71+JKoAHnLOnQnsI3HZn02f32Zfjz2IRJXMScCxwKUZZq2m/ZxPodtY1LZXY9CvusHXzey9JAL+U8658T55u5md6KefCOzw6dm2v698L+cDXzezDcBYElU89wHHmVlypLfUvHdtl5/+YaCZvrO9kMhrg3Nurn8/jsRJoFr3McCXgfXOuUbnXAcwHvg81b2fk0q1Xxv86/T0nKox6FfV4Ov+bvwoYKVz7t6USROA5F38ISTq+pPpP/QtAc4FWvwl5GTgIjM73peyLvJpkeKcG+acO8U5V0Ni301zzn0PmA5c7mdL397k93C5n9/59MG+1ccA4HQSN70ixzn3LrDZzD7pky4EVlCl+9jbBJxrZh/wx3hym6t2P6coyX7101rN7Fz/Hf4wZVnZhX2To0w3Ti4j0cplLfDLsPPTy235AolLtqXAYv/vMhL1mVOBOv/3BD+/ASP8ti8DBqYs68dAvf93ZdjbFmDb/5EjrXdOI/FjrgeeB47x6e/z7+v99NNSPv9L/z2sJkCrhpC39bNArd/PL5JopVHV+xj4L2AV8A7wBIkWOFW1n4FnSNyz6CBRMr+qlPsVGOi/v7XAH0hrDJDpn7phEBGJkWqs3hERkSwU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEb+P5qkdQkuhnG4AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "## Bài tập\n", + "## Một thế giới Peter và Sói thực tế hơn\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc sự không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb b/translations/vi/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb new file mode 100644 index 000000000..8903509fa --- /dev/null +++ b/translations/vi/8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb @@ -0,0 +1,460 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "eadbd20d2a075efb602615ad90b1e97a", + "translation_date": "2025-09-06T15:15:50+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/assignment-solution.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter và Sói: Môi Trường Thực Tế\n", + "\n", + "Trong tình huống của chúng ta, Peter có thể di chuyển gần như không cảm thấy mệt mỏi hay đói. Trong một thế giới thực tế hơn, cậu ấy cần phải ngồi xuống nghỉ ngơi và ăn uống để duy trì sức khỏe. Hãy làm cho thế giới của chúng ta trở nên thực tế hơn bằng cách áp dụng các quy tắc sau:\n", + "\n", + "1. Khi di chuyển từ nơi này sang nơi khác, Peter sẽ mất **năng lượng** và tăng thêm **mệt mỏi**.\n", + "2. Peter có thể tăng năng lượng bằng cách ăn táo.\n", + "3. Peter có thể giảm mệt mỏi bằng cách nghỉ ngơi dưới gốc cây hoặc trên cỏ (tức là đi vào vị trí trên bảng có cây hoặc cỏ - ô màu xanh lá).\n", + "4. Peter cần tìm và tiêu diệt con sói.\n", + "5. Để tiêu diệt con sói, Peter cần đạt mức năng lượng và mệt mỏi nhất định, nếu không cậu ấy sẽ thua trong trận chiến.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math\n", + "from rlboard import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "## Định nghĩa trạng thái\n", + "\n", + "Trong các quy tắc trò chơi mới, chúng ta cần theo dõi năng lượng và sự mệt mỏi ở mỗi trạng thái bàn cờ. Vì vậy, chúng ta sẽ tạo một đối tượng `state` để chứa tất cả thông tin cần thiết về trạng thái hiện tại của vấn đề, bao gồm trạng thái của bàn cờ, mức năng lượng và sự mệt mỏi hiện tại, và liệu chúng ta có thể đánh bại con sói khi ở trạng thái cuối hay không:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class state:\n", + " def __init__(self,board,energy=10,fatigue=0,init=True):\n", + " self.board = board\n", + " self.energy = energy\n", + " self.fatigue = fatigue\n", + " self.dead = False\n", + " if init:\n", + " self.board.random_start()\n", + " self.update()\n", + "\n", + " def at(self):\n", + " return self.board.at()\n", + "\n", + " def update(self):\n", + " if self.at() == Board.Cell.water:\n", + " self.dead = True\n", + " return\n", + " if self.at() == Board.Cell.tree:\n", + " self.fatigue = 0\n", + " if self.at() == Board.Cell.apple:\n", + " self.energy = 10\n", + "\n", + " def move(self,a):\n", + " self.board.move(a)\n", + " self.energy -= 1\n", + " self.fatigue += 1\n", + " self.update()\n", + "\n", + " def is_winning(self):\n", + " return self.energy > self.fatigue" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(state):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(board,policy):\n", + " n = 0 # number of steps\n", + " s = state(board)\n", + " while True:\n", + " if s.at() == Board.Cell.wolf:\n", + " if s.is_winning():\n", + " return n # success!\n", + " else:\n", + " return -n # failure!\n", + " if s.at() == Board.Cell.water:\n", + " return 0 # died\n", + " a = actions[policy(m)]\n", + " s.move(a)\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 5, won: 1 times, drown: 94 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " elif z==0:\n", + " n+=1\n", + " else:\n", + " s+=1\n", + " print(f\"Killed by wolf = {w}, won: {s} times, drown: {n} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Hàm Thưởng\n", + "\n", + "### Tổng quan\n", + "Hàm thưởng là một phần quan trọng trong việc thiết kế hệ thống học tăng cường. Nó định nghĩa cách mà một tác nhân nhận được phản hồi từ môi trường để hướng dẫn hành vi của mình.\n", + "\n", + "### Cách hoạt động\n", + "Hàm thưởng thường được thiết kế để khuyến khích tác nhân đạt được mục tiêu cụ thể. Mỗi hành động mà tác nhân thực hiện sẽ dẫn đến một giá trị thưởng, giá trị này có thể là dương, âm hoặc bằng không.\n", + "\n", + "### Ví dụ\n", + "Dưới đây là một ví dụ đơn giản về cách định nghĩa hàm thưởng:\n", + "\n", + "```python\n", + "def reward_function(state, action):\n", + " if state == \"goal_state\":\n", + " return 10 # Thưởng cao khi đạt được mục tiêu\n", + " elif action == \"invalid_action\":\n", + " return -5 # Phạt khi thực hiện hành động không hợp lệ\n", + " else:\n", + " return 0 # Không thưởng hoặc phạt cho các hành động khác\n", + "```\n", + "\n", + "### Lưu ý\n", + "[!NOTE] Hàm thưởng cần được thiết kế cẩn thận để tránh việc khuyến khích hành vi không mong muốn. Ví dụ, nếu tác nhân nhận được thưởng cao khi thực hiện một hành động cụ thể, nó có thể lặp lại hành động đó mà không quan tâm đến mục tiêu dài hạn.\n", + "\n", + "### Các phương pháp phổ biến\n", + "- **Thưởng theo mục tiêu:** Tác nhân nhận được thưởng khi đạt được trạng thái mục tiêu.\n", + "- **Thưởng theo tiến trình:** Tác nhân nhận được thưởng dựa trên mức độ tiến bộ hướng tới mục tiêu.\n", + "- **Thưởng theo hành vi:** Tác nhân nhận được thưởng khi thực hiện hành vi mong muốn.\n", + "\n", + "### Cảnh báo\n", + "[!WARNING] Tránh thiết kế hàm thưởng quá phức tạp, vì điều này có thể làm cho việc học của tác nhân trở nên khó khăn và không hiệu quả.\n", + "\n", + "### Mẹo\n", + "[!TIP] Hãy thử nghiệm nhiều hàm thưởng khác nhau để tìm ra thiết kế phù hợp nhất với môi trường và mục tiêu của bạn.\n", + "\n", + "### Kết luận\n", + "Hàm thưởng đóng vai trò quan trọng trong việc định hình hành vi của tác nhân. Một hàm thưởng được thiết kế tốt sẽ giúp tác nhân học nhanh hơn và đạt được kết quả tốt hơn.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def reward(s):\n", + " r = s.energy-s.fatigue\n", + " if s.at()==Board.Cell.wolf:\n", + " return 100 if s.is_winning() else -100\n", + " if s.at()==Board.Cell.water:\n", + " return -100\n", + " return r" + ] + }, + { + "source": [ + "## Thuật toán Q-Learning\n", + "\n", + "Thuật toán học thực tế hầu như không thay đổi, chúng ta chỉ sử dụng `state` thay vì chỉ vị trí trên bảng.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " s = state(m)\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = s.board.human\n", + " v = probs(Q[x,y])\n", + " while True:\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " if s.board.is_valid(s.board.move_pos(s.board.human,dpos)):\n", + " break \n", + " s.move(dpos)\n", + " r = reward(s)\n", + " if abs(r)==100: # end of game\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kết quả\n", + "\n", + "Hãy xem liệu chúng ta đã thành công trong việc huấn luyện Peter để chiến đấu với con sói chưa!\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Killed by wolf = 1, won: 9 times, drown: 90 times\n" + ] + } + ], + "source": [ + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [ + "Chúng ta hiện thấy ít trường hợp chết đuối hơn nhiều, nhưng Peter vẫn không phải lúc nào cũng có thể giết được con sói. Hãy thử nghiệm và xem liệu bạn có thể cải thiện kết quả này bằng cách điều chỉnh các siêu tham số.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/8-Reinforcement/1-QLearning/solution/notebook.ipynb b/translations/vi/8-Reinforcement/1-QLearning/solution/notebook.ipynb new file mode 100644 index 000000000..bc8e66525 --- /dev/null +++ b/translations/vi/8-Reinforcement/1-QLearning/solution/notebook.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "488431336543f71f14d4aaf0399e3381", + "translation_date": "2025-09-06T15:13:18+00:00", + "source_file": "8-Reinforcement/1-QLearning/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Peter và Chó Sói: Hướng dẫn cơ bản về Học tăng cường\n", + "\n", + "Trong hướng dẫn này, chúng ta sẽ học cách áp dụng học tăng cường vào một bài toán tìm đường. Bối cảnh được lấy cảm hứng từ câu chuyện cổ tích âm nhạc [Peter và Chó Sói](https://en.wikipedia.org/wiki/Peter_and_the_Wolf) của nhà soạn nhạc người Nga [Sergei Prokofiev](https://en.wikipedia.org/wiki/Sergei_Prokofiev). Đây là câu chuyện về cậu bé tiên phong Peter, người dũng cảm rời khỏi nhà để đến khu rừng trống nhằm đuổi theo con sói. Chúng ta sẽ huấn luyện các thuật toán học máy để giúp Peter khám phá khu vực xung quanh và xây dựng một bản đồ điều hướng tối ưu.\n", + "\n", + "Đầu tiên, hãy nhập một số thư viện hữu ích:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random\n", + "import math" + ] + }, + { + "source": [ + "## Tổng quan về Học tăng cường\n", + "\n", + "**Học tăng cường** (Reinforcement Learning - RL) là một kỹ thuật học cho phép chúng ta tìm hiểu hành vi tối ưu của một **tác nhân** trong một **môi trường** nào đó bằng cách thực hiện nhiều thử nghiệm. Một tác nhân trong môi trường này cần có một **mục tiêu**, được xác định bởi một **hàm phần thưởng**.\n", + "\n", + "## Môi trường\n", + "\n", + "Để đơn giản, hãy xem xét thế giới của Peter là một bảng vuông có kích thước `width` x `height`. Mỗi ô trên bảng này có thể là:\n", + "* **mặt đất**, nơi Peter và các sinh vật khác có thể đi lại\n", + "* **nước**, nơi rõ ràng bạn không thể đi qua\n", + "* **một cái cây** hoặc **cỏ** - nơi bạn có thể nghỉ ngơi\n", + "* **một quả táo**, đại diện cho thứ mà Peter rất vui khi tìm thấy để tự nuôi sống mình\n", + "* **một con sói**, thứ nguy hiểm và cần phải tránh xa\n", + "\n", + "Để làm việc với môi trường, chúng ta sẽ định nghĩa một lớp gọi là `Board`. Để tránh làm rối notebook này, chúng ta đã chuyển toàn bộ mã nguồn làm việc với bảng vào một module riêng tên là `rlboard`, mà bây giờ chúng ta sẽ import. Bạn có thể xem bên trong module này để tìm hiểu thêm chi tiết về cách triển khai nội bộ.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rlboard import *" + ] + }, + { + "source": [ + "Bây giờ hãy tạo một bảng ngẫu nhiên và xem nó trông như thế nào:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "width, height = 8,8\n", + "m = Board(width,height)\n", + "m.randomize(seed=13)\n", + "m.plot()" + ] + }, + { + "source": [ + "## Hành động và Chính sách\n", + "\n", + "Trong ví dụ của chúng ta, mục tiêu của Peter là tìm một quả táo, đồng thời tránh con sói và các chướng ngại vật khác. Để làm được điều này, anh ấy có thể đi xung quanh cho đến khi tìm thấy một quả táo. Vì vậy, tại bất kỳ vị trí nào, anh ấy có thể chọn một trong các hành động sau: lên, xuống, trái và phải. Chúng ta sẽ định nghĩa các hành động này dưới dạng một từ điển và ánh xạ chúng tới các cặp thay đổi tọa độ tương ứng. Ví dụ, di chuyển sang phải (`R`) sẽ tương ứng với cặp tọa độ `(1,0)`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "actions = { \"U\" : (0,-1), \"D\" : (0,1), \"L\" : (-1,0), \"R\" : (1,0) }\n", + "action_idx = { a : i for i,a in enumerate(actions.keys()) }" + ] + }, + { + "source": [ + "Chiến lược của tác nhân (Peter) được định nghĩa bởi một cái gọi là **chính sách**. Hãy cùng xem xét chính sách đơn giản nhất được gọi là **đi ngẫu nhiên**.\n", + "\n", + "## Đi ngẫu nhiên\n", + "\n", + "Trước tiên, hãy giải quyết vấn đề của chúng ta bằng cách triển khai chiến lược đi ngẫu nhiên.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "18" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "def random_policy(m):\n", + " return random.choice(list(actions))\n", + "\n", + "def walk(m,policy,start_position=None):\n", + " n = 0 # number of steps\n", + " # set initial position\n", + " if start_position:\n", + " m.human = start_position \n", + " else:\n", + " m.random_start()\n", + " while True:\n", + " if m.at() == Board.Cell.apple:\n", + " return n # success!\n", + " if m.at() in [Board.Cell.wolf, Board.Cell.water]:\n", + " return -1 # eaten by wolf or drowned\n", + " while True:\n", + " a = actions[policy(m)]\n", + " new_pos = m.move_pos(m.human,a)\n", + " if m.is_valid(new_pos) and m.at(new_pos)!=Board.Cell.water:\n", + " m.move(a) # do the actual move\n", + " break\n", + " n+=1\n", + "\n", + "walk(m,random_policy)" + ] + }, + { + "source": [ + "Hãy thực hiện thí nghiệm bước đi ngẫu nhiên nhiều lần và xem số bước trung bình đã thực hiện:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 32.87096774193548, eaten by wolf: 7 times\n" + ] + } + ], + "source": [ + "def print_statistics(policy):\n", + " s,w,n = 0,0,0\n", + " for _ in range(100):\n", + " z = walk(m,policy)\n", + " if z<0:\n", + " w+=1\n", + " else:\n", + " s += z\n", + " n += 1\n", + " print(f\"Average path length = {s/n}, eaten by wolf: {w} times\")\n", + "\n", + "print_statistics(random_policy)" + ] + }, + { + "source": [ + "## Hàm Thưởng\n", + "\n", + "Để làm cho chính sách của chúng ta thông minh hơn, chúng ta cần hiểu những nước đi nào \"tốt hơn\" so với những nước khác.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "move_reward = -0.1\n", + "goal_reward = 10\n", + "end_reward = -10\n", + "\n", + "def reward(m,pos=None):\n", + " pos = pos or m.human\n", + " if not m.is_valid(pos):\n", + " return end_reward\n", + " x = m.at(pos)\n", + " if x==Board.Cell.water or x == Board.Cell.wolf:\n", + " return end_reward\n", + " if x==Board.Cell.apple:\n", + " return goal_reward\n", + " return move_reward" + ] + }, + { + "source": [ + "## Q-Learning\n", + "\n", + "Xây dựng một Q-Table, hoặc mảng đa chiều. Vì bảng của chúng ta có kích thước `width` x `height`, chúng ta có thể biểu diễn Q-Table bằng một mảng numpy với hình dạng `width` x `height` x `len(actions)`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "Q = np.ones((width,height,len(actions)),dtype=np.float)*1.0/len(actions)" + ] + }, + { + "source": [ + "Chuyển bảng Q-Table vào hàm vẽ để hiển thị bảng trên bảng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Bản chất của Q-Learning: Phương trình Bellman và Thuật toán Học\n", + "\n", + "Viết mã giả cho thuật toán học của chúng ta:\n", + "\n", + "* Khởi tạo Bảng Q Q với các giá trị bằng nhau cho tất cả các trạng thái và hành động\n", + "* Đặt tốc độ học $\\alpha\\leftarrow 1$\n", + "* Lặp lại mô phỏng nhiều lần\n", + " 1. Bắt đầu tại vị trí ngẫu nhiên\n", + " 1. Lặp lại\n", + " 1. Chọn một hành động $a$ tại trạng thái $s$\n", + " 2. Thực hiện hành động bằng cách di chuyển đến trạng thái mới $s'$\n", + " 3. Nếu gặp điều kiện kết thúc trò chơi, hoặc tổng phần thưởng quá nhỏ - thoát khỏi mô phỏng \n", + " 4. Tính phần thưởng $r$ tại trạng thái mới\n", + " 5. Cập nhật Hàm Q theo phương trình Bellman: $Q(s,a)\\leftarrow (1-\\alpha)Q(s,a)+\\alpha(r+\\gamma\\max_{a'}Q(s',a'))$\n", + " 6. $s\\leftarrow s'$\n", + " 7. Cập nhật tổng phần thưởng và giảm $\\alpha$.\n", + "\n", + "## Khai thác vs. Khám phá\n", + "\n", + "Cách tiếp cận tốt nhất là cân bằng giữa khám phá và khai thác. Khi chúng ta hiểu thêm về môi trường, chúng ta sẽ có xu hướng đi theo lộ trình tối ưu hơn, tuy nhiên, thỉnh thoảng vẫn nên chọn con đường chưa được khám phá.\n", + "\n", + "## Triển khai Python\n", + "\n", + "Bây giờ chúng ta đã sẵn sàng triển khai thuật toán học. Trước đó, chúng ta cũng cần một số hàm để chuyển đổi các số bất kỳ trong Bảng Q thành một vector xác suất cho các hành động tương ứng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v" + ] + }, + { + "source": [ + "Chúng ta thêm một lượng nhỏ `eps` vào vector ban đầu để tránh việc chia cho 0 trong trường hợp ban đầu, khi tất cả các thành phần của vector đều giống nhau.\n", + "\n", + "Thuật toán học thực tế mà chúng ta sẽ chạy trong 5000 thí nghiệm, còn được gọi là **epochs**:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "" + ] + } + ], + "source": [ + "\n", + "from IPython.display import clear_output\n", + "\n", + "lpath = []\n", + "\n", + "for epoch in range(10000):\n", + " clear_output(wait=True)\n", + " print(f\"Epoch = {epoch}\",end='')\n", + "\n", + " # Pick initial point\n", + " m.random_start()\n", + " \n", + " # Start travelling\n", + " n=0\n", + " cum_reward = 0\n", + " while True:\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " dpos = actions[a]\n", + " m.move(dpos,check_correctness=False) # we allow player to move outside the board, which terminates episode\n", + " r = reward(m)\n", + " cum_reward += r\n", + " if r==end_reward or cum_reward < -1000:\n", + " print(f\" {n} steps\",end='\\r')\n", + " lpath.append(n)\n", + " break\n", + " alpha = np.exp(-n / 3000)\n", + " gamma = 0.5\n", + " ai = action_idx[a]\n", + " Q[x,y,ai] = (1 - alpha) * Q[x,y,ai] + alpha * (r + gamma * Q[x+dpos[0], y+dpos[1]].max())\n", + " n+=1" + ] + }, + { + "source": [ + "Sau khi thực hiện thuật toán này, Bảng Q nên được cập nhật với các giá trị xác định mức độ hấp dẫn của các hành động khác nhau tại mỗi bước. Hình dung bảng tại đây:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "m.plot(Q)" + ] + }, + { + "source": [ + "## Kiểm tra Chính sách\n", + "\n", + "Vì Q-Table liệt kê \"sức hấp dẫn\" của mỗi hành động tại mỗi trạng thái, nên việc sử dụng nó để xác định cách điều hướng hiệu quả trong thế giới của chúng ta là khá dễ dàng. Trong trường hợp đơn giản nhất, chúng ta chỉ cần chọn hành động tương ứng với giá trị cao nhất trong Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "def qpolicy_strict(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = list(actions)[np.argmax(v)]\n", + " return a\n", + "\n", + "walk(m,qpolicy_strict)" + ] + }, + { + "source": [ + "Nếu bạn thử đoạn mã trên nhiều lần, bạn có thể nhận thấy rằng đôi khi nó chỉ \"treo\" và bạn cần nhấn nút DỪNG trong notebook để ngắt nó.\n", + "\n", + "> **Nhiệm vụ 1:** Sửa đổi hàm `walk` để giới hạn độ dài tối đa của đường đi bằng một số bước nhất định (ví dụ, 100), và quan sát đoạn mã trên trả về giá trị này theo thời gian.\n", + "\n", + "> **Nhiệm vụ 2:** Sửa đổi hàm `walk` để không quay lại những nơi mà nó đã từng đi qua trước đó. Điều này sẽ ngăn `walk` lặp lại, tuy nhiên, tác nhân vẫn có thể bị \"mắc kẹt\" ở một vị trí mà nó không thể thoát ra được.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average path length = 3.45, eaten by wolf: 0 times\n" + ] + } + ], + "source": [ + "\n", + "def qpolicy(m):\n", + " x,y = m.human\n", + " v = probs(Q[x,y])\n", + " a = random.choices(list(actions),weights=v)[0]\n", + " return a\n", + "\n", + "print_statistics(qpolicy)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 15 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(lpath)" + ] + }, + { + "source": [ + "Điều chúng ta thấy ở đây là ban đầu độ dài trung bình của đường đi tăng lên. Điều này có thể là do khi chúng ta chưa biết gì về môi trường - chúng ta dễ bị mắc kẹt trong các trạng thái xấu, như nước hoặc sói. Khi chúng ta học được nhiều hơn và bắt đầu sử dụng kiến thức này, chúng ta có thể khám phá môi trường lâu hơn, nhưng vẫn chưa biết rõ vị trí của những quả táo.\n", + "\n", + "Khi học đủ, việc đạt được mục tiêu trở nên dễ dàng hơn đối với tác nhân, và độ dài đường đi bắt đầu giảm. Tuy nhiên, chúng ta vẫn mở rộng khám phá, vì vậy thường đi lệch khỏi đường đi tốt nhất và thử nghiệm các lựa chọn mới, khiến đường đi dài hơn mức tối ưu.\n", + "\n", + "Điều chúng ta cũng quan sát được trên biểu đồ này là tại một số điểm, độ dài tăng đột ngột. Điều này cho thấy tính chất ngẫu nhiên của quá trình, và rằng tại một số thời điểm chúng ta có thể \"làm hỏng\" các hệ số trong Q-Table bằng cách ghi đè chúng với các giá trị mới. Điều này lý tưởng nên được giảm thiểu bằng cách giảm tốc độ học (tức là, về cuối quá trình huấn luyện, chúng ta chỉ điều chỉnh các giá trị trong Q-Table bằng một giá trị nhỏ).\n", + "\n", + "Nhìn chung, điều quan trọng cần nhớ là sự thành công và chất lượng của quá trình học phụ thuộc đáng kể vào các tham số, như tốc độ học, sự giảm tốc độ học và hệ số chiết khấu. Những tham số này thường được gọi là **siêu tham số**, để phân biệt với **tham số** mà chúng ta tối ưu trong quá trình huấn luyện (ví dụ: các hệ số trong Q-Table). Quá trình tìm giá trị tốt nhất cho các siêu tham số được gọi là **tối ưu hóa siêu tham số**, và nó xứng đáng là một chủ đề riêng.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## Bài Tập\n", + "#### Một Thế Giới Peter và Con Sói Thực Tế Hơn\n", + "\n", + "Trong tình huống của chúng ta, Peter có thể di chuyển gần như không bị mệt mỏi hay đói. Trong một thế giới thực tế hơn, cậu ấy phải ngồi xuống nghỉ ngơi thỉnh thoảng và cũng cần ăn uống để duy trì sức khỏe. Hãy làm cho thế giới của chúng ta thực tế hơn bằng cách thực hiện các quy tắc sau:\n", + "\n", + "1. Khi di chuyển từ nơi này sang nơi khác, Peter mất **năng lượng** và tăng thêm **mệt mỏi**.\n", + "2. Peter có thể tăng năng lượng bằng cách ăn táo.\n", + "3. Peter có thể giảm mệt mỏi bằng cách nghỉ ngơi dưới gốc cây hoặc trên cỏ (tức là đi vào vị trí trên bảng có cây hoặc cỏ - ô màu xanh lá).\n", + "4. Peter cần tìm và tiêu diệt con sói.\n", + "5. Để tiêu diệt con sói, Peter cần đạt mức năng lượng và mệt mỏi nhất định, nếu không cậu ấy sẽ thua trong trận chiến.\n", + "\n", + "Hãy chỉnh sửa hàm thưởng ở trên theo các quy tắc của trò chơi, chạy thuật toán học tăng cường để tìm chiến lược tốt nhất để chiến thắng trò chơi, và so sánh kết quả của việc đi ngẫu nhiên với thuật toán của bạn về số lượng trò chơi thắng và thua.\n", + "\n", + "> **Note**: Bạn có thể cần điều chỉnh các siêu tham số để làm cho nó hoạt động, đặc biệt là số lượng epochs. Vì thành công của trò chơi (đánh bại con sói) là một sự kiện hiếm gặp, bạn có thể mong đợi thời gian huấn luyện lâu hơn.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/8-Reinforcement/2-Gym/notebook.ipynb b/translations/vi/8-Reinforcement/2-Gym/notebook.ipynb new file mode 100644 index 000000000..482a4fce0 --- /dev/null +++ b/translations/vi/8-Reinforcement/2-Gym/notebook.ipynb @@ -0,0 +1,390 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.4 64-bit ('base': conda)" + }, + "interpreter": { + "hash": "86193a1ab0ba47eac1c69c1756090baa3b420b3eea7d4aafab8b85f8b312f0c5" + }, + "coopTranslator": { + "original_hash": "f22f8f3daed4b6d34648d1254763105b", + "translation_date": "2025-09-06T15:18:43+00:00", + "source_file": "8-Reinforcement/2-Gym/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Trượt ván CartPole\n", + "\n", + "> **Vấn đề**: Nếu Peter muốn thoát khỏi con sói, cậu ấy cần di chuyển nhanh hơn nó. Chúng ta sẽ xem cách Peter học trượt ván, đặc biệt là giữ thăng bằng, bằng cách sử dụng Q-Learning.\n", + "\n", + "Đầu tiên, hãy cài đặt gym và nhập các thư viện cần thiết:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 1" + ] + }, + { + "source": [ + "## Tạo môi trường cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 2" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Để xem môi trường hoạt động như thế nào, hãy chạy một mô phỏng ngắn trong 100 bước.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 3" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "Trong quá trình mô phỏng, chúng ta cần lấy các quan sát để quyết định cách hành động. Thực tế, hàm `step` trả về cho chúng ta các quan sát hiện tại, hàm phần thưởng, và cờ `done` cho biết liệu có nên tiếp tục mô phỏng hay không:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "#code block 4" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "#code block 5" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 6" + ] + }, + { + "source": [ + "Hãy cùng khám phá phương pháp rời rạc hóa khác sử dụng các thùng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "#code block 7" + ] + }, + { + "source": [ + "Hãy cùng chạy một mô phỏng ngắn và quan sát những giá trị môi trường rời rạc đó.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -2, -2)\n(0, 1, -2, -5)\n(0, 2, -3, -8)\n(0, 3, -5, -11)\n(0, 3, -7, -14)\n(0, 4, -10, -17)\n(0, 3, -14, -15)\n(0, 3, -17, -12)\n(0, 3, -20, -16)\n(0, 4, -23, -19)\n" + ] + } + ], + "source": [ + "#code block 8" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 9" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#code block 10" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 22.0, alpha=0.3, epsilon=0.9\n", + "5000: 70.1384, alpha=0.3, epsilon=0.9\n", + "10000: 121.8586, alpha=0.3, epsilon=0.9\n", + "15000: 149.6368, alpha=0.3, epsilon=0.9\n", + "20000: 168.2782, alpha=0.3, epsilon=0.9\n", + "25000: 196.7356, alpha=0.3, epsilon=0.9\n", + "30000: 220.7614, alpha=0.3, epsilon=0.9\n", + "35000: 233.2138, alpha=0.3, epsilon=0.9\n", + "40000: 248.22, alpha=0.3, epsilon=0.9\n", + "45000: 264.636, alpha=0.3, epsilon=0.9\n", + "50000: 276.926, alpha=0.3, epsilon=0.9\n", + "55000: 277.9438, alpha=0.3, epsilon=0.9\n", + "60000: 248.881, alpha=0.3, epsilon=0.9\n", + "65000: 272.529, alpha=0.3, epsilon=0.9\n", + "70000: 281.7972, alpha=0.3, epsilon=0.9\n", + "75000: 284.2844, alpha=0.3, epsilon=0.9\n", + "80000: 269.667, alpha=0.3, epsilon=0.9\n", + "85000: 273.8652, alpha=0.3, epsilon=0.9\n", + "90000: 278.2466, alpha=0.3, epsilon=0.9\n", + "95000: 269.1736, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "#code block 11" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deXxU9b3/8dcnCSTsa8CQgAEJIKIIBGSXTUWiYqu0Lq2o3MvV6nWhVlGrtbdasddq9dqfy9W2tr22WpdKXYu4W0VBRVBAQFACCEF2kCXk+/tjvkkm+yTMZCZn3s/HI4+c853vzPmenMl7vud7zpxjzjlERCS4UuLdABERiS0FvYhIwCnoRUQCTkEvIhJwCnoRkYBLi3cDADp37uxyc3Pj3QwRkSZl0aJFW5xzmXXVS4igz83NZeHChfFuhohIk2JmX0ZST0M3IiIBp6AXEQk4Bb2ISMAp6EVEAk5BLyIScBEFvZmtNbMlZvaxmS30ZR3NbJ6ZrfS/O/hyM7N7zWyVmX1iZoNjuQIiIlK7+vToxzvnjnfO5fv52cB851weMN/PA5wK5PmfmcD90WqsiIjU3+GcRz8VGOenHwVeB67z5X90oesfv2dm7c0syzm38XAa2pjWbd3Lj/+2mG7tMvjpaf3p3DqdbXsO8K/V31BwXBbOOf7+8XpO6NmJj77azsSjuzD5N2/yo3G9eXbxev484wTumb+S/lltOaJdBobxwBureePzIpbccjJmBsCLSzby+ooiphyXxYl9Kn7nYUnhDv7+8Xq6tk1n5tijyso/XredtBRjQHY7nHM8uaiQCf26cOvzy+jYqjnLNu7kX6u/4d5zB5HdvgWri3bz3UHZpKWm8PKnX3NMt7bc/sJybig4mpPueoNHpg+leZrxwdptzHlxOU9dOoIN2/dx+sBuPLbgK254Zglt0tO4cFQu//PqKm46rT+/eO4zlv9iMjf9fSl/W1RIn66tGdazI51apXPP/JX87PT+9OzcivfXbGXJ+h28tXJLg7fFJScexQNvrKZ3l9as2ry7rNwMUs0oLonfZbbbZKSxa19xxPVHHtWJpet3sHNfMcdmt2PJ+h306tyKL7bsAeDu7w/k6scXx6q53HJ6f7buPcgTH6zj6537YraccENzO/DB2m38YHgP1m/7luZpKbz86aZ6vcb5J/TgolG5TLrrzXov/4qJeaSacfcrn9da79YzB/DlN3v437fW8B9je/Hgm19UqfPdwdk8/eH6Gl9jcI/2PHXpSNZs2cOEX78RUftyOrTglVknktEsNaL6DWWRXI/ezNYA2wAHPOice8jMtjvn2ofV2eac62BmzwFznHNv+/L5wHXOuYWVXnMmoR4/PXr0GPLllxGd998ocmc/Xzad06EFb183ge8/+C4L1mzlvesnsm7bXqY98G5ZnYn9ujB/+eay+cqhFO73Fw1lfN8u7Nx3kONu+WdZ+do5BTW2Ifyx0vK1cwp4aelGLvnzh3Wuz7WT+3LxqJ70u+mlOuuWKv0HrUnz1BQOHCqJ+PVEksFvzxvMZY/V/T8Z7ofDj+QXZw5o0PLMbFHYKEuNIu3Rj3LObTCzLsA8M1te27KrKavyaeKcewh4CCA/Pz9h735SuO1bANZvD/0+eKiE3fsr9uLWbdtbYb6mkAfKeoDFhw5/lXd+G1lvcuvuA5TU8wYztYU8oJAXqcaufQfr/ZyiXftj0JKKIhqjd85t8L83A88Aw4BNZpYF4H+XdmkLge5hT88BNkSrwUGVO/t5LvnTong3Q0QCqM6gN7NWZtamdBo4GVgKzAWm+2rTgWf99FzgAn/2zXBgR1Man4+nlz79Ot5NEJEAimTopivwjD+AmAY85px7ycw+AJ4wsxnAV8A0X/8FYAqwCtgLXBT1VouIBETR7tgP3dQZ9M65L4CB1ZR/A0ysptwBl0WldQG0/+Ah5i7ewKijOjXaMuN4YoqI1OHLb/bWXekwJcRlipPJr15eQdGu/dxzzvGNtkyr7vC4iETdI2+viXcTqqVLIHhffrOHRV/WfqZJNJQeYd/5bf2PzotIYltZyxl38aQevXfif78OVD2fXUQklrY0whi9evRRYNV+dSBxOJf4bRSR2FHQi4gEnIZuoqAhBzsb80SY372zhjYZ2tQiyUo9+giMv/P1CvNNcRDknvkr490EEYkTBX0E1virC4qINEUK+nqq57XBouqv738Vv4WLSJOloI9QInzpaPbTS+LdBBFpghT0IiIBp6AH9h6o+7ru0R6yiecQkIgkFwU9sOCLrXXWKb0BSayHcL49cCi2CxCRpKOgr2TfwbqDNpad8UPq6otIlCnoK9l/sHFukVfbnkFjXPtCRJKHgr6eHv9gXUzv8bh19wHyb30lZq8vIslHQV9P9722it+/szZmr79174EqZbmzn+eQ7h4iIg2koAdcPUfdDxQf/gHTmobiP9uws9ryg4caZ0hJRIJHQd8Aq4sO/5IIroakX7l512G/tohIOAV9gvmwhrtcrdyUmHeuEZHEp6CvpL7DONG2uHBHteWn3/d2I7dERIJCQR8nlggXzxGRpKCgFxEJOAU98bnuTE0HY0VEok1BHwUahhGRRKagr8Sa5I0CRURqpqCvpCFn3SzbWP2XnEREEoGCnviM0Wu4R0Qai4JeRCTgFPTE9vryNS5TZ92ISCNR0MfJLf/4LN5NEJEkEXHQm1mqmX1kZs/5+Z5mtsDMVprZ42bW3Jen+/lV/vHc2DQ9etS7FpEgq0+P/kpgWdj8HcDdzrk8YBsww5fPALY553oDd/t6Ce3xD9bFuwn11uenL/L655vj3QwRaQIiCnozywEKgIf9vAETgCd9lUeBM/30VD+Pf3yiJfgpJis3l18Zsql07g8Ul/DCkq/j3QwRaQIi7dH/BrgWKL37RSdgu3Ou2M8XAtl+OhtYB+Af3+HrV2BmM81soZktLCoqamDzRUSkLnUGvZmdBmx2zi0KL66mqovgsfIC5x5yzuU75/IzMzMjamysxPvSxCIisZQWQZ1RwBlmNgXIANoS6uG3N7M032vPATb4+oVAd6DQzNKAdsDWqLc8SkpKHCVhd+lL7EEmEZH6q7NH75y73jmX45zLBc4BXnXOnQ+8Bpztq00HnvXTc/08/vFXXQKf1nLqPW+xfvu38W6GiEjMHM559NcBs8xsFaEx+Ed8+SNAJ18+C5h9eE2MrRWbdI9WEQm2SIZuyjjnXgde99NfAMOqqbMPmBaFtsVF4u57iIg0jL4ZKyIScAp6EZGAU9CLiARcvcbog+JAcQl9fvoiV0zoHe+miIjEXFL26PcVHwLg9++sjW9DREQaQVIGvYhIMlHQV6KzK0UkaBT0legKCCISNAp6EZGAU9CLiAScgr4SjdGLSNAo6CuZ95nu2iQiwZLUQb9rf3GVsi27D8ShJSIisZPUQS8ikgwU9JX8+p8r4t0EEZGoUtBXUqKjsSISMEkX9J9v2sW0+9+NdzNERBpN0gX9L19YptsHikhSSbqgFxFJNgp6EZGAU9CLiAScgl5EJOAU9CIiAaegFxEJuKQLet1YRESSTdIFvYhIslHQi4gEnIJeRCTgFPQiIgGnoBcRCbikC3oznXcjIsmlzqA3swwze9/MFpvZp2b2c1/e08wWmNlKM3vczJr78nQ/v8o/nhvbVRARkdpE0qPfD0xwzg0Ejgcmm9lw4A7gbudcHrANmOHrzwC2Oed6A3f7eglD/XkRSTZ1Br0L2e1nm/kfB0wAnvTljwJn+umpfh7/+ETTeImISNxENEZvZqlm9jGwGZgHrAa2O+eKfZVCINtPZwPrAPzjO4BO0Wy0iIhELqKgd84dcs4dD+QAw4Cjq6vmf1fXe69yJ1Yzm2lmC81sYVFRUaTtFRGReqrXWTfOue3A68BwoL2ZpfmHcoANfroQ6A7gH28HbK3mtR5yzuU75/IzMzMb1voG0CCSiCSbSM66yTSz9n66BTAJWAa8Bpztq00HnvXTc/08/vFXnXNVevQiItI40uquQhbwqJmlEvpgeMI595yZfQb81cxuBT4CHvH1HwH+ZGarCPXkz4lBu0VEJEJ1Br1z7hNgUDXlXxAar69cvg+YFpXWiYjIYUuKb8YeKnHcMvdTNmz/Nt5NERFpdJEM3TR576/Zyh/+tZbPN+2iZfPUeDdHRKRRJUWP3vmzO0t0TFhEklBSBH1FOr9SRJJLEga9iEhyUdCLiARcUgW9huhFJBklRdBb2Li8LoEgIskm0KdXOudYXbQn3s0QEYmrQPfoH3l7DZPueoPFhdvLyg6VaPxGRJJLoIP+o3WhgF+3dW9Z2avLN8erOSIicRHooBcRkSQLeg3aiEgySoqg15k2IpLMgh306sKLiAQ86D3T9W1EJIkF9jz63NnPl00/vnBdHFsiIhJfSdGjP1BcEu8miIjETVIEfRmN2YtIEgpU0O/eX8zU377D55t2xbspIiIJI1BB/69VW1i8bju/emlFvJsiIpIwAhX0dXl/7dZ4N0FEpNElVdCLiCSjQAW9jrWKiFQVqKAvpUseiIiUC2TQi4hIuUAFve4JKyJSVaCCvpRGbkREygUy6EVEpFzAgl5jNyIilQUs6EN01o2ISLlABr2IiJSrM+jNrLuZvWZmy8zsUzO70pd3NLN5ZrbS/+7gy83M7jWzVWb2iZkNjvVKlNJZNyIiVUXSoy8GfuycOxoYDlxmZv2B2cB851weMN/PA5wK5PmfmcD9UW91HXRHKRGRcnUGvXNuo3PuQz+9C1gGZANTgUd9tUeBM/30VOCPLuQ9oL2ZZUW95dXYrxuMiIhUUa8xejPLBQYBC4CuzrmNEPowALr4atlA+L37Cn1Z5deaaWYLzWxhUVFR/Vtejase/zgqryMiEiQRB72ZtQaeAq5yzu2srWo1ZVVGz51zDznn8p1z+ZmZmZE2IyI660ZEpFxEQW9mzQiF/P855572xZtKh2T8782+vBDoHvb0HGBDdJorIiL1FclZNwY8Aixzzt0V9tBcYLqfng48G1Z+gT/7Zjiwo3SIR0REGl9aBHVGAT8ElphZ6SD4DcAc4AkzmwF8BUzzj70ATAFWAXuBi6La4gho6EZEpFydQe+ce5uarxM2sZr6DrjsMNslIiJRom/GiogEXCCDXl+YEhEpF8igFxGRcgp6EZGAC2TQry7aHe8miIgkjEAG/fKvd8W7CSIiCSOQQS8iIuUU9CIiAaegFxEJOAW9iEjAKehFRAJOQS8iEnAKehGRgFPQi4gEnIJeRCTgFPQiIgGnoBcRCTgFvYhIwCnoRUQCTkEvIhJwCnoRkYBT0IuIBFxggr5w2954N0FEJCEFIujfXf0No+94Ld7NEBFJSIEI+uVf74x3E0REElYggt65eLdARCRxBSPo490AEZEEFoigFxGRmgUi6J3GbkREahSIoBcRkZo1+aDfd/AQtz6/LN7NEBFJWHUGvZn9zsw2m9nSsLKOZjbPzFb63x18uZnZvWa2ysw+MbPBsWw8wINvfBHrRYiINGmR9Oj/AEyuVDYbmO+cywPm+3mAU4E8/zMTuD86zazZ3oPFsV6EiEiTVmfQO+feBLZWKp4KPOqnHwXODCv/owt5D2hvZlnRamz1DYzpq4uINHkNHaPv6pzbCOB/d/Hl2cC6sHqFvqwKM5tpZgvNbGFRUVEDmwH/WLyhwc8VEUkG0T4Ya9WUVdvnds495JzLd87lZ2ZmNniBG3bsa/BzRUSSQUODflPpkIz/vdmXFwLdw+rlAOpyi4jEUUODfi4w3U9PB54NK7/An30zHNhROsQjIiLxkVZXBTP7CzAO6GxmhcDPgDnAE2Y2A/gKmOarvwBMAVYBe4GLYtBmERGphzqD3jl3bg0PTaymrgMuO9xGiYhI9DT5b8aKiEjtFPQiIgHXpIP+/TWVv8clIiKVNemg/8HDC+LdBBGRhNekg/7AoZJ4N0FEJOE16aAXEZG6KehFRAJOQS8iEnAKehGRgFPQi4gEnIJeRCTgFPQiInHUPC32MaygFxGJo7OH5MR8GQp6EZE4Om9Yj5gvQ0EvIhJHA7LbxXwZCnoRkTjp3aV1oyxHQS8iEidj8zIbZTkKehGROLnmlD6NshwFvYhInLRsXufdXKNCQS8iEnAKeklIfbo2zkEqaTzpjfDFIKme/vKSkJyLdwsk2sbkdY53E5KWgl6kEfzyO8dGXPfYRjivuql69OJhUX29Fs1So/p63Tu2iOrrRYuCXqSBxveNzalxd04bGPUAasq+Oyi7bPrEPuV/87F9av/7FxyXVedr5+d2qHd7Tu7ftcJ8r8xWjIvReyFaFPRS5vwTYv9V7Ibo27VNteV/nnFClbJOrZrHtC3nDO1eNn36wG4AXDDiSNbOKaj1ec1SrV7LuXh0br3bVpe1cwq45uTGOZ0vmobkdqBV86offJUDd3CP9hXmczrU3btOT0vh0nFHlQ0rRXKBsXF9u1SYP2twDtntQ8tqlpqYkZqYrZKo6pXZKqJ6t9VjeKHyP1ldLh/fO+K6o3tXHMt9+eqxVercd94gRsdhzLe095bZJp0zBnbjigm9+ckpfWt9zohenfhOWK80El3bZgBUG3CljvB1EtWpA4447Nd49rJRnDesBwtunMTim08GKgb43y4Zwc2n9WfBDRN57N+H07LS36tTq+ZMOrrm9+pPTunHdZNDPwBnDc7m/RsncvGonmV1hlbq9We2Secfl48um+/SJp3Zp/bjigm9Oe3YmvcirpjQm/wj678HEQ0K+iZkUKUeS6T+3/mDK7xxo2FafndunHI0AMNyO9ZZ/5pKYTiqd6da6885K/Sh819Tj6n28dOO6xZJMyOWF/ZV9PvOG8S8aj5cSv3homF8cOMk0lJTmHVyX9pkNAPgwpG5NK+mR/eXmcNJS03hhin9uHBkbll5Zpt0js5qW+0y+vi9mP+aOqDC3kL3ji147ZpxADzxHyPKyksD5KcFR5eVfS+/6lURv5ffvcK6Ho7OrdOrLX/1xyfy4pVjKqxruJqWX10IDuzeHjOjdXoa7VqG/s5XTepT9jpDczty8eiedG2bQUazVF798ThOCuuELLrpJB6enl82f+e0gfz7mPL/hTYZofPYB2S345Hp+fzs9GPo0iajQs/+4QuGcsvp/cvmT+rflWNz2vHFL6fwwA+GcPaQHNpkNGPWyX0Z2L3m/9ExfTI55ZjyD7+nLh1RY91oU9AniLp2Gbu0SeepS0Y26LWz2rXg5tP7c1xO+UG+W88cUOtzurWrvbc4rm8mk32P7ZJxvSJqx2XjjyqbPmdo7cNEQ47syNo5BVwwIheAD26cVGub75w2kGd+NLLsHzcSFjaaMm/WiRSE9cbyahguqs0tZxzD57edWuPjM8cexS1nlH9wDc3twItXjikbAgo3vFcn3r5uPGf5S9iWhvbLV42lZ+dWrJ1TQI9OLcvq//b8wVxzch9mjC4Psdu+cyzvXT+xwut2aZvBvFkncvNp/XllVs0fZvURvmdxZKeW9MpsXeMHWG2evDSy9/dZg7N569rxnNCramfhiHYZDO5R9QOjTXoak47uwtlDcrixoD9PXjKCn5zSl27ty/cOJh7dlQx/bCR8CLBdy2ZcOKon78yewJrbp5SVp6QYkwccgYW9kSaG7T3MOqkPr10zjldmjeXqSX3IP7JD2V7oHy4aypAj6+4gRYuCPgZKd+XrMx561/cG1vr4+zdOIiXFuOec4+vVlrVzCmjXItQTmnv5aOZ8N9RT7ndE7UFWenbDhSNzeSSsRwSw5vYpNEtNoXvHlqydU8CEflV3jR/4wZAqZT85pR9r5xSwdk5BhXBb/ovJQGg4qH9WW2af2q/KczPbpJeFeAffswt39pAcBvXoUHYlwNJeY/hxh/Drfg/s3p41txfwyqyxvHjlmBr+CiGDe7RniO9tRnMM9r/PDm3z/zl3EGvnFPC3S0YwMKcduZ1DAZ7ToTzIf3X2QNbOKajxm5Rd22Zw+YQ8zKys09AsNYUj2mUw7+qxvHRVxXW8eHRPenWuX8++8tlAE/t1oW/XNtz2ndAH8IhenXiyls5Ih5ah8ByTl1n2Plg7p4B7zjm+wvBKm/TaP6zNjO4dW9b4+FmDs8nr0pofDj+yrGzJz0/h4elDy+bzcztyWS3DiWP6hAK5Z+fyYc/s9i0qhHpNXrpqDPOuHssVE/Po2bkVvbu04cpJoW1zdFZb1s4pqDLOH2uN8/3bAMjr0pqnfjSS4275Z511Z4zuya59xfzbmF7c+c/PAUhLMYpLKp4cfnL/rvTLasuQIzsw6qjqhzLeu34i7cOC7YyB3fh80y4y0lJplpbCWYNzuP2FZTz90XpG9e7ElRP70KVNOu+s3sJx2VV3I78/tDsjj+pMj04tueOsY8nr2oY9+4tZun4nEOoZp1ioR/v2dePJateC1JSKb+7q3uxzLx/FkvU7uPGZpQBMHnAE/bPa8tnGndUeNAX4xZkDGNS9PRnNUnn3+gl0apVe655NwbFZLP96F5ecWL5n8OAPh1QYLik9KHbd5H4s/HIbV07MY8/+Yv7+8QbunDaQJxcVAnD/+YMB6N2l5g+8966fSIdWzUhPS2XvgWLunb+K8yI4YD0mrzOL123nzWvHY1T9Ww3IbsvufcW0qhRoQ3M78mzY2G8k/jRjGDu/La5Q9tx/juatlVvK5mvaOyndjOP6ZnJS/65l2y6rXQYbd+yjR8eWfLV1LwA/P+MYpo/MJXf287RJT2PX/mLyurbmjrOPA+Cta8eT3b4FKWHvlaP8h+2IXp1494tv6N+tLW9PGl/l2MLU47OZenzoGMaHN51Es1TjpLveJDuCg6nVKd1rORyl262+B9EB+h1R/72ZWDOXAN9Myc/PdwsXLqz380bcPp+NO/bV+3nXTu5L6/Q0bn72U1o0S+XpH40kNcW455WVPL9kIytvO5UHXl/Nr+eFQvrm0/pzsd8lds5x07NLuXBkLt978D227jnA9/JzSE1J4cqJeXz5zZ4Ku5TTf/c+Zw7qxstLN/HSp1+z+pdTqgRnuE8Kt3PGfe9w7rDuXDAiN6Jd4H0HD/H6is1MHlD36WSH465/rmDX/mJ+dnr14+YAS9fvoEXzVI7KbM3WPQf4dMMOxjTSFfoADhSXMH/Zpiq71KWeWlTIc59s4PcXVT0fe8XXu7jssQ956tKRZXtBySR39vMAvH/DRFYV7eaYrHZ8+NU29hwoZsqArLIQd87xwpKvmTzgiFrfy6VKShwvLo28fiLYX3yIs+7/Fz8t6M/waoaIEoWZLXLO5ddZLxZBb2aTgXuAVOBh59yc2uo3NOiLD5XwzEfr2VdcQtuMNIbmdqR5WgqpZlz+lw+57cxjSU0x9heXRHTd5/3Fh9iy+wDZ7VtQUuJYt20vR3aK7IyVSF77m90HKowJ1uTLb/bQo2PLiHYTRaJlz/5i9uwvpkuCn80j5eIW9GaWCnwOnAQUAh8A5zrnPqvpOQ0NehGRZBZp0MfiYOwwYJVz7gvn3AHgr8DUGCxHREQiEIugzwbWhc0X+rIKzGymmS00s4VFRUUxaIaIiEBsgr66geUq40POuYecc/nOufzMzMS+ToSISFMWi6AvBLqHzecAG2KwHBERiUAsgv4DIM/MeppZc+AcYG4MliMiIhGI+hemnHPFZnY58DKh0yt/55z7NNrLERGRyMTkm7HOuReAF2Lx2iIiUj+61o2ISMAlxCUQzKwI+LKBT+8MbKmzVrBonZOD1jk5HM46H+mcq/O0xYQI+sNhZgsj+WZYkGidk4PWOTk0xjpr6EZEJOAU9CIiAReEoH8o3g2IA61zctA6J4eYr3OTH6MXEZHaBaFHLyIitVDQi4gEXJMOejObbGYrzGyVmc2Od3vqw8y6m9lrZrbMzD41syt9eUczm2dmK/3vDr7czOxev66fmNngsNea7uuvNLPpYeVDzGyJf869liC3rDKzVDP7yMye8/M9zWyBb//j/hpJmFm6n1/lH88Ne43rffkKMzslrDzh3hNm1t7MnjSz5X57jwj6djazq/37eqmZ/cXMMoK2nc3sd2a22cyWhpXFfLvWtIxaOeea5A+h6+isBnoBzYHFQP94t6se7c8CBvvpNoTuytUf+BUw25fPBu7w01OAFwldBno4sMCXdwS+8L87+OkO/rH3gRH+OS8Cp8Z7vX27ZgGPAc/5+SeAc/z0A8ClfvpHwAN++hzgcT/d32/vdKCnfx+kJup7AngU+Dc/3RxoH+TtTOj+E2uAFmHb98KgbWdgLDAYWBpWFvPtWtMyam1rvP8JDuOPPAJ4OWz+euD6eLfrMNbnWUK3X1wBZPmyLGCFn36Q0C0ZS+uv8I+fCzwYVv6gL8sCloeVV6gXx/XMAeYDE4Dn/Jt4C5BWebsSujDeCD+d5utZ5W1dWi8R3xNAWx96Vqk8sNuZ8psPdfTb7TnglCBuZyCXikEf8+1a0zJq+2nKQzcR3cmqKfC7qoOABUBX59xGAP+7i69W0/rWVl5YTXm8/Qa4Fijx852A7c65Yj8f3s6ydfOP7/D16/u3iKdeQBHwez9c9bCZtSLA29k5tx64E/gK2Ehouy0i2Nu5VGNs15qWUaOmHPQR3ckq0ZlZa+Ap4Crn3M7aqlZT5hpQHjdmdhqw2Tm3KLy4mqqujseazDoT6qEOBu53zg0C9hDa3a5Jk19nP2Y8ldBwSzegFXBqNVWDtJ3rEtd1bMpB3+TvZGVmzQiF/P855572xZvMLMs/ngVs9uU1rW9t5TnVlMfTKOAMM1tL6KbxEwj18NubWekls8PbWbZu/vF2wFbq/7eIp0Kg0Dm3wM8/SSj4g7ydJwFrnHNFzrmDwNPASIK9nUs1xnataRk1aspB36TvZOWPoD8CLHPO3RX20Fyg9Mj7dEJj96XlF/ij98OBHX637WXgZDPr4HtSJxMav9wI7DKz4X5ZF4S9Vlw45653zuU453IJba9XnXPnA68BZ/tqlde59G9xtq/vfPk5/myNnkAeoQNXCfeecM59Dawzs76+aCLwGQHezoSGbIabWUvfptJ1Dux2DtMY27WmZdQsngdtonAgZAqhs1VWAzfGuz31bPtoQrtinwAf+58phMYm5wMr/e+Ovr4Bv9wf3t8AAACjSURBVPXrugTID3uti4FV/ueisPJ8YKl/zn1UOiAY5/UfR/lZN70I/QOvAv4GpPvyDD+/yj/eK+z5N/r1WkHYWSaJ+J4AjgcW+m39d0JnVwR6OwM/B5b7dv2J0JkzgdrOwF8IHYM4SKgHPqMxtmtNy6jtR5dAEBEJuKY8dCMiIhFQ0IuIBJyCXkQk4BT0IiIBp6AXEQk4Bb2ISMAp6EVEAu7/A6SijxMjKxrLAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Từ biểu đồ này, không thể xác định được điều gì, vì do tính chất của quá trình huấn luyện ngẫu nhiên, độ dài của các phiên huấn luyện thay đổi rất nhiều. Để hiểu rõ hơn về biểu đồ này, chúng ta có thể tính **trung bình động** qua một loạt các thí nghiệm, chẳng hạn 100. Điều này có thể được thực hiện một cách thuận tiện bằng cách sử dụng `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "#code block 12" + ] + }, + { + "source": [ + "## Thay đổi siêu tham số và xem kết quả hoạt động\n", + "\n", + "Bây giờ sẽ rất thú vị khi thực sự xem cách mô hình đã được huấn luyện hoạt động. Hãy chạy mô phỏng, và chúng ta sẽ sử dụng cùng chiến lược chọn hành động như trong quá trình huấn luyện: lấy mẫu dựa trên phân phối xác suất trong Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# code block 13" + ] + }, + { + "source": [ + "## Lưu kết quả thành ảnh GIF động\n", + "\n", + "Nếu bạn muốn gây ấn tượng với bạn bè, bạn có thể gửi cho họ ảnh GIF động của cây sào cân bằng. Để làm điều này, chúng ta có thể gọi `env.render` để tạo một khung hình ảnh, sau đó lưu những khung hình đó thành ảnh GIF động bằng thư viện PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, khuyến nghị sử dụng dịch vụ dịch thuật chuyên nghiệp bởi con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/8-Reinforcement/2-Gym/solution/notebook.ipynb b/translations/vi/8-Reinforcement/2-Gym/solution/notebook.ipynb new file mode 100644 index 000000000..c4157ca25 --- /dev/null +++ b/translations/vi/8-Reinforcement/2-Gym/solution/notebook.ipynb @@ -0,0 +1,522 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "coopTranslator": { + "original_hash": "5c0e485e58d63c506f1791c4dbf990ce", + "translation_date": "2025-09-06T15:21:42+00:00", + "source_file": "8-Reinforcement/2-Gym/solution/notebook.ipynb", + "language_code": "vi" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## Trượt băng CartPole\n", + "\n", + "> **Vấn đề**: Nếu Peter muốn thoát khỏi con sói, cậu ấy cần phải di chuyển nhanh hơn nó. Chúng ta sẽ xem cách Peter học trượt băng, đặc biệt là giữ thăng bằng, bằng cách sử dụng Q-Learning.\n", + "\n", + "Đầu tiên, hãy cài đặt gym và nhập các thư viện cần thiết:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gym in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.18.3)\n", + "Requirement already satisfied: Pillow<=8.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (7.0.0)\n", + "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.10.4 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.19.2)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.6.0)\n", + "Requirement already satisfied: pyglet<=1.5.15,>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from gym) (1.5.15)\n", + "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", + "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "import sys\n", + "!pip install gym \n", + "\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import random" + ] + }, + { + "source": [ + "## Tạo môi trường cartpole\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env = gym.make(\"CartPole-v1\")\n", + "print(env.action_space)\n", + "print(env.observation_space)\n", + "print(env.action_space.sample())" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Discrete(2)\nBox(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n0\n" + ] + } + ] + }, + { + "source": [ + "Để xem môi trường hoạt động như thế nào, hãy chạy một mô phỏng ngắn trong 100 bước.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "for i in range(100):\n", + " env.render()\n", + " env.step(env.action_space.sample())\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" + ] + } + ] + }, + { + "source": [ + "Trong quá trình mô phỏng, chúng ta cần lấy các quan sát để quyết định cách hành động. Thực tế, hàm `step` trả về cho chúng ta các quan sát hiện tại, hàm phần thưởng, và cờ `done` cho biết liệu có nên tiếp tục mô phỏng hay không:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " print(f\"{obs} -> {rew}\")\n", + "env.close()" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0.03044442 -0.19543914 -0.04496216 0.28125618] -> 1.0\n", + "[ 0.02653564 -0.38989186 -0.03933704 0.55942606] -> 1.0\n", + "[ 0.0187378 -0.19424049 -0.02814852 0.25461393] -> 1.0\n", + "[ 0.01485299 -0.38894946 -0.02305624 0.53828712] -> 1.0\n", + "[ 0.007074 -0.19351108 -0.0122905 0.23842953] -> 1.0\n", + "[ 0.00320378 0.00178427 -0.00752191 -0.05810469] -> 1.0\n", + "[ 0.00323946 0.19701326 -0.008684 -0.35315131] -> 1.0\n", + "[ 0.00717973 0.00201587 -0.01574703 -0.06321931] -> 1.0\n", + "[ 0.00722005 0.19736001 -0.01701141 -0.36082863] -> 1.0\n", + "[ 0.01116725 0.39271958 -0.02422798 -0.65882671] -> 1.0\n", + "[ 0.01902164 0.19794307 -0.03740452 -0.37387001] -> 1.0\n", + "[ 0.0229805 0.39357584 -0.04488192 -0.67810827] -> 1.0\n", + "[ 0.03085202 0.58929164 -0.05844408 -0.98457719] -> 1.0\n", + "[ 0.04263785 0.78514572 -0.07813563 -1.2950295 ] -> 1.0\n", + "[ 0.05834076 0.98116859 -0.10403622 -1.61111521] -> 1.0\n", + "[ 0.07796413 0.78741784 -0.13625852 -1.35259196] -> 1.0\n", + "[ 0.09371249 0.98396202 -0.16331036 -1.68461179] -> 1.0\n", + "[ 0.11339173 0.79106371 -0.1970026 -1.44691436] -> 1.0\n", + "[ 0.12921301 0.59883361 -0.22594088 -1.22169133] -> 1.0\n" + ] + } + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n" + ] + } + ], + "source": [ + "print(env.observation_space.low)\n", + "print(env.observation_space.high)" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def discretize(x):\n", + " return tuple((x/np.array([0.25, 0.25, 0.01, 0.1])).astype(np.int))" + ] + }, + { + "source": [ + "Hãy cùng khám phá phương pháp rời rạc hóa khác sử dụng các thùng:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sample bins for interval (-5,5) with 10 bins\n [-5. -4. -3. -2. -1. 0. 1. 2. 3. 4. 5.]\n" + ] + } + ], + "source": [ + "def create_bins(i,num):\n", + " return np.arange(num+1)*(i[1]-i[0])/num+i[0]\n", + "\n", + "print(\"Sample bins for interval (-5,5) with 10 bins\\n\",create_bins((-5,5),10))\n", + "\n", + "ints = [(-5,5),(-2,2),(-0.5,0.5),(-2,2)] # intervals of values for each parameter\n", + "nbins = [20,20,10,10] # number of bins for each parameter\n", + "bins = [create_bins(ints[i],nbins[i]) for i in range(4)]\n", + "\n", + "def discretize_bins(x):\n", + " return tuple(np.digitize(x[i],bins[i]) for i in range(4))" + ] + }, + { + "source": [ + "Hãy chạy một mô phỏng ngắn và quan sát những giá trị môi trường rời rạc đó.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(0, 0, -1, -3)\n(0, 0, -2, 0)\n(0, 0, -2, -3)\n(0, 1, -3, -6)\n(0, 2, -4, -9)\n(0, 3, -6, -12)\n(0, 2, -8, -9)\n(0, 3, -10, -13)\n(0, 4, -13, -16)\n(0, 4, -16, -19)\n(0, 4, -20, -17)\n(0, 4, -24, -20)\n" + ] + } + ], + "source": [ + "env.reset()\n", + "\n", + "done = False\n", + "while not done:\n", + " #env.render()\n", + " obs, rew, done, info = env.step(env.action_space.sample())\n", + " #print(discretize_bins(obs))\n", + " print(discretize(obs))\n", + "env.close()" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "Q = {}\n", + "actions = (0,1)\n", + "\n", + "def qvalues(state):\n", + " return [Q.get((state,a),0) for a in actions]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters\n", + "alpha = 0.3\n", + "gamma = 0.9\n", + "epsilon = 0.90" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0: 108.0, alpha=0.3, epsilon=0.9\n" + ] + } + ], + "source": [ + "def probs(v,eps=1e-4):\n", + " v = v-v.min()+eps\n", + " v = v/v.sum()\n", + " return v\n", + "\n", + "Qmax = 0\n", + "cum_rewards = []\n", + "rewards = []\n", + "for epoch in range(100000):\n", + " obs = env.reset()\n", + " done = False\n", + " cum_reward=0\n", + " # == do the simulation ==\n", + " while not done:\n", + " s = discretize(obs)\n", + " if random.random() Qmax:\n", + " Qmax = np.average(cum_rewards)\n", + " Qbest = Q\n", + " cum_rewards=[]" + ] + }, + { + "source": [], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.plot(rewards)" + ] + }, + { + "source": [ + "Từ biểu đồ này, không thể rút ra bất kỳ điều gì, vì do tính chất của quá trình huấn luyện ngẫu nhiên, độ dài của các phiên huấn luyện thay đổi rất nhiều. Để hiểu rõ hơn về biểu đồ này, chúng ta có thể tính **trung bình động** qua một loạt các thí nghiệm, chẳng hạn 100. Điều này có thể được thực hiện một cách thuận tiện bằng cách sử dụng `np.convolve`:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 22 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAD4CAYAAAANbUbJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO2dd3gVZfbHvycdAiGUAKEZelGqkY4gICDo4rr6U3dVVKxrWdeKde2ylnXX1bWiYu8FpYmAKCol9AABAgQIBAglQALp7++PO3Mzd+70O7fk3vN5njyZeeedmXfu3HvmzHlPISEEGIZhmOgmLtwDYBiGYYIPC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMDJIR7AADQokULkZWVFe5hMAzD1CtWrVp1SAiRYaVvRAj7rKws5OTkhHsYDMMw9Qoi2mW1L5txGIZhYgAW9gzDMDEAC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMDsLBnGIaJAUyFPRGlENEKIlpHRBuJ6DGp/V0i2klEa6W/flI7EdFLRJRPROuJaECwL4JhwoUQAp/n7EFldW24h8IwhlgJqqoAMFoIUUpEiQCWEtFcads9QogvVP3PA9BV+hsE4FXpP8NEHXNz9+OeL9aj4HAZ7hnfI9zDYRhdTDV74aFUWk2U/owqnkwG8J603zIA6USUGfhQGSbyOHaqCgBw6ERlmEfCMMZYstkTUTwRrQVwEMACIcRyadNTkqnmRSJKltraAtij2L1QalMf8wYiyiGinOLi4gAugWHCB4V7AAxjEUvCXghRI4ToB6AdgIFEdAaA+wH0AHAWgGYA7pO6a33//d4EhBBvCCGyhRDZGRmW8vgwDMMwDrHljSOEKAHwE4AJQogiyVRTAeAdAAOlboUA2it2awdgnwtjZRiGYRxixRsng4jSpeUGAMYCyJPt8EREAC4EkCvtMgvAVZJXzmAAx4QQRUEZPcOEmVrpnVUYTmPFBt+v34dVu46GexiMDlY0+0wAi4loPYCV8NjsvwfwIRFtALABQAsAT0r95wDYASAfwJsA/ur6qBkmQnjn150AgFnr+OX11o/W4E+v/hbuYTA6mLpeCiHWA+iv0T5ap78AcEvgQ2OYyGfbQY+jWnmVr599eVUNVhYcwYiuPB/FRAYcQcswQeDRWRtx5YwVyNt/PNxDYRgALOyZKOPD5bvw2co95h2DjKzxnyivDtk5K6pr8I9vc1FVw9G8jD8s7Jmo4sGvc3Hvl+tt7fPBsl3YeajM1XHIE5Wh9MO/94v1mPn7Lpz97OIQnpWpL7CwZ2KSkpOVKD5RASEEHvomFxe+8mtAx5twemvNdgqhtP9l2yEAQNGxchwqrbC0z9wNRciaNtsbCcxELyzsmZhkwBMLcNZTP3pdJwMVdh2aN9Rs31tSHtBx7XCkrC5lw/8WbzfsW3yiAmUV1XhtiaffzkNluPWj1fh4xe6gjpEJHyzsmZjE6x8vAvOPv3poFgCgVVqK5vZtB04EdHynmD28znrqR5z/36UorfDMKdQKge/XF+H+rzb49T1cWoGsabOxcPMBS+cuq3A2T3Gqsiak8w3Xv5cTUw83FvZMTBNoKJRsptETrtW14Qm2+s6C3//OQ2XYXuyZqyg1mEjeVOTxKHrn1wJL5z79H/ORf9D+Q67nI/PQ9cG55h1dYsGmA5oPt2Bj1cTmNizsmZimNkDN/rf8wwCAlxZu09xeFaY895U2NeSftribjHDjPnsup1v2h+cNKNQs33EY2U/+iHm5oU8qwMKeiXpeXrQNd362VnNbgLIeW0zMNG5r9vtKTiFr2mx8u3avq8dtmZbsXe720FzNCetgpoQ4WRk6F1XA13zn5C3EKTmSl9a6wmMhO6cMC3sm6nn+h634arW5cHzw6w2ue6XY1bDNeG7+FgDA3z7Rfng5JbNJ3ZxDZXUt1u4p8a6T5ED6a/5h3dw3gc591ITY3KU83dh//YwCl11v9ZAfag0S40NyPiUs7JmoJGvabNz3hbm/vdKM8+Hy3brmGKe4bcZJSdT/yd43wXmlrMR4a6JAby5ALazJps9pqOc21Oa7/cdD4zU1e73HfPP1GnffzKzAwp6JWj7N8Y2k3V5c6tdHrZAGatZR47YMS06o0wgHPf0j8vYfR8GhMmRNm42PVuxyfNz4OGvCOU5HiKuFtcXDeQl11K/64ZSU4FwU7iguxUPfbECthZtdcPgkAGD/sdC55MqwsGdihjEvLPFrU/88F2856Oo50xsmunq8FMXr/4HjFXjz550Y9fxPAIA9R045Pq6eEAeA2RvqJhP1uqmFNdmMHS456dx8tufISaxTmJ2soH6ot0hN1u5ogZs+WIUPlu3GVhPbf/GJCnTKSAUAnKqqcXw+p7CwZ2Ia9eu8kfWhsroWk176Bb/mH7J8/GqXNdZklQYaqDeRTFqKdgLcE+VVPr7oeh9PpcpcdeSkvZq8pQ598wFgxLOLMdlmBLRbnxsAlFV4BHd1jf4xtx44gbOe+hE7ikMzN6AFC3sm5thz5KR3Wf2bH9lNPyXxvpJT2LjvuC3f7CqX7TjJicER9o00hP2O4lJMfTfHp03vYaiM3gX0Hx56lIdY01V/biWnnBeM31vieaP6YlWhbp/cvaH3vlHDwp6JOUYoEoXd+tFqn21G5gdZPOxWPCzMqDHQ9pyg9uLQe5b8aUA7W8fVemaMfmEJVhQc8WnTM/esUZlRGibZE/bqCeK1e0rwyLe5ul4+xSc8Ub2L8qxF9cpU1dTio+W7/Wz26jcTJ5yq1H9gac0XhRoW9kxMIycPk3n7151+Oej3HDmJDQ79oqtq3fbG8RX2esLQonONl2cll04zCo9qzwscV7ms2nXFVNv8L3zlV7z3+y6/ojAysqZ8rerNw4zXl2zHA19vwOc5vlp4WoPA51aM5me27GdhzzARx6SXlvqsj3h2MS54ealO7zpOf2SenznCyI7rBPVEpJ4Zx6p3jczPW61F0Cona5WotdpaIXD9eznIUb0Z6KGnWS/bcdjS/laRbeay6UXG7n06dqoKX632fWAcLtM3BVVUh35CVg0LeybieG5+HuZv3B+047dpop20TMZKgI+W5lpWWYN3fyvwaat2WbP/RFWYRe/wahu6kkADoLR4YcFWn/XiExVYsOkArn/PXPP+bt0+fLNW23//pI5ppOCw/0RnTa3Aw9/k+szJKDl4vBxfSf7tB1R+9XbnPvo+9gPu/GwdNu6re+NTH1NJhc7DbF/JqaDcDy1Y2DMRxyuLt+PG91f5tdfUCld+GOkNkxztpzy3nvfI9Ll5PutVLmv2TVTmBr0UBvM36tuyQyFb5DQSRy24VN728RpsLtLOpaPnovjYd5v82tbsPor3l+3C3z/Vji6+Q9HevplvSuqaWoH8g6XImjYby228TWzcWzfuHq0b6/ar0LiO938vwNDpi3Drx2ssny8QWNgz9YbOD8zBzR+sNu9owiYdwWIHPWHfv0O6z7rbrpcNk6xN0KrZUHgMD3/jmfB0w4PnyhnLMeCJBbrbW0spnxPsRlepsOOPLkft1kjX98rifOQfrLOV/7a9TojLkawyNULgmTmbAQCv/7xD9xzPzN2MborMnD9trYvLGNalhe5+WuUpP1qxR3MswcJU2BNRChGtIKJ1RLSRiB6T2jsS0XIi2kZEnxJRktSeLK3nS9uzgnsJTCwxL0DzTiBeF0rBeqqyRtNdsEtGI591ZWRpba0wfNW3NgZfQW1Vbl/y+m94f9kulFZUY6mNOAE9ftl2CEfKKnVt0bLg7dgiNaDznLKRIE1+rtTWCuwrOYXn5m/BJa/9ptlXbbOvqRVYmOcR3Ivy9APrXl+ywyff0dDOdQLeaJ5kh0buncY23VMDxYpmXwFgtBCiL4B+ACYQ0WAA/wTwohCiK4CjAKZK/acCOCqE6ALgRakfw0QE3wSQk0QpaEe/sASXvv67Rh/fdeXE3yuL8zHo6YW6NmUr2H1Y1dQKHC+v8nq1LNx8EFe/s9Lx+dVco3Msed7DKDLXCqcqrV+vLGxrhMDlby7z7C89kI+ZmJOcJmLLaFwXeWv3EFbSK7iJqbAXHuR3oUTpTwAYDeALqX0mgAul5cnSOqTtY8huViQmavlpy0EUn3CneMPGfcfw8iJ7ics+DKAykVogrCs85memUNvQlRO0SySPl0CSbvnbwI0FxqSXfkGfR3/wrt+hY892itI0okR+MDr55Zcoom+bNUqyLBTlB0tNLbBLykEjP59HPm9chN2p4FW+WdUKgRlLd+K5+Xn6OyjI0ckgGiws2eyJKJ6I1gI4CGABgO0ASoQQ8jtWIYC20nJbAHsAQNp+DEBzjWPeQEQ5RJRTXOxu4QQmMhFC4Op3VuKyN/w1Yidc8N+leP6HrYZ91JpwUYn1/DGnKmt8smC+9ctOvz5+9m/VqnKCVv5xV+j4jjvBzIyTF6aiIAelB7qT9ABK09fD3+Si0wNzLAljWdgfVXgiyZ+PWe6d71R28xcXbLVUhvGmDxSOBAJ44vtNeMWk/m+4sCTshRA1Qoh+ANoBGAigp1Y36b/Ws9zvTgkh3hBCZAshsjMy9EPUmehB/r1q2S9lRktJvewczwi1Nn7QxltFz0fm4V8Kl8IvV/uHw6vH0CzV19NHy/UykND8Sb0zHe/rFupJ5x83+QvFuZI/vlk+fy3ziVbZPjmD6c9bi5E1bbbmseT9lG9OAsJSKoZftvkqnP9ZuA1TZ3rcRn/NP4SsabNNJ9vdzLcTDGx54wghSgD8BGAwgHQikmcY2gGQHWULAbQHAGl7EwDWIiuYqMb7am/Qx+hBoIeRO6bd4KJAGdTJ9yVWK1gnkPS2Qzr7vSSHnNtUroLXafjSWw1SeusXf8+XCf/+xa/tsCTI//dTvu6xrnp7hV9bVY2wlD5Zzw8eAP7y1nIAwAfLjFNIq1NLRBpWvHEyiChdWm4AYCyAzQAWA7hY6jYFwLfS8ixpHdL2RSJUUQNMRFNnx3VXAMvfLq2vmVNty2nZP2WQDaBdlGOrSSlDNXn7j+OdXz0mJPXRwvHDmptr7hE1tIvnoWRUkWnBpgN4Zq41+7b8nXFiWtebV1BiZR5pywHjlAdHDQLZjLhueEdH+9nFiu9PJoCZRBQPz8PhMyHE90S0CcAnRPQkgDUAZkj9ZwB4n4jy4dHoLwvCuJl6iMvBpF4Kj57CjkOl2FfirzEfL3eWJ/3Vn5zZXZXeGYDH5FFRXYN4xQPOivBRImu61wzr6Gekj1Q9qpmUH753uya6faxE18pU1wg8Omujo7eitTZz3euR1sBYXOq9zew+bOx91ad9uuF2tzAV9kKI9QD6a7TvgMd+r24vB3CJK6NjogorZhw91HZXZaTq6Bd+QnWtwMCsZv47OpSFTic2m6uKYFTXCnR/aB7OPK2pt00vmZgVIkGzt4a7I3t/2S5NW74V1u62J+yzmjf0VpRSYpbBVK94ydnPGXsCZSu+G8GEI2iZkBHIBNYuxY/vVGUNXltSp3nLppKaCNRyZW1Pr1C3HY6drLKl2RYdc/5QCRR54nXFTnfs2E4FPQD8bjOZml49XrM6uQePOxtjqBzTWdgzISOQGBKl//oJHdOMlmdHuMW/m7VVNxYdw3u/+04SGj3fLvivvepNbmImGJduCzyKN1jojd0sqZ3Te223hKNTWNgzIcOJfVkIgQWbDvjYQ4+d0hb2WrbZUCv7ZRXVPuMwE3p6ZE2b7edi+MrifNw6uovlYwSiDQfKGoXppEDDw+qKGctDORxbqEs/ymjlt1GiDnjL23/c0kR/qBzGQpucgYlpnMi9GUt34snZmzFU4XJ47os/W95/9e7QRine9fk6n3Ut3+zz+xj7yiv91pVFU37NP+xXNtHtZ9nbV2fbLgiixU6FgB/1/E946fL+OHSiAteGyPMkGHyrk4ZZDy0XUk1Y2DPRhmyzlwXUF6sKMbxLC7Q2yC//5GxPJkK7Hiwyf/0w8CyZgaBVg9aozi3g67depkoE5nISTT9G92gVlOPeLvnmbztYig6q9MKRhtIcGIr8NWzGYaIOWdjLybnu/nxdRL/Ou4GWZm9nolrd1T/rpfvCKJjeIR+v2I1/zrPmW+8WRoF1fTVcQ5XC/sUfjdNx6GHnIREqMw4Le8YV9L7cQghvoJGydJ1snlDmG49GtD4WI+28TJUnXy3MQ6FpntFW3ze+PmKU0fLu8d392pR1g79Y5Z8iwwr/WWg9QV+o8kSysGcCZs6GInR6YI5m6t73ft+FSS8txW/5h/Dh8rqMk3IIuhJ19Gm0onYRnbOhCFnTZuO6mSv9hItSTt00srPfHMRelc9+kt1K4xo8MLEnLh/YPuDjuMnoHi2Dclx1MRjA15/eaaUxW8Le0Rnsw8KeCZj7vlwPAPhcQwvK2++pCrXzcJmpa5q60He0otbW5XmFHzcfRO5e3wee0uX0tSXbsXiLb8Ku3YoH7PIHxmDj4+NxaXZggjopIQ4jutpPTtiiUbJ5J4cESyAmJ/gLe6UHlds1hLXQ8y5zGxb2TMDILmk/aFSRypVqdC7bcSTkbpCRipFZQZ2Qy+wzi1MYfFulpSAxPk63Lm2wCaY1wqkLqxmZGs4BynkRvYLnbtKicfAekkpY2DOuoZViYIOkqS42KPWmh7KIRTRhJOzV+ffNkqZZqVyVZrH8nbKEoLqwuRXcKkqjRbA07OYabyNK000gZSz1yGru643EZhwmIjlUWoGsabPx3bp9OF5ehSe/3+Td1qJRku5+aSkJttMl9Htcv6B1faZWCNTUCgx5ZqFfmcRyVU1X2fU0EBIs2vHH9apzuxwaAamUlVRVh+5txWmJQquo8+4EWrrRKizsGVtskbT36XPz8MycPLy1tK56U28DL47z+7aJWjPOZzcOsdW/tLwaldW1KDpW7p3vkLFSaMMuWsLkvDNa+7UpTUiRVkk0GLni9dIvh8JOr4Rz4zARyQGpCtDeklPe/N1yHdbzemfins/XadaF/WHj/rDZkoON3QIpLy3K934Wai2y3IWShWkpviYYLcVeK9mX+s2rbXqDgMcSyXRt1UizPdiafbhgYc/oIoTw8xzZXHTcuyxPmsn/5+Xux+erCjXrwhYcPomhnVtonqf/4z/gjk/WaG6rDxxxULSi1yPzAfhPPBpVTDJiYu86TT1O9fDR0uzj4wjjT/eNllULuQYabolGhLgoWMDozTEEazJYD9bsmbDT+9EfcI6qJux36+oKM/+oKshcWmGcKEqtccocPVmFb2zmHXGbu8d1c7yvmxPJLR16ZijNLuoHtJawv/Ss9pg6vJNPmzrRl16qXz3s9jdjapDz6Dx/SV/N9mCbG9VzW2yzZ8JGaUU1sqbNRmlFtd9k0sET+vnU1bnLlfbnsT1bRrQZZ4jOW4cVurdu7No4lmwtNu+kgZHASIj33fbjnWdjcKfmGNixGWZMyfa2b1NFMyfpZH/Uw+lbiR7BNKe0SkvGsC6ee948Vd+xIBh8fP1g/H1snXLB3jhM2DjjH/N1tyl/fyO66gvI8qoa9Hh4nnd9cKfmPtkcI4U2kp91IIXJu7RshC9vHurWkByhHL16cjVBdW2dM+ps1crUCEoTHQAkxYfXLuM0VYEVFtw50rusNnsFkxFdW6Brq8b429iu3jZOl8BEPOkNPRqRlqDUCg5atsN9jwqnvHbFAKx8cKz3XSM+gB9cw6QEDOgQmjqiehjJq8Nllbhy8GnedaVwaZWmn3HULH97sLGS5O2/l/tVTLWE0qQYaHyAnYLh708d5F1u38wzAc6aPVNvsPK6LSBw48hOpv1CxYiuGchonOy1z8bZ+CXcf14Pv7Zwuyoanb/kZBUenNTT9jG3mAR0BZvINfoBtymKyDx0fi9Hx/j8xqF47YoBIXuzYGHP4EhZJY7rlPpzyk9b/CNmX9Dw0gkWPTPTDLfLNm55HsHOJNmNIzujkyLaNBIwG32lg0T46RpRtI0tRuO6QSA1i43o0lLb5RKw7hlz1zj/bJl2ad0kBRPOMC5k4yamwp6I2hPRYiLaTEQbiehvUvujRLSXiNZKfxMV+9xPRPlEtIWIxgfzApjAGfDEAvR59Afb+2nlapf52ydrfdZnLN2JvSXuFsD+6e5RutvMfrPqH7WVH/n401uhhzQZG2la51drjMvfNUryCOnbbJQ1/EPfNn5tTtIoOMXK/OzI7vYTtv2osNeridbAP8CaZl8N4C4hRE8AgwHcQkTye8uLQoh+0t8cAJC2XQbgdAATAPyPiOw57DJhxWjiVcncXP/EZ3ocOO5+3pQsA+3a6oSrnOjKimb/+pXZmHfH2dYGF2TsCt24OELB9Em2NNLxp3t895VvMWYxBdcN74ibRna2NTYtRvdoaclmn5aSiB1Pe/VM3DyqMz5Q2MXVPOTAnBUtmAp7IUSREGK1tHwCwGYAbQ12mQzgEyFEhRBiJ4B8AAPdGCzjLsdOVaGi2j88f3An37woe46c9EbORhp6KRrMZL0s2+VJSLtm02BUiLLD97cNd/2Y6jmVoV1a4Pf7R/t4rsjLKYnaouOh83uhUbI13U7P5XFsz1Z4++qz0L6ptfKFSpv39+v3GZp/AvG6knnnmrP82sxKTQLASw4nk93Cls2eiLIA9AcgV564lYjWE9HbRCTXMmsLYI9it0IYPxyYMNH3sR9w0f9+865nTZuNmb8V+PUb8exiDHp6YQhHZh09bwyt6M9hXeoeYuq6n+GeYLVLexfruA7q2AyAtsDKbNLAR0C2TW+AgumTsPyBsbqBaLLw7WpgGwf0I3RrpNw0vTVKBpqx58gpNG2o7zfvxl0epfE5vXlVXbyC3ptxqs2IZLexLOyJqBGALwHcIYQ4DuBVAJ0B9ANQBOAFuavG7n6PWiK6gYhyiCinuNhZIAnjnG2Sp8XGfb6+1f+YtTEcw3FMok7gT8Mk/4nEBIXLjSy/5PS/ekLg+hHBjeIMBKMso3aQg67s5P9q0iARt47u6tP2x/4enU42iZlNsOoVs1Gn4bBD4+QEpFp8s3CKlmKgDEB79A+na+53tgXtP5hYEvZElAiPoP9QCPEVAAghDgghaoQQtQDeRJ2pphCAslROOwB+sfBCiDeEENlCiOyMjPB+CLFIcWnwco87wUwL1EPrrXxsz5Z4WMMdTlkFSv7BJkuZD/Vs9r3bafvPy/t/fpO9jJdu4pYlSb72QLM9pjf0zCPEe4W9cX+9eRzZlVcvvYYR8fFkrNkH8Q1umuSSm9E4GZec2c5nW9v0Bq6nk7CLFW8cAjADwGYhxL8U7UqfoT8CyJWWZwG4jIiSiagjgK4AVrg3ZMYNjL54Rn7z/YMUPETkzJ6qtc9bU87yKxABAGWVdUFC8l7HpZJwejKgfVPjzI+h9E5R888/9XHlOHKErdF9//624XjtigGGx5Ft8LIZx+zhcUZbbfdYWaN/aFJPPGLRh13O0NmuaQOkN0zE+X0yNR/EWt/fyf38vY6ccOPZnbDj6YlIS0nEMxf19tlmN/VEMLAygmEArgQwWuVm+SwRbSCi9QDOAfB3ABBCbATwGYBNAOYBuEUIEfzaXowtWjXWj5w0ErqntzH2X7fCoxf0woWqH1gcEZo5yFGSoVP3VEuDG9WtpWK7578c6aun2evVVd15qMyvrbVBNGowOPO0puadLBAvmbeMhP0ZbZtY9gk/Q/qO9G9vPL42TbQfpLXSOFKTE3CtxehUeXK5b7t0EBFe/vMAnJXVzKdPzkNj0UfjTe2JC8+wdA4ziMj7oFMXjFGnrAgHVrxxlgohSAjRR+lmKYS4UgjRW2r/gxCiSLHPU0KIzkKI7kKIucG9BMYJRknJ9IT920t3ouRk4MFXU4ZmITXZ16ZeUV3ryMPFzmv5PEWNXHm/RtI4GupMniXreJ3IKGMH/jKog+WxuIHepdsVK3KQUdMAE4LJz4pBnZpj+QNjfNIua6FV/xUAaix+D/oqJnDJGySnzZJ7Ruk+uPXMRaMc+PDrYbVaWDAJ/wiYoCGEwKK8A5rBT0a/Jz0l5PHvN+H79UXaG21ARLhYZdOsrK7VtPEqPWiCid5Dw8yMfbKi7qX15lGB+5e7gd1H5l3juuH9qQP9NGG7KD25jHLuyOgJdavZLpVxFrKicEqnQLiTNMLPumQmA/yTzIUDFvZRRnVNLW7/eA027TuOV5dsx7Xv5uAJRZ1YGSNPCbVbYjDo38H3Fb9FoyTU1Aq/4tjn9vQtsBEIsouhFnoPuETJU2WgjiBUCqxQa29W71Oyib04MT4OI7oGrsUeVgVcDeroeVC/8mdtW79aB5FNHVZ81gHf5HVyLd+vdSKJzfLPTOrtb6IKdDJX77rDBQv7KGPHoTLMWrcPE1/6Bc/O2wIAmPn7LszLLUKZorjI+sJjeocIC/FxhFoh8CeVxq/8kVqN7NXjuhH6idi0BOel2e3RvFEyCqZPwmc6XjdWA4jcpGD6JM+CjixSm8OSwmRCaJqahILpkzCpT6amSadG9dok55dXpmA2QvnduELK6qlnSjMzmb94aT+seHCM3z5P/7G34wC2SX1Cl/fGCizsowy97/RNH6zGtK82eNeX79RPNzw3N3BTjV2ICEJ4XrevHprl0y5z7bCO+G3aaEfHv/+8HprauSwYSeOXcG4v/bcK2QunV6Z54I+ebdpNOiiCrNQvbZP7u+NtYkYPgyIu3Vt5Jm1lU9fAjs1QXeM70LvGdcODE3viAo2cPEr+fWk/AHXzLYDHx/+tq7LxmI6Pu555RyYpIQ4tVU4L8XGEPw/q4JPzvz7Dwj7KMHrz3H2kruqU0WTo6t0luttG92ipuy0QjpRVorSiGodKK5CSWKctKzWyuDhCG40i2B9ep58LRaZVWophGmOtj23fMf3EbfKErpXJxFJFXninWSO1JpCV9/rZi/Xty+eFKLOiUVKyW0d3wZc3D8V9E3rgzauy8f7UgX5BU8kJ8bj+7E5+DgK3j/EN3jq/TybuHtcN94yvy/NDRBjbq5WuKa2Rg8/9pMkDor7Bwj7qMJD2CsFkZQJN+xDByQkjuzJ+u3afz+u9cmJN71Vcfv0HgDeuPFP3HHYn6YwmCmWBVGthMvGEwnw2pJP2hLPaD1vtpy3nAFIH68go507BDLIAAB5KSURBVGDUIwpVJoi2Gg9imfg48rqKnturFZIT4i1PxGarXEwT4uNw6+iufh5dRlg1ZXXKqJv0VR5f+WCRuXxgB812La4ZlmWpXzBhYR9lGP2wjygKYzv10Q5iWVAvyrwvPpq9wcWdJgVR6b1yt0pLMfxstCbjurbUN0skeAOH7H0getegFkbqdXm/sxSTzMojpSrSQ6ifx6EoaD2sS3NcMeg0844K1MJezx1YmWbgkxsG2x8crJcenHVrnX1emcvmNI0gvWcu6o1bzrGWMtrouxQqWNhHGUZf6T1H6swSTos5Oy2IbQel0FAKLiOZNfOagbh9TFdN+/hzF/fBkM7NbQs9I/kgexPp+efroTeERJN6r17ThuLzUNqse2TWCZN+qijRUCj2fxrQznbFJav+9ACw4oEx+PLmoX4ZWa1iteyk8jNVmpMCVXL0ooVDSejKzjAhwaq72M/bIjf5nFJoLMqrq3hl5GqY1SIVd56rnYUxo7EnmMaugmv0WT5zUW9cMyzLtjlMT76ZuW2SV9YLRRvhg6mD8P6yAiQn1D10/tC3DVo1Tsalbyzz9gsWk/pkYvb6Ikdup2qFw+j+tkxLQcsAIpSdpOJQfm5OzZf5T52HgsMnDatjhQrW7KOI4+VVun7GMq8t2Q4AeOfXghCMKHDi4wj92ns0VacR57JA1hImsneIli+6kYxMSYzXDL03Q8scAAAX9PH1QFGfW09YDe/aAq9fme3XrpzIDmbuffnYTu6NOuulUVR3oITClKVFQnxcRAh6gDX7qMJKacHpc/NcqSQUKpQTl04LM8sapNbut47u6peq13u+IAiIjhrVtQZ0SEc7VcK1MT1bIS0lAcclTx5ZMFo1JyhTAARzmkWeS3fyWQWrxqwWgeamkYeqVaqxvsCaPeMKcnpbtyk8esorrALNJWXFnKFMtbxi5+HATqg5Bv+27q3T/K6tSYNEzL59hHf913zPWL5b55ctXJMmivsRTJlaG4Bm73TeyAlOFYVogoV9PWb5jsPImjYbBRoZGENF89QkbHlyQtBCw1ftOqpYc/aDlQWs2e99zcPn+nhjHCo1rrdqh/sm9ECvzDS0VmR6bCNNJv+hbxvNB5GWgDqh8Nm3SqVBYfhAkeW1k3kBdVCV1UnUYBMsxSXcsBkngjlwvBylFdW64ePfrPVoeb9uP2RYfDuYpCTGIzkh3kcMd2jW0CeAyy2cygLZVm8mkNRZH7cXlzo7oQY3j+rsnR8474zWmHBGa0zuV1etM2+/f6IsrdE6sWuv31NiOd+MXWQvIidmEqXb6gMTe0SMbXvtI+PCPYSgwMI+gpHrvnpzoahIkn5oVdXB09zMGCj5fSsF6ZmnNdUV9p0znD+UQq33bZVKN7rNq1f4B35pmVq0nk25e+1nT3QaQGeFJy48A+2aNsCo7vYjq5U2+xvOjux5pLQGHlHZKk07TXJ9gIV9PaZK0ozCWWJQjvRUCiYj7w+tdAdK1JWfOrZIDdjobJQmwYhQBsLIV9gzMw3T5c/Upcfb6J7BSXEBeIq7PDjJWjUpNS0b1x/BeU73lvjX//WNuORmdmCbfT1mzgZPwrJXFm8P2xjkPDZKsbS5SF8jNpuUO3bKtziK0uUwmP7iWgQrD5AW8gNySKfm6Bugq6kavaId4SZbSkx3ncVqVOGEiHDRgHY+8Qz1DRb29Ri5apS6xJ8dPphqnkTMCsrJxC0G5g8zd7uemZ5Iw2uHeQRAIAU1ZN95py8GoawbKrwTnYpGDWFvZ/Lwrauy8anD9AKhxEnAE2MfFvZRQEJ8nKWEXDJZ02Z7l4cHmCNexurP1UzwPj7Zk6L2ppGd0LRhok+6Y7s4qWmrxKzoh1WsJOHScmFs1rBu/PIEq53Sh2N7tcIgh+kFooV7xnc3TL0cS7Cwr8dcdlZ7AJ6MiP/7KT9o51ELXPm8SqxaWJSavVYmQNkdr2VaCtY8Mg7dFT9Uu5GgRcfKAQB7j+qnKjYiOdH5K/tfR3X2ZuDsoBM1q6QulqDug1SmIHj9yjPx0fWDcPc4a1kWGQ+3nNMF8+44O9zDiAh4grYekxAvuxQCz/+wNWjnaZPu682hLNwtY2RP3/T4eCzZUoybP1ztEwGqFU2qDqEH6kxEdmNwZBfQds2MJ4X1cKLZt01vgN5tm+DeCT0AeEwpfdqbF7/wPgT1iogTMLSzO29hkUboQqtiGxb2UUKz1CQcKXMvCEhJlSr4peRkFR6c2NNH6zZS7BsmJXiTWCknaLX20cqEKAfb2I24TEmMk87jzCbsRNj/qqqkNdag2pUdIiXgyE2i74oiG9NvMxG1J6LFRLSZiDYS0d+k9mZEtICItkn/m0rtREQvEVE+Ea0nosiquhtFyHKxvKomaIIe0M59f/3ZnXzyjJt5yshavFwrFAC+X+9f/vBsjcLX8gSeXWEve3uo3TmtEsoJWjPClcgrmLSV8gEp6xcwwcOKZl8N4C4hxGoiagxgFREtAHA1gIVCiOlENA3ANAD3ATgPQFfpbxCAV6X/jENOVlajYZL/rZJF39Nz8oJ6/gwL/tC5e40LmDeTik8r2bTPP0BIyzPj6Yt64+VF+bYLrjx6wem4emgWWjusAevWBK0bRGNul0m9M9HsuiQM6Rzbk8ihwvTbLIQoEkKslpZPANgMoC2AyQBmSt1mArhQWp4M4D3hYRmAdCKqv5EILiGEcJxqttcj83WOGciIrJNoISpJq/jGvRO64zmD2qhWldXOGY3w4qX9bGvaSQlx6NbKuSdGUnz99amuDxARhnZpEfL4iVjF1q+HiLIA9AewHEArIUQR4HkgAJAjUNoC2KPYrVBqUx/rBiLKIaKc4uLILaThFh3vn4O7PlsX8HFeX7IdP0gTpBVVdQWRz8pyVmbQCu2aNsDFOrVPZZR1YGW6t2qMS7L9PXdkjjtI6gUA/7msH2ZM8c/h7jaJCaETQt55BZ6tZIKEZWFPRI0AfAngDiGEUYIO7fxN6gYh3hBCZAshsjMygpOkKdL4yqSwiBHHpACqZ+bm4Yb3V/kdzzc7pDWmDDGvGdqjdWPExZGhhg5om1+ClcJ2cr+2GNPTnYlP7eN7gtQymzjz4nECK7dMsLEk7IkoER5B/6EQ4iup+YBsnpH+y/XjCgEo1bl2AKwl4Y5S7AQ86fHb9kM+6+8v2+V7DgensFLPU37FNnvV1hL2ai8eM+6T3BXDzX8u64+C6ZMcT+w6Qf74QlnQg4ktrHjjEIAZADYLIf6l2DQLwBRpeQqAbxXtV0leOYMBHJPNPbHKzsOB55tX1/h8+JvcgI95pgXTz+Yi/5c4rYhE2TWwqSKcv1xhZtIiSxVs9OeB1qNDo41mqZ5JcHWa5Wcu6o2+7cz99BnGDCveOMMAXAlgAxGtldoeADAdwGdENBXAbgCXSNvmAJgIIB/ASQDXuDriesjyHUcCPsb+4+V+2n2gNE4211wzNTxZtLR42TVQqZdWmKRevnNcd9z+8RrveiS5Ooaai/q3hRACF/b3nd66fGAHXB7DD0HGPUyFvRBiKfTjH8Zo9BcAbglwXFGFVlSomoMnypHRKNlrLlHb4P+7cBsOnnA3lXGDJHNvk/SG/vllxvVq7dcmW3mUJqvKamPNvqzCd4JWy6MnVoiLI8PJbIYJlNhVpUKIme03d+8xDHxqIT7PKfS2qQtnuCXo7SYH03IXvW10F782cqDZK/OZF0yf5GeqYhjGPfjXFQIqdDTcrGmzkTVtNj5cvhsAsKKgztwTjBzkfx/bDRdKpfCmnWdtMnScRri/VoBPqvSWoJxkNRP2IzSiZRmGCQ6cGycEmAm9j1d4hH2ipNl+tnIPdh1xPqk7Y0o2ps7M8WufOqIjGibG465x3ZCabH7rVz98LtIteqQkxMd5I2QfkiaP9R5yMpzHnGFCBwt7FymvqsHGfcf9wvorqqzViJWLNt/75fqAxqH0QR/ZLQNLtnqC1uLIo5VbEfRA4Pngza5blvWRlJaAYaIV/pW5yKOzNuJPr/6GXZKr5Wc5e5B/sNRUw5UJhqL77jVneZfdqmlqFbO5CiLCfRN6YNatw0M0IoaJXVizd5FNkk96yckqnNYcuPeL9UhOiMP1Izp5++wrOaVbdNvNHCEfXz8YLdOSfY7ptPC2Xc48rSlW7TqKszqalxS8eVTnEIyIYRgW9i6yvtCT+fFwWYU3VUBFda2PZp9g4F7oZhpbrUyCdo7fVueBZIUGUoUnKy6noeaFS/oikc1GTAzCwj4IrCw4iiGd6hKDKSdodxSXoWXjFM2UwMEOlQ+VEUf2l49EYf8nk4RuDBOtsIoTBI6dqsI9X9RluFSmDZCXH9JId/DubwX4dOVuW+d6aFJPy31DVQDjkQtOx9ndMnweeAzDhBfW7IPAR8t9BbZSs1+ytRiz1u7TzQh535cbbJ1rytAsPDl7s6W+ZgUwWqelYP9xT5FurefCm1dlW8rJ37FFKt67dqClMTEMExpY2IcApQviO78WAACSXIoWVdcm/UPfNo6PpXwWaAn7c12qp8owTOhhM04AlFVU47n5eag0CZoqq/Qv0lHpkj1bra2/eGk/x8dSeu6E2k2TYZjgwsI+AK6bmYNXFm/HN2uNi5L8ss3dbJVGaEWlWk0wdk4PZQFx14bEMEwEwMI+AH7fcRgAsGZ3ScjPTQTT6lEyP/x9JF68tK9pv39ccDoW3TUSDZPiMS1CCokwDOMObLN3gd0B5LExY94dIzDh37/4tV8x6DTLKXE7tkhFxxaputu/u3U45m/cj8T4OHTKaIRNj09wPF6GYSIT1uxdoE2TBkGrt9o8VTv75Ucr7LloGtG7XRPcPb67a8djGCbyYGHvAkO7NMftn6wx7+gAoarV3q99OoDgFfNmGCY6YWHvAmUVNZi9PjhldlMSfatJrd0T+vkBhmHqPyzsXeDRWRuDduyGiealA7+8eUjQzs8wTHTAwl6DaV+uxyWv/Wa5f3UQTSpWSvWVW8yXzzBM7MLeOBp8snKP4facgiO4+LXfvetxBITThB6MEoYMw0QXpmojEb1NRAeJKFfR9igR7SWitdLfRMW2+4kon4i2ENH4YA08nCgFPRCe8nqt0uoEfLdWjUJ+foZh6hdWNPt3AbwM4D1V+4tCiOeVDUTUC8BlAE4H0AbAj0TUTQhhrVRThPPIt7nomZnm1x7kzMSa3DWuzlXSzaInDMNEJ6bCXgjxMxFlWTzeZACfCCEqAOwkonwAAwH8brxbZLLrcBlOa14XjPTe77s0+wXTZq9HUUl5yM/JMEz9JZAJ2luJaL1k5pErbLcFoDR4F0ptfhDRDUSUQ0Q5xcXFAQwjeIx87iesL4xMV8fOLfUjYhmGYdQ4FfavAugMoB+AIgAvSO1a9gRNtVcI8YYQIlsIkZ2RkaHVJSLYfeSkX1v/Duk+6/dOCH30aeHRUz7rvds24XquDMPo4kjYCyEOCCFqhBC1AN6Ex1QDeDR5ZcKWdgD2BTbEyEOd+OyLnEJHx9kcQA6aU5W+0yDf3TYc93HyMoZhdHAk7IkoU7H6RwCyp84sAJcRUTIRdQTQFcCKwIYYXuS87ifKq3T77DjkLBFag6S6gKl5d4zwLmc09njaZDVvqLuv3IdhGMYKphO0RPQxgFEAWhBRIYB/ABhFRP3gMdEUALgRAIQQG4noMwCbAFQDuKW+e+I8Oz8Pk/pk4vI3l7l63LvHdfNZ79G6zstn5YNjTfcf2rm5q+NhGCa6seKNc7lG8wyD/k8BeCqQQUUSuw6fRL/Hf0DJSX3N3gm3ju7q1/bhdYOQf7DUrz37tKbI2XUUo3u0xKK8gwCAZAtpFBiGYWQ4XYIF3Bb0rdNSNNuHdWmBKUOz/Npfv/JMNE9Nwp3n1r0NWK0+xTAMA3C6BC+HSyuwcd9xnN0t+J5BN43s5F3u2z4dI7q0MOzfvFEyVj18rk9bYhw/pxmGsQ4Le4m/vLUceftPIPcxdzI8/HjnSGQ2ScGO4jJc8PJSn23tm9VNvH57yzBHx09MYGHPMIx1WGJIbDlwAgDw1OzNrhyvY4tUpCYnoHe7Jt422bumVxv/lAt2SWFhzzCMDVizB5C3/7g3v82+klPGnS2ilRxt0V2jcOxUFZqmJjk+7tpHzsX+4+WWUh8zDMPIsLAH8MmKugwPp6qC5ykaF0cBCXoASG+YhPSGgR2DYZjYI+aF/UPfbMAHy+qKd6/YeSSMo2EYhgkOMW8LUAp6hmGYaCXmhT3DMEwsENPCvromsNqt7107EL/ce45f+9y/jdDozTAMEz5i2mb/yKyNAe2vF4ClVc2KYRgmnMS0Zj9nQ5HjfXu3rfOfb8kZKBmGiXBiWrMPpHbsjKuzvctL7jkHOw6VYtJLS/GPC3r59f3qr0NRXROGQrUMwzASMS3sA6Fl47pkZg2S4nF6myYomD5Js++ADk012xmGYUJFTJtxGIZhYoWYFvbCoR1nbM+WLo+EYRgmuMS0sHfCiK4t8MaV2eYdGYZhIoiYFvZ6ev3401vp7pPVPBVxGknOGIZhIpmYFvYnyqs12+dvPKC7zxCu/cowTD0k5oR9Ta3A/mPleOTbXL9tZlWqLj6zHSb2zgzW0BiGYYJGzLle/nNeHt74eYfmtkbJxkW8OygqTDEMw9QnTDV7InqbiA4SUa6irRkRLSCibdL/plI7EdFLRJRPROuJaEAwB2+XlQVHdAU9ACSZFATp3yHd7SExDMOEBCtmnHcBTFC1TQOwUAjRFcBCaR0AzgPQVfq7AcCr7gzTHS557XfD7ftKyr3Lc24fgXeuPguXD+zgbRtuUhicYRgmUjEV9kKInwGoK3pMBjBTWp4J4EJF+3vCwzIA6URUb4zcKwrqLrNXmzSc06MlhnWpm5AlYi8chmHqJ04naFsJIYoAQPovRxm1BbBH0a9QavODiG4gohwiyikuLnY4DPdY8cAYzfZmXAKQYZgowG1vHC3VV9OdXQjxhhAiWwiRnZFh7AUTCL9tP4SPlu9GZbVx7vqWaSma7a2baLczDMPUJ5wK+wOyeUb6f1BqLwTQXtGvHYB9zocXOH9+czke+HoDKi0UKpHTFr91VV2EbAanL2YYJgpwKuxnAZgiLU8B8K2i/SrJK2cwgGOyuSfcHCmt1N3WOSMVABAvRcY2Ta0z3TROSQzuwBiGYUKAFdfLjwH8DqA7ERUS0VQA0wGcS0TbAJwrrQPAHAA7AOQDeBPAX4MyagekJOlfavfWjQEAXVs2AgA0aeAv4FulsYbPMEz9xTSoSghxuc4mvxlN4UkjeUuggwoGRsVD+rbz+M8/ceEZmNyvLbpIQl9GL089wzBMfSGq0yUcKq3wLg+dvki33/UjOgEAUhLjMbwr+9IzDBN9RLWwv+Kt5Zb6cRZLhmGinagW9nn7T4R7CAzDMBFBVAt7K2x8bHy4h8AwDBN0olbYV1vwq7+wXxukJsdc4k+GYWKQqBX2XR6ca9qndzvOYskwTGwQtcLeCuVVNeEeAsMwTEiIaWE/4YzW4R4CwzBMSIhpYS/046wYhmGiipgS9lufPM9nPS2FJ2cZhokNokra1dQKfLJyN/4vu71Pe3JCHH6+9xwkJcThgYk90CApAWN6tNRNa8wwDBNtRJWw/2LVHjz4dS5KTlZ52+4/rwduHNnZu37D2Z21dmUYholqosqMU1rh8a5Zs7sEAHDjyE4+gp5hGCZWiSphXyUFUv24+QAAYN2eknAOh2EYJmKIKmE/fW6ez3r+wbIwjYRhGCayiCphr0aZ4phhGCaWiWphzzAMw3iIamGf89DYcA+BYRgmIohqYd+iEdeNZRiGAaJY2PeQiogzDMMwUSzs371mYLiHwDAMEzEEFEFLRAUATgCoAVAthMgmomYAPgWQBaAAwP8JIY4GNkxz5HTF7Zs1wPSL+qB1E06FwDAMI+OGZn+OEKKfECJbWp8GYKEQoiuAhdJ60Bn+z0UAgPQGSRjWpUUoTskwDFNvCIYZZzKAmdLyTAAXBuEcfhwqrQQAbNh7LBSnYxiGqVcEKuwFgB+IaBUR3SC1tRJCFAGA9L9lgOewxcTeXJCEYRhGTaBZL4cJIfYRUUsAC4goz3QPCenhcAMAdOjQIcBh1PHipf1cOxbDMEy0EJBmL4TYJ/0/COBrAAMBHCCiTACQ/h/U2fcNIUS2ECI7IyMjkGEAAAZ08BQPT06ID/hYDMMw0YZjYU9EqUTUWF4GMA5ALoBZAKZI3aYA+DbQQVqhY4tGaM3FSBiGYTQJxIzTCsDXRCQf5yMhxDwiWgngMyKaCmA3gEsCH6YxczYU4cvVhcE+DcMwTL3FsbAXQuwA0Fej/TCAMYEMyi5//XB1KE/HMAxT74jaCFqGYRimDhb2DMMwMUC9F/alFdXhHgLDMEzEU++F/Wcr94R7CAzDMBFPvRf2S7YWe5ffuirboCfDMEzsEmgEbdhp3igJAPDtLcPQt316mEfDMAwTmdRrzf5kZTW+Wr0XANC+WcMwj4ZhGCZyqdfC/pFvN3qXGyXX+5cUhmGYoFGvhf1pCm0+KaFeXwrDMExQqdfq8G1juiI1OQF7S06FeygMwzARTb0W9gBw7fCO4R4CwzBMxMO2D4ZhmBiAhT3DMEwMwMKeYRgmBmBhzzAMEwOwsGcYhokBWNgzDMPEACzsGYZhYgAW9gzDMDEACSHCPQYQUTGAXQ53bwHgkIvDqQ/wNccGfM2xQSDXfJoQIsNKx4gQ9oFARDlCiJhKZM/XHBvwNccGobpmNuMwDMPEACzsGYZhYoBoEPZvhHsAYYCvOTbga44NQnLN9d5mzzAMw5gTDZo9wzAMYwILe4ZhmBigXgt7IppARFuIKJ+IpoV7PHYgovZEtJiINhPRRiL6m9TejIgWENE26X9TqZ2I6CXpWtcT0QDFsaZI/bcR0RRF+5lEtEHa5yUiotBfqT9EFE9Ea4joe2m9IxEtl8b/KRElSe3J0nq+tD1LcYz7pfYtRDRe0R5x3wkiSieiL4goT7rfQ6L9PhPR36XvdS4RfUxEKdF2n4nobSI6SES5irag31e9c5gihKiXfwDiAWwH0AlAEoB1AHqFe1w2xp8JYIC03BjAVgC9ADwLYJrUPg3AP6XliQDmAiAAgwEsl9qbAdgh/W8qLTeVtq0AMETaZy6A88J93dK47gTwEYDvpfXPAFwmLb8G4GZp+a8AXpOWLwPwqbTcS7rfyQA6St+D+Ej9TgCYCeA6aTkJQHo032cAbQHsBNBAcX+vjrb7DOBsAAMA5Cragn5f9c5hOt5w/xAC+KCHAJivWL8fwP3hHlcA1/MtgHMBbAGQKbVlAtgiLb8O4HJF/y3S9ssBvK5of11qywSQp2j36RfG62wHYCGA0QC+l77IhwAkqO8rgPkAhkjLCVI/Ut9ruV8kficApEmCj1TtUXuf4RH2eyQBliDd5/HReJ8BZMFX2Af9vuqdw+yvPptx5C+UTKHUVu+QXlv7A1gOoJUQoggApP8tpW5612vUXqjRHm7+DeBeALXSenMAJUKIamldOU7vtUnbj0n97X4W4aQTgGIA70imq7eIKBVRfJ+FEHsBPA9gN4AieO7bKkT3fZYJxX3VO4ch9VnYa9kl650fKRE1AvAlgDuEEMeNumq0CQftYYOIzgdwUAixStms0VWYbKs31wyPpjoAwKtCiP4AyuB59daj3l+zZEOeDI/ppQ2AVADnaXSNpvtsRtivsT4L+0IA7RXr7QDsC9NYHEFEifAI+g+FEF9JzQeIKFPangngoNSud71G7e002sPJMAB/IKICAJ/AY8r5N4B0IkqQ+ijH6b02aXsTAEdg/7MIJ4UACoUQy6X1L+AR/tF8n8cC2CmEKBZCVAH4CsBQRPd9lgnFfdU7hyH1WdivBNBVmuFPgmdiZ1aYx2QZaWZ9BoDNQoh/KTbNAiDPyE+Bx5Yvt18lzeoPBnBMeoWbD2AcETWVNKpx8NgziwCcIKLB0rmuUhwrLAgh7hdCtBNCZMFzvxYJIf4CYDGAi6Vu6muWP4uLpf5Car9M8uLoCKArPJNZEfedEELsB7CHiLpLTWMAbEIU32d4zDeDiaihNCb5mqP2PisIxX3VO4cx4ZzIcWFyZCI8XizbATwY7vHYHPtweF7L1gNYK/1NhMdWuRDANul/M6k/AXhFutYNALIVx7oWQL70d42iPRtArrTPy1BNEob5+kehzhunEzw/4nwAnwNIltpTpPV8aXsnxf4PSte1BQrvk0j8TgDoByBHutffwON1EdX3GcBjAPKkcb0Pj0dNVN1nAB/DMydRBY8mPjUU91XvHGZ/nC6BYRgmBqjPZhyGYRjGIizsGYZhYgAW9gzDMDEAC3uGYZgYgIU9wzBMDMDCnmEYJgZgYc8wDBMD/D9pwksMstgtRgAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "def running_average(x,window):\n", + " return np.convolve(x,np.ones(window)/window,mode='valid')\n", + "\n", + "plt.plot(running_average(rewards,100))" + ] + }, + { + "source": [ + "## Thay đổi siêu tham số và xem kết quả hoạt động\n", + "\n", + "Bây giờ sẽ rất thú vị khi thực sự xem cách mô hình đã được huấn luyện hoạt động. Hãy chạy mô phỏng, và chúng ta sẽ sử dụng cùng chiến lược chọn hành động như trong quá trình huấn luyện: lấy mẫu dựa trên phân phối xác suất trong Q-Table:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "obs = env.reset()\n", + "done = False\n", + "while not done:\n", + " s = discretize(obs)\n", + " env.render()\n", + " v = probs(np.array(qvalues(s)))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + "env.close()" + ] + }, + { + "source": [ + "## Lưu kết quả dưới dạng ảnh GIF động\n", + "\n", + "Nếu bạn muốn gây ấn tượng với bạn bè, bạn có thể gửi cho họ hình ảnh GIF động của cây cân bằng. Để làm điều này, chúng ta có thể gọi `env.render` để tạo một khung hình ảnh, sau đó lưu những khung hình này thành ảnh GIF động bằng thư viện PIL:\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "360\n" + ] + } + ], + "source": [ + "from PIL import Image\n", + "obs = env.reset()\n", + "done = False\n", + "i=0\n", + "ims = []\n", + "while not done:\n", + " s = discretize(obs)\n", + " img=env.render(mode='rgb_array')\n", + " ims.append(Image.fromarray(img))\n", + " v = probs(np.array([Qbest.get((s,a),0) for a in actions]))\n", + " a = random.choices(actions,weights=v)[0]\n", + " obs,_,done,_ = env.step(a)\n", + " i+=1\n", + "env.close()\n", + "ims[0].save('images/cartpole-balance.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn thông tin chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file diff --git a/translations/vi/PyTorch_Fundamentals.ipynb b/translations/vi/PyTorch_Fundamentals.ipynb new file mode 100644 index 000000000..240813b69 --- /dev/null +++ b/translations/vi/PyTorch_Fundamentals.ipynb @@ -0,0 +1,2828 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOgv0AozH1FKQBD+RkgT2bV", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "coopTranslator": { + "original_hash": "0ca21b6ee62904d616f2e36dc1cf0da7", + "translation_date": "2025-09-06T13:08:25+00:00", + "source_file": "PyTorch_Fundamentals.ipynb", + "language_code": "vi" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHh5JllMh1rG", + "outputId": "f55755ad-c369-414c-85ec-6e9d4f061a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.1+cu121'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "source": [ + "print(\"I am excited to run this\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPlb-duwXAfz", + "outputId": "cfd687e4-1238-49f4-ab6b-ee1305b740d2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "I am excited to run this\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byWVlJ9wXDSk", + "outputId": "fd74a5c4-4d4a-41b2-ef3c-562ea3e4811f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.2.1+cu121\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Osm80zoEYklS" + } + }, + { + "cell_type": "code", + "source": [ + "# scalar\n", + "scalar = torch.tensor(7)\n", + "scalar" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-o8wvJ-VXZmI", + "outputId": "558816f5-1205-4de1-fe1f-2f96e9bd79e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(7)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCZ2tXC4Y_Sg", + "outputId": "2d86dbdc-56e1-45c6-d3dd-14515f2a457a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "scalar.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssN00By0ZQgS", + "outputId": "490f40d1-5135-4969-a6d3-c8c902cdc473" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# vector\n", + "vector = torch.tensor([7, 7])\n", + "vector\n", + "#vector.ndim\n", + "#vector.item()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bws__5wlZnmF", + "outputId": "944e38f9-5ba1-4ddc-a9c6-cfb6a19bb488" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([7, 7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "vector.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9pjCvnsZZzNG", + "outputId": "e030a4da-8f81-4858-fbce-86da2aaafe52" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Matrix\n", + "MATRIX = torch.tensor([[7, 8],[9, 10]])\n", + "MATRIX" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a747hI9SaBGW", + "outputId": "af835ddb-81ff-4981-badb-441567194d15" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 7, 8],\n", + " [ 9, 10]])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XdTfFa7vaRUj", + "outputId": "0fbbab9c-8263-4cad-a380-0d2a16ca499e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "MATRIX[0]\n", + "MATRIX[1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFeD3jSDafm7", + "outputId": "69b44ab3-5ba7-451a-c6b2-f019a03d0c96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor\n", + "TENSOR = torch.tensor([[[1, 2, 3],[3,6,9], [2,4,5]]])\n", + "TENSOR" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ic3cE47tah42", + "outputId": "f250e295-91de-43ec-9d80-588a6fe0abde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]]])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wvjf5fczbAM1", + "outputId": "9c72b5b8-bafe-4ae7-9883-b051e209eada" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([1, 3, 3])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mwtXZwiMbN3m", + "outputId": "331a5e36-b1b0-4a5f-a9b8-e7049cbaa8f9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "TENSOR[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vzdZu_IfbP3J", + "outputId": "e24e7e71-e365-412d-ff50-fc094b56d2f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [3, 6, 9],\n", + " [2, 4, 5]])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "A8OL9eWfcRrJ" + } + }, + { + "cell_type": "code", + "source": [ + "random_tensor = torch.rand(3,4)\n", + "random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAqSDE1EcVS_", + "outputId": "946171c3-d054-400c-f893-79110356888c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.4414, 0.7681, 0.8385, 0.3166],\n", + " [0.0468, 0.5812, 0.0670, 0.9173],\n", + " [0.2959, 0.3276, 0.7411, 0.4643]])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.ndim" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g4fvPE5GcwzP", + "outputId": "8737f36b-6864-4059-eaed-6f9156c22306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XsAg99QmdAU6", + "outputId": "35467c11-257c-4f16-99aa-eca930bcbc36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor.size()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cii1pNdVdB68", + "outputId": "fc8d2de6-9215-43de-99f7-7b0d7f7d20fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([3, 4])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_image_tensor = torch.rand(size=(3, 224, 224)) #color channels, height, width\n", + "random_image_tensor.ndim, random_image_tensor.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aTKq2j0cdDjb", + "outputId": "6be42057-20b9-4faf-d79d-8b65c42cc27e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([3, 224, 224]))" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "random_tensor_ofownsize = torch.rand(size=(5,10,10))\n", + "random_tensor_ofownsize.ndim, random_tensor_ofownsize.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IyhDdj-Pd6nC", + "outputId": "43e5e334-6d4d-4b67-f87d-7d364c6d8c67" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, torch.Size([5, 10, 10]))" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "UOJW08uOert_" + } + }, + { + "cell_type": "code", + "source": [ + "zero = torch.zeros(size=(3, 4))\n", + "zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGvXtaXyefie", + "outputId": "d40d3e28-8667-4d2f-8b62-f0829c6162ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "zero*random_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OyUkUPkDe0uH", + "outputId": "26c2e4be-36ba-4c6c-9a90-2704ec135828" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones = torch.ones(size=(3, 4))\n", + "ones\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y_Ac62Aqe82G", + "outputId": "291de5d9-b9df-49de-c9d1-d098e3e9f4d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TvGOA9odfIEO", + "outputId": "45949ef4-6649-4b6c-d6af-2d4bfb8de832" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ones*zero" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--pTyge-fI-8", + "outputId": "c4d9bb7e-829b-43db-e2db-b1a2d64e61f0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "qDcc7Z36fSJF" + } + }, + { + "cell_type": "code", + "source": [ + "one_to_ten = torch.arange(start = 1, end = 11, step = 1)\n", + "one_to_ten" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3CZB4zUfR1s", + "outputId": "197fcba1-da0a-4b4a-ed11-3974bd6c01aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ten_zeros = torch.zeros_like(one_to_ten)\n", + "ten_zeros" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WZh99BwVfRy8", + "outputId": "51ef8bfb-6fa0-4099-ff66-b97d65b2ddea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "pGGhgsbUgqbW" + } + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor = torch.tensor([3.0, 6.0,9.0], dtype = None, device = None, requires_grad = False)\n", + "float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JORJl4XkfRsx", + "outputId": "71114171-0f49-481f-b6fc-6cb48e2fb895" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3., 6., 9.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_32_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6wOPPwGyfRLn", + "outputId": "f23776a1-b682-404a-9f67-d5bcb0402666" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float32" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor = float_32_tensor.type(torch.float16)\n", + "float_16_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tFsHCvmZfOYe", + "outputId": "d3aa305a-7591-47f5-97fd-61bff60b44bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.float16" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "float_16_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQiCGTPuwq0q", + "outputId": "98750fce-1ca3-4889-e269-8b753efdea96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)\n", + "int_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5hlrLvGUw5D_", + "outputId": "41d890a0-9aee-446c-d906-631ce2ab0995" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9], dtype=torch.int32)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "int_32_tensor*float_32_tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ihApD9u3xTNW", + "outputId": "d295eed0-6996-4e0f-8502-ff4b55cd1373" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 9., 36., 81.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(0,100,10)" + ], + "metadata": { + "id": "utKhlb_KxWDQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p78D74E9Rj7Y", + "outputId": "781a1614-a900-41f5-9e5d-358f0b2390aa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.min()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4BcSs5NeRkcj", + "outputId": "3f24a8dc-58e9-4a5f-9834-e85856a34f9d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.max()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hinqvXVLRm4q", + "outputId": "5c7d8a53-3913-4ac1-bba3-5ba8ff68250a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mean(x.type(torch.float32))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k7okc0_vRpnB", + "outputId": "91e5494f-dc57-417c-ea4d-25dbc547c893" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.type(torch.float32).mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "29QcDTjHRq10", + "outputId": "62937c6c-78e0-49f2-dde3-1543ee8f7907" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(45.)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wlpY_G_sbdKF", + "outputId": "475d8258-af65-4011-a258-b93d4d8142d4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(450)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmax()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GT6HJzwhbk4n", + "outputId": "2e455c20-c322-4bcf-d07c-1259d3ccefc6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.argmin()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egL3oi2Mb19P", + "outputId": "f71fb32f-6338-44a3-b377-75bea0a3ab54" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2U8DZKib3DP", + "outputId": "b9f613b9-74e9-45f4-ed01-05babb6a6793" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[9]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "24qBFlGYcABe", + "outputId": "5813cfcb-7f63-4bd7-ee46-f95ccbfda939" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(90)" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x = torch.arange(1, 10)\n", + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GPOxEzkcBHO", + "outputId": "aefbd903-4f4c-4d2c-c90f-eccd682fe018" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([9])" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped = x.reshape(1,9)\n", + "x_reshaped, x_reshaped.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "spmRgQjwddgp", + "outputId": "85a7c55c-2909-4ea2-fc68-386dddc65742" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_reshaped.view(1,9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tH2ahWGydqqP", + "outputId": "65d92263-4fc4-434a-c06d-c5e08436f7fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked = torch.stack([x, x, x, x], dim = 1)\n", + "x_stacked" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jgCeJcaud_-1", + "outputId": "7f293a37-6ef1-43b6-aee5-9d6d91c94f9e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhJHIK6cfPse", + "outputId": "06c47b89-3a9e-453e-bcc3-00cbcb0b8b49" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ej2c3Xxzf0tq", + "outputId": "94024061-eb37-446d-c4a8-e4d16cb6de81" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.squeeze()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4DJYo1a0f5M0", + "outputId": "efca2b47-1b14-44de-9a9a-2c83629d153f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 1, 1, 1],\n", + " [2, 2, 2, 2],\n", + " [3, 3, 3, 3],\n", + " [4, 4, 4, 4],\n", + " [5, 5, 5, 5],\n", + " [6, 6, 6, 6],\n", + " [7, 7, 7, 7],\n", + " [8, 8, 8, 8],\n", + " [9, 9, 9, 9]])" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_stacked.unsqueeze(dim=-2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4iEjn2ah2HL", + "outputId": "22395593-7c16-4162-beae-dd2bbe7bda35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[1, 1, 1, 1]],\n", + "\n", + " [[2, 2, 2, 2]],\n", + "\n", + " [[3, 3, 3, 3]],\n", + "\n", + " [[4, 4, 4, 4]],\n", + "\n", + " [[5, 5, 5, 5]],\n", + "\n", + " [[6, 6, 6, 6]],\n", + "\n", + " [[7, 7, 7, 7]],\n", + "\n", + " [[8, 8, 8, 8]],\n", + "\n", + " [[9, 9, 9, 9]]])" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "tensor = torch.tensor([1, 2, 3])\n", + "tensor = tensor - 10\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cFfiD7Nth7Z_", + "outputId": "1139e1f8-fc1a-46ca-d636-f2bc4fd2eef6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-9, -8, -7])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.mul(tensor, 10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dyA7BM_GHhqE", + "outputId": "0e3b9671-d9e8-4a32-87bb-59bc05986142" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-90, -80, -70])" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.sub(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "owtUsZ1KNegI", + "outputId": "189b7b23-0041-4e09-b991-cd209a48506a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-109, -108, -107])" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.add(tensor, 100)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K5STXlQONsyc", + "outputId": "00cbb79a-0a1d-4e21-86ec-5c91c37a2d01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([91, 92, 93])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.divide(tensor, 2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqMGnzIUNvp0", + "outputId": "c894cf3e-f148-45f8-cfc8-d78740735306" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-4.5000, -4.0000, -3.5000])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(tensor, tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ruGzKpV8NyBc", + "outputId": "fddb63bf-006f-48b6-ae28-287fbcda8bc5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8GS3r9yTeGfD", + "outputId": "c80b12ac-30b5-4f3d-c38c-9e41ba511b0e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "tensor@tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QmuYHqXTemC0", + "outputId": "402fe3ba-70b5-4bb2-c83b-254db84ff810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 622 µs, sys: 0 ns, total: 622 µs\n", + "Wall time: 516 µs\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "torch.matmul(tensor,tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGr1fzdNepd8", + "outputId": "97bd6c91-bc25-4b38-cdf5-f22dcdef243e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CPU times: user 424 µs, sys: 998 µs, total: 1.42 ms\n", + "Wall time: 1.43 ms\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(194)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.rand(3,2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGYDoK2gevfo", + "outputId": "2c8783d5-0453-47c5-c7ed-af10d25d6989" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5999, 0.0073],\n", + " [0.9321, 0.3026],\n", + " [0.3463, 0.3872]])" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.matmul(torch.rand(3,2), torch.rand(2,3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KGBGQoB8e2DP", + "outputId": "4c2ef361-a2d0-41ee-c328-3992cbbc138d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.3528, 0.1893, 0.0714],\n", + " [1.2791, 0.7110, 0.2563],\n", + " [0.8812, 0.4553, 0.1803]])" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch" + ], + "metadata": { + "id": "ib8DMtkBe_LJ" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x = torch.rand(2,9)" + ], + "metadata": { + "id": "nJo8ZBdrQY1b" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wi6oRv4MQfgf", + "outputId": "55c99f55-31f6-4cf5-ba4e-19a47c3a0167" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[0.5894, 0.4391, 0.2018, 0.5417, 0.3844, 0.3592, 0.9209, 0.9269, 0.0681],\n", + " [0.0746, 0.1740, 0.6821, 0.6890, 0.0999, 0.7444, 0.2391, 0.4625, 0.8302]])" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y=torch.randn(2,3,5)\n", + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zpx8myAUQgoc", + "outputId": "07756d70-56bd-437c-c74e-9aecc1a77311" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[ 1.5552, -0.4877, 0.5175, -1.7958, -0.6187],\n", + " [-0.3359, -1.9710, 0.0112, -1.7578, -1.5295],\n", + " [ 0.0932, 1.4079, 0.9108, 0.3328, -0.6978]],\n", + "\n", + " [[-0.9406, -1.0809, -0.2595, 0.1282, 1.6605],\n", + " [ 1.1624, 1.0902, 1.7092, -0.2842, -1.3780],\n", + " [-0.1534, -1.2795, -0.5495, 0.9902, 0.1822]]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original = torch.rand(size=(224,224,3))\n", + "x_original" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4U-X9bJQnWe", + "outputId": "657a7a76-962c-4b41-a76b-902d0482266c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[[0.4549, 0.6809, 0.2118],\n", + " [0.4824, 0.9008, 0.8741],\n", + " [0.1715, 0.1757, 0.1845],\n", + " ...,\n", + " [0.8741, 0.6594, 0.2610],\n", + " [0.0092, 0.1984, 0.1955],\n", + " [0.4236, 0.4182, 0.0251]],\n", + "\n", + " [[0.9174, 0.1661, 0.5852],\n", + " [0.1837, 0.2351, 0.3810],\n", + " [0.3726, 0.4808, 0.8732],\n", + " ...,\n", + " [0.6794, 0.0554, 0.9202],\n", + " [0.0864, 0.8750, 0.3558],\n", + " [0.8445, 0.9759, 0.4934]],\n", + "\n", + " [[0.1600, 0.2635, 0.7194],\n", + " [0.9488, 0.3405, 0.3647],\n", + " [0.6683, 0.5168, 0.9592],\n", + " ...,\n", + " [0.0521, 0.0140, 0.2445],\n", + " [0.3596, 0.3999, 0.2730],\n", + " [0.5926, 0.9877, 0.7784]],\n", + "\n", + " ...,\n", + "\n", + " [[0.4794, 0.5635, 0.3764],\n", + " [0.9124, 0.6094, 0.5059],\n", + " [0.4528, 0.4447, 0.5021],\n", + " ...,\n", + " [0.0089, 0.4816, 0.8727],\n", + " [0.2173, 0.6296, 0.2347],\n", + " [0.2028, 0.9931, 0.7201]],\n", + "\n", + " [[0.3116, 0.6459, 0.4703],\n", + " [0.0148, 0.2345, 0.7149],\n", + " [0.8393, 0.5804, 0.6691],\n", + " ...,\n", + " [0.2105, 0.9460, 0.2696],\n", + " [0.5918, 0.9295, 0.2616],\n", + " [0.2537, 0.7819, 0.4700]],\n", + "\n", + " [[0.6654, 0.1200, 0.5841],\n", + " [0.9147, 0.5522, 0.6529],\n", + " [0.1799, 0.5276, 0.5415],\n", + " ...,\n", + " [0.7536, 0.4346, 0.8793],\n", + " [0.3793, 0.1750, 0.7792],\n", + " [0.9266, 0.8325, 0.9974]]])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted=x_original.permute(2, 0, 1)\n", + "print(x_original.shape)\n", + "print(x_permuted.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DD19_zvbQzHo", + "outputId": "1d64ce1b-eb48-47e3-90b6-7f1340e7f2b2" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([224, 224, 3])\n", + "torch.Size([3, 224, 224])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NnPmMk4ZRF7w", + "outputId": "2cd5da7f-4a23-4a76-8c4a-bb982113f2a4" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0ylNoAARgTo", + "outputId": "ddca0298-cddf-4048-9b71-a791655e5bed" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.4549)" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]=0.989" + ], + "metadata": { + "id": "RXw0xXsDRi4L" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_original[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1sFdV6wzRo3f", + "outputId": "1cf87d2c-6d88-453a-d136-0f625a2800f1" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_permuted[0,0,0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xTX-hx2SR1wp", + "outputId": "0d4908c4-c3bc-44e3-8ec6-1487104cc209" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(0.9890)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x=torch.arange(1,10).reshape(1,3,3)\n", + "x, x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZomOe7gR4Q8", + "outputId": "0b3c922f-ec11-46de-b8a5-9f9533d866ad" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(tensor([[[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]]]),\n", + " torch.Size([1, 3, 3]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3y7v4SQvSBs1", + "outputId": "8c53307d-e628-404d-db66-56c6bdffab7c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hf9uG4xLSNya", + "outputId": "3075bc42-9ffa-426b-8a86-95628ffcd824" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][0][0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zA4G2Se4SRB3", + "outputId": "324312d2-ed0a-49eb-f81f-e904e53992fe" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(1)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0][2][2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mwy3zmKKSdbk", + "outputId": "d35172c3-b099-40a6-ddf1-a453c2adfa44" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor(9)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[:,1,1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fE3nCM1KS7XT", + "outputId": "01f5d755-9737-4235-9f73-dce89ff6ba16" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([5])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,0,:]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNDINKNTTxp", + "outputId": "091195ef-2f71-4602-e95f-529a69193150" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x[0,:,2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KG8A4xbfThCL", + "outputId": "5866bc41-9241-4619-be7b-e9206b3f80ab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3, 6, 9])" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "metadata": { + "id": "CZ3PX0qlTwHJ" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array = np.arange(1.0, 8.0)" + ], + "metadata": { + "id": "UOBeTumiT3Lf" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzcO32E9UCQl", + "outputId": "430def24-c42c-461f-e5e7-398544c695d3" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.from_numpy(array)\n", + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JJIL0q1DUC6O", + "outputId": "8a3b1d7c-4482-4d32-f34f-9212d9d3a177" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "array[3]=11.0" + ], + "metadata": { + "id": "j3Ce6q3DUIEK" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "array" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc_BCVdjUsCc", + "outputId": "65537325-8b11-4f36-fc73-e56f30d6a036" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1., 2., 3., 11., 5., 6., 7.])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VG1e_eITUta2", + "outputId": "a26c5198-23b6-4a6d-d73a-ba20cd9782b8" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([ 1., 2., 3., 11., 5., 6., 7.], dtype=torch.float64)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.ones(7)\n", + "tensor, tensor.dtype\n", + "numpy_tensor = tensor.numpy()\n", + "numpy_tensor, numpy_tensor.dtype" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Swt8JF8vUuev", + "outputId": "c9e5bf6a-6d2c-41d6-8327-366867ffdd2d" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32), dtype('float32'))" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "random_tensor_A = torch.rand(3,4)\n", + "random_tensor_B = torch.rand(3,4)\n", + "print(random_tensor_A)\n", + "print(random_tensor_B)\n", + "print(random_tensor_A == random_tensor_B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uGcagTteVFTD", + "outputId": "49405790-08e7-4210-b7f1-f00b904c7eb9" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.9870, 0.6636, 0.6873, 0.8863],\n", + " [0.8386, 0.4169, 0.3587, 0.0265],\n", + " [0.2981, 0.6025, 0.5652, 0.5840]])\n", + "tensor([[0.9821, 0.3481, 0.0913, 0.4940],\n", + " [0.7495, 0.4387, 0.9582, 0.8659],\n", + " [0.5064, 0.6919, 0.0809, 0.9771]])\n", + "tensor([[False, False, False, False],\n", + " [False, False, False, False],\n", + " [False, False, False, False]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "RANDOM_SEED = 42\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_C = torch.rand(3,4)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "random_tensor_D = torch.rand(3,4)\n", + "print(random_tensor_C)\n", + "print(random_tensor_D)\n", + "print(random_tensor_C == random_tensor_D)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HznyXyEaWjLM", + "outputId": "25956434-01b6-4059-9054-c9978884ddc1" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[0.8823, 0.9150, 0.3829, 0.9593],\n", + " [0.3904, 0.6009, 0.2566, 0.7936],\n", + " [0.9408, 0.1332, 0.9346, 0.5936]])\n", + "tensor([[True, True, True, True],\n", + " [True, True, True, True],\n", + " [True, True, True, True]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vltPTh0YXJSt", + "outputId": "807af6dc-a9ca-4301-ec32-b688dbde8be8" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thu May 23 02:57:59 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 60C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L6mMyPDyYh1j", + "outputId": "279c5dd8-c2a8-4fbd-f321-2f5d7c6e90e6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oOdiYa7ZYytx", + "outputId": "d73b04fc-8963-4826-9722-08d118d5ab91" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'cuda'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.device_count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vOdsazLqZFM5", + "outputId": "8189cd6a-9017-4663-a652-3e15c517d9c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor = torch.tensor([1,2,3], device = \"cpu\")\n", + "print(tensor, tensor.device)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cdik9Vw3ZMv0", + "outputId": "044a68fd-83a1-409d-8e3b-655142ca0270" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3]) cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu = tensor.to(device)\n", + "tensor_on_gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zmp835rrZp-z", + "outputId": "37fa3413-18a3-47bf-ae51-5b36ff85a3ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3], device='cuda:0')" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_gpu.numpy()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 159 + }, + "id": "jhriaa8uZ1yM", + "outputId": "bc5a3226-1a12-4fea-8769-a44f21cdc323" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "TypeError", + "evalue": "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtensor_on_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "tensor_on_cpu = tensor_on_gpu.cpu().numpy()" + ], + "metadata": { + "id": "LHGXK3GgaOzL" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j-El4LlCajfq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n" + ] + } + ] +} \ No newline at end of file