From ecee428b6c6b50b0a680636c1237fcc51ad7121e Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Sat, 23 Jan 2021 12:12:21 +0800 Subject: [PATCH] Add. JData Project introduction --- .../数据清洗-checkpoint.ipynb | 6 + .../数据清洗.ipynb | 129 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/数据清洗-checkpoint.ipynb create mode 100644 机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/数据清洗.ipynb diff --git a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/数据清洗-checkpoint.ipynb b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/数据清洗-checkpoint.ipynb new file mode 100644 index 0000000..2fd6442 --- /dev/null +++ b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/数据清洗-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/数据清洗.ipynb b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/数据清洗.ipynb new file mode 100644 index 0000000..8e5db25 --- /dev/null +++ b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/数据清洗.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 任务:京东用户购买意向预测" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 故事背景:\n", + "京东作为中国最大的自营式电商,在保持高速发展的同时,沉淀了数亿的忠实用户,积累了海量的真实数据。如何从历史数据中找出规律,去预测用户未来的购买需求,让最合适的商品遇见最需要的人,是大数据应用在精准营销中的关键问题,也是所有电商平台在做智能化升级时所需要的核心技术。\n", + "\n", + "以京东商城真实的用户、商品和行为数据(脱敏后)为基础,通过数据挖掘的技术和机器学习的算法,构建用户购买商品的预测模型,输出高潜用户和目标商品的匹配结果,为精准营销提供高质量的目标群体。\n", + "\n", + "目标:使用京东多个品类下商品的历史销售数据,构建算法模型,预测用户在未来5天内,对某个目标品类下商品的购买意向。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 数据集:\n", + "这里涉及到的数据集是京东的数据集:\n", + "\n", + "* JData_User.csv 用户数据集 105,321个用户\n", + "* JData_Comment.csv 商品评论 558,552条记录\n", + "* JData_Product.csv 预测商品集合 24,187条记录\n", + "* JData_Action_201602.csv 2月份行为交互记录 11,485,424条记录\n", + "* JData_Action_201603.csv 3月份行为交互记录 25,916,378条记录\n", + "* JData_Action_201604.csv 4月份行为交互记录 13,199,934条记录" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**JData_User.csv用户数据**\n", + "\n", + "|字段|意义|备注|\n", + "|-|-|-|\n", + "|user_id|用户id|脱敏|\n", + "|age|年龄|-1表未知|\n", + "|sex|性别|0男,1女,2未知|\n", + "|user_lv_cd|用户等级|级别枚举,越高级别越大|\n", + "|user_reg_tm|用户注册日期|粒度到天|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**JData_Comment.csv评论数据**\n", + "\n", + "|字段|意义|备注|\n", + "|-|-|-|\n", + "|dt|截止时间|天,到2016-02-01|\n", + "|sku_id|商品编号|脱敏|\n", + "|comment_num|累积评论数分段|0表示无评论,1表是1条,2表示2-10条,3表示11-50条,5表示大于50条|\n", + "|has_bad_comment|是否有差评|0表示无,1表示有|\n", + "|bad_comment_rate|差评率|差评数占总评论数的比率|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**JData_Product.csv商品数据**\n", + "\n", + "|字段|意义|备注|\n", + "|-|-|-|\n", + "|sku_id|商品编号|脱敏|\n", + "|a1|属性1|枚举,-1表未知|\n", + "|a2|属性2|枚举,-1表未知|\n", + "|a3|属性3|枚举,-1表未知|\n", + "|cate|品牌ID|脱敏|\n", + "|brand|品牌ID|脱敏|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**JData_Action_xx.csv商品数据**\n", + "\n", + "|字段|意义|备注|\n", + "|-|-|-|\n", + "|user_id|用户ID|脱敏|\n", + "|sku_id|商品编号|脱敏|\n", + "|time|行为时间||\n", + "|model_id|点击板块的编号|脱敏|\n", + "|type|行为类型|1.浏览商品详情页;2.加入购物车;3.购物车删除;4.下单;5.关注;6.点击;|\n", + "|cate|品牌ID|脱敏|\n", + "|brand|品牌ID|脱敏|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}