python——餐饮数据分析(2022年)
位置: 首页 >专题范文 > 公文范文 > 文章内容

python——餐饮数据分析(2022年)

2022-07-05 13:10:06 投稿作者:网友投稿 点击:

下面是小编为大家整理的python——餐饮数据分析(2022年),供大家参考。

python——餐饮数据分析(2022年)

 

 python——餐饮数据分析· 话不多说,直接上代码。⼀、split():就是将⼀个字符串分隔成多个字符串组成的列表enumerate(info["use_start_time"])基本应⽤就是⽤来遍历⼀个集合对象,它在遍历的同时还可以得到当前元素的索引位置import pandas as pdimport numpy as npimport matplotlib.pyplot as pltinfo = pd.read_csv("meal_order_info.csv", encoding="utf-8")info_before = pd.read_csv("info_new.csv", encoding="utf-8")# 将两个数据拼起来info_all = pd.concat([info_before, info])print(" 查看各表的维数:

 \n", info.shape, info_before.shape, info_all.shape)info.head()#条件选取order_status==1的数据info = info_all[info_all["order_status"].isin(["1"])]# data = info_all[info_all["order_status"]==1]info = info.reset_index(drop=True)# 统计每⽇⽤餐⼈数与营业额# 将时间后⾯的师傅们去掉fori,kinenumerate(info["use_start_time"]):y = k.split()y = pd.to_datetime(y[0])info.loc[i, "use_start_time"] = y# 提取三列数据# info[["use_start_time", "number_consumers","accounts_payable"]]groupbyday = info[["use_start_time", "number_consumers","accounts_payable"]].groupby(by="use_start_time")# 以时间分组sale_day = groupbyday.sum()# 定义列名称sale_day.columns = ["⼈数", "销量"]sale_day

 散点图:进阶散点图fig1 = plt.figure()#定义⼀个数组作为横坐标x = np.arange(1,244,1)#图⼤⼩设置plt.figure(figsize=(12, 6))y = sale_day[" ⼈ 数 "]plt.plot(x,y,".r")plt.xlabel("x")plt.ylabel("y")plt.savefig(" 散点图 .png",dpi=300)plt.show()

 折线图:#x,y变量变化nbins = 200H, xedges, yedges = np.histogram2d(x,y,bins=nbins)# H needs to be rotated and flippedH = np.rot90(H)H = np.flipud(H)# 将zeros maskHmasked = np.ma.masked_where(H==0,H)fig2 = plt.figure()#图⼤⼩设置plt.figure(figsize=(12, 6))plt.pcolormesh(xedges,yedges,Hmasked)plt.xlabel("x")plt.ylabel("y")cbar = plt.colorbar()cbar.ax.set_ylabel("Counts")plt.savefig("2D.png",dpi=300)plt.show()

 # 每⽇⽤餐⼈数折线图# 解决中⽂显⽰问题plt.rcParams["font.sans-serif"] = ["SimHei"]plt.rcParams["axes.unicode_minus"] = Falseplt.figure(figsize=(12, 6))plt.title(" 每⽇⽤餐⼈数折线图 ")plt.xlabel("⽇期")plt.ylabel(" ⽤ 餐 ⼈ 数 ")plt.plot(sale_day[" ⼈数 "])plt.show()plt.close折线图进阶——双坐标轴# 画出每⽇销售额的折线图# 新建画板plt.figure(figsize=(12, 6))plt.title(" 每⽇销售额的折线图 ")plt.xlabel("⽇期")plt.ylabel(" 销 售 额 ")plt.plot(sale_day[" 销量 "])plt.show()plt.close

 plt.figure(figsize=(12, 6))Y1=sale_day[" ⼈数 "]Y2=sale_day[" 销量 "]# 标题plt.title("title")#create figure#fig, ax =plt.subplots(1)# Plot y1 vs x in blue on the left vertical axis.plt.xlabel(" 时 间 ")plt.ylabel("y1", color="g")plt.tick_params(axis="y", labelcolor="g")plt.plot(Y1, "g-", linewidth=2)fig.autofmt_xdate(rotation=50)# Plot y2 vs xin red on the rightvertical axis.plt.twinx()plt.ylabel("y2", color="r")plt.tick_params(axis="y", labelcolor="r")plt.plot(Y2, "r-", linewidth=2)#To save your graph#plt.savefig("saltandtemp_V1.png" ,dpi=300)plt.show()⼆、info_august = pd.read_csv("meal_order_info.csv", encoding="utf-8")users_august = pd.read_csv("users.csv", encoding="gbk")# 提取订单状态为 1 的数据info_august_new = info_august[info_august["order_status"].isin(["1"])]info_august_new = info_august_new.reset_index(drop=True)print("提取的订单数据维数:", info_august_new.shape)info_august_new.head()

 info_august_new.to_csv("info_august_new.csv", index=False, encoding="utf-8")# 匹配⽤户的最后⼀次⽤餐时间for i in range(1, len(info_august_new)):num = users_august[users_august["USER_ID"] ==info_august_new.iloc[i-1, 1]].index.tolist()users_august.iloc[num[0], 14] = info_august_new.iloc[i-1, 9]users_august.iloc[num[0], 14] = info_august_new.iloc[i-1, 9]user = users_augustuser["LAST_VISITS"] = user["LAST_VISITS"].fillna(999)user=user.drop(user[user["LAST_VISITS"]==999].index.tolist())user = user.iloc[:, [0, 2, 12, 14]]user.head()# 读取数据users = pd.read_csv("user_loss.csv", encoding="gbk")info = pd.read_csv("info_new.csv", encoding="utf-8")print("历史客户信息表的维数:", users.shape)print("历史订单表的维数:", info.shape)# 将时间转为时间格式users["CREATED"] = pd.to_datetime(users["CREATED"])info["use_start_time"] = pd.to_datetime(info["use_start_time"])info["lock_time"] = pd.to_datetime(info["lock_time"])# 匹配⽤户的最后⼀次⽤餐时间for i in range(len(users)):info1 = info.iloc[info[info["name"] == users.iloc[i, 2]].index.tolist(), :]if sum(info["name"]==users.iloc[i, 2]) != 0:users.iloc[i, 14] = max(info1["use_start_time"])# 特征选取# 提取有效订单info = info.loc[info["order_status"] == 1, ["emp_id", "number_consumers", "expenditure"]]info = info.rename(columns={"emp_id": "USER_ID"}) # 修改列名info.head()

 info_user.to_csv("info_user.csv", index=False, encoding="utf-8")import pandas as pdfrom sklearn.preprocessing import StandardScalerimport numpy as npuser_value1.columns = ["USER_ID", "F"] # 修改列名print("F 特 征 的 最 ⼤ 值 :", max(user_value1["F"]))print("F特征的最⼩值:", min(user_value1["F"]))# 构建 M 特征user_value2 = info[["emp_id", "expenditure"]].groupby(by="emp_id").sum()user_value2 = pd.DataFrame(user_value2).reset_index()user_value2.columns = ["USER_ID", "M"]user_value = pd.merge(user_value1, user_value2, on="USER_ID")print("M特征的最⼤值:", max(user_value["M"]))print("M特征的最⼩值:", min(user_value["M"]))# 构建 R 特征user_value = pd.merge(user_value, user, on="USER_ID") # 合并两个表# 转换时间格式for i, k in enumerate(user_value["LAST_VISITS"]):y = k.split()y = pd.to_datetime(y[0])user_value.loc[i, "LAST_VISITS"] = ylast_time = pd.to_datetime(user_value["LAST_VISITS"])deadline = pd.to_datetime("2016-8-31") # 观测窗⼝结束时间user_value["R"] = deadline - last_timeprint("R特征的最⼤值:", max(user_value["R"]))print("R特征的最⼩值:", min(user_value["R"]))# 代码 7-5# 特征提取user_value = user_value.iloc[:, [0, 3, 6, 1, 2]]user_value.to_csv("user_value.csv", encoding="utf-8_sig", index=False)USER_ID = user_value["USER_ID"]ACCOUNT = user_value["ACCOUNT"]user_value = user_value.iloc[:, [2, 3, 4]]user_value.iloc[:, 0] = [i.days for i in user_value.iloc[:, 0]]# 标准差标准化standard = StandardScaler().fit_transform(user_value)np.savez("standard.npz", standard)print(standard)三、聚类

 standard = np.load("standard.npz")["arr_0"]k = 3 # 聚类中⼼数# 构建模型kmeans_model=KMeans(n_clusters=k,n_jobs=3,random_state=123)fit_kmeans = kmeans_model.fit(standard) # 模 型 训 练print("聚类中⼼:\n", kmeans_model.cluster_centers_)print("样本的类别标签:\n", kmeans_model.labels_)# 统计不同类别样本的数⽬r1 = pd.Series(kmeans_model.labels_).value_counts()print("最终每个类别的数⽬为:\n", r1)# 代码 7-7# %matplotlib inlineimport matplotlib.pyplot as plt# 中⽂和负号的正常显⽰plt.rcParams["font.sans-serif"] = "SimHei"plt.rcParams["axes.unicode_minus"] = False# 绘制雷达图N = len(kmeans_model.cluster_centers_[0])# 设置雷达图的⾓度,⽤于平分切开⼀个圆⾯angles = np.linspace(0, 2 * np.pi, N, endpoint=False)# 为了使雷达图⼀圈封闭起来angles = np.concatenate((angles, [angles[0]]))# 绘图fig = plt.figure(figsize=(7, 7))ax = fig.add_subplot(111, polar=True)sam = ["r","g","b"]lstype = ["-","--","-."]lab = []for i in range(len(kmeans_model.cluster_centers_)):values = kmeans_model.cluster_centers_[i]feature = ["R","F","M"]values = np.concatenate((values, [values[0]]))# 绘制折线图ax.plot(angles, values, sam[i], linestyle=lstype[i], linewidth=2, markersize=10)ax.fill(angles, values, alpha=0.5) # 填充颜⾊#ax.set_thetagrids(angles * 180 / np.pi, feature, fontsize=15) # 添加每个特征的标签plt.title("客户群特征分布图") # 添加标题ax.grid(True)lab.append("客户群" + str(i+1))plt.legend(lab)#plt.savefig(" 征分布图 .png")plt.show()plt.close

 0.5. 一· 、 ·0.0`一 ·,.一 .一 , 一·一, 一 一 ,. ·>v ,,,,,,,,,客户群特征分布图90•180" o•` `r` · 1`、`` `、` `


推荐访问:python——餐饮数据分析 餐饮 分析 数据

猜你喜欢