{"id":769,"date":"2025-01-05T22:57:41","date_gmt":"2025-01-05T14:57:41","guid":{"rendered":"https:\/\/www.laixuexila.com\/?p=769"},"modified":"2025-01-11T16:11:43","modified_gmt":"2025-01-11T08:11:43","slug":"%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%bf%85%e5%a4%87%e6%8a%80%e8%83%bd%ef%bc%9apython%e7%9a%84%e6%95%b0%e6%8d%ae%e6%b8%85%e6%b4%97%e6%8a%80%e6%9c%af","status":"publish","type":"post","link":"https:\/\/www.laixuexila.com\/index.php\/2025\/01\/05\/%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%bf%85%e5%a4%87%e6%8a%80%e8%83%bd%ef%bc%9apython%e7%9a%84%e6%95%b0%e6%8d%ae%e6%b8%85%e6%b4%97%e6%8a%80%e6%9c%af\/","title":{"rendered":"\u6570\u636e\u5206\u6790\u5fc5\u5b66\u6280\u80fd\uff1aPython \u6570\u636e\u6e05\u6d17\u6280\u5de7\u5168\u9762\u89e3\u6790"},"content":{"rendered":"\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">\u6570\u636e\u6e05\u6d17\u662f\u6570\u636e\u5206\u6790\u4e2d\u6700\u91cd\u8981\u7684\u6b65\u9aa4\u4e4b\u4e00\uff0c\u5b83\u6d89\u53ca\u5904\u7406\u7f3a\u5931\u503c\u3001\u91cd\u590d\u6570\u636e\u3001\u683c\u5f0f\u4e0d\u4e00\u81f4\u7b49\u95ee\u9898\uff0c\u4ee5\u786e\u4fdd\u6570\u636e\u7684\u8d28\u91cf\u548c\u51c6\u786e\u6027\u3002Python \u63d0\u4f9b\u4e86\u591a\u79cd\u5f3a\u5927\u7684\u5de5\u5177\u548c\u6280\u672f\uff0c\u53ef\u4ee5\u5e2e\u52a9\u9ad8\u6548\u5730\u8fdb\u884c\u6570\u636e\u6e05\u6d17\u3002\u4e0b\u9762\u662f Python \u6570\u636e\u6e05\u6d17\u7684\u6700\u65b0\u6280\u672f\u548c\u65b9\u6cd5\u3002<\/p>\n<\/blockquote>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">1. <strong>Pandas\uff1a\u6570\u636e\u6e05\u6d17\u7684\u6838\u5fc3\u5de5\u5177<\/strong><\/h3>\n\n\n\n<p class=\"wp-block-paragraph\"><code>Pandas<\/code> \u662f Python \u4e2d\u6700\u5e38\u7528\u7684\u6570\u636e\u5904\u7406\u548c\u6e05\u6d17\u5e93\uff0c\u5b83\u63d0\u4f9b\u4e86\u4e30\u5bcc\u7684\u51fd\u6570\u548c\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5e2e\u52a9\u4f60\u8f7b\u677e\u6e05\u6d17\u6570\u636e\u3002\u4ee5\u4e0b\u662f\u4e00\u4e9b\u6700\u65b0\u548c\u5e38\u89c1\u7684\u6280\u672f\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">1.1 <strong>\u5904\u7406\u7f3a\u5931\u503c<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u7f3a\u5931\u503c\u662f\u6570\u636e\u6e05\u6d17\u4e2d\u5e38\u89c1\u7684\u95ee\u9898\u3002<code>pandas<\/code> \u63d0\u4f9b\u4e86\u51e0\u79cd\u65b9\u6cd5\u6765\u5904\u7406\u7f3a\u5931\u503c\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u67e5\u770b\u7f3a\u5931\u503c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\n\ndf = pd.read_csv('data.csv')\nprint(df.isnull().sum())  # \u67e5\u770b\u6bcf\u5217\u7684\u7f3a\u5931\u503c\u6570\u91cf<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5220\u9664\u542b\u7f3a\u5931\u503c\u7684\u884c\u6216\u5217<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5220\u9664\u5305\u542b\u7f3a\u5931\u503c\u7684\u884c\ndf.dropna(axis=0, inplace=True)\n\n# \u5220\u9664\u5305\u542b\u7f3a\u5931\u503c\u7684\u5217\ndf.dropna(axis=1, inplace=True)<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u586b\u5145\u7f3a\u5931\u503c<\/strong><br><code>fillna()<\/code> \u53ef\u4ee5\u7528\u4e0d\u540c\u7684\u65b9\u5f0f\u586b\u5145\u7f3a\u5931\u503c\u3002<\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>  # \u7528\u7279\u5b9a\u503c\u586b\u5145\n  df.fillna(0, inplace=True)\n\n  # \u7528\u524d\u4e00\u4e2a\u975e\u7a7a\u503c\u586b\u5145\n  df.fillna(method='ffill', inplace=True)\n\n  # \u7528\u5217\u7684\u5747\u503c\u586b\u5145\n  df.fillna(df.mean(), inplace=True)<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u66ff\u6362\u7f3a\u5931\u503c<\/strong><br><code>replace()<\/code> \u65b9\u6cd5\u7528\u4e8e\u66ff\u6362\u6307\u5b9a\u503c\u3002<\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>  df.replace(to_replace=None, value=0, inplace=True)  # \u66ff\u6362 None \u6216 NaN<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.2 <strong>\u53bb\u91cd<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u53bb\u9664\u91cd\u590d\u884c\u662f\u6570\u636e\u6e05\u6d17\u7684\u5e38\u89c1\u4efb\u52a1\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5220\u9664\u91cd\u590d\u884c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df.drop_duplicates(inplace=True)<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4fdd\u7559\u7279\u5b9a\u5217\u7684\u552f\u4e00\u503c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df.drop_duplicates(subset=&#91;'column1'], inplace=True)<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.3 <strong>\u5904\u7406\u5f02\u5e38\u503c<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u5f02\u5e38\u503c\uff08outliers\uff09\u662f\u6307\u4e0e\u5176\u4ed6\u6570\u636e\u5dee\u5f02\u8f83\u5927\u7684\u503c\u3002\u4f60\u53ef\u4ee5\u901a\u8fc7\u4e00\u4e9b\u7edf\u8ba1\u65b9\u6cd5\uff08\u5982 Z-Score \u6216 IQR\uff09\u6765\u8bc6\u522b\u548c\u5904\u7406\u5f02\u5e38\u503c\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4f7f\u7528 Z-Score \u68c0\u6d4b\u5f02\u5e38\u503c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>from scipy import stats\n\nz_scores = stats.zscore(df&#91;'column_name'])\ndf = df&#91;(z_scores &lt; 3) &amp; (z_scores &gt; -3)]  # \u8fc7\u6ee4\u6389 Z-Score &gt; 3 \u6216 &lt; -3 \u7684\u6570\u636e<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4f7f\u7528 IQR \u68c0\u6d4b\u5f02\u5e38\u503c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>Q1 = df&#91;'column_name'].quantile(0.25)\nQ3 = df&#91;'column_name'].quantile(0.75)\nIQR = Q3 - Q1\n\ndf = df&#91;(df&#91;'column_name'] &gt;= (Q1 - 1.5 * IQR)) &amp; (df&#91;'column_name'] &lt;= (Q3 + 1.5 * IQR))]<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.4 <strong>\u6570\u636e\u7c7b\u578b\u8f6c\u6362<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u6709\u65f6\u5019\u6570\u636e\u5217\u7684\u7c7b\u578b\u53ef\u80fd\u4e0d\u7b26\u5408\u8981\u6c42\uff0c<code>pandas<\/code> \u63d0\u4f9b\u4e86\u8f6c\u6362\u6570\u636e\u7c7b\u578b\u7684\u529f\u80fd\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8f6c\u6362\u6570\u636e\u7c7b\u578b<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'column_name'] = df&#91;'column_name'].astype(float)<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8f6c\u6362\u4e3a\u65e5\u671f\u7c7b\u578b<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'date_column'] = pd.to_datetime(df&#91;'date_column'], errors='coerce')<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.5 <strong>\u5b57\u7b26\u4e32\u6e05\u6d17<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b57\u7b26\u4e32\u6e05\u6d17\u662f\u6570\u636e\u6e05\u6d17\u4e2d\u5f88\u91cd\u8981\u7684\u4e00\u90e8\u5206\uff0c\u7279\u522b\u662f\u5728\u5904\u7406\u6587\u672c\u6570\u636e\u65f6\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u53bb\u9664\u524d\u540e\u7a7a\u767d<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'column_name'] = df&#91;'column_name'].str.strip()<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8f6c\u6362\u4e3a\u5c0f\u5199\u6216\u5927\u5199<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'column_name'] = df&#91;'column_name'].str.lower()  # \u8f6c\u6362\u4e3a\u5c0f\u5199\ndf&#91;'column_name'] = df&#91;'column_name'].str.upper()  # \u8f6c\u6362\u4e3a\u5927\u5199<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u66ff\u6362\u5b57\u7b26\u4e32\u4e2d\u7684\u5b57\u7b26<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'column_name'] = df&#91;'column_name'].str.replace('old_value', 'new_value')<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.6 <strong>\u5904\u7406\u65e5\u671f\u65f6\u95f4\u6570\u636e<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u65e5\u671f\u65f6\u95f4\u6570\u636e\u7684\u6e05\u6d17\u5305\u62ec\u683c\u5f0f\u5316\u65e5\u671f\u3001\u63d0\u53d6\u65e5\u671f\u4e2d\u7684\u67d0\u4e9b\u90e8\u5206\u7b49\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u63d0\u53d6\u65e5\u671f\u4e2d\u7684\u5e74\u3001\u6708\u3001\u65e5<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'year'] = df&#91;'date_column'].dt.year\ndf&#91;'month'] = df&#91;'date_column'].dt.month\ndf&#91;'day'] = df&#91;'date_column'].dt.day<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u683c\u5f0f\u5316\u65e5\u671f<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'date_column'] = df&#91;'date_column'].dt.strftime('%Y-%m-%d')<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">2. <strong>\u9ad8\u7ea7\u6570\u636e\u6e05\u6d17\u6280\u5de7<\/strong><\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">2.1 <strong>\u5408\u5e76\u4e0e\u8fde\u63a5\u6570\u636e<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u6570\u636e\u6e05\u6d17\u8fc7\u7a0b\u4e2d\uff0c\u53ef\u80fd\u9700\u8981\u5408\u5e76\u6765\u81ea\u4e0d\u540c\u6570\u636e\u6e90\u7684\u6570\u636e\uff0c<code>pandas<\/code> \u63d0\u4f9b\u4e86 <code>merge()<\/code> \u548c <code>concat()<\/code> \u6765\u5e2e\u52a9\u5b9e\u73b0\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6309\u952e\u5408\u5e76\u6570\u636e<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df_merged = pd.merge(df1, df2, on='key_column', how='inner')  # 'inner'\u3001'left'\u3001'right'\u3001'outer'<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6cbf\u8f74\u8fde\u63a5\u6570\u636e<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df_concat = pd.concat(&#91;df1, df2], axis=0)  # \u884c\u5408\u5e76\uff0caxis=1 \u4e3a\u5217\u5408\u5e76<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">2.2 <strong>\u900f\u89c6\u8868\u4e0e\u6570\u636e\u805a\u5408<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u901a\u8fc7 <code>groupby()<\/code> \u53ef\u4ee5\u5bf9\u6570\u636e\u8fdb\u884c\u805a\u5408\uff0c\u8ba1\u7b97\u6c47\u603b\u7edf\u8ba1\u6570\u636e\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5206\u7ec4\u8ba1\u7b97\u6c47\u603b<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>grouped = df.groupby('category_column').agg({'value_column': &#91;'mean', 'sum', 'count']})<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u521b\u5efa\u900f\u89c6\u8868<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>pivot = df.pivot_table(values='value_column', index='row_column', columns='col_column', aggfunc='sum')<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">2.3 <strong>\u5904\u7406\u5206\u7c7b\u6570\u636e<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">\u5bf9\u4e8e\u5206\u7c7b\u6570\u636e\uff0c\u53ef\u4ee5\u4f7f\u7528 <code>pd.get_dummies()<\/code> \u8fdb\u884c\u72ec\u70ed\u7f16\u7801\uff0c\u4e5f\u53ef\u4ee5\u5bf9\u5176\u8fdb\u884c\u6620\u5c04\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u72ec\u70ed\u7f16\u7801<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df_encoded = pd.get_dummies(df&#91;'category_column'], prefix='category')<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6620\u5c04\u5206\u7c7b\u503c<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>df&#91;'category_column'] = df&#91;'category_column'].map({'category1': 1, 'category2': 2})<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3. <strong>\u6700\u65b0\u7684 Python \u6570\u636e\u6e05\u6d17\u5de5\u5177\u548c\u6280\u672f<\/strong><\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">3.1 <strong>Dask<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\"><code>Dask<\/code> \u662f\u4e00\u4e2a\u4e0e <code>pandas<\/code> \u7c7b\u4f3c\u7684\u6570\u636e\u5904\u7406\u5e93\uff0c\u652f\u6301\u5904\u7406\u8d85\u5927\u89c4\u6a21\u6570\u636e\u3002\u5b83\u4f7f\u7528\u5e76\u884c\u8ba1\u7b97\u6765\u52a0\u901f\u6570\u636e\u6e05\u6d17\u8fc7\u7a0b\uff0c\u9002\u7528\u4e8e\u5185\u5b58\u65e0\u6cd5\u5bb9\u7eb3\u7684\u6570\u636e\u96c6\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5b89\u88c5\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>pip install dask<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u57fa\u672c\u7528\u6cd5\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>import dask.dataframe as dd\n\ndf = dd.read_csv('large_data.csv')\ndf_cleaned = df.dropna().compute()  # \u4f7f\u7528 Dask \u5904\u7406\u5927\u6570\u636e\u96c6<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">3.2 <strong>Pyjanitor<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\"><code>pyjanitor<\/code> \u662f\u4e00\u4e2a\u6570\u636e\u6e05\u6d17\u5de5\u5177\uff0c\u5b83\u63d0\u4f9b\u4e86\u4e00\u4e9b\u5e38\u7528\u7684\u6570\u636e\u6e05\u6d17\u529f\u80fd\u5c01\u88c5\uff0c\u4ee5\u4fbf\u4e8e\u94fe\u5f0f\u8c03\u7528\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5b89\u88c5\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>pip install pyjanitor<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u793a\u4f8b\u4ee3\u7801\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>import janitor\n\ndf = pd.read_csv('data.csv')\ndf_cleaned = df.clean_names().remove_empty()  # \u81ea\u52a8\u6e05\u7406\u5217\u540d\u5e76\u79fb\u9664\u7a7a\u5217<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">3.3 <strong>Modin<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\"><code>Modin<\/code> \u662f <code>pandas<\/code> \u7684\u52a0\u901f\u7248\u672c\uff0c\u80fd\u8ba9\u4f60\u5229\u7528\u591a\u6838 CPU \u8fdb\u884c\u6570\u636e\u5904\u7406\u548c\u6e05\u6d17\u3002\u5b83\u7684 API \u548c <code>pandas<\/code> \u517c\u5bb9\uff0c\u80fd\u81ea\u52a8\u5206\u5e03\u8ba1\u7b97\u4efb\u52a1\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5b89\u88c5\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>pip install modin<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4f7f\u7528\u793a\u4f8b\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>import modin.pandas as mpd\n\ndf = mpd.read_csv('large_data.csv')\ndf_cleaned = df.dropna()  # \u81ea\u52a8\u52a0\u901f\u6570\u636e\u6e05\u6d17<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">3.4 <strong>FuzzyWuzzy<\/strong><\/h4>\n\n\n\n<p class=\"wp-block-paragraph\"><code>FuzzyWuzzy<\/code> \u662f\u4e00\u4e2a\u57fa\u4e8e\u5b57\u7b26\u4e32\u5339\u914d\u7684\u5e93\uff0c\u5e38\u7528\u4e8e\u6e05\u6d17\u548c\u6807\u51c6\u5316\u76f8\u4f3c\u4f46\u4e0d\u5b8c\u5168\u76f8\u540c\u7684\u6587\u672c\u6570\u636e\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5b89\u88c5\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>pip install fuzzywuzzy<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u793a\u4f8b\u4ee3\u7801\uff1a<\/strong><\/li>\n<\/ul>\n\n\n\n<pre class=\"wp-block-code\"><code>from fuzzywuzzy import fuzz\n\n# \u6bd4\u8f83\u4e24\u4e2a\u5b57\u7b26\u4e32\u7684\u76f8\u4f3c\u5ea6\nsimilarity = fuzz.ratio(\"apple\", \"applle\")\nprint(similarity)<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>\u603b\u7ed3<\/strong><\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Python \u63d0\u4f9b\u4e86\u5f3a\u5927\u7684\u5de5\u5177\u548c\u5e93\u6765\u652f\u6301\u9ad8\u6548\u7684\u6570\u636e\u6e05\u6d17\u5de5\u4f5c\u3002<code>pandas<\/code> \u662f\u6700\u5e38\u7528\u7684\u5de5\u5177\uff0c\u51e0\u4e4e\u53ef\u4ee5\u5e94\u5bf9\u6240\u6709\u5e38\u89c1\u7684\u6570\u636e\u6e05\u6d17\u4efb\u52a1\u3002\u901a\u8fc7\u7ed3\u5408 <code>Dask<\/code>\u3001<code>Modin<\/code>\u3001<code>pyjanitor<\/code> \u7b49\u5de5\u5177\uff0c\u53ef\u4ee5\u8fdb\u4e00\u6b65\u63d0\u5347\u6570\u636e\u6e05\u6d17\u7684\u6548\u7387\uff0c\u5c24\u5176\u662f\u5728\u5904\u7406\u5927\u89c4\u6a21\u6570\u636e\u96c6\u65f6\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u638c\u63e1\u8fd9\u4e9b\u6e05\u6d17\u6280\u672f\uff0c\u80fd\u591f\u5e2e\u52a9\u4f60\u5feb\u901f\u5904\u7406\u548c\u51c6\u5907\u6570\u636e\uff0c\u4e3a\u540e\u7eed\u7684\u6570\u636e\u5206\u6790\u548c\u5efa\u6a21\u5de5\u4f5c\u6253\u4e0b\u575a\u5b9e\u57fa\u7840\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6570\u636e\u6e05\u6d17\u662f\u6570\u636e\u5206\u6790\u4e2d\u6700\u91cd\u8981\u7684\u6b65\u9aa4\u4e4b\u4e00\uff0c\u5b83\u6d89\u53ca\u5904\u7406\u7f3a\u5931\u503c\u3001\u91cd\u590d\u6570\u636e\u3001\u683c\u5f0f\u4e0d\u4e00\u81f4\u7b49\u95ee\u9898\uff0c\u4ee5\u786e\u4fdd\u6570\u636e\u7684\u8d28\u91cf\u548c\u51c6\u786e\u6027\u3002 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[54],"tags":[],"class_list":["post-769","post","type-post","status-publish","format-standard","hentry","category-python"],"_links":{"self":[{"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/posts\/769","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/comments?post=769"}],"version-history":[{"count":2,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/posts\/769\/revisions"}],"predecessor-version":[{"id":896,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/posts\/769\/revisions\/896"}],"wp:attachment":[{"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/media?parent=769"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/categories?post=769"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.laixuexila.com\/index.php\/wp-json\/wp\/v2\/tags?post=769"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}