{"id":386,"date":"2022-07-21T11:54:04","date_gmt":"2022-07-21T03:54:04","guid":{"rendered":"https:\/\/kuo.us.to\/wordpress\/?p=386"},"modified":"2023-01-17T16:48:43","modified_gmt":"2023-01-17T08:48:43","slug":"python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98-%e6%ad%a3%e5%89%87%e8%a1%a8%e9%81%94%e5%bc%8f_regex2","status":"publish","type":"post","link":"https:\/\/kuo.us.to\/wordpress\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/386\/","title":{"rendered":"Python\u5b78\u7fd2\u7b46\u8a18: \u6b63\u5247\u8868\u9054\u5f0f_Regex(2)"},"content":{"rendered":"\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\">import pandas as pd\nfile_location = '0000_\u5b78\u751f\u8cc7\u6599\u7bc4\u4f8b\u6a94_\u975e\u6b63\u898f.xlsx'\ndf = pd.read_excel(file_location)\n\nprint(len(df))\nprint(df.columns)\ndf.info()\ndf<\/code><\/pre>\n\n\n<div class=\"lm-Widget p-Widget lm-Panel p-Panel jp-OutputArea-child\">\n<div class=\"lm-Widget p-Widget jp-RenderedText jp-OutputArea-output\" data-mime-type=\"application\/vnd.jupyter.stdout\">\n<pre>Index(['\u5b78\u865f', '\u51fa\u751f\u5e74\u6708\u65e5', '\u6027\u5225', '\u4e2d\u6587\u59d3\u540d', '\u90e8\u5225', '\u7cfb\u6240'], dtype='object')\n&lt;class 'pandas.core.frame.DataFrame'&gt;\nRangeIndex: 17 entries, 0 to 16\nData columns (total 6 columns):\n #   Column  Non-Null Count  Dtype \n---  ------  --------------  ----- \n 0   \u5b78\u865f      17 non-null     object\n 1   \u51fa\u751f\u5e74\u6708\u65e5   17 non-null     int64 \n 2   \u6027\u5225      17 non-null     object\n 3   \u4e2d\u6587\u59d3\u540d    17 non-null     object\n 4   \u90e8\u5225      17 non-null     object\n 5   \u7cfb\u6240      17 non-null     object\ndtypes: int64(1), object(5)\nmemory usage: 944.0+ bytes\n<\/pre>\n<\/div>\n<\/div>\n<div class=\"lm-Widget p-Widget lm-Panel p-Panel jp-OutputArea-child jp-OutputArea-executeResult\">\n<div class=\"lm-Widget p-Widget jp-OutputPrompt jp-OutputArea-prompt\">[1]:<\/div>\n<div class=\"lm-Widget p-Widget jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output\" data-mime-type=\"text\/html\">\n<div>\n<table class=\"dataframe\" border=\"1\">\n<thead>\n<tr>\n<th>&nbsp;<\/th>\n<th>\u5b78\u865f<\/th>\n<th>\u51fa\u751f\u5e74\u6708\u65e5<\/th>\n<th>\u6027\u5225<\/th>\n<th>\u4e2d\u6587\u59d3\u540d<\/th>\n<th>\u90e8\u5225<\/th>\n<th>\u7cfb\u6240<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>a 1002252<\/td>\n<td>650502<\/td>\n<td>\u5973<\/td>\n<td>\u9ec3\u6587\u6b23<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6d41\u901a\u7ba1\u7406\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>U100 7128<\/td>\n<td>720525<\/td>\n<td>\u7537<\/td>\n<td>\u5f90\u7fe0\u6d0b<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6d41\u901a\u7ba1\u7406\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>U_1017113<\/td>\n<td>721116<\/td>\n<td>\u7537<\/td>\n<td>\u6797\u6b23\u8679<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6d41\u901a\u7ba1\u7406\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>U1017117<\/td>\n<td>730224<\/td>\n<td>\u7537<\/td>\n<td>\u5433\u8a9e\u7fa4<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6d41\u901a\u7ba1\u7406\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>U1017146<\/td>\n<td>720923<\/td>\n<td>\u7537<\/td>\n<td>\u8a79\u4fca\u5b8f<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6b77\u53f2\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>U102\u4e5d7140<\/td>\n<td>690101<\/td>\n<td>\u5973<\/td>\n<td>\u6797\u7d20\u5143<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6b77\u53f2\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>6<\/th>\n<td>U1027153<\/td>\n<td>700503<\/td>\n<td>\u7537<\/td>\n<td>\u6d2a\u5049\u8aa0<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6b77\u53f2\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>7<\/th>\n<td>U1017123<\/td>\n<td>730318<\/td>\n<td>\u5973<\/td>\n<td>\u9ec3\u7389\u5ef7<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u6b77\u53f2\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>8<\/th>\n<td>U1027130<\/td>\n<td>740802<\/td>\n<td>\u5973<\/td>\n<td>\u9ec3\u74ca\u745e<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u96fb\u6a5f\u5de5\u7a0b\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>9<\/th>\n<td>U1027156<\/td>\n<td>730607<\/td>\n<td>\u7537<\/td>\n<td>\u9673\u5955\u5b63<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u96fb\u6a5f\u5de5\u7a0b\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>10<\/th>\n<td>U1027119<\/td>\n<td>720121<\/td>\n<td>\u7537<\/td>\n<td>\u85cd\u7acb\u7487<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u96fb\u6a5f\u5de5\u7a0b\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>11<\/th>\n<td>U1037130<\/td>\n<td>690909<\/td>\n<td>\u5973<\/td>\n<td>\u6c5f\u59f5\u5229<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u96fb\u6a5f\u5de5\u7a0b\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>12<\/th>\n<td>U1037131<\/td>\n<td>730118<\/td>\n<td>\u5973<\/td>\n<td>\u6797\u65b0\u4e91<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u793e\u6703\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>13<\/th>\n<td>U1047102<\/td>\n<td>751123<\/td>\n<td>\u5973<\/td>\n<td>\u8a79\u6dd1\u73b2<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u793e\u6703\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>14<\/th>\n<td>U1047113<\/td>\n<td>750903<\/td>\n<td>\u7537<\/td>\n<td>\u6797\u4e16\u5149<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u793e\u6703\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>15<\/th>\n<td>U1047120<\/td>\n<td>760907<\/td>\n<td>\u5973<\/td>\n<td>\u5433\u60e0\u8339<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u793e\u6703\u5b78\u7cfb<\/td>\n<\/tr>\n<tr>\n<th>16<\/th>\n<td>U1047124<\/td>\n<td>760227<\/td>\n<td>\u7537<\/td>\n<td>\u9673\u6bc5\u6587<\/td>\n<td>\u5927\u5b78\u90e8<\/td>\n<td>\u793e\u6703\u5b78\u7cfb<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n\n\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\"># \u627e\u51fa\u5b78\u865f\u88cf\u9762\u542b\u6587\u6578\u5b57\u8cc7\u6599\ndf[df['\u5b78\u865f'].str.contains(r'\\w')]['\u5b78\u865f']<\/code><\/pre>\n\n\n<pre>0      a 1002252\n1     U100  7128\n2      U_1017113\n3      U1017117 \n4       U1017146\n5      U102\u4e5d7140\n6       U1027153\n7       U1017123\n8       U1027130\n9       U1027156\n10      U1027119\n11      U1037130\n12      U1037131\n13      U1047102\n14      U1047113\n15      U1047120\n16      U1047124\nName: \u5b78\u865f, dtype: object<\/pre>\n\n\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\"># \u627e\u51fa\u5b78\u865f\u88cf\u9762\u542b\u975e\u6587\u6578\u5b57\u8cc7\u6599\ndf[df['\u5b78\u865f'].str.contains(r'\\W')]['\u5b78\u865f']<\/code><\/pre>\n\n\n<pre>0     a 1002252\n1    U100  7128\n3     U1017117 \nName: \u5b78\u865f, dtype: object<\/pre>\n\n\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\"># \u5c07\u5b78\u865f\u88cf\u9762\u542b\u975e\u6587\u6578\u5b57\u8cc7\u6599\u53d6\u4ee3\u70ba\"\"\ndf['\u5b78\u865f'].str.replace(r'\\W+', '', regex=True)<\/code><\/pre>\n\n\n<pre>0      a1002252\n1      U1007128\n2     U_1017113\n3      U1017117\n4      U1017146\n5     U102\u4e5d7140\n6      U1027153\n7      U1017123\n8      U1027130\n9      U1027156\n10     U1027119\n11     U1037130\n12     U1037131\n13     U1047102\n14     U1047113\n15     U1047120\n16     U1047124\nName: \u5b78\u865f, dtype: object<\/pre>\n\n\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\"># \u5c07\u5b78\u865f\u88cf\u9762\u542b\u975e\u82f1\u6587\u53ca\u6578\u5b57\u8cc7\u6599\u53d6\u4ee3\u70ba\"\"\ndf['\u5b78\u865f'].str.replace(r'[^a-zA-Z0-9]', '', regex=True)<\/code><\/pre>\n\n\n<pre>0     a1002252\n1     U1007128\n2     U1017113\n3     U1017117\n4     U1017146\n5     U1027140\n6     U1027153\n7     U1017123\n8     U1027130\n9     U1027156\n10    U1027119\n11    U1037130\n12    U1037131\n13    U1047102\n14    U1047113\n15    U1047120\n16    U1047124\nName: \u5b78\u865f, dtype: object<\/pre>\n\n\n<pre class=\"wp-block-code\"><code lang=\"python\" class=\"language-python line-numbers\"># \u5b78\u865f\u7be9\u9078\u51fa\u7d14\u6578\u5b57\nprint(df['\u5b78\u865f'].str.replace(r'\\D', '', regex=True))\n\n# \u7b49\u540c\u4e0a\u5217\u7a0b\u5f0f\u78bc\nprint(df['\u5b78\u865f'].str.replace(r'[^0-9]', '', regex=True))<\/code><\/pre>\n\n\n<pre>0     1002252\n1     1007128\n2     1017113\n3     1017117\n4     1017146\n5     1027140\n6     1027153\n7     1017123\n8     1027130\n9     1027156\n10    1027119\n11    1037130\n12    1037131\n13    1047102\n14    1047113\n15    1047120\n16    1047124\nName: \u5b78\u865f, dtype: object\n0     1002252\n1     1007128\n2     1017113\n3     1017117\n4     1017146\n5     1027140\n6     1027153\n7     1017123\n8     1027130\n9     1027156\n10    1027119\n11    1037130\n12    1037131\n13    1047102\n14    1047113\n15    1047120\n16    1047124\nName: \u5b78\u865f, dtype: object<\/pre>","protected":false},"excerpt":{"rendered":"<p>Index([&#8216;\u5b78\u865f&#8217;, &#8216;\u51fa\u751f\u5e74\u6708\u65e5&#8217;, &#8216;\u6027\u5225&#8217;, &#8216;\u4e2d\u6587\u59d3\u540d&#8217;, &#8216;\u90e8\u5225&#8217;, &#8216;\u7cfb\u6240&#8217;], dtype= [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":389,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"advgb_blocks_editor_width":"","advgb_blocks_columns_visual_guide":"","site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}}},"categories":[4],"tags":[8,9,171,172,173],"author_meta":{"display_name":"mackuo","author_link":"https:\/\/kuo.us.to\/wordpress\/author\/mackuo\/"},"featured_img":"https:\/\/kuo.us.to\/wordpress\/wp-content\/uploads\/2022\/07\/m7mtn5x9tgy-200x300.jpg","coauthors":[],"tax_additional":{"categories":{"linked":["<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">Python\u5b78\u7fd2\u7b46\u8a18<\/a>"],"unlinked":["<span class=\"advgb-post-tax-term\">Python\u5b78\u7fd2\u7b46\u8a18<\/span>"]},"tags":{"linked":["<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">Pandas<\/a>","<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">Python<\/a>","<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">Regular Expression<\/a>","<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">\u6b63\u5247\u8868\u9054\u5f0f<\/a>","<a href=\"https:\/\/kuo.us.to\/wordpress\/category\/python%e5%ad%b8%e7%bf%92%e7%ad%86%e8%a8%98\/\" class=\"advgb-post-tax-term\">\u6b63\u898f\u8868\u9054\u5f0f<\/a>"],"unlinked":["<span class=\"advgb-post-tax-term\">Pandas<\/span>","<span class=\"advgb-post-tax-term\">Python<\/span>","<span class=\"advgb-post-tax-term\">Regular Expression<\/span>","<span class=\"advgb-post-tax-term\">\u6b63\u5247\u8868\u9054\u5f0f<\/span>","<span class=\"advgb-post-tax-term\">\u6b63\u898f\u8868\u9054\u5f0f<\/span>"]}},"comment_count":"0","relative_dates":{"created":"Posted 4 \u5e74 ago","modified":"Updated 3 \u5e74 ago"},"absolute_dates":{"created":"Posted on 2022 \u5e74 7 \u6708 21 \u65e5","modified":"Updated on 2023 \u5e74 1 \u6708 17 \u65e5"},"absolute_dates_time":{"created":"Posted on 2022 \u5e74 7 \u6708 21 \u65e5 \u4e0a\u5348 11:54","modified":"Updated on 2023 \u5e74 1 \u6708 17 \u65e5 \u4e0b\u5348 4:48"},"featured_img_caption":"","series_order":"","_links":{"self":[{"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/posts\/386"}],"collection":[{"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/comments?post=386"}],"version-history":[{"count":5,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/posts\/386\/revisions"}],"predecessor-version":[{"id":404,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/posts\/386\/revisions\/404"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/media\/389"}],"wp:attachment":[{"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/media?parent=386"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/categories?post=386"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/kuo.us.to\/wordpress\/wp-json\/wp\/v2\/tags?post=386"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}