摘要
让excel可以使用正则表达式。
脚本内容
Option Explicit
Public Function re_sub(sText As String, pattern As String, repl As String)
Dim oRegExp As Object
Set oRegExp = CreateObject("vbscript.regexp")
With oRegExp
.Global = True 'True表示匹配所有, False表示仅匹配第一个符合项
.IgnoreCase = False '区分大小写
.pattern = pattern
re_sub = .Replace(sText, repl)
End With
End Function
Public Function re_find(sText As String, pattern As String)
Dim oRegExp As Object, match As Object, matches As Object
Set oRegExp = CreateObject("vbscript.regexp")
With oRegExp
.Global = True 'True表示匹配所有, False表示仅匹配第一个符合项
.IgnoreCase = True '不区分大小写
.pattern = pattern
Set matches = .Execute(sText)
End With
Dim d As Object
Set d = CreateObject("Scripting.Dictionary")
For Each match In matches
d.Add match, Null
Next
re_find = d.keys
End Function
Public Function re_extract(sText As String, pattern As String)
Dim oRegExp As Object, match As Object, matches As Object, i As Integer
Set oRegExp = CreateObject("vbscript.regexp")
With oRegExp
.Global = True 'True表示匹配所有, False表示仅匹配第一个符合项
.IgnoreCase = True '不区分大小写
.pattern = pattern
Set matches = .Execute(sText)(0).submatches
End With
Dim d As Object
Set d = CreateObject("Scripting.Dictionary")
For i = 0 To matches.Count - 1
d.Add matches(i), Null
Next
re_extract = d.keys
End Function
使用方法
# 正则抽取
=re_extract(A1,"([^|(]+)(?:\(共(\d+)层\))?(?:\|(\d{4})年建\|)?(\d室\d厅)\|([\d.]+)平米\|([东南西北]+)")
# 正则搜索效果
=re_find(A1,"[\u4e00-\u9fa5]+|^\w+$")
# 正则替换效果
=re_sub(A1,".+?(\d+)(;|$)", "$1$2")