P86-94-正则表达式.js – 张恒的笔记

/*P86-正则-创建正则表达式*/
//正则表达式，用来表达字符串规则和模式的
//用它判断某些字符串是否符合某些规律或规则
//用途很广泛，比如验证邮箱，验证手机号，有没有特殊字符等

/*基本建立和使用
//字面值或RegExp()正则表达式对象
var str ='where when what';

var re=/wh/;//2个/包裹,切记不用使用''
var re2 = new RegExp('wh');//正则表达式作为RegExp()的参数（字符串格式）

//正则表达式的使用
//exec()和test()，分别反回匹配值和boolean值（表示是否匹配到）
//但是只能返回第一个匹配到出现的位置,index:0
//VH：？返回值是object类型，因为typeof返回undefined，boolean，string，number，object（含null历史遗留问题），function
console.log(re.exec(str));//["wh", index: 0, input: "where when what", groups: undefined]
console.log(re.test(str));//true
console.log(typeof re.exec(str));//object

console.log(re2.exec(str));//["wh", index: 0, input: "where when what", groups: undefined]
console.log(re2.test(str));//true
/** */

/*搜索整个字符串从头到尾，返回所有匹配
//正则表达式后面添加g，然后重复调用exec()，就会返回后续匹配到的位置，不加g，每次都从头开始
//VH：但是注意正则必须赋值给变量，直接使用正则，每次都是第一次，
//VH：如果匹配一共有3个，调用4次返回null，第五次开始是重新从头开始，重复结果
var str ='where when what';
var re=/wh/g;//在斜杠外面添加g，不加g，每次调用都是重新从头查
var re2 = new RegExp('wh');//这种方式应该怎么添加g呢？还是有其他方式呢？
console.log(re.exec(str));//标记32
console.log(re.exec(str));//标记33
console.log(re.exec(str));//标记34
console.log(re.exec(str));//标记35返回null，说明已经检索到末尾了。
console.log(re.exec(str));//标记36重复第一次结果
console.log(/wh/g.exec(str));//标记37//直接使用正则，次次都是第一次
console.log(/wh/g.exec(str));//标记38//直接使用正则，次次都是第一次
/*
index.js:32 ["wh", index: 0, input: "where when what", groups: undefined]
index.js:33 ["wh", index: 6, input: "where when what", groups: undefined]
index.js:34 ["wh", index: 11, input: "where when what", groups: undefined]
index.js:35 null
index.js:36 ["wh", index: 0, input: "where when what", groups: undefined]
index.js:37 ["wh", index: 0, input: "where when what", groups: undefined]
index.js:38 ["wh", index: 0, input: "where when what", groups: undefined]
*/
/** */

/*P87-正则-字符匹配
//最简单的语法就是匹配字符，英文字母，数字，下划线等等
//VH:我测试了中文也是可以的。

//如何匹配到回车，tab等特殊字符呢？比如下面的模板字符串例子
//模板字符串例子
var str = `This str contains 123
CAPITALIZED letters and _-&^% symbols 中文`;

console.log(/T/.test(str));//true
console.log(/This/.test(str));//true
console.log(/Thiss/.test(str));//false
console.log(/12/.test(str));//true
console.log(/1234/.test(str));//false
console.log(/_-&/.test(str));//true

//VH：做一些自己想到的测试
console.log(/中/.test(str));//true//VH：补充一个汉字匹配，是可以的
console.log(/小/.test(str));//false//VH:证明确实是可以匹配汉字
console.log(/\n/.test(str));//true//VH:不显示字符，使用转义字符
console.log(/h.s/.test(str));//true//VH:下节讲到的'约定通配符'(我的称呼)也可使用

console.log(/中/.exec(str));//["中", index: 60, input: "This str contains 123↵CAPITALIZED letters and _-&^% symbols 中文", groups: undefined]
console.log(/小/.exec(str));//null
console.log(/\n/.exec(str));//["↵", index: 21, input: "This str contains 123↵CAPITALIZED letters and _-&^% symbols 中文", groups: undefined]
console.log(/h.s/.exec(str));//["his", index: 1, input: "This str contains 123↵CAPITALIZED letters and _-&^% symbols 中文", groups: undefined]
/** */

/*P88-正则-特殊字符匹配
//标题不太恰当，应该是约定的通配符
//上节适用于已经知道需要被匹配的字符，如果匹配一些规则或模式的话，需要特殊的符号来表示
//str.match(/正则/)，不含g，返回匹配到的第一个相关信息，等同于"正则.exec(str)"的效果。
//str.match(/正则/g)，含g，返回匹配到的值，多个就返回数组

var str = `This str contains 123
CAPITALIZED letters and _-&^% symbols 中文`;

//.任意单个字符（不含\n,\r就是换行和回车）
//match(/正则/)，不含g，返回匹配到的第一个相关信息，等同于"正则.exec()"的效果。
console.log(str.match(/Th.s/));//["This", index: 0, input: "This str contains 123↵CAPITALIZED letters and _-&^% symbols 中文", groups: undefined]

//match(/正则/g)，含g，返回匹配到的值，多个就返回数组
console.log(str.match(/Th.s/g));//["This"]
console.log(str.match(/1.3/g));//["123"]
console.log(str.match(/中./g));//["中文"]

//以下约定转义字符表达特定匹配范围，若大写，含义相反
//\d和\D含义相反

//\d表示数字0-9
console.log(str.match(/\d/g));//(3) ["1", "2", "3"]
//\w表示数字字母下划线A-Za-z_0-9
console.log(str.match(/\w/g));//(47) ["T", "h", "i", "s", "s", "t", "r", "c", "o", "n", "t", "a", "i", "n", "s", "1", "2", "3", "C", "A", "P", "I", "T", "A", "L", "I", "Z", "E", "D", "l", "e", "t", "t", "e", "r", "s", "a", "n", "d", "_", "s", "y", "m", "b", "o", "l", "s"]
//\s表示空格和无显示字符（\n,\r,\t），参考https://blog.csdn.net/jiang7701037/article/details/80754163
console.log(str.match(/\s/g));//(9) [" ", " ", " ", "↵", " ", " ", " ", " ", " "]
console.log('你好'.match(/\u4f60/g));//["你"]
//其实可以直接使用中文，而且必须加g，才能返回匹配到的值数组，不然就是匹配到的位置等信息数组
console.log('你好'.match(/你/g));//["你"]
/** */

/*P89-正则-匹配次数*/
//可以规定某个模式出现多少次

//*号：0次或多次【因为*在正则中配合/形成段落注释符后半部分，所以*部分讲解例子console.log全部使用///注释掉，日后使用，搜索///可以去掉】
var str = `This str contains 123
CAPITALIZED letters and _-&^% symbols 中文`;
///console.log(str.match(/This.*/g));//["This str contains 123"]
//因为.不匹配换行，所以下面更改变量值，就能匹配到最后
var str = `This str contains 123 CAPITALIZED letters and _-&^% symbols 中文`;
///console.log(str.match(/This.*/g));//["This str contains 123 CAPITALIZED letters and _-&^% symbols 中文"]

//思考0次
var str = `This`;
///console.log(str.match(/This.*/g));//["This"]
//思考1次
var str = `This1`;
///console.log(str.match(/This.*/g));//["This1"]
//思考3次
var str = `This123`;
///console.log(str.match(/This.*/g));//["This123"]
//【结论】：匹配结果应该是最大值（字符串允许的），而不会根据0,1,2,3这样不停去匹配

/*+号：1次或多次，【可以理解为正数次】
var str = `This str contains 123 CAPITALIZED letters and _-&^% symbols 中文`;
console.log(str.match(/t+/g));//(3) ["t", "t", "tt"]
//【结论】：+可以根据1次，2次，3次这样的去匹配，不是最大值次数
//测试隔次数出现，结果是依然可以。不会因为4次没匹配到就结束
var str = `t tt ttt ttttt`;//1235
console.log(str.match(/t+/g));//(4) ["t", "tt", "ttt", "ttttt"]
/** */

/*？表示1次或0次
//这里可以看到x?返回结果都是空字符，因为str中无x，符合0次条件被返回，但是这个匹配到的结果是空
//第二个console.logt?,返回结果中有1个是't'，同样步骤：符合1次条件，返回匹配到的结果t
var str = `This str contains 123 CAPITALIZED letters and _-&^% symbols 中文`;
console.log(str.match(/x?/g));//(63) ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
console.log(str.match(/t?/g));//(63) ["", "", "", "", "", "", "t", "", "", "", "", "", "t", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "t", "t", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
/** */
/*精确重复次数的方法：{}
//{3}单数字
var str = `This str contains 12 345 67890 letters`;
console.log(str.match(/t{2}/g));//["tt"]
console.log(str.match(/\d{2}/g));//(4) ["12", "34", "67", "89"]
//{1,3}区间
var str = `This str contains 12 345 67890 letters`;
console.log(str.match(/t{1,3}/g));//(3) ["t", "t", "tt"]
console.log(str.match(/\d{1,3}/g));//(4) ["12", "345", "678", "90"]
console.log(str.match(/\d{1,2}/g));//(6)  ["12", "34", "5", "67", "89", "0"]
//VH：【结论】大概是：
//先找到符合条件的数字串（一般有多个），从第一串的第一个字符开始，
//先从模式的最大长度去匹配，成功，则继续测试剩余部分，
//剩余部分失败，选择第二长的模式去匹配，依次类推，直到数字串结束，跳到下一个字符串
//重复上述步骤，直到最后字符串的最后字符测试结束

//{1,}只有开头数字+逗号：最少1次
var str = `This str contains 12 345 67890 letters`;
console.log(str.match(/\d{1,}/g));//(3) ["12", "345", "67890"]
//VH:【解释】“
//1.”找到数字属性的3个字符串12,345,67890
//2.先从12开始测试，最长无限，一直到2次，匹配成功，本字符串结束，转入下一个
//3.开始测试345,最长无限，直到3次，匹配成功，本字符串结束，不再测试出现2次的情况，转下一个
//4.开始测试67890,最长无限，直到5次，匹配成功，本字符串结束，不再测试4次情况，后续无数字串
//5.结束！
/** */

/*P90-正则-区间、逻辑和界定符
var str = `This str contains 123 CAPITALIZED letters and _-&^% symbols 中文`;

console.log(str.match(/[abc]/g));//(4) ["c", "a", "a", "b"]
//可以使用-中划线，表示连续,下例子就是所有的小写字母被匹配并返回
console.log(str.match(/[a-z]/g));//(31) ["h", "i", "s", "s", "t", "r", "c", "o", "n", "t", "a", "i", "n", "s", "l", "e", "t", "t", "e", "r", "s", "a", "n", "d", "s", "y", "m", "b", "o", "l", "s"]
//所有大写字母
console.log(str.match(/[A-Z]/g));//(12) ["T", "C", "A", "P", "I", "T", "A", "L", "I", "Z", "E", "D"]
//所有数字
console.log(str.match(/[0-9]/g));//(3) ["1", "2", "3"]
//^脱字符，表示非的意思。
//[^]里面的^否定全部的后续占位，[^<>]不含<和>2个符号，对比像{}是紧挨的前面1个占位的次数
//（占位是我起的名字，意思是完整（范围）的字符或字符组合）
//是否必须配合[]来用呢？在外面是作为开头限定符的，后面有例子
console.log(str.match(/[^0-9]/g));//(59) ["T", "h", "i", "s", " ", "s", "t", "r", " ", "c", "o", "n", "t", "a", "i", "n", "s", " ", " ", "C", "A", "P", "I", "T", "A", "L", "I", "Z", "E", "D", " ", "l", "e", "t", "t", "e", "r", "s", " ", "a", "n", "d", " ", "_", "-", "&", "^", "%", " ", "s", "y", "m", "b", "o", "l", "s", " ", "中", "文"]
console.log(str.match(/[^a-z]/g));//(31) ["T", " ", " ", " ", "1", "2", "3", " ", "C", "A", "P", "I", "T", "A", "L", "I", "Z", "E", "D", " ", " ", " ", "_", "-", "&", "^", "%", " ", " ", "中", "文"]

//[]内使用-和^的本义，就需要转义的字符，下划线不必转义（和[]无配合）
console.log(str.match(/[\-_&\^]/g));//(4) ["_", "-", "&", "^"]
console.log(str.match(/\-/g));//["-"]

//或的逻辑
console.log(str.match(/This|contains/g));//(2) ["This", "contains"]

//指定开头或结尾^和$
var str=`this that this and that`;
//直接this，2个结果，使用^this,限定在开头，就一个结果
console.log(str.match(/this/g));//(2) ["this", "this"]
console.log(str.match(/^this/g));//["this"]
//直接that，2个结果，使用that$,限定在末尾，只有一个结果
console.log(str.match(/that/g));//(2) ["that", "that"]
console.log(str.match(/that$/g));//["that"]
console.log(str.match(/^that/g));//null

//单词边界\b
var str=`this athata this and that`;
console.log(str.match(/that/g));//(2) ["that", "that"]
console.log(str.match(/\bthat\b/g));//["that"]
//VH：这不就是空格么？
console.log(str.match(/ that /g));//null
//VH:改进，只在that前面加空格
console.log(str.match(/ that/g));//[" that"]
console.log(str.match(/\bthis\b/g));//(2) ["this", "this"]
console.log(str.match(/ this /g));//[" this "]
//【总结】\b和空格的区别，\b不会返回空格，\b代表空格和开头或结尾（这两处无空格）
/** */

/*P91-正则-分组*/
//正则表达式可以使用小括号对模式进行分组
//分组的内容会当成一个整体，并且执行的结果会把分组的内容返回回来。

/*例子1
//例子中th.*th就像拉面，2头固定，中间可拉伸匹配。
//虽然不加括号也能匹配，但本节的意思是(th)作为整体:1.有()返回信息不一样，2.例子2更好说明作为整体的含义
var str = `this that this and that`;
console.log(/(th).*(th)/.exec(str));//(3) ["this that this and th", "th", "th", index: 0, input: "this that this and that", groups: undefined]
console.log(/th.*th/.exec(str));//["this that this and th", index: 0, input: "this that this and that", groups: undefined]
/*
老师说，使用exec()可以返回分组的一些信息，VH：经过测试，不加()返回信息没有()内的th部分。
返回的第一个是匹配到的字符串，第二个和第三个元素对应正则中的分组内容！
(3) ["this that this and th", "th", "th", index: 0, input: "this that this and that", groups: undefined]
0: "this that this and th"
1: "th"
2: "th"
groups: undefined
index: 0
input: "this that this and that"
length: 3
__proto__: Array(0)
*/
/** */

/*测试使用match()
//仅仅返回匹配到的字符串
var str = `this that this and that`;
console.log(str.match(/(th).*(th)/g));//["this that this and th"]
/** */

/*问答总结*/
//问：th那么多，任意两个th之间作为一个匹配，为啥只出一个结果呢？没加g？没使用变量？
//答：因为*是任意次，匹配的时候，先从最长开始，匹配到就向后寻找。
//问：exec()不是执行1次显示一个么？而且没有加g？
//答：因为最长长度匹配，只能有1个，不必加g（全局）；

/*自己例子测试
//换成th和nd，发现，只找最长的匹配结果，返回数组元素3个，分别是[结果，(1),(2)].
//注意：只是match()中的正则加g是返回结果，不加g等于exec()的效果，详见第88节。
var str = `this that nd this and that`;
var re =/(th).*(nd)/g;//
console.log(re.exec(str));//(3) ["this that this and", "th", "nd", index: 0, input: "this that this and that", groups: undefined]
console.log(re.exec(str));//null
console.log(str.match(re));//["this that nd this and"]
//自己写个小点的例子
var str = `th1nd`;
var re =/(th).*(nd)/g;//
console.log(re.exec(str));//(3) ["th1nd", "th", "nd", index: 0, input: "th1nd", groups: undefined]
console.log(str.match(re));//["th1nd"]
/** */

/*老师例子2，有效说明()作为整体的意义。
//设定出现次数时，aa作为整体重复出现，比如aaaa
//如果是aa{2},就是aaa这样的了。
var str =`aaaab abb cddaa`;
console.log(str.match(/(aa){2}/g));//["aaaa"]

//这大概等同于/aaaa/，经过测试，确定是的。
console.log(str.match(/aaaa/g));//["aaaa"]

//自己写的例子
//比如加括号，比如(12){2}，实例就是1212
//不加括号，比如12{2}，实例就是122
var str =`1212 1221212`;
console.log(str.match(/(12){2}/g));//(2) ["1212", "1212"]
console.log(str.match(/12{2}/g));//["122"]
/** */

/*P92-正则-常见正则
//日常开发中比较常见的正则表达式
//1.手机号
//var mobileRe = /^1[3-9]{1}\d{9}/g;//我自己写的。
var mobileRe = /^1[3-9]\d{9}/g;//老师的[3-9]后面没有这个{1}，因为默认就是1。
console.log(mobileRe.test('13818886666'));//true
console.log(mobileRe.test('10818886666'));//false

var mobileRe = /^1[3-9]+\d{9}/g;//+号是出现次数为正，有可能13后续123456789匹配，135后续123456789也匹配。
console.log(mobileRe.test('135123456789'));//true

//2.邮箱
//这里注意观察正则//g后面这个g的颜色，如果是蓝色（在vscode中）就说明基本语法对。如果是其他颜色，可能符号或者写错了。我写成了中文的（）
var emailRe = /^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$/g;
console.log(emailRe.test('admin@163.com'));//true
console.log(emailRe.test('admin@163.comnnn'));//false//因为后面加了3个n造成6个字符

//【总结】
//这个正则表达式，能够很好理解到，正则的用法。正则的每个字符并不代表1个字符，而是自己或者与其他字符组合成一个范围，
//按照一个范围默认占据一个字符位置，一个范围可以指定出现次数，复制自己分身去占位置。=>{}
//具体举例，a范围就是a1个，a-z代表26个，
//默认一家一份，派占1个成员出来占位置1个，(th)表示固定用t和h组合成员占位置，2个成员拉手一起占位置，1份2个位置。
//{2}重复占2份，{2,5}弹性占位，可能2-5份。
//^表开头，$表结尾，加入队列。因为字符串开头结尾是概念性的没有位置，所以这两也不占位，就不能重复
//注意转义。

//3.用户名
var usernameRe = /^[a-zA-Z][a-zA-Z0-9_]{5,14}$/g;
console.log(usernameRe.test('abc'));//false
console.log(usernameRe.test('$aaa'));//false
console.log(usernameRe.test('abcd0_12a'));//true
console.log(usernameRe.test('a123456789012345'));//false//16位了。
console.log(usernameRe.test('a12345678901234'));//true//这个是15位。
/** */

/*93-正则-字符串替换
//字符串里一些方法可以用到正则表达式的。
//replace()2个参数，被替换部分和要替补上去的部分
//replace()有返回值，是处理后的新字符串
var str= 'Tish is an apple';
console.log(str.replace('Tish','This'));//This is an apple

//使用正则
var str= 'Tish 1is 2an 3apple';
console.log(str.replace(/\d+/g,''));//This is an apple

//VH测试，不加g
var str= 'Tish 1is 2an 3apple';
console.log(str.replace(/\d+/,''));//Tish is 2an 3apple//只替换第一个匹配到的

//VH测试多个数字，单独数字
var str= 'Tish 1is 2an 333apple 5 34';
console.log(str.replace(/\d+/g,''));//Tish is an apple  //多个和单独均可替换，前面apple后面是有空格的

//复杂一点的例子(老师讲)
//下面我是新建变量，分3行写代码，为了方便说明注意点，老师只用了一行。
//还有注意，[^]里面的^否定全部的后续占位（占位是我起的名字，意思是完整（范围）的字符或字符组合）
//本例中，并不是一般意义的替换，而是整个匹配，()选中要保留的部分(就是后面的$1)，以$1替换（其实全匹配就是全删除，以$1替换就是保留$1）
var html=`<span>hello</span><div>world</div>`;
re=/<[^>]*>([^<>]*)<\/[^>]*>/g;//这里要注意html标签闭标签的/需要转义，不然就当成正则的组成部分了。
newHtml= html.replace(re,'$1');//$1指的是正则中第一个分组的内容，而且$1需要使用引号！说明它不是变量，就是这里特定符号！
console.log(newHtml);//helloworld

//测试$2
//上面的例子中的正则是1对标签，字符串是两对标签儿。匹配到第1对后。继续往后匹配第2对。
//这里是把上面的正则复制一遍，正好匹配整个字符串！
//因为有2对标签，所以，正好完全匹配
var html=`<span>hello</span><div>world</div>`;
re=/<[^>]*>([^<>]*)<\/[^>]*><[^>]*>([^<>]*)<\/[^>]*>/g;
newHtml= html.replace(re,'$1'+'$2');//$1指的是正则中第一个分组的内容
console.log(newHtml);//helloworld
newHtml= html.replace(re,'$1'+'和'+'$2');//$1指的是正则中第一个分组的内容
console.log(newHtml);//hello和world

//测试
//这次把字符串的标签对为3对，这个时候，肯定就不能匹配第3个
var html=`<span>hello</span><div>world</div><div>！</div>`;
re=/<[^>]*>([^<>]*)<\/[^>]*><[^>]*>([^<>]*)<\/[^>]*>/g;
newHtml= html.replace(re,'$1'+'$2');//$1指的是正则中第一个分组的内容
console.log(newHtml);//helloworld<div>！</div>
/** */

/*P94-正则-字符串分离
//另外一个接收正则表达式的方法是分隔字符串//老师这里说接收，应该是从参数的角度，实际是用途更恰当。
//使用特殊字符，对字符串进行分隔，然后返回分隔后的数组

var tags='html, css, javascript';
//注意这个逗号后面有空格，分隔出来的元素就不带空格
console.log(tags.split(', '));//(3) ["html", "css", "javascript"]

var str= 'This  | is , an & apple';
//\W（大写W）的含义是非字母的，+号的意思是正数次重复。这样，就可以匹配到多个空格，标点符号等
console.log(str.split(/\W+/g));//(4) ["This", "is", "an", "apple"]

//如果\w(小写w）的含义是字母的，+号的意思是正数次重复。这样，结果就相反了
var str= 'This  | is , an & apple';
console.log(str.split(/\w+/g));//(5) ["", "  | ", " , ", " & ", ""]
/** */
扫一扫手机查看
相关文章

【WPSJS宏】取单元格日期数字值转换成真的日期字符串

JS基础教程笔记系列简介和目录

P119-120-总结和下一步.js

P114-118-模块化.js

P106-113-异步.js

P101-105-异常.js

发表回复 取消回复

发表回复取消回复