移除注释的完善思路：真的可以用正则实现？

导语

创新互联建站于2013年创立，是专业互联网技术服务公司，拥有项目成都网站设计、成都网站建设网站策划，项目实施与项目整合能力。我们以让每一个梦想脱颖而出为使命，1280元宁蒗做网站,已为上家服务,为宁蒗各地企业和个人服务,联系电话:13518219792

网上有很多自称能实现移除JS注释的正则表达式，实际上存在种种缺陷。这使人多少有些愕然，也不禁疑惑到：真的可以用正则实现吗？而本篇文章以使用正则移除JS注释为目标，通过实践，由浅及深，遇到问题解决问题，一步步看看到底能否用正则实现！

移除注释的完善思路：真的可以用正则实现？

1 单行注释

单行注释要么占据一整行，要么处于某一行的***。

正常情况下不难，直接通过正则匹配，再用replace方法移除便可。

 
 
 
 
  
  
  
  let codes = `  
  
  
  
    let name = "Wmaker"; // This is name.  
  
  
  
    if (name) {  
  
  
  
      // Print name.  
  
  
  
      console.log("His name is:", name);  
  
  
  
    }  
  
  
  
  `;  
  
  
  
   
  
  
  
   
  
  
  
  console.log( codes.replace(/\/\/.*$/mg, '') );  
  
  
  
   
  
  
  
  // 打印出：  
  
  
  
  // let name = "Wmaker";   
  
  
  
  // if (name) {  
  
  
  
  //     
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  // }

上面是成功的删除了注释，不过对于独占一整行的注释清理的不够彻底，会留下空白行。实际上，行尾注释前面的空白也被保留了下来。所以目标稍稍提高，清除这些空白。操作起来也并不难，思路大致这样：删除整行，实际上是删除本行末尾的换行符或上一行末尾的换行符。而换行符本身也属于空白符。所以只需操作正则，匹配到注释以及注释前面所有的空白符即可，一箭双雕。

 
 
 
 
  
  
  
  let codes = `  
  
  
  
    let name = "Wmaker"; // This is name.  
  
  
  
    if (name) {  
  
  
  
      // Print name.  
  
  
  
      console.log("His name is:", name);  
  
  
  
    }  
  
  
  
  `;   
  
  
  
   
  
  
  
  console.log( codes.replace(/\s*\/\/.*$/mg, '') );  
  
  
  
   
  
  
  
  // 打印出：  
  
  
  
  // let name = "Wmaker";  
  
  
  
  // if (name) {  
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  // }

如果在字符串中出现完整的URL地址，上面的正则会直接匹配而将其删除。网上大多会将URL的格式特征（http://xxx）：双下划线前面有冒号，作为解决途径加以利用。但这只是治标不治本的做法，毕竟//以任何形式出现在字符串中是它的自由，我们无从干涉。

这样问题就转变成：如何使正则匹配存在于引号外的双下划线？

想匹配被引号包围，带有双下划线的代码块比较简单：/".*\/\/.*"/mg。难点在于如何实现这个否定，即当正则匹配到双下划线后，再判断其是否在引号里面？绞尽脑汁，也上网查了很多，都没有像样的结果。静心平气，洗把脸刷刷牙再冲个头冷静之后，觉得单纯使用正则的路已经走不通了，得跳出这个圈。

就在***关头，在那淫秽污浊的房间上方突然光芒万丈。我急忙护住了充满血丝的眼睛，静待其适应后定睛一看。只见那里显现出了一段文字（Chinese）：孩儿啊，先将带有//被引号包围的字符串替换掉，去掉注释后再还原，不就行了吗？

 
 
 
 
  
  
  
  let codes = `  
  
  
  
    let name = "Wmaker"; // This is name.  
  
  
  
    if (name) {  
  
  
  
      // Print name.  
  
  
  
      console.log("His name is:", name);  
  
  
  
      console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
    }  
  
  
  
  `;   
  
  
  
   
  
  
  
  // 之前的方式。  
  
  
  
  console.log( codes.replace(/\s*\/\/.*$/mg, '') );  
  
  
  
  // 打印出：  
  
  
  
  // let name = "Wmaker"; 
  
  
  
  // if (name) {  
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  //   console.log("Unusual situation, characters of  
  
  
  
  // }   
  
  
  
   
  
  
  
  // 现在的方式。  
  
  
  
  console.log( removeComments(codes) );  
  
  
  
  // 打印出： 
  
  
  
  // let name = "Wmaker";  
  
  
  
  // if (name) {  
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  //   console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
  // }  
  
  
  
   
  
  
  
  function removeComments(codes) {  
  
  
  
    let {replacedCodes, matchedObj} = replaceQuotationMarksWithForwardSlash(codes);  
  
  
  
   
  
  
  
    replacedCodes = replacedCodes.replace(/\s*\/\/.*$/mg, '');  
  
  
  
    Object.keys(matchedObj).forEach(k => {  
  
  
  
      replacedCodes = replacedCodes.replace(k, matchedObj[k]);  
  
  
  
    });  
  
  
  
   
  
  
  
    return replacedCodes;  
  
  
  
   
  
  
  
    function replaceQuotationMarksWithForwardSlash(codes) {  
  
  
  
      let matchedObj = {};  
  
  
  
      let replacedCodes = '';      
  
  
  
   
  
  
  
      let regQuotation = /".*\/\/.*"/mg;  
  
  
  
      let uniqueStr = 'QUOTATIONMARKS' + Math.floor(Math.random()*10000);  
  
  
  
   
  
  
  
      let index = 0;  
  
  
  
      replacedCodes = codes.replace(regQuotation, function(match) {  
  
  
  
        let s = uniqueStr + (index++);  
  
  
  
        matchedObj[s] = match;  
  
  
  
        return s;  
  
  
  
      });  
  
  
  
   
  
  
  
      return { replacedCodes, matchedObj };  
  
  
  
    }  
  
  
  
  }

是的，目标达成了，老天眷顾啊！

另外，有一个需要优化的地方：定义字符串的方式有三种 ' " ` ，目前我们只匹配了双引号。

为了避免正则的记忆功能，都使用了正则字面量进行测试。

--- 之前

 
 
 
 
  
  
  
  console.log( /".*\/\/.*"/mg.test(`'Unu//sual'`) ); // false  
  
  
  
  console.log( /".*\/\/.*"/mg.test(`"Unu//sual"`) ); // true  
  
  
  
  console.log( /".*\/\/.*"/mg.test(`\`Unu//sual\``) ); // false

--- 之后

 
 
 
 
  
  
  
  console.log( /('|"|`).*\/\/.*\1/mg.test(`'Unu//sual'`) ); // true  
  
  
  
  console.log( /('|"|`).*\/\/.*\1/mg.test(`"Unu//sual"`) ); // true  
  
  
  
  console.log( /('|"|`).*\/\/.*\1/mg.test(`\`Unu//sual\``) ); // true

啊！问题到此结束了！

真的结束了吗？不！我看了看时间：02:17，然后将眼镜摘下，扯了张纸巾，拭去了几颗泪水。

以下是接连解决的两个问题：贪婪模式和转义字符。

 
 
 
 
  
  
  
  --- STEP 1，由于正则的贪婪模式导致。 
  
  
  
  let codes = ` 
  
  
  
    let str = 'abc//abc'; // abc' 
  
  
  
  `; 
  
  
  
  console.log( codes.match(/('|"|`).*\/\/.*\1/mg) ); // ["'abc//abc'; // abc'"] 
  
  
  
   
  
  
  
  -- 解决  
  
  
  
  let codes = ` 
  
  
  
    let str = 'abc//abc'; // abc' 
  
  
  
  `; 
  
  
  
  console.log( codes.match(/('|"|`).*?\/\/.*?\1/mg) ); // ["'abc//abc'"]  
  
  
  
   
  
  
  
  --- STEP 2，由定义字符串时其中的转义字符导致。 
  
  
  
  let codes = ` 
  
  
  
    let str = 'http://x\\'x.com'; // 'acs 
  
  
  
  `; 
  
  
  
  console.log( codes.match(/('|"|`).*?\/\/.*?\1/mg) ); // ["'http://x\'", "'; // '"] 
  
  
  
   
  
  
  
  -- 解决  
  
  
  
  let reg = /(?
  
  
  
  
let codes = ` 
  
  
  
    let str = 'http://x\\'x.com'; // 'acs 
  
  
  
  `; 
  
  
  
  console.log( codes.match(reg) ); // ["'http://x\'x.com'"]

事情到这里，虽然劳累，但多少有些成就感，毕竟成功了。

可是，可是，可是在测试时，竟然无意间发现一个无法逾越的障碍。就好比费劲千辛万苦花费无尽的财力物力之后，某某尤物终于愿意一同去情人旅馆时，却发现家家爆满，没有空余的房间。在强装欢笑，玩命的哄骗着她，一家接连一家的寻找直到终于定到房间后，却发现自己已然挺不起来了！

正则会将任意位置的引号作为查找的起始位置，它不在乎引号是成双的道理。下面是一个示例。

 
 
 
 
  
  
  
  let reg = /(?
  
  
  
  
let codes = `  
  
  
  
    let str = "abc"; // "  
  
  
  
  `;  
  
  
  
  console.log( codes.match(reg) ); // [""abc"; // ""]

不过，问题好歹在补过觉之后的 06:37 时得以解决。

思路是这样的：虽然不能正确实现匹配带有//被引号包围的代码块（可能有方法，但能力有限），但是简化成匹配单纯被引号包围的代码块，是简单而且能正确做到的，虽然耗费的内存多了一些。另外，两引号间也可能包含换行符，所以为其增加s模式：.代表全部字符。下面是去除单行注释的最终代码。

 
 
 
 
  
  
  
  let codes = `  
  
  
  
    let name = "Wmaker"; // This is name.  
  
  
  
    let str = 'http://x\\'x.com' + " / / " + '/"/"/'; // '; // " "  
  
  
  
    if (name) {  
  
  
  
      // Print name.  
  
  
  
      console.log("His name is:", name);  
  
  
  
      console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
    } 
  
  
  
  `;  
  
  
  
   
  
  
  
  console.log(removeComments(codes));  
  
  
  
  // 打印出：  
  
  
  
  // let name = "Wmaker";  
  
  
  
  // let str = 'http://x\'x.com' + " / / " + '/"/"/';  
  
  
  
  // if (name) {  
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  //   console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
  // } 
  
  
  
   
  
  
  
  function removeComments(codes) {  
  
  
  
    let {replacedCodes, matchedObj} = replaceQuotationMarksWithForwardSlash(codes);   
  
  
  
    replacedCodes = replacedCodes.replace(/\s*\/\/.*$/mg, '');  
  
  
  
    Object.keys(matchedObj).forEach(k => {  
  
  
  
      replacedCodes = replacedCodes.replace(k, matchedObj[k]);  
  
  
  
    });  
  
  
  
   
  
  
  
    return replacedCodes;  
  
  
  
   
  
  
  
    function replaceQuotationMarksWithForwardSlash(codes) {  
  
  
  
      let matchedObj = {};  
  
  
  
      let replacedCodes = '';     
  
  
  
   
  
  
  
      let regQuotation = /(?
  
  
  
  
    let uniqueStr = 'QUOTATIONMARKS' + Math.floor(Math.random()*10000);  
  
  
  
   
  
  
  
      let index = 0;  
  
  
  
      replacedCodes = codes.replace(regQuotation, function(match) {  
  
  
  
        let s = uniqueStr + (index++);  
  
  
  
        matchedObj[s] = match;  
  
  
  
        return s;  
  
  
  
      });  
  
  
  
   
  
  
  
      return { replacedCodes, matchedObj };  
  
  
  
    }  
  
  
  
  }

***补充一点，单双引号虽然也可以多行显示，但其解析后实际是单行的。

 
 
 
 
  
  
  
  let codes = "' \  
  
  
  
    Wmaker \  
  
  
  
  '";  
  
  
  
  codes.match( /(?

2 多行注释

啊！难点已经解决，现在就可以悠哉悠哉的往前推进了。

多行注释与单行思路相同，只需在删除注释时多加一个匹配模式。中和两者的最终代码如下。

 
 
 
 
  
  
  
  let codes = `  
  
  
  
    let name = "Wmaker"; // This is name.  
  
  
  
    let str = 'http://x\\'x.com' + " / / " + '/"/"/'; // '; // " "  
  
  
  
    let str = 'http://x\\'x./*a*/com' + " / / " + '/"/"/'; // '; // "/*sad*/ "  
  
  
  
    if (name) {  
  
  
  
      // Print name.  
  
  
  
      /* Print name. */  
  
  
  
      console.log("His name is:", name);  
  
  
  
      console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
      /*  
  
  
  
       * Others test.  
  
  
  
       */  
  
  
  
      console.log("Unusual situation, characters of /* abc */ in quotation marks.");  
  
  
  
    }  
  
  
  
  `;   
  
  
  
   
  
  
  
  console.log(removeComments(codes));  
  
  
  
  // 打印出：  
  
  
  
  // let name = "Wmaker";  
  
  
  
  // let str = 'http://x\'x.com' + " / / " + '/"/"/';  
  
  
  
  // let str = 'http://x\'x./*a*/com' + " / / " + '/"/"/';  
  
  
  
  // if (name) {  
  
  
  
  //   console.log("His name is:", name);  
  
  
  
  //   console.log("Unusual situation, characters of // in quotation marks.");  
  
  
  
  //   console.log("Unusual situation, characters of /* abc */ in quotation marks."); 
  
  
  
  // }  
  
  
  
   
  
  
  
  function removeComments(codes) {  
  
  
  
    let {replacedCodes, matchedObj} = replaceQuotationMarksWithForwardSlash(codes);  
  
  
  
   
  
  
  
    replacedCodes = replacedCodes.replace(/(\s*\/\/.*$)|(\s*\/\*[\s\S]*?\*\/)/mg, '');  
  
  
  
    Object.keys(matchedObj).forEach(k => {  
  
  
  
      replacedCodes = replacedCodes.replace(k, matchedObj[k]);  
  
  
  
    }); 
  
  
  
   
  
  
  
    return replacedCodes;  
  
  
  
    function replaceQuotationMarksWithForwardSlash(codes) {  
  
  
  
      let matchedObj = {};  
  
  
  
      let replacedCodes = '';      
  
  
  
   
  
  
  
      let regQuotation = /(?
  
  
  
  
    let uniqueStr = 'QUOTATIONMARKS' + Math.floor(Math.random()*10000);  
  
  
  
   
  
  
  
      let index = 0;  
  
  
  
      replacedCodes = codes.replace(regQuotation, function(match) {  
  
  
  
      let s = uniqueStr + (index++);  
  
  
  
      matchedObj[s] = match;  
  
  
  
      return s;  
  
  
  
      });  
  
  
  
      return { replacedCodes, matchedObj };  
  
  
  
    }  
  
  
  
  }

3 总结

从以上可以得出结论，单纯使用正则表达式是不能达到目标的，需要配合其它操作才行。但现在得出的结果真的能覆盖全部的情况？会不会有其它的隐藏问题，比如多字节字符的问题。虽然作为一个码农，该有的自信不会少，但慢慢的也明白了自己的局限性。从网上的其它资料看，使用UglifyJS，或在正确的解析中去除注释，会更为稳妥。但有可能自己动手解决的，没理由不花费些精力试试！

问题更新记录

感谢热心同志找出的错误，我会将能改与不能改的都列于此地，并只会更新下面两个示例的代码。

1.没有考虑正则字面量中的转义字符。

出错示例：var reg=/a\//;。

修改方式：将删除注释的正则改为：/(\s*(?

这里是工作于前端页面的代码及相应示例，下载链接。

 
 
 
 
  
  
  
    
  
  
  
   
  
  
  
   
  
  
  
    
  
  
  
      
  
  
  
    Remove Comments  
  
  
  
    
  
  
  
   
  
  
  
    
  
  
  
    输入：
  
  
  
  
      
  
  
  
   
  
  
  
      
  
  
  
    转换  
  
  
  
   
  
  
  
    输出：

这里是工作于Node端的代码及相应示例，下载链接。运行命令：node 执行文件待转译文件转移后文件。

 
 
 
 
  
  
  
  const fs = require('fs');  
  
  
  
  const path = require('path');  
  
  
  
  const process = require('process');  
  
  
  
   
  
  
  
  let sourceFile = process.argv[2];  
  
  
  
  let targetFile = process.argv[3];  
  
  
  
  if (!sourceFile || !targetFile) {  
  
  
  
    throw new Error('Please set source file and target file.');  
  
  
  
  } 
  
  
  
   
  
  
  
  sourceFile = path.resolve(__dirname, sourceFile);  
  
  
  
  targetFile = path.resolve(__dirname, targetFile);  
  
  
  
   
  
  
  
  fs.readFile(sourceFile, 'utf8', (err, data) => {  
  
  
  
    if (err) throw err; 
  
  
  
  fs.writeFile(targetFile, removeComments(data), 'utf8', (err, data) => {  
  
  
  
      if (err) throw err;  
  
  
  
      console.log('Remove Comments Done!');  
  
  
  
    });  
  
  
  
  });  
  
  
  
   
  
  
  
  function removeComments(codes) {  
  
  
  
    let {replacedCodes, matchedObj} = replaceQuotationMarksWithForwardSlash(codes);  
  
  
  
   
  
  
  
    replacedCodes = replacedCodes.replace(/(\s*(?
  
  
  
  
  Object.keys(matchedObj).forEach(k => {  
  
  
  
      replacedCodes = replacedCodes.replace(k, matchedObj[k]);  
  
  
  
    }); 
  
  
  
    
  
  
  
    return replacedCodes; 
  
  
  
   
  
  
  
    function replaceQuotationMarksWithForwardSlash(codes) {  
  
  
  
      let matchedObj = {};  
  
  
  
      let replacedCodes = '';       
  
  
  
   
  
  
  
      let regQuotation = /(?
  
  
  
  
    let uniqueStr = 'QUOTATIONMARKS' + Math.floor(Math.random()*10000);  
  
  
  
   
  
  
  
      let index = 0;  
  
  
  
      replacedCodes = codes.replace(regQuotation, function(match) {  
  
  
  
        let s = uniqueStr + (index++);  
  
  
  
        matchedObj[s] = match;  
  
  
  
        return s;  
  
  
  
      });  
  
  
  
   
  
  
  
      return { replacedCodes, matchedObj };  
  
  
  
    }  
  
  
  
  }

当前名称：移除注释的完善思路：真的可以用正则实现？
分享链接：http://www.hantingmc.com/qtweb/news12/10812.html

成都网站建设公司_创新互联，为您提供外贸网站建设、网站改版、网站排名、动态网站、网站策划、网站设计

声明：本网站发布的内容（图片、视频和文字）以用户投稿、用户转载内容为主，如果涉及侵权请尽快告知，我们将会在第一时间删除。文章观点不代表本网站立场，如需处理请联系客服。电话：028-86922220；邮箱：631063699@qq.com。内容未经允许不得转载，或转载时需注明来源：创新互联

猜你还喜欢下面的内容

网站建设知识

分类信息网

成都网站推广　　　微信开发　　　雨棚定制　　　达州托管服务器　　　广汉长尔科技　　　古蔺网站建设　　　网站设计　　　服务器租凭　　　广汉网站建设　　　四川國際商會　　　 bgp机房托管　　　南充做网站　　　成都网站排名　　　广安网站建设　　　成都雨棚定制　　　手机网站制作　　　大吉雪茄　　　企业网站设计　　　网络口碑营销　　　成都IDC机房托管