Python分割器教你给文章做手术

Python分割器在我们进行文章分割的时候会经常用到。当然一篇相当长的文章会让你有些头疼。看完下面的代码希望大家能够熟练的使用Python分割器进行文章分割。

 
 
 
  
  
  # 将txt小说分割转换成多个HTML文件   
  
  
  # @author : GreatGhoul   
  
  
  # @email : greatghoul@gmail.com   
  
  
  # @blog : http://greatghoul.javaeye.com   
  
  
  import re   
  
  
  import os   
  
  
  # regex for the section title   
  
  
  # sec_re = re.compile(r'第.+卷\s+.+\s+第.+章\s+.+')   
  
  
  # txt book's path.   
  
  
  source_path = 'f:\\佣兵天下.txt'   
  
  
  path_pieces = os.path.split(source_path)   
  
  
  novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1])   
  
  
  target_path = '%s%s_html' % (path_pieces[0], novel_title)   
  
  
  section_re = re.compile(r'^\s*第.+卷\s+.*$')   
  
  
  section_head = '''''   
  
  
     
  
  
     
  
  
     
  
  
  %s   
  
  
     
  
  
  margin:0;   
  
  
  padding: 20px; background:#FAFAD2;color:#2B4B86;text
-align:center;">   
  
  
  %s
去页尾
'''   
  
  
  # escape xml/html   
  
  
  def escape_xml(code):   
  
  
  text = code   
  
  
  text = re.sub(r'<', '<', text)   
  
  
  text = re.sub(r'>', '>', text)   
  
  
  text = re.sub(r'&', '&', text)   
  
  
  text = re.sub(r'\t', '    ', text)   
  
  
  text = re.sub(r'\s', ' ', text)   
  
  
  return text   
  
  
  # entry of the script   
  
  
  def main():   
  
  
  # create the output folder   
  
  
  if not os.path.exists(target_path):   
  
  
  os.mkdir(target_path)   
  
  
  # open the source file   
  
  
  input = open(source_path, 'r')   
  
  
  sec_count = 0   
  
  
  sec_cache = []   
  
  
  idx_cache = []   
  
  
  output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
  
  
  preface_title = '%s 前言' % novel_title   
  
  
  output.writelines([section_head % (preface_title, 
preface_title)])   
  
  
  idx_cache.append('
%s'   
  
  
  % (sec_count, novel_title))   
  
  
  for line in input:   
  
  
  # is a chapter's title?   
  
  
  if line.strip() == '':   
  
  
  pass   
  
  
  elif re.match(section_re, line):   
  
  
  line = re.sub(r'\s+', ' ', line)   
  
  
  print 'converting %s...' % line   
  
  
  # write the section footer   
  
  
  sec_cache.append('
')   
  
  
  if sec_count == 0:   
  
  
  sec_cache.append('目录 | ')   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count + 1))   
  
  
  else:   
  
  
  sec_cache.append('上一篇 | '   
  
  
  % (sec_count - 1))   
  
  
  sec_cache.append('目录 | ')   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count + 1))   
  
  
  sec_cache.append('回页首
')   
  
  
  sec_cache.append('')   
  
  
  output.writelines(sec_cache)   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  sec_cache = []   
  
  
  sec_count += 1   
  
  
  # create a new section   
  
  
  output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
  
  
  output.writelines([section_head % (line, line)])   
  
  
  idx_cache.append('
%s'   
  
  
  % (sec_count, line))   
  
  
  else:   
  
  
  sec_cache.append('%s
'   
  
  
  % escape_xml(line))   
  
  
  # write rest lines   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count - 1))   
  
  
  sec_cache.append('目录 | ')   
  
  
  sec_cache.append('#">回页首
')   
  
  
  output.writelines(sec_cache)   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  sec_cache = []   
  
  
  # write the menu   
  
  
  output = open('%s\\index.html' % (target_path), 'w')   
  
  
  menu_head = '%s 目录' % novel_title   
  
  
  output.writelines([section_head % (menu_head, menu_head), 
''])   
  
  
  output.writelines(idx_cache)   
  
  
  output.writelines([''])   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  inx_cache = []   
  
  
  print 'completed. %d chapter(s) in total.' % sec_count   
  
  
  if __name__ == '__main__':   
  
  
  main()

以上就是对Python分割器的相关介绍，希望大家有所收获。

【编辑推荐】

Python数据编组对文字串的读写
Python 拼写检查如何更简单的使用
Python函数变量在应用中的“窍门”
在Python函数变量中如何使用global语句简介
Python编程语言维和受到众人的追捧

当前名称：Python分割器教你给文章做手术
网址分享：http://www.hantingmc.com/qtweb/news40/412440.html

网站建设、网络推广公司-创新互联，是专注品牌与效果的网站制作，网络营销seo公司；服务项目有等

声明：本网站发布的内容（图片、视频和文字）以用户投稿、用户转载内容为主，如果涉及侵权请尽快告知，我们将会在第一时间删除。文章观点不代表本网站立场，如需处理请联系客服。电话：028-86922220；邮箱：631063699@qq.com。内容未经允许不得转载，或转载时需注明来源：创新互联

猜你还喜欢下面的内容