"""# reCollectString.py# 收集指定路径下、指定后缀文件中的匹配字符串,并记录文件名和行号。# 适用于Python3# 要处理的文件编码应为:utf-8(无签名)"""importos,re# 要处理的文件所在路径DIR="./workDir"# 要处理的文件后缀列表EXT=['.txt',".md"]# 输出文件(含路径)OUT_FILE="./out.txt"# 要查找的字符串(正则表达式),请按需设置r=re.compile(r'\[.*?\]')withopen(OUT_FILE,"w",encoding="utf-8")asoutFile:filesNum=1forroot,__,filesinos.walk(DIR):lesson_cut_file_list=[xforxinfilesifos.path.splitext(x)[1]inEXT]forfileinlesson_cut_file_list:withopen(os.path.join(root,file),"r",encoding="utf-8")asf:# for line in f.readlines():forindex,lineinenumerate(f):lineStrip=line.strip()finds=r.findall(lineStrip)iffinds:foriinfinds:outLine="{path}\t{index}\t{string}\n".format(path=os.path.join(root,file),index=index+1,string=i)ifoutLine:outFile.write(outLine)print(filesNum,"files done.")filesNum+=1