在机器学习中免不了和图片打交道。有时候收集来的图片后缀名和实际类型会存在不一致的情况,这里可能就需要涉及到图片文件的批量处理。简单粗暴上代码:
import struct import osdef gci(filepath): files = os.listdir(filepath) for fi in files: fi_d = os.path.join(filepath,fi) if os.path.isdir(fi_d): gci(fi_d) else: tempP = os.path.join(filepath,fi_d) tp = filetype(tempP) if tp != 'JPEG': print(tempP) os.remove(tempP) #print tempPdef typeList(): return { "FFD8FF": "JPEG", "89504E47": "PNG", "47494638": "GIF"} def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper() def filetype(filename): binfile = open(filename, 'rb') tl = typeList() ftype = 'unknown' for hcode in tl.keys(): numOfBytes = len(hcode) / 2 binfile.seek(0) hbytes = struct.unpack_from("B"*numOfBytes, binfile.read(numOfBytes)) f_hcode = bytes2hex(hbytes) if f_hcode == hcode: ftype = tl[hcode] break binfile.close() return ftype gci('your_img_folder_path')复制代码