python多线程读取文件
网上只有多进程,有木有?理解和书写代码花了一天的时间,有木有?
多线程分块读取
# -*- coding: utf-8 -*-
import os,time
import threading
rlock = threading.RLock()
curPosition = 0
class Reader(threading.Thread):
def __init__(self, res):
self.res = res
super(Reader, self).__init__()
def run(self):
global curPosition
fstream = open(self.res.fileName, 'r')
while True:
#锁定共享资源
rlock.acquire()
startPosition = curPosition
curPosition = endPosition = (startPosition + self.res.blockSize) if (startPosition + self.res.blockSize) < self.res.fileSize else self.res.fileSize
#释放共享资源
rlock.release()
if startPosition == self.res.fileSize:
break
elif startPosition != 0:
fstream.seek(startPosition)
fstream.readline()
pos = fstream.tell()
while pos < endPosition:
line = fstream.readline()
#处理line
#print(line.strip())
pos = fstream.tell()
fstream.close()
class Resource(object):
def __init__(self, fileName):
self.fileName = fileName
#分块大小
self.blockSize = 100000000
self.getFileSize()
#计算文件大小
def getFileSize(self):
fstream = open(self.fileName, 'r')
fstream.seek(0, os.SEEK_END)
self.fileSize = fstream.tell()
fstream.close()
if __name__ == '__main__':
starttime = time.clock()
#线程数
threadNum = 4
#文件
fileName = 'IPData.txt';
res = Resource(fileName)
threads = []
#初始化线程
for i in range(threadNum):
rdr = Reader(res)
threads.append(rdr)
#开始线程
for i in range(threadNum):
threads[i].start()
#结束线程
for i in range(threadNum):
threads[i].join()
print(time.clock() - starttime)
多线程按行读取
#在上面的代码中使用open(file, 'r')产生的句柄每次在线程锁里面pop()就好了。
请问具体怎么操作按行读取,你只说了一下,对于初学者我完全不懂。