python多线程读取文件

网上只有多进程,有木有?理解和书写代码花了一天的时间,有木有?

多线程分块读取

# -*- coding: utf-8 -*-
import os,time
import threading

rlock = threading.RLock()
curPosition = 0

class Reader(threading.Thread):
    def __init__(self, res):
        self.res = res
        super(Reader, self).__init__()
    def run(self):
        global curPosition
        fstream = open(self.res.fileName, 'r')
        while True:
            #锁定共享资源
            rlock.acquire()
            startPosition = curPosition
            curPosition = endPosition = (startPosition + self.res.blockSize) if (startPosition + self.res.blockSize) < self.res.fileSize else self.res.fileSize
            #释放共享资源
            rlock.release()
            if startPosition == self.res.fileSize:
                break
            elif startPosition != 0:
                fstream.seek(startPosition)
                fstream.readline()
            pos = fstream.tell()
            while pos < endPosition:
                line = fstream.readline()
                #处理line
                #print(line.strip())
                pos = fstream.tell()
        fstream.close()

class Resource(object):
    def __init__(self, fileName):
        self.fileName = fileName
        #分块大小
        self.blockSize = 100000000
        self.getFileSize()
    #计算文件大小
    def getFileSize(self):
        fstream = open(self.fileName, 'r')
        fstream.seek(0, os.SEEK_END)
        self.fileSize = fstream.tell()
        fstream.close()

if __name__ == '__main__':
    starttime = time.clock()
    #线程数
    threadNum = 4
    #文件
    fileName = 'IPData.txt';
    res = Resource(fileName)
    threads = []
    #初始化线程
    for i in range(threadNum):
        rdr = Reader(res)
        threads.append(rdr)
    #开始线程
    for i in range(threadNum):
        threads[i].start()
    #结束线程
    for i in range(threadNum):
        threads[i].join()

    print(time.clock() - starttime)

多线程按行读取

#在上面的代码中使用open(file, 'r')产生的句柄每次在线程锁里面pop()就好了。

标签: none

仅有一条评论

  1. ray ray

    请问具体怎么操作按行读取,你只说了一下,对于初学者我完全不懂。

添加新评论