Schedule this script to run every X minutes to process newly found files.

If your files are larger & take time to process, you can use this process to create a 'queue.csv', which can then be used via another script to process in batch.

In [ ]:
import pandas as pd
import os
import glob
import multiprocessing.dummy as mp
import time

#look for any files of type txt in your chosen directory
files = glob.glob("/root/*.txt")


def do_process(file):
    #if the file is not marked as done, then....
    if '_done' not in file:

        #read the files into a dataframe
        df = pd.read_csv(file)

        #All your Pandas commands

        #change filename to include _done once processed and move to processed folder, using the unix time in seconds to give the file a unique name
        os.system('mv ' + file + ' /root/processed/' + str(time.time()) + file.split('/')[2] + '_done')

if __name__=="__main__":
    start = time.time()
    d = []
    p=mp.Pool(10)
    out = p.map(do_process, files)
    p.close()
    p.join()
    end = time.time()
    print(end - start)
    
    #find files over 30 mins old and delete (with .txt_done extension)
    os.system('find /root/processed -name "*.txt_done" -type f -mmin +30 -delete')