Schedule this script to run every X minutes to process newly found files.

If your files are larger & take time to process, you can use this process to create a 'queue.csv', which can then be used via another script to process in batch.

In [ ]:
import pandas as pd
import os
import glob
import multiprocessing.dummy as mp
import time

#look for any files of type txt in your chosen directory
files = glob.glob("/root/*.txt")

def do_process(file):
#if the file is not marked as done, then....
if '_done' not in file:

#read the files into a dataframe

#All your Pandas commands

#change filename to include _done once processed and move to processed folder, using the unix time in seconds to give the file a unique name
os.system('mv ' + file + ' /root/processed/' + str(time.time()) + file.split('/')[2] + '_done')

if __name__=="__main__":
start = time.time()
d = []
p=mp.Pool(10)
out = p.map(do_process, files)
p.close()
p.join()
end = time.time()
print(end - start)

#find files over 30 mins old and delete (with .txt_done extension)
os.system('find /root/processed -name "*.txt_done" -type f -mmin +30 -delete')