Archive

Posts Tagged ‘Python’

Python remove duplicate files script (based on md5 hash)

December 2nd, 2008
#!/usr/bin/python
# This script compares the file contents of two directories
# using and md5 hash and deletes any duplicate files from the
# target directory

import os
import md5
import sys
import base64

source={}
target={}

source_path="/home/jesterj/source"
target_path="/home/jesterj/target"

def gather_files(dir,dict):
	os.chdir(dir)
	names=os.listdir(dir)
	for filename in names:
		#read file
		f = open(filename)
		file_contents = f.read()

		#create new hash
		hash=md5.new()
		hash.update(file_contents)
		hex=hash.hexdigest()

		#add to dictionary
		dict[hex] = filename

def remove_matches():
	has_dupes=0
	for hash,file in source.iteritems():
		if target.has_key(hash):
			print “%s checksum %s exists in both dirs.” % (file, hash)
			#remove_target_file(file)
			has_dupes=True
	if has_dupes==False:
		print “No duplicate files found.”

def remove_target_file(a):
	os.chdir(target_path)
	os.system(”rm ” + a)
	print “File ” + target_path+”/”+a + ” removed”

def main():
	try:
		gather_files(source_path,source)
		gather_files(target_path,target)
		remove_matches()
	except:
		print “error”

main()

Python

Parse error log and send email

November 4th, 2008

This post gives an example of how I wrote a script that parsed an error log looking for ‘Traceback’ errors and emailing them to the admin on a daily basis. The cool thing about this script is that it keeps a byte track of each time it reads the log file, stores that byte count in a temp file then picks up at the byte count the next day so you don’t get duplicate emails from tracebacks on a previous day…

let’s call this file tracebacks.py

#!/usr/bin/python
import os
import sys
import time
import smtplib
import socket #required to get host name
from email.mime.text import MIMEText #need this for subject line in email

def main():
	line=""
	line_num=0
	last_byte_read=""
	byte_num=""
	stored_byte_num=""
	traceback_count=0

	log_file=open("/var/log/loadscript/<logfilename here>","r")
	tmp_file="/tmp/loadscript.tmp"

	###############################
	#check if required file exists
	###############################
	if os.path.exists(tmp_file):
		writefile = open(tmp_file,'r')
		stored_byte_num=writefile.read()
	else:
		writefile = open(tmp_file,'w')
		writefile.write("")
		writefile.close()

	writefile = open(tmp_file,'r')			 

	################################
	# find last byte searched in file
	################################
	if len(stored_byte_num)>0:
		stored_byte_num=int(stored_byte_num)
		log_file.seek(stored_byte_num)

	data = log_file.readlines()
	was_found=False

	#loop through file
	for x in data:
		#print x.strip()
		line_num=line_num + 1
		if x.startswith('Traceback'):
			traceback_count += 1
			line += "\n+++++++++++++++++++++++++++++++++++++++"
			#########################################################"
			line += "\nError on Line: %s\n" % line_num
			line += "\n"+x
			was_found=True
		if was_found:
			if x.find('File')==2:
				line += x
		else:
			was_found = False

	# record last byte read in tmp file
	last_byte_read=log_file.tell()
	last_byte_read=str(last_byte_read)
	handle=open(tmp_file,'w')
	handle.write(last_byte_read)
	handle.close()

	#print last_byte_read

	#send mail summary
	if was_found==False:
		mail_message = "\nDid not find any Tracebacks!\n"
	else:
		mail_message = "SCRIPT SUMMARY\n"
	       	mail_message += "================================================\n"
		mail_message += "Tracebacks found: %s\n" % traceback_count
		mail_message += "Last byte checked: %s\n" % last_byte_read
		mail_message += "Script will start at this number next search!\n"
       		mail_message += "================================================\n"
		mail_message += line

		send_mail(mail_message)

        print "Errors found and sent to pse-admin"
        #print "%s" % mail_message

def send_mail(mail_message):

	smtpserver='smtp.example.com'
	host = socket.gethostname()

	RECIPIENTS = ['username@domain.com']
	SENDER = ‘root@%s.mascorp.com’ % host
	MESSAGE = “”"Subject: [Nagios] Loadscript Errors
From: nagios@%s.mascorp.com

%s
“”" % (host, mail_message)

	session = smtplib.SMTP(smtpserver)
	smtpresult = session.sendmail(SENDER, RECIPIENTS, MESSAGE)

	session.close()

try:
	main()

except:
        health = ‘UNKNOWN’
        result = 3
        print “Error: Check script”
        sys.exit(1)

Python ,