Dell Scraper
by Ryan on Jan.12, 2010, under Coding/Scripting, Sys Admin, python
Due to failings in the past, our inventory system is in need of a revamp as it is missing both systems and information as part of the revamp we decided to scrap everything we new and reinventroy everything. We use a lot of dell systems and I figured that as we had collected the service tags of a number of systems we could get the info about that system from the dell site. I wrote a quick python script to take a list of service tags and output the system info as a csv file in the format “tag, shipdate, SysType, Ram, HDD, Support”.
E.g.
"4GXCB1H","2008-03-29","Latitude D430","2GB","80GB","3Y NBD INTERNATIONAL (NEXT BUSINESS DAY);UPG TO 4Y NBD INTERNATIONAL (NEXT BUSINE"
Place a text file called “taglist.txt” in the same directory as the script with the dell service tags listed 1 per line. When the script is done the data will be in the file called “SystemList.txt”.
4GXCB1H 5GXAB1H
#!/usr/bin/python
#
# Copyright 2009 Ryan McLean <ryanm *at* ninet *.* org>
#
# dellscraper.py
#
# Dell Warranty Scraper
###Imports
from urllib2 import Request, urlopen, URLError
import datetime, re, sys, os, string
def processHTML(rawhtml):
#Grab Shipdate
res = re.search('Ship\sDate:</td><td\s.*?>(\d\d/\d\d/\d\d\d\d)</td>', rawhtml, re.IGNORECASE)
shipdate = res.group(1)
#Grab System Type
res = re.search('System\sType:</td><td\s.*?>(.*?)</td></tr>', rawhtml, re.IGNORECASE)
SysType = res.group(1)
#Breakdown
bd1 = re.search('<span>Part Description</span></td></tr><tr><td .*?><img .*? /></td></tr>(.*</table>)', rawhtml, re.IGNORECASE)
table = bd1.group(1)
table = re.sub('</table>','',table)
table = re.sub('<tr>.*?</td><td','<td',table)
table = re.sub('<td valign="top">|<td valign="top"></td>','',table)
table = re.sub('<td valign="top">','',table)
table = re.sub('<.*?>', 'XXX', table)
table = re.sub('X{2,}', '<x>', table)
#Get RAM
res = re.search('<x>MEMORY\s:\s(\d+[M|G]B).*?<x>',table)
Ram = res.group(1)
#Get HDD
res = re.search('<x>.*?(\d+GB).*?<x>',table)
HDD = res.group(1)
#Get Support Details
Support = []
res = re.search('<x>(\dY[R]?\s.*?)<x>',table)
if (res):
Support.append(res.group(1))
res = ""
res = re.search('<x>(UPG\sTO\s\dY[R]?\s.*?)<x>',table)
if (res):
Support.append(res.group(1))
res = ""
res = re.search('<x>(\dY[R]?\sPRO.*?)<x>',table)
if (res):
Support.append(res.group(1))
return shipdate, SysType, Support, Ram, HDD
#fileI/O
def readfile(inFile):
if os.path.isfile(inFile) == False:
exit()
thisfile = open(inFile, 'r')
tags = []
taga = tags.append
for line in thisfile.readlines():
if line.strip(): # check for empty lines
if line[-1] == '\n':
taga(line[0:-1]) # If line ends in line break remove it
else:
taga(line)
thisfile.close()
return tags
def writefile(outfile, outline):
thisfile = open(outfile, 'a')
thisfile.write(outline + "\n")
thisfile.close()
#format data and write
def fdata(tag, shipdate, SysType, Support, Ram, HDD):
d,m,y = shipdate.split('/')
shipdate = y + '-' + m + '-' + d
ts = ''
for s in Support:
ts = ts + ';' + s
ts = re.sub('^;','',ts)
Support = ts
if re.match('\d\d\d\dMB',Ram):
res = re.search('(\d)\d\d\dMB',Ram)
Ram = res.group(1) + "GB"
outstr = '"' + tag + '","' + shipdate + '","' + SysType + '","' + Ram + '","' + HDD + '","' + Support + '"'
return outstr
### MAIN ###
url = 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/en/details?c=uk&cs=RC1050265&l=en&s=bsd&~ck=anavml&~tab=2&~wsf=tabs&servicetag='
filename = 'taglist.txt'
tags = readfile(filename)
for tag in tags:
try:
req = Request(url + tag)
response = urlopen(req)
rawhtml = response.read()
except URLError, e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
else:
shipdate, SysType, Support, Ram, HDD = processHTML(rawhtml)
outstr = fdata(tag, shipdate, SysType, Support, Ram, HDD)
writefile("./SystemList.txt", outstr)
March 3rd, 2010 on 22:28
Food for thought. Thankyou for that, however my thanks don’t end there. I am color blind (tritanopia to be exact). I mostly use Safari browser (unsure if that matters), and a good many internet sites are hard to understand thanks to a careless variety of colours employed ithe design. On your site, as the choice of colours is reasonable, the site is quite clear and simple to read. I am not sure whether this was a calculated and kind act, or just good luck, but I still thank you.