Important: new version released along with new PB design. Read more about it here: http://9v.lt/blog/photobucket-ripper-update/
First I wrote about a method to grab someone else’s pictures from photobucket and I provided a script to do so, even though the method was working, the script was rather primitive and not automated at all… well it was OK for my one time use. But now when I have more stuff to grab and I have to update the album constantly, it is not very nice as I have to do it all over again.
So I decided I’ll make a better, fully automated script and here it is.
To use the script you will need to install a “mechanize” library for Python (and of course python itself, lol), but don’t worry, I provided everything.
As a developer I like to keep things simple and with this script, ripping those pictures is really simple.
If the file already exists, it will skip it.
The script with required libraries can be downloaded here: PhotobucketGetter.zip
Now the usage is very simple. Run it with a -h parameter to see the help.
$ python PhotobucketGetter.py -h usage: PhotobucketGetter.py [-h] -u [-p] [-f] [-d] [-n] [-t] Script to grab and save pics from a photobucket album automatically optional arguments: -h, --help show this help message and exit -u , --url Album URL -p , --passwd Album password (if any) -f , --filter What to download (pic/vid/all) -d , --dir Where to download (folder name) -n, --nofolder If this is used, then downloaded files will not be put in separate folders -t, --terminate If to terminate on error or continue grabbing
So, if the album you want to rip doesn’t have a password, just input
PhotobucketGetter.py -u URL
if album has a password, add additional “-p mypasswd” parameter.
Other options are optional, add “-f pic” to download pictures, “-f vid” to download just videos, or “-f all” or don’t add it at all, to download everything.
“-d” parameter is to specify how to name the folder where everything will be put – default is “PhotobucketGetter”.
Now the script itself is a bit complicated, but easy to understand (if that makes sense :P)
''' Script to grab pics from a photobucket album automatically and save them locally. Provide password if the album is protected and the album URL. ====== If you really like this script, then consider a small donation for my sleepless efforts keeping this script working and up-to-date. Go to my website: http://9v.lt and press a Donate button on the right :) ====== Author: Kulverstukas Website: http://9v.lt Shouts to Evilzone.org and Programisiai.lt Version: 0.5 http://9v.lt/blog/update-photobucket-ripper/ ''' import os import sys import re import urllib import mechanize from argparse import ArgumentParser #============================================ class ImageMethods: def downloadImages(self, links, browser): errorCounter = 0 linksList = [i.strip() for i in re.split(',\s{2,}', links)] print "* Found "+str(len(linksList))+" images..." print "* Compiling regex patterns and downloading the pictures..." picUrl = re.compile('url: "(https?://(.*?)\.photobucket\.com/albums/(.*?))",') picName = re.compile('title: "(.*?)"') counter = 1 for link in linksList: name = picName.search(link).group(0).replace("title: \"","")[:-1] name = stripSymbols(name) if (noFolder == False): if (name == ""): name = noNamePic if (os.path.exists(mainFolder+'/'+name) == False): os.mkdir(mainFolder+'/'+name) picLink = picUrl.search(link) picLink = picLink.group(0).replace("url: \"", "")[:-2] fileName = os.path.basename(picLink) fullPath = "" if (noFolder): fullPath = "%s/%s" % (mainFolder, fileName) name = mainFolder else: fullPath = "%s/%s/%s" % (mainFolder, name, fileName) if (os.path.exists(fullPath)): print '%d. Retrieving "%s" into "%s" folder' % (counter, fileName, name) print "*** "+name+'/'+fileName+" exists. Skipping..." else: try: size = CalculateSize().calculateSize(browser.open(picLink).info().get("Content-Length")) print '%d. Retrieving "%s" into "%s" folder -- Size: %s' % (counter, fileName, name, size) urllib.urlretrieve(picLink, fullPath) except KeyboardInterrupt: print " Terminating..." sys.exit(0) except Exception as e: if (terminate): print " Terminating with message: %s" % e sys.exit(0) else: print " Error grabbing this image. Continuing..." errorCounter += 1 counter += 1 return errorCounter def grabSlideshowData(self, htmlCode): data = re.search("PB\.Slideshow\.data \= \[\n.*\];", htmlCode) if (data == None): print "*** Something went wrong grabbing picture data. Terminating..." sys.exit(0) data = data.group(0).replace("PB.Slideshow.data = [", "").replace("];", "").strip() return data #============================================ class VideoMethods: def downloadVideos(self, list, browser): errorCounter = 0 counter = 1 print "* Found "+str(len(list))+" videos..." for item in list: url = item[0] name = item[1] name = stripSymbols(name) if (noFolder == False): if (name == ""): name = noNamePic if (os.path.exists(mainFolder+'/'+name) == False): os.mkdir(mainFolder+'/'+name) fileName = os.path.basename(url) fullPath = "" if (noFolder): fullPath = "%s/%s" % (mainFolder, fileName) name = mainFolder else: fullPath = "%s/%s/%s" % (mainFolder, name, fileName) if (os.path.exists(fullPath)): print '%d. Retrieving "%s" into "%s" folder' % (counter, fileName, name) print "*** "+name+'/'+fileName+" exists. Skipping..." else: try: size = CalculateSize().calculateSize(browser.open(url).info().get("Content-Length")) print '%d. Retrieving "%s" into "%s" folder -- Size: %s' % (counter, fileName, name, size) urllib.urlretrieve(url, fullPath) except KeyboardInterrupt: print " Terminating..." sys.exit(0) except Exception as e: if (terminate): print " Terminating with message: %s" % e sys.exit(0) else: print " Error grabbing this video. Continuing..." errorCounter += 1 counter += 1 return errorCounter def grabVideoLinks(self, html): list = [] pattern = "<img src\=\""+album.replace(".", "\.")+".*/>" matchObj = re.search("http://(.*?)\.", pattern) pattern = pattern.replace(matchObj.group(0), "http://[\w\d]*\.") rawList = re.findall(pattern, html) videoName = "" videoUrl = "" for link in rawList: # grab the video name and trim crap from it videoName = re.search("title=\"(.*?)\"", link).group(0).replace("title=\"","")[:-1] # grab the video url and leave only the URL to video videoUrl = re.search("alt=\"(.*?)\"", link).group(0).replace("alt=\"","") videoUrl = videoUrl.replace(re.search("\s(.*?)\"", videoUrl).group(0), "") videoUrl = os.path.join(album, videoUrl) list.append((videoUrl, videoName)) return list #============================================ class CalculateSize: def calculateSize(self, bytes): abbrevs = ["kB", "mB", "gB"] if (bytes == None): size = "0 kB" else: bytes = float(bytes) if (bytes < 1024.0): size = "%d B" % (bytes) else: for abbrev in abbrevs: if (bytes >= 1024.0): bytes = bytes / 1024.0 size = "%.2f %s" % (bytes, abbrev) return size #============================================ def stripSymbols(input): badSymbols = ['\\', '/', ':', '*', '?', '"', '<', '>', '|'] replacement = '~'; i = '' for i in badSymbols: input = input.replace(i, replacement); return input #============================================ def begin(): print '\n* Creating "%s" folder...' % mainFolder if (os.path.exists(mainFolder)): print "*** Folder exists. Skipping..." else: os.mkdir(mainFolder) print "* Initiating connection to Photobucket..." browser = mechanize.Browser() browser.addheaders = [('User-Agent', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')] browser.set_handle_equiv(False) #browser.set_debug_http(True) browser.set_handle_robots(False) try: browser.open(album) except KeyboardInterrupt: print " Terminating..." sys.exit(0) except Exception as e: print " Terminating with message: %s" % e sys.exit(0) # see if the album has a password field rawHtml = "" for form in browser.forms(): if (form.name == "frmLogin"): if (passwd == ""): print "*** Album requires password, none given. Terminating..." sys.exit(0) print "* Album requires password... using '"+passwd+"'" browser.select_form(name="frmLogin") browser.form["loginForm[password]"] = passwd print "* Submitting password..." browser.submit() break if ((filter == "pic") or (filter == "all")): print "* Reading image HTML code..." rawHtml = browser.open(album+slideshowFilter).read() imgMethods = ImageMethods() slideshowData = imgMethods.grabSlideshowData(rawHtml) errors = imgMethods.downloadImages(slideshowData, browser) if (terminate == False): print " There were %d skipped images while grabbing" % errors print " Done grabbing images!" if ((filter == "vid") or (filter == "all")): print "* Reading video HTML code..." rawHtml = browser.open(album+videoFilter).read() vidMethods = VideoMethods() links = vidMethods.grabVideoLinks(rawHtml) errors = vidMethods.downloadVideos(links, browser) if (terminate == False): print " There were %d skipped videos while grabbing" % errors print " Done grabbing videos!" #============================================= parser = ArgumentParser(description="Script to grab and save pics from a photobucket album automatically") parser.add_argument('-u', '--url', help='Album URL', required=True, metavar="") parser.add_argument('-p', '--passwd', help='Album password (if any)', metavar="") parser.add_argument('-f', '--filter', help='What to download (pic/vid/all)', default="all", metavar="") parser.add_argument('-d', '--dir', help='Where to download (folder name)', default="PhotobucketGetter", metavar="") parser.add_argument('-n', '--nofolder', help='If this is used, then downloaded files will not be put in separate folders', action="store_true") parser.add_argument('-t', '--terminate', help='If to terminate on error or continue grabbing', action="store_true") args = parser.parse_args() #====== global vars, change values here ====== noNamePic = 'NoName' slideshowFilter = "?albumview=slideshow" videoFilter = "?mediafilter=videos" mainFolder = args.dir album = args.url passwd = args.passwd filter = args.filter noFolder = args.nofolder terminate = args.terminate if ((filter != 'pic') and (filter != 'vid') and (filter != 'all')): filter = 'all' #============================================= begin()