From c6957331e7029d85ea4be98f54ae74d209324f48 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Thu, 12 Jun 2014 22:44:59 -0700 Subject: [PATCH] 1.0.64 - Imagebam ripper #8 --- pom.xml | 2 +- .../ripme/ripper/rippers/ImagebamRipper.java | 207 ++++++++++++++++++ .../com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java diff --git a/pom.xml b/pom.xml index 39ef35c2..3a094010 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.63 + 1.0.64 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java new file mode 100644 index 00000000..c43d9d3f --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java @@ -0,0 +1,207 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; + +public class ImagebamRipper extends AlbumRipper { + + private static final int IMAGE_SLEEP_TIME = 250, + PAGE_SLEEP_TIME = 3000; + + private static final String DOMAIN = "imagebam.com", HOST = "imagebam"; + + // Thread pool for finding direct image links from "image" pages (html) + private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam"); + + // Current HTML document + private Document albumDoc = null; + + public ImagebamRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return HOST; + } + + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + if (albumDoc == null) { + logger.info(" Retrieving " + url.toExternalForm()); + sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); + albumDoc = Jsoup.connect(url.toExternalForm()) + .userAgent(USER_AGENT) + .timeout(5000) + .get(); + } + Elements elems = albumDoc.select("legend"); + String title = elems.first().text(); + logger.info("Title text: '" + title + "'"); + Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$"); + Matcher m = p.matcher(title); + if (m.matches()) { + logger.info("matches!"); + return HOST + "_" + getGID(url) + " (" + m.group(1).trim() + ")"; + } + logger.info("Doesn't match " + p.pattern()); + } catch (Exception e) { + // Fall back to default album naming convention + logger.warn("Failed to get album title from " + url, e); + } + return super.getAlbumTitle(url); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p; + Matcher m; + + p = Pattern.compile("^https?://[wm.]*imagebam.com/gallery/([a-zA-Z0-9]+).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected imagebam gallery format: " + + "http://www.imagebam.com/gallery/galleryid" + + " Got: " + url); + } + + @Override + public void rip() throws IOException { + int index = 0; + String nextUrl = this.url.toExternalForm(); + while (true) { + if (isStopped()) { + break; + } + if (albumDoc == null) { + logger.info(" Retrieving album page " + nextUrl); + sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); + albumDoc = Jsoup.connect(nextUrl) + .userAgent(USER_AGENT) + .timeout(5000) + .referrer(this.url.toExternalForm()) + .get(); + } + // Find thumbnails + Elements thumbs = albumDoc.select("div > a[target=_blank]:not(.footera)"); + if (thumbs.size() == 0) { + logger.info("No images found at " + nextUrl); + break; + } + // Iterate over images on page + for (Element thumb : thumbs) { + if (isStopped()) { + break; + } + index++; + ImagebamImageThread t = new ImagebamImageThread(new URL(thumb.attr("href")), index); + imagebamThreadPool.addThread(t); + try { + Thread.sleep(IMAGE_SLEEP_TIME); + } catch (InterruptedException e) { + logger.warn("Interrupted while waiting to load next image", e); + } + } + + if (isStopped()) { + break; + } + // Find next page + Elements hrefs = albumDoc.select("a.pagination_current + a.pagination_link"); + if (hrefs.size() == 0) { + logger.info("No more pages found at " + nextUrl); + break; + } + nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href"); + logger.info("Found next page: " + nextUrl); + + // Reset albumDoc so we fetch the page next time + albumDoc = null; + + // Sleep before loading next page + try { + Thread.sleep(PAGE_SLEEP_TIME); + } catch (InterruptedException e) { + logger.error("Interrupted while waiting to load next page", e); + break; + } + } + + waitForThreads(); + } + + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + + /** + * Helper class to find and download images found on "image" pages + * + * Handles case when site has IP-banned the user. + */ + private class ImagebamImageThread extends Thread { + private URL url; + private int index; + + public ImagebamImageThread(URL url, int index) { + super(); + this.url = url; + this.index = index; + } + + @Override + public void run() { + fetchImage(); + } + + private void fetchImage() { + try { + Document doc = Jsoup.connect(this.url.toExternalForm()) + .userAgent(USER_AGENT) + .cookie("nw", "1") + .timeout(5000) + .referrer(this.url.toExternalForm()) + .get(); + // Find image + Elements images = doc.select("td > img"); + if (images.size() == 0) { + logger.warn("Image not found at " + this.url); + return; + } + Element image = images.first(); + String imgsrc = image.attr("src"); + logger.info("Found URL " + imgsrc + " via " + images.get(0)); + // Provide prefix and let the AbstractRipper "guess" the filename + String prefix = ""; + if (Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index); + } + addURLToDownload(new URL(imgsrc), prefix); + } catch (IOException e) { + logger.error("[!] Exception while loading/parsing " + this.url, e); + } + } + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 9f3cb349..508f437d 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.0.63"; + private static final String DEFAULT_VERSION = "1.0.64"; private static final String updateJsonURL = "http://rarchives.com/ripme.json"; private static final String updateJarURL = "http://rarchives.com/ripme.jar"; private static final String mainFileName = "ripme.jar";