From 0783a991102f9dce85fe85389a7f2125e290f8b0 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Mon, 26 May 2014 02:13:56 -0700 Subject: [PATCH] 1.0.47 - E-hentai: URL validation, logs more info For troubleshooting #41 --- pom.xml | 2 +- .../ripme/ripper/rippers/EHentaiRipper.java | 41 ++++++++++++------- .../com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/pom.xml b/pom.xml index d3461f4b..71b35946 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.46 + 1.0.47 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 67b9bdf9..96e067b7 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -22,10 +22,6 @@ public class EHentaiRipper extends AlbumRipper { private Document albumDoc = null; - private URL prevUrl = null; - private URL cursorUrl = null; - private Document cursorDoc = null; - public EHentaiRipper(URL url) throws IOException { super(url); } @@ -80,20 +76,32 @@ public class EHentaiRipper extends AlbumRipper { if (albumDoc == null) { albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); } - if (cursorDoc == null) { - Elements select = albumDoc.select("#gdt > .gdtm"); - Element first = select.first(); - String href = first.select("a").attr("href"); - cursorUrl = new URL(href); + Elements select = albumDoc.select("#gdt > .gdtm"); + Element first = select.first(); + String href = first.select("a").attr("href"); + if (href.equals("")) { + throw new IOException("Could not find 'href' inside elements under #gdt > .gdtm > a"); } + URL cursorUrl = new URL(href), prevUrl = null; while (!cursorUrl.equals(prevUrl)) { - cursorDoc = Jsoup.connect(this.cursorUrl.toExternalForm()).get(); + Document cursorDoc = Jsoup.connect(cursorUrl.toExternalForm()) + .userAgent(USER_AGENT) + .get(); Elements a = cursorDoc.select(".sni > a"); - Elements img = a.select("img"); + Elements images = a.select("img"); + if (images.size() == 0) { + logger.error("No images found at " + cursorUrl); + break; + } - String imgsrc = img.attr("src"); + String imgsrc = images.get(0).attr("src"); + if (imgsrc.equals("")) { + logger.warn("Image URL is empty via " + images.get(0)); + continue; + } + logger.info("Found URL " + imgsrc + " via " + images.get(0)); Pattern p = Pattern.compile("^http://.*/ehg/image.php.*&n=([^&]+).*$"); Matcher m = p.matcher(imgsrc); if (m.matches()) { @@ -114,10 +122,13 @@ public class EHentaiRipper extends AlbumRipper { addURLToDownload(new URL(imgsrc), prefix); } - String href = a.attr("href"); - prevUrl = cursorUrl; - cursorUrl = new URL(href); + String nextUrl = a.attr("href"); + if (nextUrl.equals("")) { + logger.warn("Next page URL is empty, via " + a); + break; + } + cursorUrl = new URL(nextUrl); index++; } diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 3a42b458..bb988d20 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.0.46"; + private static final String DEFAULT_VERSION = "1.0.47"; private static final String updateJsonURL = "http://rarchives.com/ripme.json"; private static final String updateJarURL = "http://rarchives.com/ripme.jar"; private static final String mainFileName = "ripme.jar";