From 564bab9ab0cae7594e5e355862746bc946c5996c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 17 Oct 2017 00:02:44 -0400 Subject: [PATCH 1/3] Added Hbrowse.com ripper --- .../ripme/ripper/rippers/HbrowseRipper.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java new file mode 100644 index 00000000..356fa97b --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java @@ -0,0 +1,67 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class HbrowseRipper extends AbstractHTMLRipper { + + public HbrowseRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "hbrowse"; + } + + @Override + public String getDomain() { + return "hbrowse.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("http://www.hbrowse.com/\\d+/([a-zA-Z0-9]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected hbrowse.com URL format: " + + "hbrowse.com/ID/COMICID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + Document tempDoc = Http.url(url).get(); + return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("table > tbody > tr > td > a > img")) { + String imageURL = el.attr("src").replace("/zzz", ""); + result.add(imageURL); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + } From 56051bf1afc33a336153a38f4e76891e074727ff Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 17 Oct 2017 00:21:15 -0400 Subject: [PATCH 2/3] Hbrowse.com ripper now includes album title --- .../ripme/ripper/rippers/HbrowseRipper.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java index 356fa97b..45377a3d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java @@ -50,6 +50,19 @@ public class HbrowseRipper extends AbstractHTMLRipper { return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get(); } + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + Document doc = getFirstPage(); + String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text(); + return getHost() + "_" + title + "_" + getGID(url); + } catch (Exception e) { + // Fall back to default album naming convention + logger.warn("Failed to get album title from " + url, e); + } + return super.getAlbumTitle(url); + } + @Override public List getURLsFromPage(Document doc) { List result = new ArrayList(); From 7c6c2f394a0d3538e1d55c7cf0895a401f364408 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 17 Oct 2017 14:06:15 -0400 Subject: [PATCH 3/3] hbrowse ripper now get real GID --- .../java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java index 45377a3d..029f8998 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java @@ -34,7 +34,7 @@ public class HbrowseRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("http://www.hbrowse.com/\\d+/([a-zA-Z0-9]*)"); + Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1);