From 8e2d32a6bc3120e077c54ec3b86ec3641a80362c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 23 Nov 2017 00:18:09 -0500 Subject: [PATCH] Rewrote xhamster ripper to use mobile site --- .../ripme/ripper/rippers/XhamsterRipper.java | 133 +++++++++--------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index 26bcd45f..35fe56ff 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -3,91 +3,46 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class XhamsterRipper extends AlbumRipper { - - private static final String HOST = "xhamster"; +public class XhamsterRipper extends AbstractHTMLRipper { public XhamsterRipper(URL url) throws IOException { super(url); } @Override - public boolean canRip(URL url) { - Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*[0-9]+$"); - Matcher m = p.matcher(url.toExternalForm()); - return m.matches(); + public String getHost() { + return "xhamster"; + } + + @Override + public String getDomain() { + return "xhamster.com"; } @Override public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - int index = 0; - String nextURL = this.url.toExternalForm(); - while (nextURL != null) { - logger.info(" Retrieving " + nextURL); - Document doc = Http.url(nextURL).get(); - for (Element thumb : doc.select("table.iListing div.img img")) { - if (!thumb.hasAttr("src")) { - continue; - } - String image = thumb.attr("src"); - // replace thumbnail urls with the urls to the full sized images - image = image.replaceAll( - "https://upt.xhcdn\\.", - "http://up.xhamster."); - image = image.replaceAll("ept\\.xhcdn", "ep.xhamster"); - image = image.replaceAll( - "_160\\.", - "_1000."); - // Xhamster has shitty cert management and uses the wrong cert for their ep.xhamster Domain - // so we change all https requests to http - image = image.replaceAll( - "https://", - "http://"); - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); - if (isThisATest()) { - break; - } - } - if (isThisATest()) { - break; - } - nextURL = null; - for (Element element : doc.select("a.last")) { - nextURL = element.attr("href"); - break; - } - } - waitForThreads(); - } - - @Override - public String getHost() { - return HOST; + String URLToReturn = url.toExternalForm(); + URLToReturn = URLToReturn.replaceAll("m.xhamster.com", "xhamster.com"); + URLToReturn = URLToReturn.replaceAll("\\w\\w.xhamster.com", "xhamster.com"); + URL san_url = new URL(URLToReturn.replaceAll("xhamster.com", "m.xhamster.com")); + logger.info("sanitized URL is " + san_url.toExternalForm()); + return san_url; } @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d+)$"); + Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.com/photos/gallery/.*?(\\d+)$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -98,4 +53,54 @@ public class XhamsterRipper extends AlbumRipper { + " Got: " + url); } -} + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public boolean canRip(URL url) { + Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d+)$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return true; + } + return false; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + if (doc.select("a.next").first().attr("href") != "") { + return Http.url(doc.select("a.next").first().attr("href")).get(); + } else { + throw new IOException("No more pages"); + } + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element thumb : doc.select("div.picture_view > div.pictures_block > div.items > div.item-container > a > div.thumb_container > div.img > img")) { + String image = thumb.attr("src"); + // replace thumbnail urls with the urls to the full sized images + image = image.replaceAll( + "https://upt.xhcdn\\.", + "http://up.xhamster."); + image = image.replaceAll("ept\\.xhcdn", "ep.xhamster"); + image = image.replaceAll( + "_160\\.", + "_1000."); + // Xhamster has bad cert management and uses invalid certs for some cdns, so we change all our requests to http + image = image.replaceAll("https", "http"); + result.add(image); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} \ No newline at end of file