Eroshare can now rip profiles (Fixes #465) (#578)

2017-06-15 15:21:08 -04:00 · 2017-06-15 15:21:08 -04:00 · 128d384d29
commit 128d384d29
parent 6443240901
1 changed files with 92 additions and 20 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
@ -9,6 +9,7 @@ import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -17,8 +18,10 @@ import org.jsoup.Connection.Response;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 import org.jsoup.Connection.Method;
 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 /**
@ -27,6 +30,7 @@ import com.rarchives.ripme.utils.Http;
 */
 public class EroShareRipper extends AbstractHTMLRipper {
    public EroShareRipper (URL url) throws IOException {
        super(url);
    }
@ -42,33 +46,73 @@ public class EroShareRipper extends AbstractHTMLRipper {
    }
    @Override
-    public void downloadURL(URL url, int index) {
+    public void downloadURL(URL url, int index){
        addURLToDownload(url);
    }
    @Override
    public boolean canRip(URL url) {
        Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
        Matcher m = p.matcher(url.toExternalForm());
        if (m.matches()) {
            return true;
        }
        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
        Matcher ma = pa.matcher(url.toExternalForm());
        if (ma.matches()) {
            return true;
        }
        return false;
    }
    public boolean is_profile(URL url) {
        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
        Matcher ma = pa.matcher(url.toExternalForm());
        if (ma.matches()) {
            return true;
        }
        return false;
    }
    @Override
    public Document getNextPage(Document doc) throws IOException {
        // Find next page
        String nextUrl = "";
        Element elem = doc.select("li.next > a").first();
        logger.info(elem);
            nextUrl = elem.attr("href");
            if (nextUrl == "") {
                throw new IOException("No more pages");
            }
            return Http.url("https://eroshare.com" + nextUrl).get();
        }
    @Override
    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
+        if (is_profile(url) == false) {
-            // Attempt to use album title as GID
+            try {
-            Element titleElement = getFirstPage().select("meta[property=og:title]").first();
+                // Attempt to use album title as GID
-            String title = titleElement.attr("content");
+                Element titleElement = getFirstPage().select("meta[property=og:title]").first();
-            title = title.substring(title.lastIndexOf('/') + 1);
+                String title = titleElement.attr("content");
-            return getHost() + "_" + getGID(url) + "_" + title.trim();
+                title = title.substring(title.lastIndexOf('/') + 1);
-        } catch (IOException e) {
+                return getHost() + "_" + getGID(url) + "_" + title.trim();
-            // Fall back to default album naming convention
+            } catch (IOException e) {
-            logger.info("Unable to find title at " + url);
+                // Fall back to default album naming convention
                logger.info("Unable to find title at " + url);
            }
            return super.getAlbumTitle(url);
        }
-        return super.getAlbumTitle(url);
+        return url.toExternalForm().split("/u/")[1];
    }
    @Override
-    public List<String> getURLsFromPage(Document doc) {
+    public List<String> getURLsFromPage(Document doc){
        List<String> URLs = new ArrayList<String>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
-        for (Element img : imgs) {
+        for (Element img : imgs){
-            if (img.hasClass("album-image")) {
+            if (img.hasClass("album-image")){
                String imageURL = img.attr("src");
                imageURL = "https:" + imageURL;
                URLs.add(imageURL);
@ -76,13 +120,33 @@ public class EroShareRipper extends AbstractHTMLRipper {
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
-        for (Element vid : vids) {
+        for (Element vid : vids){
-            if (vid.hasClass("album-video")) {
+            if (vid.hasClass("album-video")){
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(videoURL);
            }
        }
        // Profile videos
        Elements links = doc.select("div.item-container > a.item");
        for (Element link : links){
            Document video_page;
            try {
                video_page = Http.url("https://eroshare.com" + link.attr("href")).get();
            } catch(IOException e) {
                logger.warn("Failed to log link in Jsoup");
                video_page = null;
                e.printStackTrace();
            }
            Elements profile_vids = video_page.getElementsByTag("video");
            for (Element vid : profile_vids){
                if (vid.hasClass("album-video")){
                    Elements source = vid.getElementsByTag("source");
                    String videoURL = source.first().attr("src");
                    URLs.add(videoURL);
                }
            }
        }
        return URLs;
    }
@ -105,6 +169,13 @@ public class EroShareRipper extends AbstractHTMLRipper {
        if (m.matches()) {
            return m.group(1);
        }
        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
        Matcher ma = pa.matcher(url.toExternalForm());
        if (ma.matches()) {
            return m.group(1) + "_profile";
        }
        throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album");
    }
@ -119,8 +190,8 @@ public class EroShareRipper extends AbstractHTMLRipper {
        List<URL> URLs = new ArrayList<URL>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
-        for (Element img : imgs) {
+        for (Element img : imgs){
-            if (img.hasClass("album-image")) {
+            if (img.hasClass("album-image")){
                String imageURL = img.attr("src");
                imageURL = "https:" + imageURL;
                URLs.add(new URL(imageURL));
@ -128,8 +199,8 @@ public class EroShareRipper extends AbstractHTMLRipper {
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
-        for (Element vid : vids) {
+        for (Element vid : vids){
-            if (vid.hasClass("album-video")) {
+            if (vid.hasClass("album-video")){
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(new URL(videoURL));
@ -139,3 +210,4 @@ public class EroShareRipper extends AbstractHTMLRipper {
        return URLs;
    }
 }