Eroshare can now rip profiles (Fixes #465) (#578)

2017-06-15 15:21:08 -04:00 · 2017-06-15 15:21:08 -04:00 · 128d384d29
commit 128d384d29
parent 6443240901
1 changed files with 92 additions and 20 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
@ -9,6 +9,7 @@ import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -17,8 +18,10 @@ import org.jsoup.Connection.Response;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
+import org.jsoup.Connection.Method;

 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;

 /**
@ -27,6 +30,7 @@ import com.rarchives.ripme.utils.Http;
 */
 public class EroShareRipper extends AbstractHTMLRipper {

+
    public EroShareRipper (URL url) throws IOException {
        super(url);
    }
@ -42,33 +46,73 @@ public class EroShareRipper extends AbstractHTMLRipper {
    }

    @Override
-    public void downloadURL(URL url, int index) {
+    public void downloadURL(URL url, int index){
        addURLToDownload(url);
    }
+    @Override
+    public boolean canRip(URL url) {
+        Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
+        Matcher m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return true;
+        }
+
+        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
+        Matcher ma = pa.matcher(url.toExternalForm());
+        if (ma.matches()) {
+            return true;
+        }
+        return false;
+    }
+
+    public boolean is_profile(URL url) {
+        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
+        Matcher ma = pa.matcher(url.toExternalForm());
+        if (ma.matches()) {
+            return true;
+        }
+        return false;
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        // Find next page
+        String nextUrl = "";
+        Element elem = doc.select("li.next > a").first();
+        logger.info(elem);
+            nextUrl = elem.attr("href");
+            if (nextUrl == "") {
+                throw new IOException("No more pages");
+            }
+            return Http.url("https://eroshare.com" + nextUrl).get();
+        }

    @Override
    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
-            // Attempt to use album title as GID
-            Element titleElement = getFirstPage().select("meta[property=og:title]").first();
-            String title = titleElement.attr("content");
-            title = title.substring(title.lastIndexOf('/') + 1);
-            return getHost() + "_" + getGID(url) + "_" + title.trim();
-        } catch (IOException e) {
-            // Fall back to default album naming convention
-            logger.info("Unable to find title at " + url);
+        if (is_profile(url) == false) {
+            try {
+                // Attempt to use album title as GID
+                Element titleElement = getFirstPage().select("meta[property=og:title]").first();
+                String title = titleElement.attr("content");
+                title = title.substring(title.lastIndexOf('/') + 1);
+                return getHost() + "_" + getGID(url) + "_" + title.trim();
+            } catch (IOException e) {
+                // Fall back to default album naming convention
+                logger.info("Unable to find title at " + url);
+            }
+            return super.getAlbumTitle(url);
        }
-        return super.getAlbumTitle(url);
+        return url.toExternalForm().split("/u/")[1];
    }


    @Override
-    public List<String> getURLsFromPage(Document doc) {
+    public List<String> getURLsFromPage(Document doc){
        List<String> URLs = new ArrayList<String>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
-        for (Element img : imgs) {
-            if (img.hasClass("album-image")) {
+        for (Element img : imgs){
+            if (img.hasClass("album-image")){
                String imageURL = img.attr("src");
                imageURL = "https:" + imageURL;
                URLs.add(imageURL);
@ -76,13 +120,33 @@ public class EroShareRipper extends AbstractHTMLRipper {
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
-        for (Element vid : vids) {
-            if (vid.hasClass("album-video")) {
+        for (Element vid : vids){
+            if (vid.hasClass("album-video")){
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(videoURL);
            }
        }
+        // Profile videos
+        Elements links = doc.select("div.item-container > a.item");
+        for (Element link : links){
+            Document video_page;
+            try {
+                video_page = Http.url("https://eroshare.com" + link.attr("href")).get();
+            } catch(IOException e) {
+                logger.warn("Failed to log link in Jsoup");
+                video_page = null;
+                e.printStackTrace();
+            }
+            Elements profile_vids = video_page.getElementsByTag("video");
+            for (Element vid : profile_vids){
+                if (vid.hasClass("album-video")){
+                    Elements source = vid.getElementsByTag("source");
+                    String videoURL = source.first().attr("src");
+                    URLs.add(videoURL);
+                }
+            }
+        }

        return URLs;
    }
@ -105,6 +169,13 @@ public class EroShareRipper extends AbstractHTMLRipper {
        if (m.matches()) {
            return m.group(1);
        }
+
+        Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
+        Matcher ma = pa.matcher(url.toExternalForm());
+        if (ma.matches()) {
+            return m.group(1) + "_profile";
+        }
+
        throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album");
    }

@ -119,8 +190,8 @@ public class EroShareRipper extends AbstractHTMLRipper {
        List<URL> URLs = new ArrayList<URL>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
-        for (Element img : imgs) {
-            if (img.hasClass("album-image")) {
+        for (Element img : imgs){
+            if (img.hasClass("album-image")){
                String imageURL = img.attr("src");
                imageURL = "https:" + imageURL;
                URLs.add(new URL(imageURL));
@ -128,8 +199,8 @@ public class EroShareRipper extends AbstractHTMLRipper {
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
-        for (Element vid : vids) {
-            if (vid.hasClass("album-video")) {
+        for (Element vid : vids){
+            if (vid.hasClass("album-video")){
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(new URL(videoURL));
@ -139,3 +210,4 @@ public class EroShareRipper extends AbstractHTMLRipper {
        return URLs;
    }
 }
+