Merge pull request #5 from RipMeApp/master

Sync.
2018-03-12 10:16:58 -05:00 · 2018-03-12 10:16:58 -05:00 · 41eb9fd09b
commit 41eb9fd09b
parent d9423c1fae a4880ecf80
12 changed files with 132 additions and 98 deletions
--- a/build.sh
+++ b/build.sh
@ -1 +1,2 @@
+#!/usr/bin/env bash
 mvn clean compile assembly:single
--- a/docs/options.md
+++ b/docs/options.md
@ -1,40 +0,0 @@
-file.overwrite     | bool | If true ripme will overwrite existing files rather than skip them
-
-clipboard.autorip  | bool | If true ripme will try to download any links in the clip board
-
-error.skip404      | bool | Don't retry on 404 errors
-
-download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
-
-auto.update        | bool | If true ripme will auto-update every time it's started
-
-play.sound         | bool | If true ripme will play a sound every time a rip finishes
-
-download.show_popup| bool | TODO figure out what this is for
-
-log.save           | bool | If true ripme will save it's logs
-
-urls_only.save     | bool | If true ripme will save all urls to a text file and download no files
-
-album_titles.save  | bool | Currently does nothing
-
-prefer.mp4         | bool | Prefer mp4 when downloading a video that has more than 1 format
-
-download.timeout   | int  | File download timeout (in milliseconds)
-
-page.timeout       | int  | Page download timeout (in milliseconds)
-
-download.max_size  | int  | Maximum size of downloaded files in bytes
-
-threads.size       | int  | The number of threads to use
-
-twitter.auth       | String | Twitter API key (Base64'd)
-
-tumblr.auth        | String | Tumblr API key
-
-log.level          | String | The debug log level (Example: Log level: Debug)
-
-gw.api             | String | TODO figure out what this is for
-
-twitter.max_requests | int | TODO figure out what this is for
-
--- a/pom.xml
+++ b/pom.xml
@ -4,7 +4,7 @@
  <groupId>com.rarchives.ripme</groupId>
  <artifactId>ripme</artifactId>
  <packaging>jar</packaging>
-  <version>1.7.19</version>
+  <version>1.7.23</version>
  <name>ripme</name>
  <url>http://rip.rarchives.com</url>
  <properties>
--- a/ripme.json
+++ b/ripme.json
@ -1,6 +1,10 @@
 {
-    "latestVersion": "1.7.19",
+    "latestVersion": "1.7.23",
    "changeList": [
+        "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
+        "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
+        "1.7.21: Fixed last seen feature",
+        "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
        "1.7.19: imgurRipper no longer tries to add ?1 to file names",
        "1.7.18: AlbumRipper now removes bad chars from file names",
        "1.7.17: Fixed hentai.cafe autorip from clipboard",
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        Document doc = getFirstPage();

        while (doc != null) {
+            if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
+                sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
+                break;
+            }
            List<String> imageURLs = getURLsFromPage(doc);
+            // If hasASAPRipping() returns true then the ripper will handle downloading the files
+            // if not it's done in the following block of code
            if (!hasASAPRipping()) {
                // Remove all but 1 image
                if (isThisATest()) {
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@ -44,7 +44,8 @@ public abstract class AbstractRipper
    public abstract String getHost();
    public abstract String getGID(URL url) throws MalformedURLException;
    public boolean hasASAPRipping() { return false; }
-
+    // Everytime addUrlToDownload skips a already downloaded url this increases by 1
+    public int alreadyDownloadedUrls = 0;
    private boolean shouldStop = false;
    private boolean thisIsATest = false;

@ -60,7 +61,13 @@ public abstract class AbstractRipper
        }
    }

+
+    /**
+     * Adds a URL to the url history file
+     * @param downloadedURL URL to check if downloaded
+     */
    private void writeDownloadedURL(String downloadedURL) throws IOException {
+        downloadedURL = normalizeUrl(downloadedURL);
        BufferedWriter bw = null;
        FileWriter fw = null;
        try {
@ -86,6 +93,15 @@ public abstract class AbstractRipper
        }
    }

+
+    /**
+     * Normalize a URL
+     * @param url URL to check if downloaded
+     */
+    public String normalizeUrl(String url) {
+        return url;
+    }
+    
    /**
     * Checks to see if Ripme has already downloaded a URL
     * @param url URL to check if downloaded
@ -95,6 +111,7 @@ public abstract class AbstractRipper
     */
    private boolean hasDownloadedURL(String url) {
        File file = new File(URLHistoryFile);
+        url = normalizeUrl(url);
        try {
            Scanner scanner = new Scanner(file);
            while (scanner.hasNextLine()) {
@ -194,9 +211,11 @@ public abstract class AbstractRipper
     *      False if failed to download
     */
    protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
+        // Don't re-add the url if it was downloaded in a previous rip
        if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
            if (hasDownloadedURL(url.toExternalForm())) {
                sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
+                alreadyDownloadedUrls += 1;
                return false;
            }
        }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {

    @Override
    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
+        Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
        Matcher m = p.matcher(url.toExternalForm());
        if (!m.matches()) {
            throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
        Elements pageImages = page.getElementsByClass("c-tile");
        for (Element thumb : pageImages) {
            // If true this link is a sub album
-            if (thumb.attr("href").contains("/comix/album/")) {
+            if (thumb.attr("href").contains("/comics/album/")) {
                String subUrl = "https://www.8muses.com" + thumb.attr("href");
                try {
                    logger.info("Retrieving " + subUrl);
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
                    logger.warn("Error while loading subalbum " + subUrl, e);
                }

-            } else if (thumb.attr("href").contains("/comix/picture/")) {
+            } else if (thumb.attr("href").contains("/comics/picture/")) {
+                logger.info("This page is a album");
                logger.info("Ripping image");
                if (super.isStopped()) break;
                // Find thumbnail image source
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;


 public class InstagramRipper extends AbstractHTMLRipper {
+    String nextPageID = "";

    private String userID;

@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
        return san_url;
    }

+    @Override
+    public String normalizeUrl(String url) {
+        // Remove the date sig from the url
+        return url.replaceAll("/[A-Z0-9]{8}/", "/");
+    }
+
    private List<String> getPostsFromSinglePage(Document Doc) {
        List<String> imageURLs = new ArrayList<>();
        JSONArray datas;
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {

    @Override
    public List<String> getURLsFromPage(Document doc) {
-        String nextPageID = "";
        List<String> imageURLs = new ArrayList<>();
        JSONObject json = new JSONObject();
        try {
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
                    break;
                }
            }
-            // Rip the next page
-            if (!nextPageID.equals("") && !isThisATest()) {
-                if (url.toExternalForm().contains("/tags/")) {
-                    try {
-                        // Sleep for a while to avoid a ban
-                        sleep(2500);
-                        if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
-                            getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
-                        } else {
-                            getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
-                        }

-                    } catch (IOException e) {
-                        return imageURLs;
-                    }
-
-                }
-                try {
-                    // Sleep for a while to avoid a ban
-                    sleep(2500);
-                    getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
-                } catch (IOException e) {
-                    return imageURLs;
-                }
-            } else {
-                logger.warn("Can't get net page");
-            }
        } else { // We're ripping from a single page
            logger.info("Ripping from single page");
            imageURLs = getPostsFromSinglePage(doc);
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
        return imageURLs;
    }

+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        Document toreturn;
+        if (!nextPageID.equals("") && !isThisATest()) {
+            if (url.toExternalForm().contains("/tags/")) {
+                try {
+                    // Sleep for a while to avoid a ban
+                    sleep(2500);
+                    if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
+                        toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
+                    } else {
+                        toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
+                    }
+                    logger.info(toreturn.html());
+                    if (!hasImage(toreturn)) {
+                        throw new IOException("No more pages");
+                    }
+                    return toreturn;
+
+                } catch (IOException e) {
+                    throw new IOException("No more pages");
+                }
+
+            }
+            try {
+                // Sleep for a while to avoid a ban
+                sleep(2500);
+                toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
+                if (!hasImage(toreturn)) {
+                    throw new IOException("No more pages");
+                }
+                return toreturn;
+            } catch (IOException e) {
+                return null;
+            }
+        } else {
+            throw new IOException("No more pages");
+        }
+    }
+
    @Override
    public void downloadURL(URL url, int index) {
        addURLToDownload(url);
    }

+    private boolean hasImage(Document doc) {
+        try {
+            JSONObject json = getJSONFromPage(doc);
+            JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
+            JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
+            logger.info(datas.length());
+            if (datas.length() == 0) {
+                return false;
+            }
+            return true;
+        } catch (IOException e) {
+            return false;
+        }
+
+    }
+
 }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
    public void rip() throws IOException {
        logger.info("    Retrieving " + this.url);
        Document doc = Http.url(this.url).get();
-        Elements embeds = doc.select("embed");
-        if (embeds.size() == 0) {
-            throw new IOException("Could not find Embed code at " + url);
-        }
-        Element embed = embeds.get(0);
-        String vars = embed.attr("flashvars");
-        for (String var : vars.split("&")) {
-            if (var.startsWith("flv_url=")) {
-                String vidUrl = var.substring("flv_url=".length());
-                vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
-                addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
-            }
-        }
+        Elements scripts = doc.select("script");
+        for (Element e : scripts) {
+            if (e.html().contains("html5player.setVideoUrlHigh")) {
+                logger.info("Found the right script");
+                String[] lines = e.html().split("\n");
+                for (String line: lines) {
+                    if (line.contains("html5player.setVideoUrlHigh")) {
+                        String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
+                        addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
                        waitForThreads();
+                        return;
+                    }
+                }
+            }
+        }
+        throw new IOException("Unable to find video url at " + this.url.toExternalForm());
    }
 }
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
 public class UpdateUtils {

    private static final Logger logger = Logger.getLogger(UpdateUtils.class);
-    private static final String DEFAULT_VERSION = "1.7.19";
+    private static final String DEFAULT_VERSION = "1.7.23";
    private static final String REPO_NAME = "ripmeapp/ripme";
    private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
    private static final String mainFileName = "ripme.jar";
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
        // A simple image album
        EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
        testRipper(ripper);
+        // Test the new url format
+        ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
+        testRipper(ripper);
        // Test pages with subalbums
        ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
        testRipper(ripper);
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
@ -1,13 +1,15 @@
-package com.rarchives.ripme.tst.ripper.rippers;
+//package com.rarchives.ripme.tst.ripper.rippers;
+//
+//import java.io.IOException;
+//import java.net.URL;
+//
+//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
+//
+//public class FivehundredpxRipperTest extends RippersTest {
+//    public void test500pxAlbum() throws IOException {
+//        FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
+//        testRipper(ripper);
+//    }
+//}

-import java.io.IOException;
-import java.net.URL;
-
-import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
-
-public class FivehundredpxRipperTest extends RippersTest {
-    public void test500pxAlbum() throws IOException {
-        FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
-        testRipper(ripper);
-    }
-}
+// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438