updated crawler

2019-12-01 19:35:02 +01:00 · 2019-12-01 19:35:02 +01:00 · 6858565b14
commit 6858565b14
parent 6d22a5f579
10 changed files with 1222 additions and 582 deletions
--- a/pom.xml
+++ b/pom.xml
@ -11,6 +11,22 @@
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>
  <dependencies>
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+      <version>2.3.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.xml.bind</groupId>
+      <artifactId>jaxb-core</artifactId>
+      <version>2.3.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.xml.bind</groupId>
+      <artifactId>jaxb-impl</artifactId>
+      <version>2.3.0</version>
+    </dependency>
+
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@ -2,21 +2,29 @@ package com.rarchives.ripme.ripper;

 import java.io.File;
 import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
-
+import java.util.Map;
 import org.jsoup.nodes.Document;

 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Utils;
 import com.rarchives.ripme.ui.MainWindow;
+import com.rarchives.ripme.ui.RipStatusMessage;

 /**
 * Simplified ripper, designed for ripping from sites by parsing HTML.
 */
-public abstract class AbstractHTMLRipper extends AlbumRipper {
+public abstract class AbstractHTMLRipper extends AbstractRipper {
+
+    private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
+    private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
+    private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());

    protected AbstractHTMLRipper(URL url) throws IOException {
        super(url);
@ -93,6 +101,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {

            // We set doc to null here so the while loop below this doesn't fire
            doc = null;
+            LOGGER.debug("Adding items from " + this.url + " to queue");
        }

        while (doc != null) {
@ -261,4 +270,210 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        }
        return prefix;
    }
+
+    /*
+     * ------ Methods copied from AlbumRipper. ------
+     * This removes AlbumnRipper's usage from this class.
+     */
+
+    protected boolean allowDuplicates() {
+        return false;
+    }
+
+    @Override
+    /**
+     * Returns total amount of files attempted.
+     */
+    public int getCount() {
+        return itemsCompleted.size() + itemsErrored.size();
+    }
+
+    @Override
+    /**
+     * Queues multiple URLs of single images to download from a single Album URL
+     */
+    public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
+        // Only download one file if this is a test.
+        if (super.isThisATest() &&
+                (itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
+            stop();
+            return false;
+        }
+        if (!allowDuplicates()
+                && ( itemsPending.containsKey(url)
+                || itemsCompleted.containsKey(url)
+                || itemsErrored.containsKey(url) )) {
+            // Item is already downloaded/downloading, skip it.
+            LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
+            return false;
+        }
+        if (Utils.getConfigBoolean("urls_only.save", false)) {
+            // Output URL to file
+            String urlFile = this.workingDir + File.separator + "urls.txt";
+            try (FileWriter fw = new FileWriter(urlFile, true)) {
+                fw.write(url.toExternalForm());
+                fw.write(System.lineSeparator());
+                itemsCompleted.put(url, new File(urlFile));
+            } catch (IOException e) {
+                LOGGER.error("Error while writing to " + urlFile, e);
+            }
+        }
+        else {
+            itemsPending.put(url, saveAs);
+            DownloadFileThread dft = new DownloadFileThread(url,  saveAs,  this, getFileExtFromMIME);
+            if (referrer != null) {
+                dft.setReferrer(referrer);
+            }
+            if (cookies != null) {
+                dft.setCookies(cookies);
+            }
+            threadPool.addThread(dft);
+        }
+
+        return true;
+    }
+
+    @Override
+    public boolean addURLToDownload(URL url, File saveAs) {
+        return addURLToDownload(url, saveAs, null, null, false);
+    }
+
+    /**
+     * Queues image to be downloaded and saved.
+     * Uses filename from URL to decide filename.
+     * @param url
+     *      URL to download
+     * @return
+     *      True on success
+     */
+    protected boolean addURLToDownload(URL url) {
+        // Use empty prefix and empty subdirectory
+        return addURLToDownload(url, "", "");
+    }
+
+    @Override
+    /**
+     * Cleans up & tells user about successful download
+     */
+    public void downloadCompleted(URL url, File saveAs) {
+        if (observer == null) {
+            return;
+        }
+        try {
+            String path = Utils.removeCWD(saveAs);
+            RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
+            itemsPending.remove(url);
+            itemsCompleted.put(url, saveAs);
+            observer.update(this, msg);
+
+            checkIfComplete();
+        } catch (Exception e) {
+            LOGGER.error("Exception while updating observer: ", e);
+        }
+    }
+
+    @Override
+    /**
+     * Cleans up & tells user about failed download.
+     */
+    public void downloadErrored(URL url, String reason) {
+        if (observer == null) {
+            return;
+        }
+        itemsPending.remove(url);
+        itemsErrored.put(url, reason);
+        observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
+
+        checkIfComplete();
+    }
+
+    @Override
+    /**
+     * Tells user that a single file in the album they wish to download has
+     * already been downloaded in the past.
+     */
+    public void downloadExists(URL url, File file) {
+        if (observer == null) {
+            return;
+        }
+
+        itemsPending.remove(url);
+        itemsCompleted.put(url, file);
+        observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
+
+        checkIfComplete();
+    }
+
+    /**
+     * Notifies observers and updates state if all files have been ripped.
+     */
+    @Override
+    protected void checkIfComplete() {
+        if (observer == null) {
+            return;
+        }
+        if (itemsPending.isEmpty()) {
+            super.checkIfComplete();
+        }
+    }
+
+    /**
+     * Sets directory to save all ripped files to.
+     * @param url
+     *      URL to define how the working directory should be saved.
+     * @throws
+     *      IOException
+     */
+    @Override
+    public void setWorkingDir(URL url) throws IOException {
+        String path = Utils.getWorkingDirectory().getCanonicalPath();
+        if (!path.endsWith(File.separator)) {
+            path += File.separator;
+        }
+        String title;
+        if (Utils.getConfigBoolean("album_titles.save", true)) {
+            title = getAlbumTitle(this.url);
+        } else {
+            title = super.getAlbumTitle(this.url);
+        }
+        LOGGER.debug("Using album title '" + title + "'");
+
+        title = Utils.filesystemSafe(title);
+        path += title;
+        path = Utils.getOriginalDirectory(path) + File.separator;   // check for case sensitive (unix only)
+
+        this.workingDir = new File(path);
+        if (!this.workingDir.exists()) {
+            LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
+            this.workingDir.mkdirs();
+        }
+        LOGGER.debug("Set working directory to: " + this.workingDir);
+    }
+
+    /**
+     * @return
+     *      Integer between 0 and 100 defining the progress of the album rip.
+     */
+    @Override
+    public int getCompletionPercentage() {
+        double total = itemsPending.size()  + itemsErrored.size() + itemsCompleted.size();
+        return (int) (100 * ( (total - itemsPending.size()) / total));
+    }
+
+    /**
+     * @return
+     *      Human-readable information on the status of the current rip.
+     */
+    @Override
+    public String getStatusText() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(getCompletionPercentage())
+                .append("% ")
+                .append("- Pending: "  ).append(itemsPending.size())
+                .append(", Completed: ").append(itemsCompleted.size())
+                .append(", Errored: "  ).append(itemsErrored.size());
+        return sb.toString();
+    }
+
+
 }
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
@ -1,19 +1,27 @@
 package com.rarchives.ripme.ripper;

+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
-
+import java.util.Map;
 import org.json.JSONObject;
-
+import com.rarchives.ripme.ui.RipStatusMessage;
 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Utils;

 /**
 * Simplified ripper, designed for ripping from sites by parsing JSON.
 */
-public abstract class AbstractJSONRipper extends AlbumRipper {
+public abstract class AbstractJSONRipper extends AbstractRipper {
+
+    private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
+    private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
+    private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());

    protected AbstractJSONRipper(URL url) throws IOException {
        super(url);
@ -111,4 +119,209 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
        }
        return prefix;
    }
+
+    /*
+     * ------ Methods copied from AlbumRipper ------
+     */
+
+    protected boolean allowDuplicates() {
+        return false;
+    }
+
+    @Override
+    /**
+     * Returns total amount of files attempted.
+     */
+    public int getCount() {
+        return itemsCompleted.size() + itemsErrored.size();
+    }
+
+    @Override
+    /**
+     * Queues multiple URLs of single images to download from a single Album URL
+     */
+    public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
+        // Only download one file if this is a test.
+        if (super.isThisATest() &&
+                (itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
+            stop();
+            return false;
+        }
+        if (!allowDuplicates()
+                && ( itemsPending.containsKey(url)
+                || itemsCompleted.containsKey(url)
+                || itemsErrored.containsKey(url) )) {
+            // Item is already downloaded/downloading, skip it.
+            LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
+            return false;
+        }
+        if (Utils.getConfigBoolean("urls_only.save", false)) {
+            // Output URL to file
+            String urlFile = this.workingDir + File.separator + "urls.txt";
+            try (FileWriter fw = new FileWriter(urlFile, true)) {
+                fw.write(url.toExternalForm());
+                fw.write(System.lineSeparator());
+                itemsCompleted.put(url, new File(urlFile));
+            } catch (IOException e) {
+                LOGGER.error("Error while writing to " + urlFile, e);
+            }
+        }
+        else {
+            itemsPending.put(url, saveAs);
+            DownloadFileThread dft = new DownloadFileThread(url,  saveAs,  this, getFileExtFromMIME);
+            if (referrer != null) {
+                dft.setReferrer(referrer);
+            }
+            if (cookies != null) {
+                dft.setCookies(cookies);
+            }
+            threadPool.addThread(dft);
+        }
+
+        return true;
+    }
+
+    @Override
+    public boolean addURLToDownload(URL url, File saveAs) {
+        return addURLToDownload(url, saveAs, null, null, false);
+    }
+
+    /**
+     * Queues image to be downloaded and saved.
+     * Uses filename from URL to decide filename.
+     * @param url
+     *      URL to download
+     * @return
+     *      True on success
+     */
+    protected boolean addURLToDownload(URL url) {
+        // Use empty prefix and empty subdirectory
+        return addURLToDownload(url, "", "");
+    }
+
+    @Override
+    /**
+     * Cleans up & tells user about successful download
+     */
+    public void downloadCompleted(URL url, File saveAs) {
+        if (observer == null) {
+            return;
+        }
+        try {
+            String path = Utils.removeCWD(saveAs);
+            RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
+            itemsPending.remove(url);
+            itemsCompleted.put(url, saveAs);
+            observer.update(this, msg);
+
+            checkIfComplete();
+        } catch (Exception e) {
+            LOGGER.error("Exception while updating observer: ", e);
+        }
+    }
+
+    @Override
+    /**
+     * Cleans up & tells user about failed download.
+     */
+    public void downloadErrored(URL url, String reason) {
+        if (observer == null) {
+            return;
+        }
+        itemsPending.remove(url);
+        itemsErrored.put(url, reason);
+        observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
+
+        checkIfComplete();
+    }
+
+    @Override
+    /**
+     * Tells user that a single file in the album they wish to download has
+     * already been downloaded in the past.
+     */
+    public void downloadExists(URL url, File file) {
+        if (observer == null) {
+            return;
+        }
+
+        itemsPending.remove(url);
+        itemsCompleted.put(url, file);
+        observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
+
+        checkIfComplete();
+    }
+
+    /**
+     * Notifies observers and updates state if all files have been ripped.
+     */
+    @Override
+    protected void checkIfComplete() {
+        if (observer == null) {
+            return;
+        }
+        if (itemsPending.isEmpty()) {
+            super.checkIfComplete();
+        }
+    }
+
+    /**
+     * Sets directory to save all ripped files to.
+     * @param url
+     *      URL to define how the working directory should be saved.
+     * @throws
+     *      IOException
+     */
+    @Override
+    public void setWorkingDir(URL url) throws IOException {
+        String path = Utils.getWorkingDirectory().getCanonicalPath();
+        if (!path.endsWith(File.separator)) {
+            path += File.separator;
+        }
+        String title;
+        if (Utils.getConfigBoolean("album_titles.save", true)) {
+            title = getAlbumTitle(this.url);
+        } else {
+            title = super.getAlbumTitle(this.url);
+        }
+        LOGGER.debug("Using album title '" + title + "'");
+
+        title = Utils.filesystemSafe(title);
+        path += title;
+        path = Utils.getOriginalDirectory(path) + File.separator;   // check for case sensitive (unix only)
+
+        this.workingDir = new File(path);
+        if (!this.workingDir.exists()) {
+            LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
+            this.workingDir.mkdirs();
+        }
+        LOGGER.debug("Set working directory to: " + this.workingDir);
+    }
+
+    /**
+     * @return
+     *      Integer between 0 and 100 defining the progress of the album rip.
+     */
+    @Override
+    public int getCompletionPercentage() {
+        double total = itemsPending.size()  + itemsErrored.size() + itemsCompleted.size();
+        return (int) (100 * ( (total - itemsPending.size()) / total));
+    }
+
+    /**
+     * @return
+     *      Human-readable information on the status of the current rip.
+     */
+    @Override
+    public String getStatusText() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(getCompletionPercentage())
+                .append("% ")
+                .append("- Pending: "  ).append(itemsPending.size())
+                .append(", Completed: ").append(itemsCompleted.size())
+                .append(", Errored: "  ).append(itemsErrored.size());
+        return sb.toString();
+    }
+
+
 }
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@ -1,7 +1,11 @@
 package com.rarchives.ripme.ripper;

 import java.awt.Desktop;
-import java.io.*;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.net.MalformedURLException;
 import java.net.URL;
@ -9,21 +13,17 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Observable;
-
-import com.rarchives.ripme.App;
+import java.util.Scanner;
 import org.apache.log4j.FileAppender;
 import org.apache.log4j.Logger;
 import org.jsoup.HttpStatusException;
-
+import com.rarchives.ripme.App;
 import com.rarchives.ripme.ui.RipStatusComplete;
 import com.rarchives.ripme.ui.RipStatusHandler;
 import com.rarchives.ripme.ui.RipStatusMessage;
 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Utils;

-import java.io.File;
-import java.util.Scanner;
-
 public abstract class AbstractRipper
        extends Observable
        implements RipperInterface, Runnable {
@ -67,7 +67,7 @@ public abstract class AbstractRipper
     * Adds a URL to the url history file
     * @param downloadedURL URL to check if downloaded
     */
-    private void writeDownloadedURL(String downloadedURL) throws IOException {
+    protected void writeDownloadedURL(String downloadedURL) throws IOException {
        // If "save urls only" is checked don't write to the url history file
        if (Utils.getConfigBoolean("urls_only.save", false)) {
            return;
@ -131,7 +131,7 @@ public abstract class AbstractRipper
     *      Returns true if previously downloaded.
     *      Returns false if not yet downloaded.
     */
-    private boolean hasDownloadedURL(String url) {
+    protected boolean hasDownloadedURL(String url) {
        File file = new File(URLHistoryFile);
        url = normalizeUrl(url);

@ -218,6 +218,44 @@ public abstract class AbstractRipper
    protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies,
                                                Boolean getFileExtFromMIME);

+    /**
+     * Queues image to be downloaded and saved.
+     * @param url
+     *      URL of the file
+     * @param options
+     *      A map<String,String> containing any changes to the default options.
+     *      Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+     *      getFileExtFromMIME should be "true" or "false"
+     * @param cookies
+     *      The cookies to send to the server while downloading this file.
+     * @return
+     *      True if downloaded successfully
+     *      False if failed to download
+     */
+    protected boolean addURLToDownload(URL url, Map<String, String> options, Map<String, String> cookies) {
+        // Bit of a hack but this lets us pass a bool using a map<string,String>
+        boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
+        return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
+                cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
+    }
+
+
+    /**
+     * Queues image to be downloaded and saved.
+     * @param url
+     *      URL of the file
+     * @param options
+     *      A map<String,String> containing any changes to the default options.
+     *      Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+     *      getFileExtFromMIME should be "true" or "false"
+     * @return
+     *      True if downloaded successfully
+     *      False if failed to download
+     */
+    protected boolean addURLToDownload(URL url, Map<String, String> options) {
+        return addURLToDownload(url, options, null);
+    }
+
    /**
     * Queues image to be downloaded and saved.
     * @param url
@ -237,6 +275,22 @@ public abstract class AbstractRipper
     *      False if failed to download
     */
    protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
+        // A common bug is rippers adding urls that are just "http:". This rejects said urls
+        if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
+            LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
+            return false;
+
+        }
+        // Make sure the url doesn't contain any spaces as that can cause a 400 error when requesting the file
+        if (url.toExternalForm().contains(" ")) {
+            // If for some reason the url with all spaces encoded as %20 is malformed print an error
+            try {
+                url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
+            } catch (MalformedURLException e) {
+                LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
+                e.printStackTrace();
+            }
+        }
        // Don't re-add the url if it was downloaded in a previous rip
        if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
            if (hasDownloadedURL(url.toExternalForm())) {
@ -280,6 +334,7 @@ public abstract class AbstractRipper
            saveFileAs.getParentFile().mkdirs();
        }
        if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
+            LOGGER.info("Writing " + url.toExternalForm() + " to file");
            try {
                writeDownloadedURL(url.toExternalForm() + "\n");
            } catch (IOException e) {
@ -493,7 +548,7 @@ public abstract class AbstractRipper
    public static AbstractRipper getRipper(URL url) throws Exception {
        for (Constructor<?> constructor : getRipperConstructors("com.rarchives.ripme.ripper.rippers")) {
            try {
-                AlbumRipper ripper = (AlbumRipper) constructor.newInstance(url); // by design: can throw ClassCastException
+                AbstractRipper ripper = (AbstractRipper) constructor.newInstance(url); // by design: can throw ClassCastException
                LOGGER.debug("Found album ripper: " + ripper.getClass().getName());
                return ripper;
            } catch (Exception e) {
--- a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
@ -70,7 +70,7 @@ public abstract class AlbumRipper extends AbstractRipper {
            String urlFile = this.workingDir + File.separator + "urls.txt";
            try (FileWriter fw = new FileWriter(urlFile, true)) {
                fw.write(url.toExternalForm());
-                fw.write("\n");
+                fw.write(System.lineSeparator());
                itemsCompleted.put(url, new File(urlFile));
            } catch (IOException e) {
                LOGGER.error("Error while writing to " + urlFile, e);
@ -87,6 +87,7 @@ public abstract class AlbumRipper extends AbstractRipper {
            }
            threadPool.addThread(dft);
        }
+
        return true;
    }

--- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
+++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
@ -1,12 +1,6 @@
 package com.rarchives.ripme.ripper;

-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.Array;
+import java.io.*;
 import java.net.HttpURLConnection;
 import java.net.SocketTimeoutException;
 import java.net.URL;
@ -19,22 +13,18 @@ import java.util.ResourceBundle;
 import javax.net.ssl.HttpsURLConnection;

 import com.rarchives.ripme.ui.MainWindow;
-import org.apache.commons.io.IOUtils;
 import org.apache.log4j.Logger;
 import org.jsoup.HttpStatusException;

 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Utils;
-import com.rarchives.ripme.ripper.AbstractRipper;

 /**
- * Thread for downloading files.
- * Includes retry logic, observer notifications, and other goodies.
+ * Thread for downloading files. Includes retry logic, observer notifications,
+ * and other goodies.
 */
 class DownloadFileThread extends Thread {
-
    private ResourceBundle rb = MainWindow.rb;
-
    private static final Logger logger = Logger.getLogger(DownloadFileThread.class);

    private String referrer = "";
@ -63,16 +53,19 @@ class DownloadFileThread extends Thread {
    public void setReferrer(String referrer) {
        this.referrer = referrer;
    }
+
    public void setCookies(Map<String, String> cookies) {
        this.cookies = cookies;
    }

-
    /**
-     * Attempts to download the file. Retries as needed.
-     * Notifies observers upon completion/error/warn.
+     * Attempts to download the file. Retries as needed. Notifies observers upon
+     * completion/error/warn.
     */
    public void run() {
+        // First thing we make sure the file name doesn't have any illegal chars in it
+        saveAs = new File(
+                saveAs.getParentFile().getAbsolutePath() + File.separator + Utils.sanitizeSaveAs(saveAs.getName()));
        long fileSize = 0;
        int bytesTotal = 0;
        int bytesDownloaded = 0;
@ -85,13 +78,15 @@ class DownloadFileThread extends Thread {
            observer.downloadErrored(url, rb.getString("download.interrupted"));
            return;
        }
-        if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME ||
-                Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME && !observer.tryResumeDownload()) {
+        if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME
+                || Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
+                && !observer.tryResumeDownload()) {
            if (Utils.getConfigBoolean("file.overwrite", false)) {
                logger.info("[!] " + rb.getString("deleting.existing.file") + prettySaveAs);
                saveAs.delete();
            } else {
-                logger.info("[!] " + rb.getString("skipping") + url + " -- " + rb.getString("file.already.exists") + ": " + prettySaveAs);
+                logger.info("[!] " + rb.getString("skipping") + url + " -- "
+                        + rb.getString("file.already.exists") + ": " + prettySaveAs);
                observer.downloadExists(url, saveAs);
                return;
            }
@ -101,7 +96,8 @@ class DownloadFileThread extends Thread {
        int tries = 0; // Number of attempts to download
        do {
            tries += 1;
-            InputStream bis = null; OutputStream fos = null;
+            InputStream bis = null;
+            OutputStream fos = null;
            try {
                logger.info("    Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
                observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
@ -110,12 +106,12 @@ class DownloadFileThread extends Thread {
                HttpURLConnection huc;
                if (this.url.toString().startsWith("https")) {
                    huc = (HttpsURLConnection) urlToDownload.openConnection();
-                }
-                else {
+                } else {
                    huc = (HttpURLConnection) urlToDownload.openConnection();
                }
                huc.setInstanceFollowRedirects(true);
-                // It is important to set both ConnectTimeout and ReadTimeout. If you don't then ripme will wait forever
+                // It is important to set both ConnectTimeout and ReadTimeout. If you don't then
+                // ripme will wait forever
                // for the server to send data after connecting.
                huc.setConnectTimeout(TIMEOUT);
                huc.setReadTimeout(TIMEOUT);
@ -142,8 +138,10 @@ class DownloadFileThread extends Thread {

                int statusCode = huc.getResponseCode();
                logger.debug("Status code: " + statusCode);
+                // If the server doesn't allow resuming downloads error out
                if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
-                    // TODO find a better way to handle servers that don't support resuming downloads then just erroring out
+                    // TODO find a better way to handle servers that don't support resuming
+                    // downloads then just erroring out
                    throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
                }
                if (statusCode / 100 == 3) { // 3xx Redirect
@ -158,12 +156,15 @@ class DownloadFileThread extends Thread {
                    throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
                }
                if (statusCode / 100 == 4) { // 4xx errors
-                    logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading from " + url);
-                    observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
+                    logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode
+                            + " while downloading from " + url);
+                    observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " "
+                            + statusCode + " while downloading " + url.toExternalForm());
                    return; // Not retriable, drop out.
                }
                if (statusCode / 100 == 5) { // 5xx errors
-                    observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
+                    observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode
+                            + " while downloading " + url.toExternalForm());
                    // Throw exception so download can be retried
                    throw new IOException(rb.getString("retriable.status.code") + " " + statusCode);
                }
@ -174,7 +175,8 @@ class DownloadFileThread extends Thread {
                    return;
                }

-                // If the ripper is using the bytes progress bar set bytesTotal to huc.getContentLength()
+                // If the ripper is using the bytes progress bar set bytesTotal to
+                // huc.getContentLength()
                if (observer.useByteProgessBar()) {
                    bytesTotal = huc.getContentLength();
                    observer.setBytesTotal(bytesTotal);
@ -202,7 +204,8 @@ class DownloadFileThread extends Thread {
                            saveAs = new File(saveAs.toString() + "." + fileExt);
                        } else {
                            logger.error(rb.getString("was.unable.to.get.content.type.using.magic.number"));
-                            logger.error(rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
+                            logger.error(
+                                    rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
                        }
                    }
                }
@ -210,11 +213,41 @@ class DownloadFileThread extends Thread {
                if (statusCode == 206) {
                    fos = new FileOutputStream(saveAs, true);
                } else {
+                    try {
                        fos = new FileOutputStream(saveAs);
+                    } catch (FileNotFoundException e) {
+                        // We do this because some filesystems have a max name length
+                        if (e.getMessage().contains("File name too long")) {
+                            logger.error("The filename " + saveAs.getName()
+                                    + " is to long to be saved on this file system.");
+                            logger.info("Shortening filename");
+                            String[] saveAsSplit = saveAs.getName().split("\\.");
+                            // Get the file extension so when we shorten the file name we don't cut off the
+                            // file extension
+                            String fileExt = saveAsSplit[saveAsSplit.length - 1];
+                            // The max limit for filenames on Linux with Ext3/4 is 255 bytes
+                            logger.info(saveAs.getName().substring(0, 254 - fileExt.length()) + fileExt);
+                            String filename = saveAs.getName().substring(0, 254 - fileExt.length()) + "." + fileExt;
+                            // We can't just use the new file name as the saveAs because the file name
+                            // doesn't include the
+                            // users save path, so we get the user save path from the old saveAs
+                            saveAs = new File(saveAs.getParentFile().getAbsolutePath() + File.separator + filename);
+                            fos = new FileOutputStream(saveAs);
+                        } else if (saveAs.getAbsolutePath().length() > 259 && Utils.isWindows()) {
+                            // This if is for when the file path has gone above 260 chars which windows does
+                            // not allow
+                            fos = new FileOutputStream(
+                                    Utils.shortenSaveAsWindows(saveAs.getParentFile().getPath(), saveAs.getName()));
+                        }
+                    }
                }
                byte[] data = new byte[1024 * 256];
                int bytesRead;
-                boolean shouldSkipFileDownload = huc.getContentLength() / 10000000 >= 10;
+                boolean shouldSkipFileDownload = huc.getContentLength() / 1000000 >= 10 && AbstractRipper.isThisATest();
+                // If this is a test rip we skip large downloads
+                if (shouldSkipFileDownload) {
+                    logger.debug("Not downloading whole file because it is over 10mb and this is a test");
+                } else {
                    while ((bytesRead = bis.read(data)) != -1) {
                        try {
                            observer.stopCheck();
@ -228,13 +261,6 @@ class DownloadFileThread extends Thread {
                            observer.setBytesCompleted(bytesDownloaded);
                            observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
                        }
-                    // If this is a test and we're downloading a large file
-                    if (AbstractRipper.isThisATest() && shouldSkipFileDownload) {
-                        logger.debug("Not downloading whole file because it is over 10mb and this is a test");
-                        bis.close();
-                        fos.close();
-                        break;
-
                    }
                }
                bis.close();
@ -249,24 +275,34 @@ class DownloadFileThread extends Thread {
                logger.debug(rb.getString("http.status.exception"), hse);
                logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
                if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
-                    observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
+                    observer.downloadErrored(url,
+                            "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
                    return;
                }
            } catch (IOException e) {
                logger.debug("IOException", e);
-                logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - " + e.getMessage());
+                logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - "
+                        + e.getMessage());
            } finally {
                // Close any open streams
                try {
-                    if (bis != null) { bis.close(); }
-                } catch (IOException e) { }
+                    if (bis != null) {
+                        bis.close();
+                    }
+                } catch (IOException e) {
+                }
                try {
-                    if (fos != null) { fos.close(); }
-                } catch (IOException e) { }
+                    if (fos != null) {
+                        fos.close();
+                    }
+                } catch (IOException e) {
+                }
            }
            if (tries > this.retries) {
-                logger.error("[!] " + rb.getString ("exceeded.maximum.retries") + " (" + this.retries + ") for URL " + url);
-                observer.downloadErrored(url, rb.getString("failed.to.download") + " " + url.toExternalForm());
+                logger.error("[!] " + rb.getString("exceeded.maximum.retries") + " (" + this.retries
+                        + ") for URL " + url);
+                observer.downloadErrored(url,
+                        rb.getString("failed.to.download") + " " + url.toExternalForm());
                return;
            }
        } while (true);
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
@ -9,19 +9,27 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

-import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import org.json.JSONArray;
+import org.json.JSONObject;
 import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;

 import com.rarchives.ripme.utils.Http;


-public class GfycatRipper extends AbstractSingleFileRipper {
+public class GfycatRipper extends AbstractHTMLRipper {

    private static final String HOST = "gfycat.com";
+    String username = "";
+    String cursor = "";
+    String count = "30";
+
+

    public GfycatRipper(URL url) throws IOException {
-        super(url);
+        super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
    }

    @Override
@ -41,14 +49,26 @@ public class GfycatRipper extends AbstractSingleFileRipper {

    @Override
    public URL sanitizeURL(URL url) throws MalformedURLException {
-        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
+        String sUrl = url.toExternalForm();
+        sUrl = sUrl.replace("/gifs/detail", "");
+        sUrl = sUrl.replace("/amp", "");
+        return new URL(sUrl);
+    }

-        return url;
+    public boolean isProfile() {
+        Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
+        Matcher m = p.matcher(url.toExternalForm());
+        return m.matches();
    }

    @Override
    public Document getFirstPage() throws IOException {
+        if (!isProfile()) {
            return Http.url(url).get();
+        } else {
+            username = getGID(url);
+            return Http.url(new URL("https://api.gfycat.com/v1/users/" +  username + "/gfycats")).ignoreContentType().get();
+        }
    }

    @Override
@ -58,27 +78,58 @@ public class GfycatRipper extends AbstractSingleFileRipper {

    @Override
    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
+        Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
-        }
+
+        if (m.matches())
+            return m.group(2);

        throw new MalformedURLException(
                "Expected gfycat.com format: "
-                        + "gfycat.com/id"
+                        + "gfycat.com/id or "
+                        + "thumbs.gfycat.com/id.gif"
                        + " Got: " + url);
    }

+    private String stripHTMLTags(String t) {
+        t = t.replaceAll("<html>\n" +
+                " <head></head>\n" +
+                " <body>", "");
+        t = t.replaceAll("</body>\n" +
+                "</html>", "");
+        t = t.replaceAll("\n", "");
+        t = t.replaceAll("=\"\"", "");
+        return t;
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        if (cursor.equals("")) {
+            throw new IOException("No more pages");
+        }
+        return Http.url(new URL("https://api.gfycat.com/v1/users/" +  username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
+    }
+
    @Override
    public List<String> getURLsFromPage(Document doc) {
        List<String> result = new ArrayList<>();
-        Elements videos = doc.select("source");
-        String vidUrl = videos.first().attr("src");
-        if (vidUrl.startsWith("//")) {
-            vidUrl = "http:" + vidUrl;
+        if (isProfile()) {
+            JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
+            JSONArray content = page.getJSONArray("gfycats");
+            for (int i = 0; i < content.length(); i++) {
+                result.add(content.getJSONObject(i).getString("mp4Url"));
+            }
+            cursor = page.getString("cursor");
+        } else {
+            Elements videos = doc.select("script");
+            for (Element el : videos) {
+                String json = el.html();
+                if (json.startsWith("{")) {
+                    JSONObject page = new JSONObject(json);
+                    result.add(page.getJSONObject("video").getString("contentUrl"));
+                }
+            }
        }
-        result.add(vidUrl);
        return result;
    }

@ -95,14 +146,14 @@ public class GfycatRipper extends AbstractSingleFileRipper {
        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));

        Document doc = Http.url(url).get();
-        Elements videos = doc.select("source");
-        if (videos.isEmpty()) {
-            throw new IOException("Could not find source at " + url);
-        }
-        String vidUrl = videos.first().attr("src");
-        if (vidUrl.startsWith("//")) {
-            vidUrl = "http:" + vidUrl;
-        }
-        return vidUrl;
+        Elements videos = doc.select("script");
+        for (Element el : videos) {
+            String json = el.html();
+            if (json.startsWith("{")) {
+                JSONObject page = new JSONObject(json);
+                return page.getJSONObject("video").getString("contentUrl");
+            }
+        }
+        throw new IOException();
    }
 }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@ -1,480 +1,502 @@
 package com.rarchives.ripme.ripper.rippers;

-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.time.*;
-import java.time.format.DateTimeFormatter;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.security.*;
-
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
 import com.rarchives.ripme.ripper.AbstractJSONRipper;
 import com.rarchives.ripme.utils.Http;
-
+import com.rarchives.ripme.utils.Utils;
+import jdk.nashorn.internal.ir.Block;
+import jdk.nashorn.internal.ir.CallNode;
+import jdk.nashorn.internal.ir.ExpressionStatement;
+import jdk.nashorn.internal.ir.FunctionNode;
+import jdk.nashorn.internal.ir.Statement;
+import jdk.nashorn.internal.parser.Parser;
+import jdk.nashorn.internal.runtime.Context;
+import jdk.nashorn.internal.runtime.ErrorManager;
+import jdk.nashorn.internal.runtime.Source;
+import jdk.nashorn.internal.runtime.options.Options;
+import org.json.JSONArray;
+import org.json.JSONObject;
 import org.jsoup.Connection;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
-import com.rarchives.ripme.ui.RipStatusMessage;
-import com.rarchives.ripme.utils.Utils;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Spliterators;
+import java.util.function.BiFunction;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;

+import static java.lang.String.format;

+// Available configuration options:
+// instagram.download_images_only - use to skip video links
+// instagram.session_id - should be set for stories and private accounts (look for sessionid cookie)
 public class InstagramRipper extends AbstractJSONRipper {
-    String nextPageID = "";
+
    private String qHash;
-    private  boolean rippingTag = false;
-    private String tagName;
+    private Map<String, String> cookies = new HashMap<>();
+    private String idString;
+    private List<String> itemPrefixes = new ArrayList<>();
+    private List<String> failedItems = new ArrayList<>();

-    private String userID;
-    private String rhx_gis = null;
-    private String csrftoken;
-    // Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
-    // see https://github.com/RipMeApp/ripme/issues/601
-    private String workAroundJsonString;
+    private boolean hashtagRip;
+    private boolean taggedRip;
+    private boolean igtvRip;
+    private boolean postRip;
+    private boolean storiesRip;
+    private boolean pinnedRip;
+    private boolean pinnedReelRip;

+    private enum UrlTypePattern {
+        // e.g. https://www.instagram.com/explore/tags/rachelc00k/
+        HASHTAG("explore/tags/(?<tagname>[^?/]+)"),

+        // e.g. https://www.instagram.com/stories/rachelc00k/
+        STORIES("stories/(?<username>[^?/]+)"),
+
+        // e.g. https://www.instagram.com/rachelc00k/tagged/
+        USER_TAGGED("(?<username>[^?/]+)/tagged"),
+
+        // e.g. https://www.instagram.com/rachelc00k/channel/
+        IGTV("(?<username>[^?/]+)/channel"),
+
+        // e.g. https://www.instagram.com/p/Bu4CEfbhNk4/
+        SINGLE_POST("(?:p|tv)/(?<shortcode>[^?/]+)"),
+
+        // pseudo-url, e.g. https://www.instagram.com/rachelc00k/?pinned
+        PINNED("(?<username>[^?/]+)/?[?]pinned"),
+
+        // e.g. https://www.instagram.com/rachelc00k/
+        USER_PROFILE("(?<username>[^?/]+)");
+
+        private final String urlTypePattern;
+
+        UrlTypePattern(String urlTypePattern) {
+            this.urlTypePattern = urlTypePattern;
+        }
+    }

    public InstagramRipper(URL url) throws IOException {
        super(url);
    }

    @Override
-    public String getHost() {
-        return "instagram";
-    }
-    @Override
-    public String getDomain() {
+    protected String getDomain() {
        return "instagram.com";
    }

    @Override
-    public boolean canRip(URL url) {
-        return (url.getHost().endsWith("instagram.com"));
-    }
-
-    @Override
-    public URL sanitizeURL(URL url) throws MalformedURLException {
-       URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
-       LOGGER.info("sanitized URL is " + san_url.toExternalForm());
-        return san_url;
-    }
-
-    @Override
-    public String normalizeUrl(String url) {
-        // Remove the date sig from the url
-        return url.replaceAll("/[A-Z0-9]{8}/", "/");
-    }
-
-    @Override public boolean hasASAPRipping() {
-        return true;
-    }
-
-    private List<String> getPostsFromSinglePage(JSONObject json) {
-        List<String> imageURLs = new ArrayList<>();
-        JSONArray datas;
-            if (json.getJSONObject("entry_data").getJSONArray("PostPage")
-                    .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
-                    .has("edge_sidecar_to_children")) {
-                datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
-                        .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
-                        .getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
-                for (int i = 0; i < datas.length(); i++) {
-                    JSONObject data = (JSONObject) datas.get(i);
-                    data = data.getJSONObject("node");
-                    if (data.has("is_video") && data.getBoolean("is_video")) {
-                        imageURLs.add(data.getString("video_url"));
-                    } else {
-                        imageURLs.add(data.getString("display_url"));
-                    }
-                }
-            } else {
-                JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
-                        .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
-                if (data.getBoolean("is_video")) {
-                    imageURLs.add(data.getString("video_url"));
-                } else {
-                    imageURLs.add(data.getString("display_url"));
-                }
-            }
-            return imageURLs;
+    public String getHost() {
+        return "instagram";
    }

    @Override
    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
-        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
+        for (UrlTypePattern urlType : UrlTypePattern.values()) {
+            Matcher urlMatcher = getUrlMatcher(url, urlType);
+            if (urlMatcher.matches()) {
+                switch (urlType) {
+                    case HASHTAG:
+                        hashtagRip = true;
+                        return "tag_" + urlMatcher.group("tagname");
+                    case PINNED:
+                        pinnedRip = true;
+                        return urlMatcher.group("username") + "_pinned";
+                    case STORIES:
+                        storiesRip = true;
+                        return urlMatcher.group("username") + "_stories";
+                    case USER_TAGGED:
+                        taggedRip = true;
+                        return urlMatcher.group("username") + "_tagged";
+                    case IGTV:
+                        igtvRip = true;
+                        return urlMatcher.group("username") + "_igtv";
+                    case SINGLE_POST:
+                        postRip = true;
+                        return "post_" + urlMatcher.group("shortcode");
+                    case USER_PROFILE:
+                        return urlMatcher.group("username");
+                    default:
+                        throw new RuntimeException("Reached unreachable");
+                }
+            }
+        }
+        throw new MalformedURLException("This URL can't be ripped");
    }

-        p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
-        }
-
-        p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(2) + "_" + m.group(1);
-        }
-
-        p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
-        }
-
-        p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
-        }
-
-        p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            rippingTag = true;
-            tagName = m.group(1);
-            return m.group(1);
-        }
-
-        throw new MalformedURLException("Unable to find user in " + url);
-    }
-
-    private String stripHTMLTags(String t) {
-        t = t.replaceAll("<html>\n" +
-                " <head></head>\n" +
-                " <body>", "");
-        t.replaceAll("</body>\n" +
-                "</html>", "");
-        t = t.replaceAll("\n", "");
-        t = t.replaceAll("=\"\"", "");
-        return t;
-    }
-
-
-    private JSONObject getJSONFromPage(Document firstPage) throws IOException {
-        // Check if this page is HTML + JSON or jsut json
-        if (!firstPage.html().contains("window._sharedData =")) {
-            return new JSONObject(stripHTMLTags(firstPage.html()));
-        }
-        String jsonText = "";
-        try {
-            for (Element script : firstPage.select("script[type=text/javascript]")) {
-                if (script.data().contains("window._sharedData = ")) {
-                    jsonText = script.data().replaceAll("window._sharedData = ", "");
-                    jsonText = jsonText.replaceAll("};", "}");
-                }
-            }
-            return new JSONObject(jsonText);
-        } catch (JSONException e) {
-            throw new IOException("Could not get JSON from page");
-        }
+    private Matcher getUrlMatcher(URL url, UrlTypePattern type) {
+        String baseRegex = "^https?://(?:www[.])?instagram[.]com/%s(?:[?/].*)?";
+        Pattern pattern = Pattern.compile(format(baseRegex, type.urlTypePattern));
+        return pattern.matcher(url.toExternalForm());
    }

    @Override
    public JSONObject getFirstPage() throws IOException {
-        Connection.Response resp = Http.url(url).response();
-        LOGGER.info(resp.cookies());
-        csrftoken = resp.cookie("csrftoken");
-        Document p = resp.parse();
-        // Get the query hash so we can download the next page
-        qHash = getQHash(p);
-        return getJSONFromPage(p);
+        setAuthCookie();
+        Document document = Http.url(url).cookies(cookies).response().parse();
+        qHash = getQhash(document);
+        JSONObject jsonObject = getJsonObjectFromDoc(document);
+        String hashtagNamePath = "entry_data.TagPage[0].graphql.hashtag.name";
+        String singlePostIdPath = "graphql.shortcode_media.shortcode";
+        String profileIdPath = "entry_data.ProfilePage[0].graphql.user.id";
+        String storiesPath = "entry_data.StoriesPage[0].user.id";
+        String idPath = hashtagRip ? hashtagNamePath : storiesRip ? storiesPath : postRip ? singlePostIdPath : profileIdPath;
+        idString = getJsonStringByPath(jsonObject, idPath);
+        return taggedRip ? getNextPage(null) : pinnedRip ? getPinnedItems(document) : storiesRip ? getStoriesItems() : jsonObject;
    }

-    private String getVideoFromPage(String videoID) {
+    private void setAuthCookie() throws IOException {
+        String sessionId = Utils.getConfigString("instagram.session_id", null);
+        if ((storiesRip || pinnedRip) && sessionId == null) {
+            throw new IOException("instagram.session_id should be set up for Instagram stories");
+        }
+        if (sessionId != null) {
+            cookies.put("sessionid", sessionId);
+        }
+    }
+
+    // Query hash is used for graphql requests
+    private String getQhash(Document doc) throws IOException {
+        if (postRip) {
+            return null;
+        }
+        Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
+                href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
+
+        String href = doc.select("link[rel=preload]").stream()
+                .map(link -> link.attr("href"))
+                .filter(hrefFilter)
+                .findFirst().orElse("");
+        String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
+
+        Function<String, String> hashExtractor =
+                storiesRip || pinnedReelRip ? this::getStoriesHash :
+                        pinnedRip ? this::getPinnedHash : hashtagRip ? this::getTagHash :
+                                taggedRip ? this::getUserTagHash : this::getProfileHash;
+
+        return hashExtractor.apply(body);
+    }
+
+    private String getStoriesHash(String jsData) {
+        return getHashValue(jsData, "loadStoryViewers", -5);
+    }
+
+    private String getProfileHash(String jsData) {
+        return getHashValue(jsData, "loadProfilePageExtras", -1);
+    }
+
+    private String getPinnedHash(String jsData) {
+        return getHashValue(jsData, "loadProfilePageExtras", -2);
+    }
+
+    private String getTagHash(String jsData) {
+        return getHashValue(jsData, "requestNextTagMedia", -1);
+    }
+
+    private String getUserTagHash(String jsData) {
+        return getHashValue(jsData, "requestNextTaggedPosts", -1);
+    }
+
+    private JSONObject getJsonObjectFromDoc(Document document) {
+        for (Element script : document.select("script[type=text/javascript]")) {
+            String scriptText = script.data();
+            if (scriptText.startsWith("window._sharedData") || scriptText.startsWith("window.__additionalDataLoaded")) {
+                String jsonText = scriptText.replaceAll("[^{]*([{].*})[^}]*", "$1");
+                if (jsonText.contains("graphql") || jsonText.contains("StoriesPage")) {
+                    return new JSONObject(jsonText);
+                }
+            }
+        }
+        return null;
+    }
+
+    @Override
+    public JSONObject getNextPage(JSONObject source) throws IOException {
+        if (postRip || storiesRip || pinnedReelRip) {
+            return null;
+        }
+        JSONObject nextPageQuery = new JSONObject().put(hashtagRip ? "tag_name" : "id", idString).put("first", 12);
+        if (source == null) {
+            return graphqlRequest(nextPageQuery);
+        }
+        JSONObject pageInfo = getMediaRoot(source).getJSONObject("page_info");
+        if (pageInfo.getBoolean("has_next_page")) {
+            return graphqlRequest(nextPageQuery.put("after", pageInfo.getString("end_cursor")));
+        } else {
+            failedItems.forEach(LOGGER::error);
+            return null;
+        }
+    }
+
+    private JSONObject getStoriesItems() throws IOException {
+        return graphqlRequest(new JSONObject().append("reel_ids", idString).put("precomposed_overlay", false));
+    }
+
+    // Two requests with different query hashes required for pinned items.
+    // Query hash to be used depends on flag specified:
+    // pinnedRip flag is used initially to get list of pinned albums;
+    // pinnedReelRip flag is used next to get media urls.
+    private JSONObject getPinnedItems(Document document) throws IOException {
+        JSONObject queryForIds = new JSONObject().put("user_id", idString).put("include_highlight_reels", true);
+        JSONObject pinnedIdsJson = graphqlRequest(queryForIds);
+        JSONArray pinnedItems = getJsonArrayByPath(pinnedIdsJson, "data.user.edge_highlight_reels.edges");
+        pinnedRip = false;
+        pinnedReelRip = true;
+        qHash = getQhash(document);
+        JSONObject queryForDetails = new JSONObject();
+        getStreamOfJsonArray(pinnedItems)
+                .map(object -> getJsonStringByPath(object, "node.id"))
+                .forEach(id -> queryForDetails.append("highlight_reel_ids", id));
+        queryForDetails.put("precomposed_overlay", false);
+        return graphqlRequest(queryForDetails);
+    }
+
+    private JSONObject graphqlRequest(JSONObject vars) throws IOException {
+        // Sleep for a while to avoid a ban
+        sleep(2500);
+        String url = format("https://www.instagram.com/graphql/query/?query_hash=%s&variables=%s", qHash, vars.toString());
+        return Http.url(url).cookies(cookies).getJSON();
+    }
+
+    @Override
+    public List<String> getURLsFromJSON(JSONObject json) {
+        if (storiesRip || pinnedReelRip) {
+            JSONArray storyAlbums = getJsonArrayByPath(json, "data.reels_media");
+            return getStreamOfJsonArray(storyAlbums)
+                    .flatMap(album -> getStreamOfJsonArray(album.getJSONArray("items")))
+                    .peek(storyItem -> itemPrefixes.add(getTimestampPrefix(storyItem)))
+                    .flatMap(this::parseStoryItemForUrls)
+                    .collect(Collectors.toList());
+        }
+        if (postRip) {
+            JSONObject detailsJson = downloadItemDetailsJson(idString);
+            addPrefixInfo(detailsJson);
+            return parseItemDetailsForUrls(detailsJson).collect(Collectors.toList());
+        }
+        JSONArray edges = getMediaRoot(json).getJSONArray("edges");
+        return getStreamOfJsonArray(edges)
+                .map(edge -> getJsonStringByPath(edge, "node.shortcode"))
+                .map(this::downloadItemDetailsJson)
+                .filter(Objects::nonNull)
+                .peek(this::addPrefixInfo)
+                .flatMap(this::parseItemDetailsForUrls)
+                .collect(Collectors.toList());
+    }
+
+    private Stream<? extends String> parseStoryItemForUrls(JSONObject storyItem) {
+        if (storyItem.getBoolean("is_video")) {
+            itemPrefixes.add(getTimestampPrefix(storyItem) + "preview_");
+            int lastIndex = storyItem.getJSONArray("video_resources").length() - 1;
+            return Stream.of(
+                    getJsonStringByPath(storyItem, "video_resources[" + lastIndex + "].src"),
+                    storyItem.getString("display_url"));
+        }
+        return Stream.of(storyItem.getString("display_url"));
+    }
+
+    private JSONObject getMediaRoot(JSONObject json) {
+        String userExtra = "data.user.edge_owner_to_timeline_media";
+        String igtvExtra = "data.user.edge_felix_video_timeline";
+        String taggedExtra = "data.user.edge_user_to_photos_of_you";
+        String hashtagExtra = "data.hashtag.edge_hashtag_to_media";
+        String userHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_owner_to_timeline_media";
+        String igtvHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_felix_video_timeline";
+        String hashtagHomeRoot = "entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media";
+        String mediaRootPath = json.optJSONObject("entry_data") != null ?
+                (hashtagRip ? hashtagHomeRoot : igtvRip ? igtvHomeRoot : userHomeRoot) : hashtagRip ?
+                hashtagExtra : igtvRip ? igtvExtra : taggedRip ? taggedExtra : userExtra;
+        return getJsonObjectByPath(json, mediaRootPath);
+    }
+
+    private JSONObject downloadItemDetailsJson(String shortcode) {
+        String url = "https://www.instagram.com/p/%s/?__a=1";
        try {
-            Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
+            Http http = Http.url(format(url, shortcode));
+            http.ignoreContentType();
+            http.connection().followRedirects(false);
+            Connection.Response response = http.cookies(cookies).response();
+            // Fix for redirection link; repeat request with the new shortcode
+            if (response.statusCode() == 302) {
+                Pattern redirectIdPattern = Pattern.compile("/p/(?<shortcode>[^?/]+)");
+                Matcher m = redirectIdPattern.matcher(response.header("location"));
+                return m.find() ? downloadItemDetailsJson(m.group("shortcode")) : null;
+            }
+            return new JSONObject(response.body());
+        } catch (Exception e) {
+            failedItems.add(shortcode);
+            LOGGER.trace(format("No item %s found", shortcode), e);
+        }
+        return null;
+    }
+
+    private void addPrefixInfo(JSONObject itemDetailsJson) {
+        JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
+        String shortcode = mediaItem.getString("shortcode");
+        int subItemsCount = "GraphSidecar".equals(mediaItem.getString("__typename")) ?
+                getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges").length() : 1;
+        for (int i = 0; i < subItemsCount; i++) {
+            itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_");
+        }
+    }
+
+    private String getTimestampPrefix(JSONObject item) {
+        Instant instant = Instant.ofEpochSecond(item.getLong("taken_at_timestamp"));
+        return DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
+    }
+
+    private Stream<? extends String> parseItemDetailsForUrls(JSONObject itemDetailsJson) {
+        JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
+        // For some reason JSON video_url has lower quality than the HTML-tag one
+        // HTML-tag url is requested here and marked with _extra_ prefix
+        if ("GraphVideo".equals(mediaItem.getString("__typename"))) {
+            String shortcode = mediaItem.getString("shortcode");
+            String urlFromPage = getVideoUrlFromPage(shortcode);
+            if (!urlFromPage.isEmpty()) {
+                itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_extra_");
+                return Stream.of(mediaItem.getString("video_url"), urlFromPage);
+            }
+        }
+        return parseRootForUrls(mediaItem);
+    }
+
+    // Uses recursion for GraphSidecar
+    private Stream<? extends String> parseRootForUrls(JSONObject mediaItem) {
+        String typeName = mediaItem.getString("__typename");
+        switch (typeName) {
+            case "GraphImage":
+                return Stream.of(mediaItem.getString("display_url"));
+            case "GraphVideo":
+                return Stream.of(mediaItem.getString("video_url"));
+            case "GraphSidecar":
+                JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
+                return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
+                        .flatMap(this::parseRootForUrls);
+            default:
+                return Stream.empty();
+        }
+    }
+
+    private String getVideoUrlFromPage(String videoID) {
+        try {
+            Document doc = Http.url("https://www.instagram.com/p/" + videoID).cookies(cookies).get();
            return doc.select("meta[property=og:video]").attr("content");
-        } catch (IOException e) {
+        } catch (Exception e) {
            LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
        }
        return "";
    }

-    private String getOriginalUrl(String imageURL) {
-        // Without this regex most images will return a 403 error
-        imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
-        imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
-
-        // Instagram returns cropped images to unauthenticated applications to maintain legacy support. 
-        // To retrieve the uncropped image, remove this segment from the URL. 
-        // Segment format: cX.Y.W.H - eg: c0.134.1080.1080
-        imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
-        imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
-        return imageURL;
+    @Override
+    protected void downloadURL(URL url, int index) {
+        if (Utils.getConfigBoolean("instagram.download_images_only", false) && url.toString().contains(".mp4?")) {
+            LOGGER.info("Skipped video url: " + url);
+            return;
+        }
+        addURLToDownload(url, itemPrefixes.get(index - 1), "", null, cookies);
    }

-    public String getAfter(JSONObject json) {
-        try {
-            return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
-                    .getJSONObject("graphql").getJSONObject("user")
-                    .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
-        } catch (JSONException e) {
-            // This is here so that when the user rips the last page they don't get a "end_cursor not a string" error
-            try {
-                return json.getJSONObject("data").getJSONObject("user")
-                        .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
-            } catch (JSONException t) {
-                return "";
+    // Javascript parsing
+    /* ------------------------------------------------------------------------------------------------------- */
+    private String getHashValue(String javaScriptData, String keyword, int offset) {
+        List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
+        return statements.stream()
+                .flatMap(statement -> filterItems(statement, ExpressionStatement.class))
+                .map(ExpressionStatement::getExpression)
+                .flatMap(expression -> filterItems(expression, CallNode.class))
+                .map(CallNode::getArgs)
+                .map(expressions -> expressions.get(0))
+                .flatMap(expression -> filterItems(expression, FunctionNode.class))
+                .map(FunctionNode::getBody)
+                .map(Block::getStatements)
+                .map(statementList -> lookForHash(statementList, keyword, offset))
+                .filter(Objects::nonNull)
+                .findFirst().orElse(null);
+    }
+
+    private String lookForHash(List<Statement> list, String keyword, int offset) {
+        for (int i = 0; i < list.size(); i++) {
+            Statement st = list.get(i);
+            if (st.toString().contains(keyword)) {
+                return list.get(i + offset).toString().replaceAll(".*\"([0-9a-f]*)\".*", "$1");
            }
        }
+        return null;
+    }
+
+    private <T> Stream<T> filterItems(Object obj, Class<T> aClass) {
+        return Stream.of(obj).filter(aClass::isInstance).map(aClass::cast);
+    }
+
+    private Block getJsBodyBlock(String javaScriptData) {
+        ErrorManager errors = new ErrorManager();
+        Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
+        return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
+    }
+
+    // Some JSON helper methods below
+    /* ------------------------------------------------------------------------------------------------------- */
+    private JSONObject getJsonObjectByPath(JSONObject object, String key) {
+        Pattern arrayPattern = Pattern.compile("(?<arr>.*)\\[(?<idx>\\d+)]");
+        JSONObject result = object;
+        for (String s : key.split("[.]")) {
+            Matcher m = arrayPattern.matcher(s);
+            result = m.matches() ?
+                    result.getJSONArray(m.group("arr")).getJSONObject(Integer.parseInt(m.group("idx"))) :
+                    result.getJSONObject(s);
+        }
+        return result;
+    }
+
+    private <T> T getByPath(BiFunction<JSONObject, String, T> func, JSONObject object, String key) {
+        int namePos = key.lastIndexOf('.');
+        JSONObject parent = namePos < 0 ? object : getJsonObjectByPath(object, key.substring(0, namePos));
+        return func.apply(parent, key.substring(namePos + 1));
+    }
+
+    private JSONArray getJsonArrayByPath(JSONObject object, String key) {
+        return getByPath(JSONObject::getJSONArray, object, key);
+    }
+
+    private String getJsonStringByPath(JSONObject object, String key) {
+        return getByPath(JSONObject::getString, object, key);
+    }
+
+    private Stream<JSONObject> getStreamOfJsonArray(JSONArray array) {
+        return StreamSupport.stream(new JSONSpliterator(array), false);
+    }
+
+    private class JSONSpliterator extends Spliterators.AbstractSpliterator<JSONObject> {
+        private JSONArray array;
+        private int index = 0;
+
+        JSONSpliterator(JSONArray array) {
+            super(array.length(), SIZED | ORDERED);
+            this.array = array;
        }

        @Override
-    public List<String> getURLsFromJSON(JSONObject json) {
-        List<String> imageURLs = new ArrayList<>();
-        if (!url.toExternalForm().contains("/p/")) {
-            nextPageID = getAfter(json);
-        }
-
-        // get the rhx_gis value so we can get the next page later on
-        if (rhx_gis == null) {
-            rhx_gis = json.getString("rhx_gis");
-        }
-        if (!url.toExternalForm().contains("/p/")) {
-            JSONArray datas = new JSONArray();
-            if (!rippingTag) {
-                // This first try only works on data from the first page
-                try {
-                    JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
-                    userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", "");
-                    datas = json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
-                            .getJSONObject("graphql").getJSONObject("user")
-                            .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
-                } catch (JSONException e) {
-                    datas = json.getJSONObject("data").getJSONObject("user")
-                            .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
-                }
-            } else {
-                try {
-                    JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage");
-                    datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag")
-                            .getJSONObject("edge_hashtag_to_media").getJSONArray("edges");
-                } catch (JSONException e) {
-                    datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media")
-                            .getJSONArray("edges");
-                }
-            }
-            for (int i = 0; i < datas.length(); i++) {
-                JSONObject data = (JSONObject) datas.get(i);
-                data = data.getJSONObject("node");
-                Long epoch = data.getLong("taken_at_timestamp");
-                Instant instant = Instant.ofEpochSecond(epoch);
-                String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
-                // It looks like tag pages don't have the __typename key
-                if (!rippingTag) {
-                    if (data.getString("__typename").equals("GraphSidecar")) {
-                        try {
-                            Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get();
-                            List<String> toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
-                            for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) {
-                                addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode"));
-                            }
-                        } catch (MalformedURLException e) {
-                            LOGGER.error("Unable to download slide show, URL was malformed");
-                        } catch (IOException e) {
-                            LOGGER.error("Unable to download slide show");
-                        }
-                    }
-                }
-                try {
-                    if (!data.getBoolean("is_video")) {
-                        if (imageURLs.isEmpty()) {
-                            // We add this one item to the array because either wise
-                            // the ripper will error out because we returned an empty array
-                            imageURLs.add(getOriginalUrl(data.getString("display_url")));
-                        }
-                        addURLToDownload(new URL(data.getString("display_url")), image_date);
-                    } else {
-                        if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
-                            addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date);
-                        } else {
-                            sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
-                        }
-                    }
-                } catch (MalformedURLException e) {
-                    LOGGER.info("Got MalformedURLException");
-                    return imageURLs;
-                }
-
-                if (isThisATest()) {
-                    break;
-                }
-            }
-
-        } else { // We're ripping from a single page
-            LOGGER.info("Ripping from single page");
-            imageURLs = getPostsFromSinglePage(json);
-        }
-
-        return imageURLs;
-    }
-
-    private String getIGGis(String variables) {
-        String stringToMD5 = rhx_gis + ":" + variables;
-        LOGGER.debug("String to md5 is \"" + stringToMD5 + "\"");
-        try {
-            byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8");
-
-            MessageDigest md = MessageDigest.getInstance("MD5");
-            byte[] hash = md.digest(bytesOfMessage);
-            StringBuffer sb = new StringBuffer();
-            for (int i = 0; i < hash.length; ++i) {
-                sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3));
-            }
-            return sb.toString();
-        } catch(UnsupportedEncodingException e) {
-            return null;
-        } catch(NoSuchAlgorithmException e) {
-            return null;
-        }
-    }
-
-    @Override
-    public JSONObject getNextPage(JSONObject json) throws IOException {
-        JSONObject toreturn;
-        java.util.Map<String, String> cookies = new HashMap<String, String>();
-//        This shouldn't be hardcoded and will break one day
-        cookies.put("ig_pr", "1");
-        cookies.put("csrftoken", csrftoken);
-        if (!nextPageID.equals("") && !isThisATest()) {
-            if (rippingTag) {
-                try {
-                    sleep(2500);
-                    String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}";
-                    String ig_gis = getIGGis(vars);
-                     toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
-                                     "&variables=" + vars, ig_gis);
-                    // Sleep for a while to avoid a ban
-                    LOGGER.info(toreturn);
-                    if (!pageHasImages(toreturn)) {
-                        throw new IOException("No more pages");
-                    }
-                    return toreturn;
-
-                } catch (IOException e) {
-                    throw new IOException("No more pages");
-                }
-
-            }
-            try {
-                // Sleep for a while to avoid a ban
-                sleep(2500);
-                String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}";
-                String ig_gis = getIGGis(vars);
-                LOGGER.info(ig_gis);
-
-                LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars);
-                toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis);
-                if (!pageHasImages(toreturn)) {
-                    throw new IOException("No more pages");
-                }
-                return toreturn;
-            } catch (IOException e) {
-                return null;
-            }
-        } else {
-            throw new IOException("No more pages");
-        }
-    }
-
-    @Override
-    public void downloadURL(URL url, int index) {
-        addURLToDownload(url);
-    }
-
-    private boolean pageHasImages(JSONObject json) {
-        LOGGER.info(json);
-        int numberOfImages = json.getJSONObject("data").getJSONObject("user")
-                .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
-        if (numberOfImages == 0) {
+        public boolean tryAdvance(Consumer<? super JSONObject> action) {
+            if (index == array.length()) {
                return false;
            }
+            action.accept(array.getJSONObject(index++));
            return true;
        }
-
-    private JSONObject getPage(String url, String ig_gis) {
-        StringBuilder sb = new StringBuilder();
-        try {
-            // We can't use Jsoup here because it won't download a non-html file larger than a MB
-            // even if you set maxBodySize to 0
-            URLConnection connection = new URL(url).openConnection();
-            connection.setRequestProperty("User-Agent", USER_AGENT);
-            connection.setRequestProperty("x-instagram-gis", ig_gis);
-            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
-            String line;
-            while ((line = in.readLine()) != null) {
-                sb.append(line);
-
-            }
-            in.close();
-            workAroundJsonString = sb.toString();
-            return new JSONObject(sb.toString());
-
-        } catch (MalformedURLException e) {
-            LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
-            return null;
-        } catch (IOException e) {
-            LOGGER.info("Unable to get query_hash");
-            LOGGER.info(e.getMessage());
-            return null;
    }
 }
-
-    private String getQHash(Document doc) {
-        String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
-        StringBuilder sb = new StringBuilder();
-        Document jsPage;
-        try {
-            // We can't use Jsoup here because it won't download a non-html file larger than a MB
-            // even if you set maxBodySize to 0
-            URLConnection connection = new URL(jsFileURL).openConnection();
-            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
-            String line;
-            while ((line = in.readLine()) != null) {
-                sb.append(line);
-            }
-            in.close();
-
-        } catch (MalformedURLException e) {
-            LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
-            return null;
-        } catch (IOException e) {
-            LOGGER.info("Unable to get query_hash");
-            LOGGER.info(e.getMessage());
-            return null;
-        }
-        if (!rippingTag) {
-            Pattern jsP = Pattern.compile("byUserId\\.get\\(t\\)\\)\\|\\|void 0===r\\?void 0:r\\.pagination},queryId:.([a-zA-Z0-9]+)");
-            Matcher m = jsP.matcher(sb.toString());
-            if (m.find()) {
-                return m.group(1);
-            }
-
-        } else {
-            Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");
-            Matcher m = jsP.matcher(sb.toString());
-            if (m.find()) {
-                return m.group(1);
-            }
-        }
-        LOGGER.error("Could not find query_hash on " + jsFileURL);
-        return null;
-
-    }
-
-}
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@ -12,6 +12,7 @@ import javax.sound.sampled.Clip;
 import javax.sound.sampled.Line;
 import javax.sound.sampled.LineEvent;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
@ -179,21 +180,21 @@ public class Utils {
    /**
     * Determines if your current system is a Windows system.
     */
-    private static boolean isWindows() {
+    public static boolean isWindows() {
        return OS.contains("win");
    }

    /**
     * Determines if your current system is a Mac system
     */
-    private static boolean isMacOS() {
+    public static boolean isMacOS() {
        return OS.contains("mac");
    }

    /**
     * Determines if current system is based on UNIX
     */
-    private static boolean isUnix() {
+    public static boolean isUnix() {
        return OS.contains("nix") || OS.contains("nux") || OS.contains("bsd");
    }

@ -773,4 +774,34 @@ public class Utils {
        return false;
    }

+    public static File shortenSaveAsWindows(String ripsDirPath, String fileName) throws FileNotFoundException {
+        // int ripDirLength = ripsDirPath.length();
+        // int maxFileNameLength = 260 - ripDirLength;
+        // LOGGER.info(maxFileNameLength);
+        LOGGER.error("The filename " + fileName + " is to long to be saved on this file system.");
+        LOGGER.info("Shortening filename");
+        String fullPath = ripsDirPath + File.separator + fileName;
+        // How long the path without the file name is
+        int pathLength = ripsDirPath.length();
+        int fileNameLength = fileName.length();
+        if (pathLength == 260) {
+            // We've reached the max length, there's nothing more we can do
+            throw new FileNotFoundException("File path is too long for this OS");
+        }
+        String[] saveAsSplit = fileName.split("\\.");
+        // Get the file extension so when we shorten the file name we don't cut off the
+        // file extension
+        String fileExt = saveAsSplit[saveAsSplit.length - 1];
+        // The max limit for paths on Windows is 260 chars
+        LOGGER.info(fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt);
+        fullPath = fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt;
+        LOGGER.info(fullPath);
+        LOGGER.info(fullPath.length());
+        return new File(fullPath);
+    }
+
+    public static String sanitizeSaveAs(String fileNameToSan) {
+        return fileNameToSan.replaceAll("[\\\\/:*?\"<>|]", "_");
+    }
+
 }