diff --git a/pom.xml b/pom.xml
index 2aa1d0c9..b6097400 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,6 +11,22 @@
UTF-8
+
+ javax.xml.bind
+ jaxb-api
+ 2.3.0
+
+
+ com.sun.xml.bind
+ jaxb-core
+ 2.3.0
+
+
+ com.sun.xml.bind
+ jaxb-impl
+ 2.3.0
+
+
junit
junit
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index b24017f7..4d17f11e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -2,21 +2,29 @@ package com.rarchives.ripme.ripper;
import java.io.File;
import java.io.FileOutputStream;
+import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
-
+import java.util.Map;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
import com.rarchives.ripme.ui.MainWindow;
+import com.rarchives.ripme.ui.RipStatusMessage;
/**
* Simplified ripper, designed for ripping from sites by parsing HTML.
*/
-public abstract class AbstractHTMLRipper extends AlbumRipper {
+public abstract class AbstractHTMLRipper extends AbstractRipper {
+
+ private Map itemsPending = Collections.synchronizedMap(new HashMap());
+ private Map itemsCompleted = Collections.synchronizedMap(new HashMap());
+ private Map itemsErrored = Collections.synchronizedMap(new HashMap());
protected AbstractHTMLRipper(URL url) throws IOException {
super(url);
@@ -93,6 +101,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
// We set doc to null here so the while loop below this doesn't fire
doc = null;
+ LOGGER.debug("Adding items from " + this.url + " to queue");
}
while (doc != null) {
@@ -176,12 +185,12 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
waitForThreads();
}
-
+
/**
* Gets the file name from the URL
- * @param url
+ * @param url
* URL that you want to get the filename from
- * @return
+ * @return
* Filename of the URL
*/
private String fileNameFromURL(URL url) {
@@ -195,7 +204,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
return saveAs;
}
/**
- *
+ *
* @param url
* Target URL
* @param subdirectory
@@ -204,7 +213,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
* Text you want to save
* @param index
* Index in something like an album
- * @return
+ * @return
* True if ripped successfully
* False if failed
*/
@@ -226,11 +235,11 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
saveFileAs = new File(
workingDir.getCanonicalPath()
- + subdirectory
- + File.separator
- + getPrefix(index)
- + fileName
- + ".txt");
+ + subdirectory
+ + File.separator
+ + getPrefix(index)
+ + fileName
+ + ".txt");
// Write the file
FileOutputStream out = (new FileOutputStream(saveFileAs));
out.write(text.getBytes());
@@ -246,12 +255,12 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
return true;
}
-
+
/**
* Gets prefix based on where in the index it is
- * @param index
+ * @param index
* The index in question
- * @return
+ * @return
* Returns prefix for a file. (?)
*/
protected String getPrefix(int index) {
@@ -261,4 +270,210 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
return prefix;
}
-}
+
+ /*
+ * ------ Methods copied from AlbumRipper. ------
+ * This removes AlbumnRipper's usage from this class.
+ */
+
+ protected boolean allowDuplicates() {
+ return false;
+ }
+
+ @Override
+ /**
+ * Returns total amount of files attempted.
+ */
+ public int getCount() {
+ return itemsCompleted.size() + itemsErrored.size();
+ }
+
+ @Override
+ /**
+ * Queues multiple URLs of single images to download from a single Album URL
+ */
+ public boolean addURLToDownload(URL url, File saveAs, String referrer, Map cookies, Boolean getFileExtFromMIME) {
+ // Only download one file if this is a test.
+ if (super.isThisATest() &&
+ (itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
+ stop();
+ return false;
+ }
+ if (!allowDuplicates()
+ && ( itemsPending.containsKey(url)
+ || itemsCompleted.containsKey(url)
+ || itemsErrored.containsKey(url) )) {
+ // Item is already downloaded/downloading, skip it.
+ LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
+ return false;
+ }
+ if (Utils.getConfigBoolean("urls_only.save", false)) {
+ // Output URL to file
+ String urlFile = this.workingDir + File.separator + "urls.txt";
+ try (FileWriter fw = new FileWriter(urlFile, true)) {
+ fw.write(url.toExternalForm());
+ fw.write(System.lineSeparator());
+ itemsCompleted.put(url, new File(urlFile));
+ } catch (IOException e) {
+ LOGGER.error("Error while writing to " + urlFile, e);
+ }
+ }
+ else {
+ itemsPending.put(url, saveAs);
+ DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
+ if (referrer != null) {
+ dft.setReferrer(referrer);
+ }
+ if (cookies != null) {
+ dft.setCookies(cookies);
+ }
+ threadPool.addThread(dft);
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean addURLToDownload(URL url, File saveAs) {
+ return addURLToDownload(url, saveAs, null, null, false);
+ }
+
+ /**
+ * Queues image to be downloaded and saved.
+ * Uses filename from URL to decide filename.
+ * @param url
+ * URL to download
+ * @return
+ * True on success
+ */
+ protected boolean addURLToDownload(URL url) {
+ // Use empty prefix and empty subdirectory
+ return addURLToDownload(url, "", "");
+ }
+
+ @Override
+ /**
+ * Cleans up & tells user about successful download
+ */
+ public void downloadCompleted(URL url, File saveAs) {
+ if (observer == null) {
+ return;
+ }
+ try {
+ String path = Utils.removeCWD(saveAs);
+ RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
+ itemsPending.remove(url);
+ itemsCompleted.put(url, saveAs);
+ observer.update(this, msg);
+
+ checkIfComplete();
+ } catch (Exception e) {
+ LOGGER.error("Exception while updating observer: ", e);
+ }
+ }
+
+ @Override
+ /**
+ * Cleans up & tells user about failed download.
+ */
+ public void downloadErrored(URL url, String reason) {
+ if (observer == null) {
+ return;
+ }
+ itemsPending.remove(url);
+ itemsErrored.put(url, reason);
+ observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
+
+ checkIfComplete();
+ }
+
+ @Override
+ /**
+ * Tells user that a single file in the album they wish to download has
+ * already been downloaded in the past.
+ */
+ public void downloadExists(URL url, File file) {
+ if (observer == null) {
+ return;
+ }
+
+ itemsPending.remove(url);
+ itemsCompleted.put(url, file);
+ observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
+
+ checkIfComplete();
+ }
+
+ /**
+ * Notifies observers and updates state if all files have been ripped.
+ */
+ @Override
+ protected void checkIfComplete() {
+ if (observer == null) {
+ return;
+ }
+ if (itemsPending.isEmpty()) {
+ super.checkIfComplete();
+ }
+ }
+
+ /**
+ * Sets directory to save all ripped files to.
+ * @param url
+ * URL to define how the working directory should be saved.
+ * @throws
+ * IOException
+ */
+ @Override
+ public void setWorkingDir(URL url) throws IOException {
+ String path = Utils.getWorkingDirectory().getCanonicalPath();
+ if (!path.endsWith(File.separator)) {
+ path += File.separator;
+ }
+ String title;
+ if (Utils.getConfigBoolean("album_titles.save", true)) {
+ title = getAlbumTitle(this.url);
+ } else {
+ title = super.getAlbumTitle(this.url);
+ }
+ LOGGER.debug("Using album title '" + title + "'");
+
+ title = Utils.filesystemSafe(title);
+ path += title;
+ path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
+
+ this.workingDir = new File(path);
+ if (!this.workingDir.exists()) {
+ LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
+ this.workingDir.mkdirs();
+ }
+ LOGGER.debug("Set working directory to: " + this.workingDir);
+ }
+
+ /**
+ * @return
+ * Integer between 0 and 100 defining the progress of the album rip.
+ */
+ @Override
+ public int getCompletionPercentage() {
+ double total = itemsPending.size() + itemsErrored.size() + itemsCompleted.size();
+ return (int) (100 * ( (total - itemsPending.size()) / total));
+ }
+
+ /**
+ * @return
+ * Human-readable information on the status of the current rip.
+ */
+ @Override
+ public String getStatusText() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(getCompletionPercentage())
+ .append("% ")
+ .append("- Pending: " ).append(itemsPending.size())
+ .append(", Completed: ").append(itemsCompleted.size())
+ .append(", Errored: " ).append(itemsErrored.size());
+ return sb.toString();
+ }
+
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
index 4455270e..19f44240 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
@@ -1,19 +1,27 @@
package com.rarchives.ripme.ripper;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
-
+import java.util.Map;
import org.json.JSONObject;
-
+import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
/**
* Simplified ripper, designed for ripping from sites by parsing JSON.
*/
-public abstract class AbstractJSONRipper extends AlbumRipper {
+public abstract class AbstractJSONRipper extends AbstractRipper {
+
+ private Map itemsPending = Collections.synchronizedMap(new HashMap());
+ private Map itemsCompleted = Collections.synchronizedMap(new HashMap());
+ private Map itemsErrored = Collections.synchronizedMap(new HashMap());
protected AbstractJSONRipper(URL url) throws IOException {
super(url);
@@ -56,12 +64,12 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
while (json != null) {
List imageURLs = getURLsFromJSON(json);
-
+
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
- sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
- break;
+ sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
+ break;
}
-
+
// Remove all but 1 image
if (isThisATest()) {
while (imageURLs.size() > 1) {
@@ -77,7 +85,7 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
if (isStopped()) {
break;
}
-
+
index += 1;
LOGGER.debug("Found image url #" + index+ ": " + imageURL);
downloadURL(new URL(imageURL), index);
@@ -111,4 +119,209 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
}
return prefix;
}
-}
+
+ /*
+ * ------ Methods copied from AlbumRipper ------
+ */
+
+ protected boolean allowDuplicates() {
+ return false;
+ }
+
+ @Override
+ /**
+ * Returns total amount of files attempted.
+ */
+ public int getCount() {
+ return itemsCompleted.size() + itemsErrored.size();
+ }
+
+ @Override
+ /**
+ * Queues multiple URLs of single images to download from a single Album URL
+ */
+ public boolean addURLToDownload(URL url, File saveAs, String referrer, Map cookies, Boolean getFileExtFromMIME) {
+ // Only download one file if this is a test.
+ if (super.isThisATest() &&
+ (itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
+ stop();
+ return false;
+ }
+ if (!allowDuplicates()
+ && ( itemsPending.containsKey(url)
+ || itemsCompleted.containsKey(url)
+ || itemsErrored.containsKey(url) )) {
+ // Item is already downloaded/downloading, skip it.
+ LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
+ return false;
+ }
+ if (Utils.getConfigBoolean("urls_only.save", false)) {
+ // Output URL to file
+ String urlFile = this.workingDir + File.separator + "urls.txt";
+ try (FileWriter fw = new FileWriter(urlFile, true)) {
+ fw.write(url.toExternalForm());
+ fw.write(System.lineSeparator());
+ itemsCompleted.put(url, new File(urlFile));
+ } catch (IOException e) {
+ LOGGER.error("Error while writing to " + urlFile, e);
+ }
+ }
+ else {
+ itemsPending.put(url, saveAs);
+ DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
+ if (referrer != null) {
+ dft.setReferrer(referrer);
+ }
+ if (cookies != null) {
+ dft.setCookies(cookies);
+ }
+ threadPool.addThread(dft);
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean addURLToDownload(URL url, File saveAs) {
+ return addURLToDownload(url, saveAs, null, null, false);
+ }
+
+ /**
+ * Queues image to be downloaded and saved.
+ * Uses filename from URL to decide filename.
+ * @param url
+ * URL to download
+ * @return
+ * True on success
+ */
+ protected boolean addURLToDownload(URL url) {
+ // Use empty prefix and empty subdirectory
+ return addURLToDownload(url, "", "");
+ }
+
+ @Override
+ /**
+ * Cleans up & tells user about successful download
+ */
+ public void downloadCompleted(URL url, File saveAs) {
+ if (observer == null) {
+ return;
+ }
+ try {
+ String path = Utils.removeCWD(saveAs);
+ RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
+ itemsPending.remove(url);
+ itemsCompleted.put(url, saveAs);
+ observer.update(this, msg);
+
+ checkIfComplete();
+ } catch (Exception e) {
+ LOGGER.error("Exception while updating observer: ", e);
+ }
+ }
+
+ @Override
+ /**
+ * Cleans up & tells user about failed download.
+ */
+ public void downloadErrored(URL url, String reason) {
+ if (observer == null) {
+ return;
+ }
+ itemsPending.remove(url);
+ itemsErrored.put(url, reason);
+ observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
+
+ checkIfComplete();
+ }
+
+ @Override
+ /**
+ * Tells user that a single file in the album they wish to download has
+ * already been downloaded in the past.
+ */
+ public void downloadExists(URL url, File file) {
+ if (observer == null) {
+ return;
+ }
+
+ itemsPending.remove(url);
+ itemsCompleted.put(url, file);
+ observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
+
+ checkIfComplete();
+ }
+
+ /**
+ * Notifies observers and updates state if all files have been ripped.
+ */
+ @Override
+ protected void checkIfComplete() {
+ if (observer == null) {
+ return;
+ }
+ if (itemsPending.isEmpty()) {
+ super.checkIfComplete();
+ }
+ }
+
+ /**
+ * Sets directory to save all ripped files to.
+ * @param url
+ * URL to define how the working directory should be saved.
+ * @throws
+ * IOException
+ */
+ @Override
+ public void setWorkingDir(URL url) throws IOException {
+ String path = Utils.getWorkingDirectory().getCanonicalPath();
+ if (!path.endsWith(File.separator)) {
+ path += File.separator;
+ }
+ String title;
+ if (Utils.getConfigBoolean("album_titles.save", true)) {
+ title = getAlbumTitle(this.url);
+ } else {
+ title = super.getAlbumTitle(this.url);
+ }
+ LOGGER.debug("Using album title '" + title + "'");
+
+ title = Utils.filesystemSafe(title);
+ path += title;
+ path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
+
+ this.workingDir = new File(path);
+ if (!this.workingDir.exists()) {
+ LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
+ this.workingDir.mkdirs();
+ }
+ LOGGER.debug("Set working directory to: " + this.workingDir);
+ }
+
+ /**
+ * @return
+ * Integer between 0 and 100 defining the progress of the album rip.
+ */
+ @Override
+ public int getCompletionPercentage() {
+ double total = itemsPending.size() + itemsErrored.size() + itemsCompleted.size();
+ return (int) (100 * ( (total - itemsPending.size()) / total));
+ }
+
+ /**
+ * @return
+ * Human-readable information on the status of the current rip.
+ */
+ @Override
+ public String getStatusText() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(getCompletionPercentage())
+ .append("% ")
+ .append("- Pending: " ).append(itemsPending.size())
+ .append(", Completed: ").append(itemsCompleted.size())
+ .append(", Errored: " ).append(itemsErrored.size());
+ return sb.toString();
+ }
+
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index 95643b4c..19d1bf77 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -1,7 +1,11 @@
package com.rarchives.ripme.ripper;
import java.awt.Desktop;
-import java.io.*;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URL;
@@ -9,24 +13,20 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Observable;
-
-import com.rarchives.ripme.App;
+import java.util.Scanner;
import org.apache.log4j.FileAppender;
import org.apache.log4j.Logger;
import org.jsoup.HttpStatusException;
-
+import com.rarchives.ripme.App;
import com.rarchives.ripme.ui.RipStatusComplete;
import com.rarchives.ripme.ui.RipStatusHandler;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
-import java.io.File;
-import java.util.Scanner;
-
public abstract class AbstractRipper
- extends Observable
- implements RipperInterface, Runnable {
+ extends Observable
+ implements RipperInterface, Runnable {
protected static final Logger LOGGER = Logger.getLogger(AbstractRipper.class);
private final String URLHistoryFile = Utils.getURLHistoryFile();
@@ -67,7 +67,7 @@ public abstract class AbstractRipper
* Adds a URL to the url history file
* @param downloadedURL URL to check if downloaded
*/
- private void writeDownloadedURL(String downloadedURL) throws IOException {
+ protected void writeDownloadedURL(String downloadedURL) throws IOException {
// If "save urls only" is checked don't write to the url history file
if (Utils.getConfigBoolean("urls_only.save", false)) {
return;
@@ -123,15 +123,15 @@ public abstract class AbstractRipper
public String normalizeUrl(String url) {
return url;
}
-
+
/**
* Checks to see if Ripme has already downloaded a URL
* @param url URL to check if downloaded
- * @return
+ * @return
* Returns true if previously downloaded.
* Returns false if not yet downloaded.
*/
- private boolean hasDownloadedURL(String url) {
+ protected boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile);
url = normalizeUrl(url);
@@ -172,7 +172,7 @@ public abstract class AbstractRipper
* Logger (for debugging)
* FileAppender
* Threadpool
- * @throws IOException
+ * @throws IOException
* Always be prepared.
*/
public void setup() throws IOException {
@@ -218,6 +218,44 @@ public abstract class AbstractRipper
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map cookies,
Boolean getFileExtFromMIME);
+ /**
+ * Queues image to be downloaded and saved.
+ * @param url
+ * URL of the file
+ * @param options
+ * A map containing any changes to the default options.
+ * Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+ * getFileExtFromMIME should be "true" or "false"
+ * @param cookies
+ * The cookies to send to the server while downloading this file.
+ * @return
+ * True if downloaded successfully
+ * False if failed to download
+ */
+ protected boolean addURLToDownload(URL url, Map options, Map cookies) {
+ // Bit of a hack but this lets us pass a bool using a map
+ boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
+ return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
+ cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
+ }
+
+
+ /**
+ * Queues image to be downloaded and saved.
+ * @param url
+ * URL of the file
+ * @param options
+ * A map containing any changes to the default options.
+ * Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+ * getFileExtFromMIME should be "true" or "false"
+ * @return
+ * True if downloaded successfully
+ * False if failed to download
+ */
+ protected boolean addURLToDownload(URL url, Map options) {
+ return addURLToDownload(url, options, null);
+ }
+
/**
* Queues image to be downloaded and saved.
* @param url
@@ -232,11 +270,27 @@ public abstract class AbstractRipper
* The cookies to send to the server while downloading this file.
* @param fileName
* The name that file will be written to
- * @return
+ * @return
* True if downloaded successfully
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
+ // A common bug is rippers adding urls that are just "http:". This rejects said urls
+ if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
+ LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
+ return false;
+
+ }
+ // Make sure the url doesn't contain any spaces as that can cause a 400 error when requesting the file
+ if (url.toExternalForm().contains(" ")) {
+ // If for some reason the url with all spaces encoded as %20 is malformed print an error
+ try {
+ url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
+ } catch (MalformedURLException e) {
+ LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
+ e.printStackTrace();
+ }
+ }
// Don't re-add the url if it was downloaded in a previous rip
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
@@ -266,10 +320,10 @@ public abstract class AbstractRipper
}
saveFileAs = new File(
topFolderName
- + subdirectory
- + File.separator
- + prefix
- + saveAs);
+ + subdirectory
+ + File.separator
+ + prefix
+ + saveAs);
} catch (IOException e) {
LOGGER.error("[!] Error creating save file path for URL '" + url + "':", e);
return false;
@@ -280,6 +334,7 @@ public abstract class AbstractRipper
saveFileAs.getParentFile().mkdirs();
}
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
+ LOGGER.info("Writing " + url.toExternalForm() + " to file");
try {
writeDownloadedURL(url.toExternalForm() + "\n");
} catch (IOException e) {
@@ -447,7 +502,7 @@ public abstract class AbstractRipper
/**
* Gets URL
- * @return
+ * @return
* Returns URL that wants to be downloaded.
*/
public URL getURL() {
@@ -467,14 +522,14 @@ public abstract class AbstractRipper
public abstract void setWorkingDir(URL url) throws IOException;
/**
- *
- * @param url
+ *
+ * @param url
* The URL you want to get the title of.
* @return
* host_URLid
* e.g. (for a reddit post)
* reddit_post_7mg2ur
- * @throws MalformedURLException
+ * @throws MalformedURLException
* If any of those damned URLs gets malformed.
*/
public String getAlbumTitle(URL url) throws MalformedURLException {
@@ -493,7 +548,7 @@ public abstract class AbstractRipper
public static AbstractRipper getRipper(URL url) throws Exception {
for (Constructor> constructor : getRipperConstructors("com.rarchives.ripme.ripper.rippers")) {
try {
- AlbumRipper ripper = (AlbumRipper) constructor.newInstance(url); // by design: can throw ClassCastException
+ AbstractRipper ripper = (AbstractRipper) constructor.newInstance(url); // by design: can throw ClassCastException
LOGGER.debug("Found album ripper: " + ripper.getClass().getName());
return ripper;
} catch (Exception e) {
@@ -531,7 +586,7 @@ public abstract class AbstractRipper
/**
* Sends an update message to the relevant observer(s) on this ripper.
- * @param status
+ * @param status
* @param message
*/
public void sendUpdate(STATUS status, Object message) {
@@ -540,15 +595,15 @@ public abstract class AbstractRipper
}
observer.update(this, new RipStatusMessage(status, message));
}
-
+
/**
* Get the completion percentage.
- * @return
+ * @return
* Percentage complete
*/
public abstract int getCompletionPercentage();
/**
- * @return
+ * @return
* Text for status
*/
public abstract String getStatusText();
@@ -584,12 +639,12 @@ public abstract class AbstractRipper
}
}
}
-
+
/**
* Pauses thread for a set amount of time.
* @param milliseconds
* Amount of time (in milliseconds) that the thread gets paused for
- * @return
+ * @return
* True if paused successfully
* False if failed to pause/got interrupted.
*/
@@ -624,4 +679,4 @@ public abstract class AbstractRipper
protected boolean useByteProgessBar() { return false;}
// If true ripme will try to resume a broken download for this ripper
protected boolean tryResumeDownload() { return false;}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractSingleFileRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractSingleFileRipper.java
index f1f8be41..caf69c37 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractSingleFileRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractSingleFileRipper.java
@@ -40,4 +40,4 @@ public abstract class AbstractSingleFileRipper extends AbstractHTMLRipper {
@Override
public boolean useByteProgessBar() {return true;}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
index 97943b33..fa6ce3ba 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
@@ -59,8 +59,8 @@ public abstract class AlbumRipper extends AbstractRipper {
}
if (!allowDuplicates()
&& ( itemsPending.containsKey(url)
- || itemsCompleted.containsKey(url)
- || itemsErrored.containsKey(url) )) {
+ || itemsCompleted.containsKey(url)
+ || itemsErrored.containsKey(url) )) {
// Item is already downloaded/downloading, skip it.
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
return false;
@@ -70,7 +70,7 @@ public abstract class AlbumRipper extends AbstractRipper {
String urlFile = this.workingDir + File.separator + "urls.txt";
try (FileWriter fw = new FileWriter(urlFile, true)) {
fw.write(url.toExternalForm());
- fw.write("\n");
+ fw.write(System.lineSeparator());
itemsCompleted.put(url, new File(urlFile));
} catch (IOException e) {
LOGGER.error("Error while writing to " + urlFile, e);
@@ -87,6 +87,7 @@ public abstract class AlbumRipper extends AbstractRipper {
}
threadPool.addThread(dft);
}
+
return true;
}
@@ -225,10 +226,10 @@ public abstract class AlbumRipper extends AbstractRipper {
public String getStatusText() {
StringBuilder sb = new StringBuilder();
sb.append(getCompletionPercentage())
- .append("% ")
- .append("- Pending: " ).append(itemsPending.size())
- .append(", Completed: ").append(itemsCompleted.size())
- .append(", Errored: " ).append(itemsErrored.size());
+ .append("% ")
+ .append("- Pending: " ).append(itemsPending.size())
+ .append(", Completed: ").append(itemsCompleted.size())
+ .append(", Errored: " ).append(itemsErrored.size());
return sb.toString();
}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
index 2f8a1503..3680af6b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
+++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
@@ -1,12 +1,6 @@
package com.rarchives.ripme.ripper;
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.Array;
+import java.io.*;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
@@ -19,26 +13,22 @@ import java.util.ResourceBundle;
import javax.net.ssl.HttpsURLConnection;
import com.rarchives.ripme.ui.MainWindow;
-import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.jsoup.HttpStatusException;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
-import com.rarchives.ripme.ripper.AbstractRipper;
/**
- * Thread for downloading files.
- * Includes retry logic, observer notifications, and other goodies.
+ * Thread for downloading files. Includes retry logic, observer notifications,
+ * and other goodies.
*/
class DownloadFileThread extends Thread {
-
private ResourceBundle rb = MainWindow.rb;
-
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
private String referrer = "";
- private Map cookies = new HashMap<>();
+ private Map cookies = new HashMap<>();
private URL url;
private File saveAs;
@@ -63,16 +53,19 @@ class DownloadFileThread extends Thread {
public void setReferrer(String referrer) {
this.referrer = referrer;
}
- public void setCookies(Map cookies) {
+
+ public void setCookies(Map cookies) {
this.cookies = cookies;
}
-
/**
- * Attempts to download the file. Retries as needed.
- * Notifies observers upon completion/error/warn.
+ * Attempts to download the file. Retries as needed. Notifies observers upon
+ * completion/error/warn.
*/
public void run() {
+ // First thing we make sure the file name doesn't have any illegal chars in it
+ saveAs = new File(
+ saveAs.getParentFile().getAbsolutePath() + File.separator + Utils.sanitizeSaveAs(saveAs.getName()));
long fileSize = 0;
int bytesTotal = 0;
int bytesDownloaded = 0;
@@ -85,13 +78,15 @@ class DownloadFileThread extends Thread {
observer.downloadErrored(url, rb.getString("download.interrupted"));
return;
}
- if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME ||
- Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME && !observer.tryResumeDownload()) {
+ if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME
+ || Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
+ && !observer.tryResumeDownload()) {
if (Utils.getConfigBoolean("file.overwrite", false)) {
logger.info("[!] " + rb.getString("deleting.existing.file") + prettySaveAs);
saveAs.delete();
} else {
- logger.info("[!] " + rb.getString("skipping") + url + " -- " + rb.getString("file.already.exists") + ": " + prettySaveAs);
+ logger.info("[!] " + rb.getString("skipping") + url + " -- "
+ + rb.getString("file.already.exists") + ": " + prettySaveAs);
observer.downloadExists(url, saveAs);
return;
}
@@ -101,7 +96,8 @@ class DownloadFileThread extends Thread {
int tries = 0; // Number of attempts to download
do {
tries += 1;
- InputStream bis = null; OutputStream fos = null;
+ InputStream bis = null;
+ OutputStream fos = null;
try {
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
@@ -110,16 +106,16 @@ class DownloadFileThread extends Thread {
HttpURLConnection huc;
if (this.url.toString().startsWith("https")) {
huc = (HttpsURLConnection) urlToDownload.openConnection();
- }
- else {
+ } else {
huc = (HttpURLConnection) urlToDownload.openConnection();
}
huc.setInstanceFollowRedirects(true);
- // It is important to set both ConnectTimeout and ReadTimeout. If you don't then ripme will wait forever
+ // It is important to set both ConnectTimeout and ReadTimeout. If you don't then
+ // ripme will wait forever
// for the server to send data after connecting.
huc.setConnectTimeout(TIMEOUT);
huc.setReadTimeout(TIMEOUT);
- huc.setRequestProperty("accept", "*/*");
+ huc.setRequestProperty("accept", "*/*");
if (!referrer.equals("")) {
huc.setRequestProperty("Referer", referrer); // Sic
}
@@ -142,11 +138,13 @@ class DownloadFileThread extends Thread {
int statusCode = huc.getResponseCode();
logger.debug("Status code: " + statusCode);
+ // If the server doesn't allow resuming downloads error out
if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
- // TODO find a better way to handle servers that don't support resuming downloads then just erroring out
+ // TODO find a better way to handle servers that don't support resuming
+ // downloads then just erroring out
throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
}
- if (statusCode / 100 == 3) { // 3xx Redirect
+ if (statusCode / 100 == 3) { // 3xx Redirect
if (!redirected) {
// Don't increment retries on the first redirect
tries--;
@@ -158,12 +156,15 @@ class DownloadFileThread extends Thread {
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
}
if (statusCode / 100 == 4) { // 4xx errors
- logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading from " + url);
- observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
+ logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode
+ + " while downloading from " + url);
+ observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " "
+ + statusCode + " while downloading " + url.toExternalForm());
return; // Not retriable, drop out.
}
if (statusCode / 100 == 5) { // 5xx errors
- observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
+ observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode
+ + " while downloading " + url.toExternalForm());
// Throw exception so download can be retried
throw new IOException(rb.getString("retriable.status.code") + " " + statusCode);
}
@@ -174,7 +175,8 @@ class DownloadFileThread extends Thread {
return;
}
- // If the ripper is using the bytes progress bar set bytesTotal to huc.getContentLength()
+ // If the ripper is using the bytes progress bar set bytesTotal to
+ // huc.getContentLength()
if (observer.useByteProgessBar()) {
bytesTotal = huc.getContentLength();
observer.setBytesTotal(bytesTotal);
@@ -195,14 +197,15 @@ class DownloadFileThread extends Thread {
logger.error("Was unable to get content type from stream");
// Try to get the file type from the magic number
byte[] magicBytes = new byte[8];
- bis.read(magicBytes,0, 5);
+ bis.read(magicBytes, 0, 5);
bis.reset();
fileExt = Utils.getEXTFromMagic(magicBytes);
if (fileExt != null) {
saveAs = new File(saveAs.toString() + "." + fileExt);
} else {
logger.error(rb.getString("was.unable.to.get.content.type.using.magic.number"));
- logger.error(rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
+ logger.error(
+ rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
}
}
}
@@ -210,31 +213,54 @@ class DownloadFileThread extends Thread {
if (statusCode == 206) {
fos = new FileOutputStream(saveAs, true);
} else {
- fos = new FileOutputStream(saveAs);
+ try {
+ fos = new FileOutputStream(saveAs);
+ } catch (FileNotFoundException e) {
+ // We do this because some filesystems have a max name length
+ if (e.getMessage().contains("File name too long")) {
+ logger.error("The filename " + saveAs.getName()
+ + " is to long to be saved on this file system.");
+ logger.info("Shortening filename");
+ String[] saveAsSplit = saveAs.getName().split("\\.");
+ // Get the file extension so when we shorten the file name we don't cut off the
+ // file extension
+ String fileExt = saveAsSplit[saveAsSplit.length - 1];
+ // The max limit for filenames on Linux with Ext3/4 is 255 bytes
+ logger.info(saveAs.getName().substring(0, 254 - fileExt.length()) + fileExt);
+ String filename = saveAs.getName().substring(0, 254 - fileExt.length()) + "." + fileExt;
+ // We can't just use the new file name as the saveAs because the file name
+ // doesn't include the
+ // users save path, so we get the user save path from the old saveAs
+ saveAs = new File(saveAs.getParentFile().getAbsolutePath() + File.separator + filename);
+ fos = new FileOutputStream(saveAs);
+ } else if (saveAs.getAbsolutePath().length() > 259 && Utils.isWindows()) {
+ // This if is for when the file path has gone above 260 chars which windows does
+ // not allow
+ fos = new FileOutputStream(
+ Utils.shortenSaveAsWindows(saveAs.getParentFile().getPath(), saveAs.getName()));
+ }
+ }
}
byte[] data = new byte[1024 * 256];
int bytesRead;
- boolean shouldSkipFileDownload = huc.getContentLength() / 10000000 >= 10;
- while ( (bytesRead = bis.read(data)) != -1) {
- try {
- observer.stopCheck();
- } catch (IOException e) {
- observer.downloadErrored(url, rb.getString("download.interrupted"));
- return;
- }
- fos.write(data, 0, bytesRead);
- if (observer.useByteProgessBar()) {
- bytesDownloaded += bytesRead;
- observer.setBytesCompleted(bytesDownloaded);
- observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
- }
- // If this is a test and we're downloading a large file
- if (AbstractRipper.isThisATest() && shouldSkipFileDownload) {
- logger.debug("Not downloading whole file because it is over 10mb and this is a test");
- bis.close();
- fos.close();
- break;
-
+ boolean shouldSkipFileDownload = huc.getContentLength() / 1000000 >= 10 && AbstractRipper.isThisATest();
+ // If this is a test rip we skip large downloads
+ if (shouldSkipFileDownload) {
+ logger.debug("Not downloading whole file because it is over 10mb and this is a test");
+ } else {
+ while ((bytesRead = bis.read(data)) != -1) {
+ try {
+ observer.stopCheck();
+ } catch (IOException e) {
+ observer.downloadErrored(url, rb.getString("download.interrupted"));
+ return;
+ }
+ fos.write(data, 0, bytesRead);
+ if (observer.useByteProgessBar()) {
+ bytesDownloaded += bytesRead;
+ observer.setBytesCompleted(bytesDownloaded);
+ observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
+ }
}
}
bis.close();
@@ -249,24 +275,34 @@ class DownloadFileThread extends Thread {
logger.debug(rb.getString("http.status.exception"), hse);
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
- observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
+ observer.downloadErrored(url,
+ "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
return;
}
} catch (IOException e) {
logger.debug("IOException", e);
- logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - " + e.getMessage());
+ logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - "
+ + e.getMessage());
} finally {
// Close any open streams
try {
- if (bis != null) { bis.close(); }
- } catch (IOException e) { }
+ if (bis != null) {
+ bis.close();
+ }
+ } catch (IOException e) {
+ }
try {
- if (fos != null) { fos.close(); }
- } catch (IOException e) { }
+ if (fos != null) {
+ fos.close();
+ }
+ } catch (IOException e) {
+ }
}
if (tries > this.retries) {
- logger.error("[!] " + rb.getString ("exceeded.maximum.retries") + " (" + this.retries + ") for URL " + url);
- observer.downloadErrored(url, rb.getString("failed.to.download") + " " + url.toExternalForm());
+ logger.error("[!] " + rb.getString("exceeded.maximum.retries") + " (" + this.retries
+ + ") for URL " + url);
+ observer.downloadErrored(url,
+ rb.getString("failed.to.download") + " " + url.toExternalForm());
return;
}
} while (true);
@@ -274,4 +310,4 @@ class DownloadFileThread extends Thread {
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
index 9c2db859..e4da448d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
@@ -9,19 +9,27 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import org.json.JSONArray;
+import org.json.JSONObject;
import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.utils.Http;
-public class GfycatRipper extends AbstractSingleFileRipper {
+public class GfycatRipper extends AbstractHTMLRipper {
private static final String HOST = "gfycat.com";
+ String username = "";
+ String cursor = "";
+ String count = "30";
+
+
public GfycatRipper(URL url) throws IOException {
- super(url);
+ super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
}
@Override
@@ -41,14 +49,26 @@ public class GfycatRipper extends AbstractSingleFileRipper {
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
- url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
-
- return url;
+ String sUrl = url.toExternalForm();
+ sUrl = sUrl.replace("/gifs/detail", "");
+ sUrl = sUrl.replace("/amp", "");
+ return new URL(sUrl);
+ }
+
+ public boolean isProfile() {
+ Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ return m.matches();
}
@Override
public Document getFirstPage() throws IOException {
- return Http.url(url).get();
+ if (!isProfile()) {
+ return Http.url(url).get();
+ } else {
+ username = getGID(url);
+ return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
+ }
}
@Override
@@ -58,27 +78,58 @@ public class GfycatRipper extends AbstractSingleFileRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
+ Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
+
+ if (m.matches())
+ return m.group(2);
throw new MalformedURLException(
- "Expected gfycat.com format:"
- + "gfycat.com/id"
+ "Expected gfycat.com format: "
+ + "gfycat.com/id or "
+ + "thumbs.gfycat.com/id.gif"
+ " Got: " + url);
}
+ private String stripHTMLTags(String t) {
+ t = t.replaceAll("\n" +
+ " \n" +
+ " ", "");
+ t = t.replaceAll("\n" +
+ "", "");
+ t = t.replaceAll("\n", "");
+ t = t.replaceAll("=\"\"", "");
+ return t;
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ if (cursor.equals("")) {
+ throw new IOException("No more pages");
+ }
+ return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
+ }
+
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
- Elements videos = doc.select("source");
- String vidUrl = videos.first().attr("src");
- if (vidUrl.startsWith("//")) {
- vidUrl = "http:" + vidUrl;
+ if (isProfile()) {
+ JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
+ JSONArray content = page.getJSONArray("gfycats");
+ for (int i = 0; i < content.length(); i++) {
+ result.add(content.getJSONObject(i).getString("mp4Url"));
+ }
+ cursor = page.getString("cursor");
+ } else {
+ Elements videos = doc.select("script");
+ for (Element el : videos) {
+ String json = el.html();
+ if (json.startsWith("{")) {
+ JSONObject page = new JSONObject(json);
+ result.add(page.getJSONObject("video").getString("contentUrl"));
+ }
+ }
}
- result.add(vidUrl);
return result;
}
@@ -95,14 +146,14 @@ public class GfycatRipper extends AbstractSingleFileRipper {
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
Document doc = Http.url(url).get();
- Elements videos = doc.select("source");
- if (videos.isEmpty()) {
- throw new IOException("Could not find source at " + url);
+ Elements videos = doc.select("script");
+ for (Element el : videos) {
+ String json = el.html();
+ if (json.startsWith("{")) {
+ JSONObject page = new JSONObject(json);
+ return page.getJSONObject("video").getString("contentUrl");
+ }
}
- String vidUrl = videos.first().attr("src");
- if (vidUrl.startsWith("//")) {
- vidUrl = "http:" + vidUrl;
- }
- return vidUrl;
+ throw new IOException();
}
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index 57c3e6ab..f6c85591 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -1,480 +1,502 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.time.*;
-import java.time.format.DateTimeFormatter;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.security.*;
-
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
-
+import com.rarchives.ripme.utils.Utils;
+import jdk.nashorn.internal.ir.Block;
+import jdk.nashorn.internal.ir.CallNode;
+import jdk.nashorn.internal.ir.ExpressionStatement;
+import jdk.nashorn.internal.ir.FunctionNode;
+import jdk.nashorn.internal.ir.Statement;
+import jdk.nashorn.internal.parser.Parser;
+import jdk.nashorn.internal.runtime.Context;
+import jdk.nashorn.internal.runtime.ErrorManager;
+import jdk.nashorn.internal.runtime.Source;
+import jdk.nashorn.internal.runtime.options.Options;
+import org.json.JSONArray;
+import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import com.rarchives.ripme.ui.RipStatusMessage;
-import com.rarchives.ripme.utils.Utils;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Spliterators;
+import java.util.function.BiFunction;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+import static java.lang.String.format;
+// Available configuration options:
+// instagram.download_images_only - use to skip video links
+// instagram.session_id - should be set for stories and private accounts (look for sessionid cookie)
public class InstagramRipper extends AbstractJSONRipper {
- String nextPageID = "";
+
private String qHash;
- private boolean rippingTag = false;
- private String tagName;
+ private Map cookies = new HashMap<>();
+ private String idString;
+ private List itemPrefixes = new ArrayList<>();
+ private List failedItems = new ArrayList<>();
- private String userID;
- private String rhx_gis = null;
- private String csrftoken;
- // Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
- // see https://github.com/RipMeApp/ripme/issues/601
- private String workAroundJsonString;
+ private boolean hashtagRip;
+ private boolean taggedRip;
+ private boolean igtvRip;
+ private boolean postRip;
+ private boolean storiesRip;
+ private boolean pinnedRip;
+ private boolean pinnedReelRip;
+ private enum UrlTypePattern {
+ // e.g. https://www.instagram.com/explore/tags/rachelc00k/
+ HASHTAG("explore/tags/(?[^?/]+)"),
+ // e.g. https://www.instagram.com/stories/rachelc00k/
+ STORIES("stories/(?[^?/]+)"),
+
+ // e.g. https://www.instagram.com/rachelc00k/tagged/
+ USER_TAGGED("(?[^?/]+)/tagged"),
+
+ // e.g. https://www.instagram.com/rachelc00k/channel/
+ IGTV("(?[^?/]+)/channel"),
+
+ // e.g. https://www.instagram.com/p/Bu4CEfbhNk4/
+ SINGLE_POST("(?:p|tv)/(?[^?/]+)"),
+
+ // pseudo-url, e.g. https://www.instagram.com/rachelc00k/?pinned
+ PINNED("(?[^?/]+)/?[?]pinned"),
+
+ // e.g. https://www.instagram.com/rachelc00k/
+ USER_PROFILE("(?[^?/]+)");
+
+ private final String urlTypePattern;
+
+ UrlTypePattern(String urlTypePattern) {
+ this.urlTypePattern = urlTypePattern;
+ }
+ }
public InstagramRipper(URL url) throws IOException {
super(url);
}
@Override
- public String getHost() {
- return "instagram";
- }
- @Override
- public String getDomain() {
+ protected String getDomain() {
return "instagram.com";
}
@Override
- public boolean canRip(URL url) {
- return (url.getHost().endsWith("instagram.com"));
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
- LOGGER.info("sanitized URL is " + san_url.toExternalForm());
- return san_url;
- }
-
- @Override
- public String normalizeUrl(String url) {
- // Remove the date sig from the url
- return url.replaceAll("/[A-Z0-9]{8}/", "/");
- }
-
- @Override public boolean hasASAPRipping() {
- return true;
- }
-
- private List getPostsFromSinglePage(JSONObject json) {
- List imageURLs = new ArrayList<>();
- JSONArray datas;
- if (json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
- .has("edge_sidecar_to_children")) {
- datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
- .getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
- for (int i = 0; i < datas.length(); i++) {
- JSONObject data = (JSONObject) datas.get(i);
- data = data.getJSONObject("node");
- if (data.has("is_video") && data.getBoolean("is_video")) {
- imageURLs.add(data.getString("video_url"));
- } else {
- imageURLs.add(data.getString("display_url"));
- }
- }
- } else {
- JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
- if (data.getBoolean("is_video")) {
- imageURLs.add(data.getString("video_url"));
- } else {
- imageURLs.add(data.getString("display_url"));
- }
- }
- return imageURLs;
+ public String getHost() {
+ return "instagram";
}
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(2) + "_" + m.group(1);
- }
-
- p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- rippingTag = true;
- tagName = m.group(1);
- return m.group(1);
- }
-
- throw new MalformedURLException("Unable to find user in " + url);
- }
-
- private String stripHTMLTags(String t) {
- t = t.replaceAll("\n" +
- " \n" +
- " ", "");
- t.replaceAll("\n" +
- "", "");
- t = t.replaceAll("\n", "");
- t = t.replaceAll("=\"\"", "");
- return t;
- }
-
-
- private JSONObject getJSONFromPage(Document firstPage) throws IOException {
- // Check if this page is HTML + JSON or jsut json
- if (!firstPage.html().contains("window._sharedData =")) {
- return new JSONObject(stripHTMLTags(firstPage.html()));
- }
- String jsonText = "";
- try {
- for (Element script : firstPage.select("script[type=text/javascript]")) {
- if (script.data().contains("window._sharedData = ")) {
- jsonText = script.data().replaceAll("window._sharedData = ", "");
- jsonText = jsonText.replaceAll("};", "}");
+ for (UrlTypePattern urlType : UrlTypePattern.values()) {
+ Matcher urlMatcher = getUrlMatcher(url, urlType);
+ if (urlMatcher.matches()) {
+ switch (urlType) {
+ case HASHTAG:
+ hashtagRip = true;
+ return "tag_" + urlMatcher.group("tagname");
+ case PINNED:
+ pinnedRip = true;
+ return urlMatcher.group("username") + "_pinned";
+ case STORIES:
+ storiesRip = true;
+ return urlMatcher.group("username") + "_stories";
+ case USER_TAGGED:
+ taggedRip = true;
+ return urlMatcher.group("username") + "_tagged";
+ case IGTV:
+ igtvRip = true;
+ return urlMatcher.group("username") + "_igtv";
+ case SINGLE_POST:
+ postRip = true;
+ return "post_" + urlMatcher.group("shortcode");
+ case USER_PROFILE:
+ return urlMatcher.group("username");
+ default:
+ throw new RuntimeException("Reached unreachable");
}
}
- return new JSONObject(jsonText);
- } catch (JSONException e) {
- throw new IOException("Could not get JSON from page");
}
+ throw new MalformedURLException("This URL can't be ripped");
+ }
+
+ private Matcher getUrlMatcher(URL url, UrlTypePattern type) {
+ String baseRegex = "^https?://(?:www[.])?instagram[.]com/%s(?:[?/].*)?";
+ Pattern pattern = Pattern.compile(format(baseRegex, type.urlTypePattern));
+ return pattern.matcher(url.toExternalForm());
}
@Override
public JSONObject getFirstPage() throws IOException {
- Connection.Response resp = Http.url(url).response();
- LOGGER.info(resp.cookies());
- csrftoken = resp.cookie("csrftoken");
- Document p = resp.parse();
- // Get the query hash so we can download the next page
- qHash = getQHash(p);
- return getJSONFromPage(p);
+ setAuthCookie();
+ Document document = Http.url(url).cookies(cookies).response().parse();
+ qHash = getQhash(document);
+ JSONObject jsonObject = getJsonObjectFromDoc(document);
+ String hashtagNamePath = "entry_data.TagPage[0].graphql.hashtag.name";
+ String singlePostIdPath = "graphql.shortcode_media.shortcode";
+ String profileIdPath = "entry_data.ProfilePage[0].graphql.user.id";
+ String storiesPath = "entry_data.StoriesPage[0].user.id";
+ String idPath = hashtagRip ? hashtagNamePath : storiesRip ? storiesPath : postRip ? singlePostIdPath : profileIdPath;
+ idString = getJsonStringByPath(jsonObject, idPath);
+ return taggedRip ? getNextPage(null) : pinnedRip ? getPinnedItems(document) : storiesRip ? getStoriesItems() : jsonObject;
}
- private String getVideoFromPage(String videoID) {
+ private void setAuthCookie() throws IOException {
+ String sessionId = Utils.getConfigString("instagram.session_id", null);
+ if ((storiesRip || pinnedRip) && sessionId == null) {
+ throw new IOException("instagram.session_id should be set up for Instagram stories");
+ }
+ if (sessionId != null) {
+ cookies.put("sessionid", sessionId);
+ }
+ }
+
+ // Query hash is used for graphql requests
+ private String getQhash(Document doc) throws IOException {
+ if (postRip) {
+ return null;
+ }
+ Predicate hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
+ href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
+
+ String href = doc.select("link[rel=preload]").stream()
+ .map(link -> link.attr("href"))
+ .filter(hrefFilter)
+ .findFirst().orElse("");
+ String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
+
+ Function hashExtractor =
+ storiesRip || pinnedReelRip ? this::getStoriesHash :
+ pinnedRip ? this::getPinnedHash : hashtagRip ? this::getTagHash :
+ taggedRip ? this::getUserTagHash : this::getProfileHash;
+
+ return hashExtractor.apply(body);
+ }
+
+ private String getStoriesHash(String jsData) {
+ return getHashValue(jsData, "loadStoryViewers", -5);
+ }
+
+ private String getProfileHash(String jsData) {
+ return getHashValue(jsData, "loadProfilePageExtras", -1);
+ }
+
+ private String getPinnedHash(String jsData) {
+ return getHashValue(jsData, "loadProfilePageExtras", -2);
+ }
+
+ private String getTagHash(String jsData) {
+ return getHashValue(jsData, "requestNextTagMedia", -1);
+ }
+
+ private String getUserTagHash(String jsData) {
+ return getHashValue(jsData, "requestNextTaggedPosts", -1);
+ }
+
+ private JSONObject getJsonObjectFromDoc(Document document) {
+ for (Element script : document.select("script[type=text/javascript]")) {
+ String scriptText = script.data();
+ if (scriptText.startsWith("window._sharedData") || scriptText.startsWith("window.__additionalDataLoaded")) {
+ String jsonText = scriptText.replaceAll("[^{]*([{].*})[^}]*", "$1");
+ if (jsonText.contains("graphql") || jsonText.contains("StoriesPage")) {
+ return new JSONObject(jsonText);
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public JSONObject getNextPage(JSONObject source) throws IOException {
+ if (postRip || storiesRip || pinnedReelRip) {
+ return null;
+ }
+ JSONObject nextPageQuery = new JSONObject().put(hashtagRip ? "tag_name" : "id", idString).put("first", 12);
+ if (source == null) {
+ return graphqlRequest(nextPageQuery);
+ }
+ JSONObject pageInfo = getMediaRoot(source).getJSONObject("page_info");
+ if (pageInfo.getBoolean("has_next_page")) {
+ return graphqlRequest(nextPageQuery.put("after", pageInfo.getString("end_cursor")));
+ } else {
+ failedItems.forEach(LOGGER::error);
+ return null;
+ }
+ }
+
+ private JSONObject getStoriesItems() throws IOException {
+ return graphqlRequest(new JSONObject().append("reel_ids", idString).put("precomposed_overlay", false));
+ }
+
+ // Two requests with different query hashes required for pinned items.
+ // Query hash to be used depends on flag specified:
+ // pinnedRip flag is used initially to get list of pinned albums;
+ // pinnedReelRip flag is used next to get media urls.
+ private JSONObject getPinnedItems(Document document) throws IOException {
+ JSONObject queryForIds = new JSONObject().put("user_id", idString).put("include_highlight_reels", true);
+ JSONObject pinnedIdsJson = graphqlRequest(queryForIds);
+ JSONArray pinnedItems = getJsonArrayByPath(pinnedIdsJson, "data.user.edge_highlight_reels.edges");
+ pinnedRip = false;
+ pinnedReelRip = true;
+ qHash = getQhash(document);
+ JSONObject queryForDetails = new JSONObject();
+ getStreamOfJsonArray(pinnedItems)
+ .map(object -> getJsonStringByPath(object, "node.id"))
+ .forEach(id -> queryForDetails.append("highlight_reel_ids", id));
+ queryForDetails.put("precomposed_overlay", false);
+ return graphqlRequest(queryForDetails);
+ }
+
+ private JSONObject graphqlRequest(JSONObject vars) throws IOException {
+ // Sleep for a while to avoid a ban
+ sleep(2500);
+ String url = format("https://www.instagram.com/graphql/query/?query_hash=%s&variables=%s", qHash, vars.toString());
+ return Http.url(url).cookies(cookies).getJSON();
+ }
+
+ @Override
+ public List getURLsFromJSON(JSONObject json) {
+ if (storiesRip || pinnedReelRip) {
+ JSONArray storyAlbums = getJsonArrayByPath(json, "data.reels_media");
+ return getStreamOfJsonArray(storyAlbums)
+ .flatMap(album -> getStreamOfJsonArray(album.getJSONArray("items")))
+ .peek(storyItem -> itemPrefixes.add(getTimestampPrefix(storyItem)))
+ .flatMap(this::parseStoryItemForUrls)
+ .collect(Collectors.toList());
+ }
+ if (postRip) {
+ JSONObject detailsJson = downloadItemDetailsJson(idString);
+ addPrefixInfo(detailsJson);
+ return parseItemDetailsForUrls(detailsJson).collect(Collectors.toList());
+ }
+ JSONArray edges = getMediaRoot(json).getJSONArray("edges");
+ return getStreamOfJsonArray(edges)
+ .map(edge -> getJsonStringByPath(edge, "node.shortcode"))
+ .map(this::downloadItemDetailsJson)
+ .filter(Objects::nonNull)
+ .peek(this::addPrefixInfo)
+ .flatMap(this::parseItemDetailsForUrls)
+ .collect(Collectors.toList());
+ }
+
+ private Stream extends String> parseStoryItemForUrls(JSONObject storyItem) {
+ if (storyItem.getBoolean("is_video")) {
+ itemPrefixes.add(getTimestampPrefix(storyItem) + "preview_");
+ int lastIndex = storyItem.getJSONArray("video_resources").length() - 1;
+ return Stream.of(
+ getJsonStringByPath(storyItem, "video_resources[" + lastIndex + "].src"),
+ storyItem.getString("display_url"));
+ }
+ return Stream.of(storyItem.getString("display_url"));
+ }
+
+ private JSONObject getMediaRoot(JSONObject json) {
+ String userExtra = "data.user.edge_owner_to_timeline_media";
+ String igtvExtra = "data.user.edge_felix_video_timeline";
+ String taggedExtra = "data.user.edge_user_to_photos_of_you";
+ String hashtagExtra = "data.hashtag.edge_hashtag_to_media";
+ String userHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_owner_to_timeline_media";
+ String igtvHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_felix_video_timeline";
+ String hashtagHomeRoot = "entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media";
+ String mediaRootPath = json.optJSONObject("entry_data") != null ?
+ (hashtagRip ? hashtagHomeRoot : igtvRip ? igtvHomeRoot : userHomeRoot) : hashtagRip ?
+ hashtagExtra : igtvRip ? igtvExtra : taggedRip ? taggedExtra : userExtra;
+ return getJsonObjectByPath(json, mediaRootPath);
+ }
+
+ private JSONObject downloadItemDetailsJson(String shortcode) {
+ String url = "https://www.instagram.com/p/%s/?__a=1";
try {
- Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
+ Http http = Http.url(format(url, shortcode));
+ http.ignoreContentType();
+ http.connection().followRedirects(false);
+ Connection.Response response = http.cookies(cookies).response();
+ // Fix for redirection link; repeat request with the new shortcode
+ if (response.statusCode() == 302) {
+ Pattern redirectIdPattern = Pattern.compile("/p/(?[^?/]+)");
+ Matcher m = redirectIdPattern.matcher(response.header("location"));
+ return m.find() ? downloadItemDetailsJson(m.group("shortcode")) : null;
+ }
+ return new JSONObject(response.body());
+ } catch (Exception e) {
+ failedItems.add(shortcode);
+ LOGGER.trace(format("No item %s found", shortcode), e);
+ }
+ return null;
+ }
+
+ private void addPrefixInfo(JSONObject itemDetailsJson) {
+ JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
+ String shortcode = mediaItem.getString("shortcode");
+ int subItemsCount = "GraphSidecar".equals(mediaItem.getString("__typename")) ?
+ getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges").length() : 1;
+ for (int i = 0; i < subItemsCount; i++) {
+ itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_");
+ }
+ }
+
+ private String getTimestampPrefix(JSONObject item) {
+ Instant instant = Instant.ofEpochSecond(item.getLong("taken_at_timestamp"));
+ return DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
+ }
+
+ private Stream extends String> parseItemDetailsForUrls(JSONObject itemDetailsJson) {
+ JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
+ // For some reason JSON video_url has lower quality than the HTML-tag one
+ // HTML-tag url is requested here and marked with _extra_ prefix
+ if ("GraphVideo".equals(mediaItem.getString("__typename"))) {
+ String shortcode = mediaItem.getString("shortcode");
+ String urlFromPage = getVideoUrlFromPage(shortcode);
+ if (!urlFromPage.isEmpty()) {
+ itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_extra_");
+ return Stream.of(mediaItem.getString("video_url"), urlFromPage);
+ }
+ }
+ return parseRootForUrls(mediaItem);
+ }
+
+ // Uses recursion for GraphSidecar
+ private Stream extends String> parseRootForUrls(JSONObject mediaItem) {
+ String typeName = mediaItem.getString("__typename");
+ switch (typeName) {
+ case "GraphImage":
+ return Stream.of(mediaItem.getString("display_url"));
+ case "GraphVideo":
+ return Stream.of(mediaItem.getString("video_url"));
+ case "GraphSidecar":
+ JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
+ return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
+ .flatMap(this::parseRootForUrls);
+ default:
+ return Stream.empty();
+ }
+ }
+
+ private String getVideoUrlFromPage(String videoID) {
+ try {
+ Document doc = Http.url("https://www.instagram.com/p/" + videoID).cookies(cookies).get();
return doc.select("meta[property=og:video]").attr("content");
- } catch (IOException e) {
+ } catch (Exception e) {
LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
}
return "";
}
- private String getOriginalUrl(String imageURL) {
- // Without this regex most images will return a 403 error
- imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
- imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
-
- // Instagram returns cropped images to unauthenticated applications to maintain legacy support.
- // To retrieve the uncropped image, remove this segment from the URL.
- // Segment format: cX.Y.W.H - eg: c0.134.1080.1080
- imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
- imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
- return imageURL;
- }
-
- public String getAfter(JSONObject json) {
- try {
- return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
- .getJSONObject("graphql").getJSONObject("user")
- .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
- } catch (JSONException e) {
- // This is here so that when the user rips the last page they don't get a "end_cursor not a string" error
- try {
- return json.getJSONObject("data").getJSONObject("user")
- .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
- } catch (JSONException t) {
- return "";
- }
- }
- }
-
@Override
- public List getURLsFromJSON(JSONObject json) {
- List imageURLs = new ArrayList<>();
- if (!url.toExternalForm().contains("/p/")) {
- nextPageID = getAfter(json);
+ protected void downloadURL(URL url, int index) {
+ if (Utils.getConfigBoolean("instagram.download_images_only", false) && url.toString().contains(".mp4?")) {
+ LOGGER.info("Skipped video url: " + url);
+ return;
}
-
- // get the rhx_gis value so we can get the next page later on
- if (rhx_gis == null) {
- rhx_gis = json.getString("rhx_gis");
- }
- if (!url.toExternalForm().contains("/p/")) {
- JSONArray datas = new JSONArray();
- if (!rippingTag) {
- // This first try only works on data from the first page
- try {
- JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
- userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", "");
- datas = json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
- .getJSONObject("graphql").getJSONObject("user")
- .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
- } catch (JSONException e) {
- datas = json.getJSONObject("data").getJSONObject("user")
- .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
- }
- } else {
- try {
- JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage");
- datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag")
- .getJSONObject("edge_hashtag_to_media").getJSONArray("edges");
- } catch (JSONException e) {
- datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media")
- .getJSONArray("edges");
- }
- }
- for (int i = 0; i < datas.length(); i++) {
- JSONObject data = (JSONObject) datas.get(i);
- data = data.getJSONObject("node");
- Long epoch = data.getLong("taken_at_timestamp");
- Instant instant = Instant.ofEpochSecond(epoch);
- String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
- // It looks like tag pages don't have the __typename key
- if (!rippingTag) {
- if (data.getString("__typename").equals("GraphSidecar")) {
- try {
- Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get();
- List toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
- for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) {
- addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode"));
- }
- } catch (MalformedURLException e) {
- LOGGER.error("Unable to download slide show, URL was malformed");
- } catch (IOException e) {
- LOGGER.error("Unable to download slide show");
- }
- }
- }
- try {
- if (!data.getBoolean("is_video")) {
- if (imageURLs.isEmpty()) {
- // We add this one item to the array because either wise
- // the ripper will error out because we returned an empty array
- imageURLs.add(getOriginalUrl(data.getString("display_url")));
- }
- addURLToDownload(new URL(data.getString("display_url")), image_date);
- } else {
- if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
- addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date);
- } else {
- sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
- }
- }
- } catch (MalformedURLException e) {
- LOGGER.info("Got MalformedURLException");
- return imageURLs;
- }
-
- if (isThisATest()) {
- break;
- }
- }
-
- } else { // We're ripping from a single page
- LOGGER.info("Ripping from single page");
- imageURLs = getPostsFromSinglePage(json);
- }
-
- return imageURLs;
+ addURLToDownload(url, itemPrefixes.get(index - 1), "", null, cookies);
}
- private String getIGGis(String variables) {
- String stringToMD5 = rhx_gis + ":" + variables;
- LOGGER.debug("String to md5 is \"" + stringToMD5 + "\"");
- try {
- byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8");
-
- MessageDigest md = MessageDigest.getInstance("MD5");
- byte[] hash = md.digest(bytesOfMessage);
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < hash.length; ++i) {
- sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3));
- }
- return sb.toString();
- } catch(UnsupportedEncodingException e) {
- return null;
- } catch(NoSuchAlgorithmException e) {
- return null;
- }
+ // Javascript parsing
+ /* ------------------------------------------------------------------------------------------------------- */
+ private String getHashValue(String javaScriptData, String keyword, int offset) {
+ List statements = getJsBodyBlock(javaScriptData).getStatements();
+ return statements.stream()
+ .flatMap(statement -> filterItems(statement, ExpressionStatement.class))
+ .map(ExpressionStatement::getExpression)
+ .flatMap(expression -> filterItems(expression, CallNode.class))
+ .map(CallNode::getArgs)
+ .map(expressions -> expressions.get(0))
+ .flatMap(expression -> filterItems(expression, FunctionNode.class))
+ .map(FunctionNode::getBody)
+ .map(Block::getStatements)
+ .map(statementList -> lookForHash(statementList, keyword, offset))
+ .filter(Objects::nonNull)
+ .findFirst().orElse(null);
}
- @Override
- public JSONObject getNextPage(JSONObject json) throws IOException {
- JSONObject toreturn;
- java.util.Map cookies = new HashMap();
-// This shouldn't be hardcoded and will break one day
- cookies.put("ig_pr", "1");
- cookies.put("csrftoken", csrftoken);
- if (!nextPageID.equals("") && !isThisATest()) {
- if (rippingTag) {
- try {
- sleep(2500);
- String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}";
- String ig_gis = getIGGis(vars);
- toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
- "&variables=" + vars, ig_gis);
- // Sleep for a while to avoid a ban
- LOGGER.info(toreturn);
- if (!pageHasImages(toreturn)) {
- throw new IOException("No more pages");
- }
- return toreturn;
-
- } catch (IOException e) {
- throw new IOException("No more pages");
- }
-
- }
- try {
- // Sleep for a while to avoid a ban
- sleep(2500);
- String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}";
- String ig_gis = getIGGis(vars);
- LOGGER.info(ig_gis);
-
- LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars);
- toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis);
- if (!pageHasImages(toreturn)) {
- throw new IOException("No more pages");
- }
- return toreturn;
- } catch (IOException e) {
- return null;
- }
- } else {
- throw new IOException("No more pages");
- }
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url);
- }
-
- private boolean pageHasImages(JSONObject json) {
- LOGGER.info(json);
- int numberOfImages = json.getJSONObject("data").getJSONObject("user")
- .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
- if (numberOfImages == 0) {
- return false;
- }
- return true;
- }
-
- private JSONObject getPage(String url, String ig_gis) {
- StringBuilder sb = new StringBuilder();
- try {
- // We can't use Jsoup here because it won't download a non-html file larger than a MB
- // even if you set maxBodySize to 0
- URLConnection connection = new URL(url).openConnection();
- connection.setRequestProperty("User-Agent", USER_AGENT);
- connection.setRequestProperty("x-instagram-gis", ig_gis);
- BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
- String line;
- while ((line = in.readLine()) != null) {
- sb.append(line);
-
- }
- in.close();
- workAroundJsonString = sb.toString();
- return new JSONObject(sb.toString());
-
- } catch (MalformedURLException e) {
- LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
- return null;
- } catch (IOException e) {
- LOGGER.info("Unable to get query_hash");
- LOGGER.info(e.getMessage());
- return null;
- }
- }
-
- private String getQHash(Document doc) {
- String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
- StringBuilder sb = new StringBuilder();
- Document jsPage;
- try {
- // We can't use Jsoup here because it won't download a non-html file larger than a MB
- // even if you set maxBodySize to 0
- URLConnection connection = new URL(jsFileURL).openConnection();
- BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
- String line;
- while ((line = in.readLine()) != null) {
- sb.append(line);
- }
- in.close();
-
- } catch (MalformedURLException e) {
- LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
- return null;
- } catch (IOException e) {
- LOGGER.info("Unable to get query_hash");
- LOGGER.info(e.getMessage());
- return null;
- }
- if (!rippingTag) {
- Pattern jsP = Pattern.compile("byUserId\\.get\\(t\\)\\)\\|\\|void 0===r\\?void 0:r\\.pagination},queryId:.([a-zA-Z0-9]+)");
- Matcher m = jsP.matcher(sb.toString());
- if (m.find()) {
- return m.group(1);
- }
-
- } else {
- Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");
- Matcher m = jsP.matcher(sb.toString());
- if (m.find()) {
- return m.group(1);
+ private String lookForHash(List list, String keyword, int offset) {
+ for (int i = 0; i < list.size(); i++) {
+ Statement st = list.get(i);
+ if (st.toString().contains(keyword)) {
+ return list.get(i + offset).toString().replaceAll(".*\"([0-9a-f]*)\".*", "$1");
}
}
- LOGGER.error("Could not find query_hash on " + jsFileURL);
return null;
-
}
+ private Stream filterItems(Object obj, Class aClass) {
+ return Stream.of(obj).filter(aClass::isInstance).map(aClass::cast);
+ }
+
+ private Block getJsBodyBlock(String javaScriptData) {
+ ErrorManager errors = new ErrorManager();
+ Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
+ return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
+ }
+
+ // Some JSON helper methods below
+ /* ------------------------------------------------------------------------------------------------------- */
+ private JSONObject getJsonObjectByPath(JSONObject object, String key) {
+ Pattern arrayPattern = Pattern.compile("(?.*)\\[(?\\d+)]");
+ JSONObject result = object;
+ for (String s : key.split("[.]")) {
+ Matcher m = arrayPattern.matcher(s);
+ result = m.matches() ?
+ result.getJSONArray(m.group("arr")).getJSONObject(Integer.parseInt(m.group("idx"))) :
+ result.getJSONObject(s);
+ }
+ return result;
+ }
+
+ private T getByPath(BiFunction func, JSONObject object, String key) {
+ int namePos = key.lastIndexOf('.');
+ JSONObject parent = namePos < 0 ? object : getJsonObjectByPath(object, key.substring(0, namePos));
+ return func.apply(parent, key.substring(namePos + 1));
+ }
+
+ private JSONArray getJsonArrayByPath(JSONObject object, String key) {
+ return getByPath(JSONObject::getJSONArray, object, key);
+ }
+
+ private String getJsonStringByPath(JSONObject object, String key) {
+ return getByPath(JSONObject::getString, object, key);
+ }
+
+ private Stream getStreamOfJsonArray(JSONArray array) {
+ return StreamSupport.stream(new JSONSpliterator(array), false);
+ }
+
+ private class JSONSpliterator extends Spliterators.AbstractSpliterator {
+ private JSONArray array;
+ private int index = 0;
+
+ JSONSpliterator(JSONArray array) {
+ super(array.length(), SIZED | ORDERED);
+ this.array = array;
+ }
+
+ @Override
+ public boolean tryAdvance(Consumer super JSONObject> action) {
+ if (index == array.length()) {
+ return false;
+ }
+ action.accept(array.getJSONObject(index++));
+ return true;
+ }
+ }
}
diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java
index 1f49b8a5..338c8f27 100644
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@@ -12,6 +12,7 @@ import javax.sound.sampled.Clip;
import javax.sound.sampled.Line;
import javax.sound.sampled.LineEvent;
import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
@@ -179,21 +180,21 @@ public class Utils {
/**
* Determines if your current system is a Windows system.
*/
- private static boolean isWindows() {
+ public static boolean isWindows() {
return OS.contains("win");
}
/**
* Determines if your current system is a Mac system
*/
- private static boolean isMacOS() {
+ public static boolean isMacOS() {
return OS.contains("mac");
}
/**
* Determines if current system is based on UNIX
*/
- private static boolean isUnix() {
+ public static boolean isUnix() {
return OS.contains("nix") || OS.contains("nux") || OS.contains("bsd");
}
@@ -773,4 +774,34 @@ public class Utils {
return false;
}
+ public static File shortenSaveAsWindows(String ripsDirPath, String fileName) throws FileNotFoundException {
+ // int ripDirLength = ripsDirPath.length();
+ // int maxFileNameLength = 260 - ripDirLength;
+ // LOGGER.info(maxFileNameLength);
+ LOGGER.error("The filename " + fileName + " is to long to be saved on this file system.");
+ LOGGER.info("Shortening filename");
+ String fullPath = ripsDirPath + File.separator + fileName;
+ // How long the path without the file name is
+ int pathLength = ripsDirPath.length();
+ int fileNameLength = fileName.length();
+ if (pathLength == 260) {
+ // We've reached the max length, there's nothing more we can do
+ throw new FileNotFoundException("File path is too long for this OS");
+ }
+ String[] saveAsSplit = fileName.split("\\.");
+ // Get the file extension so when we shorten the file name we don't cut off the
+ // file extension
+ String fileExt = saveAsSplit[saveAsSplit.length - 1];
+ // The max limit for paths on Windows is 260 chars
+ LOGGER.info(fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt);
+ fullPath = fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt;
+ LOGGER.info(fullPath);
+ LOGGER.info(fullPath.length());
+ return new File(fullPath);
+ }
+
+ public static String sanitizeSaveAs(String fileNameToSan) {
+ return fileNameToSan.replaceAll("[\\\\/:*?\"<>|]", "_");
+ }
+
}
\ No newline at end of file