updated crawler
This commit is contained in:
parent
6d22a5f579
commit
6858565b14
16
pom.xml
16
pom.xml
@ -11,6 +11,22 @@
|
|||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
</properties>
|
</properties>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.xml.bind</groupId>
|
||||||
|
<artifactId>jaxb-api</artifactId>
|
||||||
|
<version>2.3.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.xml.bind</groupId>
|
||||||
|
<artifactId>jaxb-core</artifactId>
|
||||||
|
<version>2.3.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.xml.bind</groupId>
|
||||||
|
<artifactId>jaxb-impl</artifactId>
|
||||||
|
<version>2.3.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
@ -2,21 +2,29 @@ package com.rarchives.ripme.ripper;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
import com.rarchives.ripme.ui.MainWindow;
|
import com.rarchives.ripme.ui.MainWindow;
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||||
|
|
||||||
|
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
|
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
|
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||||
|
|
||||||
protected AbstractHTMLRipper(URL url) throws IOException {
|
protected AbstractHTMLRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -93,6 +101,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
|
|
||||||
// We set doc to null here so the while loop below this doesn't fire
|
// We set doc to null here so the while loop below this doesn't fire
|
||||||
doc = null;
|
doc = null;
|
||||||
|
LOGGER.debug("Adding items from " + this.url + " to queue");
|
||||||
}
|
}
|
||||||
|
|
||||||
while (doc != null) {
|
while (doc != null) {
|
||||||
@ -261,4 +270,210 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
return prefix;
|
return prefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ------ Methods copied from AlbumRipper. ------
|
||||||
|
* This removes AlbumnRipper's usage from this class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected boolean allowDuplicates() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Returns total amount of files attempted.
|
||||||
|
*/
|
||||||
|
public int getCount() {
|
||||||
|
return itemsCompleted.size() + itemsErrored.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Queues multiple URLs of single images to download from a single Album URL
|
||||||
|
*/
|
||||||
|
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||||
|
// Only download one file if this is a test.
|
||||||
|
if (super.isThisATest() &&
|
||||||
|
(itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||||
|
stop();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!allowDuplicates()
|
||||||
|
&& ( itemsPending.containsKey(url)
|
||||||
|
|| itemsCompleted.containsKey(url)
|
||||||
|
|| itemsErrored.containsKey(url) )) {
|
||||||
|
// Item is already downloaded/downloading, skip it.
|
||||||
|
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||||
|
// Output URL to file
|
||||||
|
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||||
|
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||||
|
fw.write(url.toExternalForm());
|
||||||
|
fw.write(System.lineSeparator());
|
||||||
|
itemsCompleted.put(url, new File(urlFile));
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOGGER.error("Error while writing to " + urlFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
itemsPending.put(url, saveAs);
|
||||||
|
DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
|
||||||
|
if (referrer != null) {
|
||||||
|
dft.setReferrer(referrer);
|
||||||
|
}
|
||||||
|
if (cookies != null) {
|
||||||
|
dft.setCookies(cookies);
|
||||||
|
}
|
||||||
|
threadPool.addThread(dft);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean addURLToDownload(URL url, File saveAs) {
|
||||||
|
return addURLToDownload(url, saveAs, null, null, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queues image to be downloaded and saved.
|
||||||
|
* Uses filename from URL to decide filename.
|
||||||
|
* @param url
|
||||||
|
* URL to download
|
||||||
|
* @return
|
||||||
|
* True on success
|
||||||
|
*/
|
||||||
|
protected boolean addURLToDownload(URL url) {
|
||||||
|
// Use empty prefix and empty subdirectory
|
||||||
|
return addURLToDownload(url, "", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about successful download
|
||||||
|
*/
|
||||||
|
public void downloadCompleted(URL url, File saveAs) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
String path = Utils.removeCWD(saveAs);
|
||||||
|
RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsCompleted.put(url, saveAs);
|
||||||
|
observer.update(this, msg);
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.error("Exception while updating observer: ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about failed download.
|
||||||
|
*/
|
||||||
|
public void downloadErrored(URL url, String reason) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsErrored.put(url, reason);
|
||||||
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Tells user that a single file in the album they wish to download has
|
||||||
|
* already been downloaded in the past.
|
||||||
|
*/
|
||||||
|
public void downloadExists(URL url, File file) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsCompleted.put(url, file);
|
||||||
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notifies observers and updates state if all files have been ripped.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void checkIfComplete() {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (itemsPending.isEmpty()) {
|
||||||
|
super.checkIfComplete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets directory to save all ripped files to.
|
||||||
|
* @param url
|
||||||
|
* URL to define how the working directory should be saved.
|
||||||
|
* @throws
|
||||||
|
* IOException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setWorkingDir(URL url) throws IOException {
|
||||||
|
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||||
|
if (!path.endsWith(File.separator)) {
|
||||||
|
path += File.separator;
|
||||||
|
}
|
||||||
|
String title;
|
||||||
|
if (Utils.getConfigBoolean("album_titles.save", true)) {
|
||||||
|
title = getAlbumTitle(this.url);
|
||||||
|
} else {
|
||||||
|
title = super.getAlbumTitle(this.url);
|
||||||
|
}
|
||||||
|
LOGGER.debug("Using album title '" + title + "'");
|
||||||
|
|
||||||
|
title = Utils.filesystemSafe(title);
|
||||||
|
path += title;
|
||||||
|
path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
|
||||||
|
|
||||||
|
this.workingDir = new File(path);
|
||||||
|
if (!this.workingDir.exists()) {
|
||||||
|
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
||||||
|
this.workingDir.mkdirs();
|
||||||
|
}
|
||||||
|
LOGGER.debug("Set working directory to: " + this.workingDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Integer between 0 and 100 defining the progress of the album rip.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getCompletionPercentage() {
|
||||||
|
double total = itemsPending.size() + itemsErrored.size() + itemsCompleted.size();
|
||||||
|
return (int) (100 * ( (total - itemsPending.size()) / total));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Human-readable information on the status of the current rip.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getStatusText() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(getCompletionPercentage())
|
||||||
|
.append("% ")
|
||||||
|
.append("- Pending: " ).append(itemsPending.size())
|
||||||
|
.append(", Completed: ").append(itemsCompleted.size())
|
||||||
|
.append(", Errored: " ).append(itemsErrored.size());
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@ -1,19 +1,27 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simplified ripper, designed for ripping from sites by parsing JSON.
|
* Simplified ripper, designed for ripping from sites by parsing JSON.
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractJSONRipper extends AlbumRipper {
|
public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||||
|
|
||||||
|
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
|
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
|
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||||
|
|
||||||
protected AbstractJSONRipper(URL url) throws IOException {
|
protected AbstractJSONRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -111,4 +119,209 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
return prefix;
|
return prefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ------ Methods copied from AlbumRipper ------
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected boolean allowDuplicates() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Returns total amount of files attempted.
|
||||||
|
*/
|
||||||
|
public int getCount() {
|
||||||
|
return itemsCompleted.size() + itemsErrored.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Queues multiple URLs of single images to download from a single Album URL
|
||||||
|
*/
|
||||||
|
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||||
|
// Only download one file if this is a test.
|
||||||
|
if (super.isThisATest() &&
|
||||||
|
(itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||||
|
stop();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!allowDuplicates()
|
||||||
|
&& ( itemsPending.containsKey(url)
|
||||||
|
|| itemsCompleted.containsKey(url)
|
||||||
|
|| itemsErrored.containsKey(url) )) {
|
||||||
|
// Item is already downloaded/downloading, skip it.
|
||||||
|
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||||
|
// Output URL to file
|
||||||
|
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||||
|
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||||
|
fw.write(url.toExternalForm());
|
||||||
|
fw.write(System.lineSeparator());
|
||||||
|
itemsCompleted.put(url, new File(urlFile));
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOGGER.error("Error while writing to " + urlFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
itemsPending.put(url, saveAs);
|
||||||
|
DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
|
||||||
|
if (referrer != null) {
|
||||||
|
dft.setReferrer(referrer);
|
||||||
|
}
|
||||||
|
if (cookies != null) {
|
||||||
|
dft.setCookies(cookies);
|
||||||
|
}
|
||||||
|
threadPool.addThread(dft);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean addURLToDownload(URL url, File saveAs) {
|
||||||
|
return addURLToDownload(url, saveAs, null, null, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queues image to be downloaded and saved.
|
||||||
|
* Uses filename from URL to decide filename.
|
||||||
|
* @param url
|
||||||
|
* URL to download
|
||||||
|
* @return
|
||||||
|
* True on success
|
||||||
|
*/
|
||||||
|
protected boolean addURLToDownload(URL url) {
|
||||||
|
// Use empty prefix and empty subdirectory
|
||||||
|
return addURLToDownload(url, "", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about successful download
|
||||||
|
*/
|
||||||
|
public void downloadCompleted(URL url, File saveAs) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
String path = Utils.removeCWD(saveAs);
|
||||||
|
RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsCompleted.put(url, saveAs);
|
||||||
|
observer.update(this, msg);
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.error("Exception while updating observer: ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about failed download.
|
||||||
|
*/
|
||||||
|
public void downloadErrored(URL url, String reason) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsErrored.put(url, reason);
|
||||||
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Tells user that a single file in the album they wish to download has
|
||||||
|
* already been downloaded in the past.
|
||||||
|
*/
|
||||||
|
public void downloadExists(URL url, File file) {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
itemsPending.remove(url);
|
||||||
|
itemsCompleted.put(url, file);
|
||||||
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
|
||||||
|
|
||||||
|
checkIfComplete();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notifies observers and updates state if all files have been ripped.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void checkIfComplete() {
|
||||||
|
if (observer == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (itemsPending.isEmpty()) {
|
||||||
|
super.checkIfComplete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets directory to save all ripped files to.
|
||||||
|
* @param url
|
||||||
|
* URL to define how the working directory should be saved.
|
||||||
|
* @throws
|
||||||
|
* IOException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setWorkingDir(URL url) throws IOException {
|
||||||
|
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||||
|
if (!path.endsWith(File.separator)) {
|
||||||
|
path += File.separator;
|
||||||
|
}
|
||||||
|
String title;
|
||||||
|
if (Utils.getConfigBoolean("album_titles.save", true)) {
|
||||||
|
title = getAlbumTitle(this.url);
|
||||||
|
} else {
|
||||||
|
title = super.getAlbumTitle(this.url);
|
||||||
|
}
|
||||||
|
LOGGER.debug("Using album title '" + title + "'");
|
||||||
|
|
||||||
|
title = Utils.filesystemSafe(title);
|
||||||
|
path += title;
|
||||||
|
path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
|
||||||
|
|
||||||
|
this.workingDir = new File(path);
|
||||||
|
if (!this.workingDir.exists()) {
|
||||||
|
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
||||||
|
this.workingDir.mkdirs();
|
||||||
|
}
|
||||||
|
LOGGER.debug("Set working directory to: " + this.workingDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Integer between 0 and 100 defining the progress of the album rip.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getCompletionPercentage() {
|
||||||
|
double total = itemsPending.size() + itemsErrored.size() + itemsCompleted.size();
|
||||||
|
return (int) (100 * ( (total - itemsPending.size()) / total));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Human-readable information on the status of the current rip.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getStatusText() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(getCompletionPercentage())
|
||||||
|
.append("% ")
|
||||||
|
.append("- Pending: " ).append(itemsPending.size())
|
||||||
|
.append(", Completed: ").append(itemsCompleted.size())
|
||||||
|
.append(", Errored: " ).append(itemsErrored.size());
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@ -1,7 +1,11 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
import java.awt.Desktop;
|
import java.awt.Desktop;
|
||||||
import java.io.*;
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -9,21 +13,17 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Observable;
|
import java.util.Observable;
|
||||||
|
import java.util.Scanner;
|
||||||
import com.rarchives.ripme.App;
|
|
||||||
import org.apache.log4j.FileAppender;
|
import org.apache.log4j.FileAppender;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.HttpStatusException;
|
import org.jsoup.HttpStatusException;
|
||||||
|
import com.rarchives.ripme.App;
|
||||||
import com.rarchives.ripme.ui.RipStatusComplete;
|
import com.rarchives.ripme.ui.RipStatusComplete;
|
||||||
import com.rarchives.ripme.ui.RipStatusHandler;
|
import com.rarchives.ripme.ui.RipStatusHandler;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.Scanner;
|
|
||||||
|
|
||||||
public abstract class AbstractRipper
|
public abstract class AbstractRipper
|
||||||
extends Observable
|
extends Observable
|
||||||
implements RipperInterface, Runnable {
|
implements RipperInterface, Runnable {
|
||||||
@ -67,7 +67,7 @@ public abstract class AbstractRipper
|
|||||||
* Adds a URL to the url history file
|
* Adds a URL to the url history file
|
||||||
* @param downloadedURL URL to check if downloaded
|
* @param downloadedURL URL to check if downloaded
|
||||||
*/
|
*/
|
||||||
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
protected void writeDownloadedURL(String downloadedURL) throws IOException {
|
||||||
// If "save urls only" is checked don't write to the url history file
|
// If "save urls only" is checked don't write to the url history file
|
||||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||||
return;
|
return;
|
||||||
@ -131,7 +131,7 @@ public abstract class AbstractRipper
|
|||||||
* Returns true if previously downloaded.
|
* Returns true if previously downloaded.
|
||||||
* Returns false if not yet downloaded.
|
* Returns false if not yet downloaded.
|
||||||
*/
|
*/
|
||||||
private boolean hasDownloadedURL(String url) {
|
protected boolean hasDownloadedURL(String url) {
|
||||||
File file = new File(URLHistoryFile);
|
File file = new File(URLHistoryFile);
|
||||||
url = normalizeUrl(url);
|
url = normalizeUrl(url);
|
||||||
|
|
||||||
@ -218,6 +218,44 @@ public abstract class AbstractRipper
|
|||||||
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies,
|
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies,
|
||||||
Boolean getFileExtFromMIME);
|
Boolean getFileExtFromMIME);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queues image to be downloaded and saved.
|
||||||
|
* @param url
|
||||||
|
* URL of the file
|
||||||
|
* @param options
|
||||||
|
* A map<String,String> containing any changes to the default options.
|
||||||
|
* Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
|
||||||
|
* getFileExtFromMIME should be "true" or "false"
|
||||||
|
* @param cookies
|
||||||
|
* The cookies to send to the server while downloading this file.
|
||||||
|
* @return
|
||||||
|
* True if downloaded successfully
|
||||||
|
* False if failed to download
|
||||||
|
*/
|
||||||
|
protected boolean addURLToDownload(URL url, Map<String, String> options, Map<String, String> cookies) {
|
||||||
|
// Bit of a hack but this lets us pass a bool using a map<string,String>
|
||||||
|
boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
|
||||||
|
return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
|
||||||
|
cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queues image to be downloaded and saved.
|
||||||
|
* @param url
|
||||||
|
* URL of the file
|
||||||
|
* @param options
|
||||||
|
* A map<String,String> containing any changes to the default options.
|
||||||
|
* Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
|
||||||
|
* getFileExtFromMIME should be "true" or "false"
|
||||||
|
* @return
|
||||||
|
* True if downloaded successfully
|
||||||
|
* False if failed to download
|
||||||
|
*/
|
||||||
|
protected boolean addURLToDownload(URL url, Map<String, String> options) {
|
||||||
|
return addURLToDownload(url, options, null);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queues image to be downloaded and saved.
|
* Queues image to be downloaded and saved.
|
||||||
* @param url
|
* @param url
|
||||||
@ -237,6 +275,22 @@ public abstract class AbstractRipper
|
|||||||
* False if failed to download
|
* False if failed to download
|
||||||
*/
|
*/
|
||||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
|
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
|
||||||
|
// A common bug is rippers adding urls that are just "http:". This rejects said urls
|
||||||
|
if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
|
||||||
|
LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
// Make sure the url doesn't contain any spaces as that can cause a 400 error when requesting the file
|
||||||
|
if (url.toExternalForm().contains(" ")) {
|
||||||
|
// If for some reason the url with all spaces encoded as %20 is malformed print an error
|
||||||
|
try {
|
||||||
|
url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
// Don't re-add the url if it was downloaded in a previous rip
|
// Don't re-add the url if it was downloaded in a previous rip
|
||||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||||
if (hasDownloadedURL(url.toExternalForm())) {
|
if (hasDownloadedURL(url.toExternalForm())) {
|
||||||
@ -280,6 +334,7 @@ public abstract class AbstractRipper
|
|||||||
saveFileAs.getParentFile().mkdirs();
|
saveFileAs.getParentFile().mkdirs();
|
||||||
}
|
}
|
||||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||||
|
LOGGER.info("Writing " + url.toExternalForm() + " to file");
|
||||||
try {
|
try {
|
||||||
writeDownloadedURL(url.toExternalForm() + "\n");
|
writeDownloadedURL(url.toExternalForm() + "\n");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
@ -493,7 +548,7 @@ public abstract class AbstractRipper
|
|||||||
public static AbstractRipper getRipper(URL url) throws Exception {
|
public static AbstractRipper getRipper(URL url) throws Exception {
|
||||||
for (Constructor<?> constructor : getRipperConstructors("com.rarchives.ripme.ripper.rippers")) {
|
for (Constructor<?> constructor : getRipperConstructors("com.rarchives.ripme.ripper.rippers")) {
|
||||||
try {
|
try {
|
||||||
AlbumRipper ripper = (AlbumRipper) constructor.newInstance(url); // by design: can throw ClassCastException
|
AbstractRipper ripper = (AbstractRipper) constructor.newInstance(url); // by design: can throw ClassCastException
|
||||||
LOGGER.debug("Found album ripper: " + ripper.getClass().getName());
|
LOGGER.debug("Found album ripper: " + ripper.getClass().getName());
|
||||||
return ripper;
|
return ripper;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -70,7 +70,7 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||||
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||||
fw.write(url.toExternalForm());
|
fw.write(url.toExternalForm());
|
||||||
fw.write("\n");
|
fw.write(System.lineSeparator());
|
||||||
itemsCompleted.put(url, new File(urlFile));
|
itemsCompleted.put(url, new File(urlFile));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOGGER.error("Error while writing to " + urlFile, e);
|
LOGGER.error("Error while writing to " + urlFile, e);
|
||||||
@ -87,6 +87,7 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
}
|
}
|
||||||
threadPool.addThread(dft);
|
threadPool.addThread(dft);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,12 +1,6 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.*;
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.lang.reflect.Array;
|
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -19,22 +13,18 @@ import java.util.ResourceBundle;
|
|||||||
import javax.net.ssl.HttpsURLConnection;
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
|
||||||
import com.rarchives.ripme.ui.MainWindow;
|
import com.rarchives.ripme.ui.MainWindow;
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.HttpStatusException;
|
import org.jsoup.HttpStatusException;
|
||||||
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
import com.rarchives.ripme.ripper.AbstractRipper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thread for downloading files.
|
* Thread for downloading files. Includes retry logic, observer notifications,
|
||||||
* Includes retry logic, observer notifications, and other goodies.
|
* and other goodies.
|
||||||
*/
|
*/
|
||||||
class DownloadFileThread extends Thread {
|
class DownloadFileThread extends Thread {
|
||||||
|
|
||||||
private ResourceBundle rb = MainWindow.rb;
|
private ResourceBundle rb = MainWindow.rb;
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
||||||
|
|
||||||
private String referrer = "";
|
private String referrer = "";
|
||||||
@ -63,16 +53,19 @@ class DownloadFileThread extends Thread {
|
|||||||
public void setReferrer(String referrer) {
|
public void setReferrer(String referrer) {
|
||||||
this.referrer = referrer;
|
this.referrer = referrer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCookies(Map<String, String> cookies) {
|
public void setCookies(Map<String, String> cookies) {
|
||||||
this.cookies = cookies;
|
this.cookies = cookies;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts to download the file. Retries as needed.
|
* Attempts to download the file. Retries as needed. Notifies observers upon
|
||||||
* Notifies observers upon completion/error/warn.
|
* completion/error/warn.
|
||||||
*/
|
*/
|
||||||
public void run() {
|
public void run() {
|
||||||
|
// First thing we make sure the file name doesn't have any illegal chars in it
|
||||||
|
saveAs = new File(
|
||||||
|
saveAs.getParentFile().getAbsolutePath() + File.separator + Utils.sanitizeSaveAs(saveAs.getName()));
|
||||||
long fileSize = 0;
|
long fileSize = 0;
|
||||||
int bytesTotal = 0;
|
int bytesTotal = 0;
|
||||||
int bytesDownloaded = 0;
|
int bytesDownloaded = 0;
|
||||||
@ -85,13 +78,15 @@ class DownloadFileThread extends Thread {
|
|||||||
observer.downloadErrored(url, rb.getString("download.interrupted"));
|
observer.downloadErrored(url, rb.getString("download.interrupted"));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME ||
|
if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME
|
||||||
Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME && !observer.tryResumeDownload()) {
|
|| Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
|
||||||
|
&& !observer.tryResumeDownload()) {
|
||||||
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
||||||
logger.info("[!] " + rb.getString("deleting.existing.file") + prettySaveAs);
|
logger.info("[!] " + rb.getString("deleting.existing.file") + prettySaveAs);
|
||||||
saveAs.delete();
|
saveAs.delete();
|
||||||
} else {
|
} else {
|
||||||
logger.info("[!] " + rb.getString("skipping") + url + " -- " + rb.getString("file.already.exists") + ": " + prettySaveAs);
|
logger.info("[!] " + rb.getString("skipping") + url + " -- "
|
||||||
|
+ rb.getString("file.already.exists") + ": " + prettySaveAs);
|
||||||
observer.downloadExists(url, saveAs);
|
observer.downloadExists(url, saveAs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -101,7 +96,8 @@ class DownloadFileThread extends Thread {
|
|||||||
int tries = 0; // Number of attempts to download
|
int tries = 0; // Number of attempts to download
|
||||||
do {
|
do {
|
||||||
tries += 1;
|
tries += 1;
|
||||||
InputStream bis = null; OutputStream fos = null;
|
InputStream bis = null;
|
||||||
|
OutputStream fos = null;
|
||||||
try {
|
try {
|
||||||
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
|
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
|
||||||
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
||||||
@ -110,12 +106,12 @@ class DownloadFileThread extends Thread {
|
|||||||
HttpURLConnection huc;
|
HttpURLConnection huc;
|
||||||
if (this.url.toString().startsWith("https")) {
|
if (this.url.toString().startsWith("https")) {
|
||||||
huc = (HttpsURLConnection) urlToDownload.openConnection();
|
huc = (HttpsURLConnection) urlToDownload.openConnection();
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
huc = (HttpURLConnection) urlToDownload.openConnection();
|
huc = (HttpURLConnection) urlToDownload.openConnection();
|
||||||
}
|
}
|
||||||
huc.setInstanceFollowRedirects(true);
|
huc.setInstanceFollowRedirects(true);
|
||||||
// It is important to set both ConnectTimeout and ReadTimeout. If you don't then ripme will wait forever
|
// It is important to set both ConnectTimeout and ReadTimeout. If you don't then
|
||||||
|
// ripme will wait forever
|
||||||
// for the server to send data after connecting.
|
// for the server to send data after connecting.
|
||||||
huc.setConnectTimeout(TIMEOUT);
|
huc.setConnectTimeout(TIMEOUT);
|
||||||
huc.setReadTimeout(TIMEOUT);
|
huc.setReadTimeout(TIMEOUT);
|
||||||
@ -142,8 +138,10 @@ class DownloadFileThread extends Thread {
|
|||||||
|
|
||||||
int statusCode = huc.getResponseCode();
|
int statusCode = huc.getResponseCode();
|
||||||
logger.debug("Status code: " + statusCode);
|
logger.debug("Status code: " + statusCode);
|
||||||
|
// If the server doesn't allow resuming downloads error out
|
||||||
if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
|
if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
|
||||||
// TODO find a better way to handle servers that don't support resuming downloads then just erroring out
|
// TODO find a better way to handle servers that don't support resuming
|
||||||
|
// downloads then just erroring out
|
||||||
throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
|
throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
|
||||||
}
|
}
|
||||||
if (statusCode / 100 == 3) { // 3xx Redirect
|
if (statusCode / 100 == 3) { // 3xx Redirect
|
||||||
@ -158,12 +156,15 @@ class DownloadFileThread extends Thread {
|
|||||||
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
|
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
|
||||||
}
|
}
|
||||||
if (statusCode / 100 == 4) { // 4xx errors
|
if (statusCode / 100 == 4) { // 4xx errors
|
||||||
logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading from " + url);
|
logger.error("[!] " + rb.getString("nonretriable.status.code") + " " + statusCode
|
||||||
observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
|
+ " while downloading from " + url);
|
||||||
|
observer.downloadErrored(url, rb.getString("nonretriable.status.code") + " "
|
||||||
|
+ statusCode + " while downloading " + url.toExternalForm());
|
||||||
return; // Not retriable, drop out.
|
return; // Not retriable, drop out.
|
||||||
}
|
}
|
||||||
if (statusCode / 100 == 5) { // 5xx errors
|
if (statusCode / 100 == 5) { // 5xx errors
|
||||||
observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode + " while downloading " + url.toExternalForm());
|
observer.downloadErrored(url, rb.getString("retriable.status.code") + " " + statusCode
|
||||||
|
+ " while downloading " + url.toExternalForm());
|
||||||
// Throw exception so download can be retried
|
// Throw exception so download can be retried
|
||||||
throw new IOException(rb.getString("retriable.status.code") + " " + statusCode);
|
throw new IOException(rb.getString("retriable.status.code") + " " + statusCode);
|
||||||
}
|
}
|
||||||
@ -174,7 +175,8 @@ class DownloadFileThread extends Thread {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the ripper is using the bytes progress bar set bytesTotal to huc.getContentLength()
|
// If the ripper is using the bytes progress bar set bytesTotal to
|
||||||
|
// huc.getContentLength()
|
||||||
if (observer.useByteProgessBar()) {
|
if (observer.useByteProgessBar()) {
|
||||||
bytesTotal = huc.getContentLength();
|
bytesTotal = huc.getContentLength();
|
||||||
observer.setBytesTotal(bytesTotal);
|
observer.setBytesTotal(bytesTotal);
|
||||||
@ -202,7 +204,8 @@ class DownloadFileThread extends Thread {
|
|||||||
saveAs = new File(saveAs.toString() + "." + fileExt);
|
saveAs = new File(saveAs.toString() + "." + fileExt);
|
||||||
} else {
|
} else {
|
||||||
logger.error(rb.getString("was.unable.to.get.content.type.using.magic.number"));
|
logger.error(rb.getString("was.unable.to.get.content.type.using.magic.number"));
|
||||||
logger.error(rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
|
logger.error(
|
||||||
|
rb.getString("magic.number.was") + ": " + Arrays.toString(magicBytes));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -210,11 +213,41 @@ class DownloadFileThread extends Thread {
|
|||||||
if (statusCode == 206) {
|
if (statusCode == 206) {
|
||||||
fos = new FileOutputStream(saveAs, true);
|
fos = new FileOutputStream(saveAs, true);
|
||||||
} else {
|
} else {
|
||||||
|
try {
|
||||||
fos = new FileOutputStream(saveAs);
|
fos = new FileOutputStream(saveAs);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
// We do this because some filesystems have a max name length
|
||||||
|
if (e.getMessage().contains("File name too long")) {
|
||||||
|
logger.error("The filename " + saveAs.getName()
|
||||||
|
+ " is to long to be saved on this file system.");
|
||||||
|
logger.info("Shortening filename");
|
||||||
|
String[] saveAsSplit = saveAs.getName().split("\\.");
|
||||||
|
// Get the file extension so when we shorten the file name we don't cut off the
|
||||||
|
// file extension
|
||||||
|
String fileExt = saveAsSplit[saveAsSplit.length - 1];
|
||||||
|
// The max limit for filenames on Linux with Ext3/4 is 255 bytes
|
||||||
|
logger.info(saveAs.getName().substring(0, 254 - fileExt.length()) + fileExt);
|
||||||
|
String filename = saveAs.getName().substring(0, 254 - fileExt.length()) + "." + fileExt;
|
||||||
|
// We can't just use the new file name as the saveAs because the file name
|
||||||
|
// doesn't include the
|
||||||
|
// users save path, so we get the user save path from the old saveAs
|
||||||
|
saveAs = new File(saveAs.getParentFile().getAbsolutePath() + File.separator + filename);
|
||||||
|
fos = new FileOutputStream(saveAs);
|
||||||
|
} else if (saveAs.getAbsolutePath().length() > 259 && Utils.isWindows()) {
|
||||||
|
// This if is for when the file path has gone above 260 chars which windows does
|
||||||
|
// not allow
|
||||||
|
fos = new FileOutputStream(
|
||||||
|
Utils.shortenSaveAsWindows(saveAs.getParentFile().getPath(), saveAs.getName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
byte[] data = new byte[1024 * 256];
|
byte[] data = new byte[1024 * 256];
|
||||||
int bytesRead;
|
int bytesRead;
|
||||||
boolean shouldSkipFileDownload = huc.getContentLength() / 10000000 >= 10;
|
boolean shouldSkipFileDownload = huc.getContentLength() / 1000000 >= 10 && AbstractRipper.isThisATest();
|
||||||
|
// If this is a test rip we skip large downloads
|
||||||
|
if (shouldSkipFileDownload) {
|
||||||
|
logger.debug("Not downloading whole file because it is over 10mb and this is a test");
|
||||||
|
} else {
|
||||||
while ((bytesRead = bis.read(data)) != -1) {
|
while ((bytesRead = bis.read(data)) != -1) {
|
||||||
try {
|
try {
|
||||||
observer.stopCheck();
|
observer.stopCheck();
|
||||||
@ -228,13 +261,6 @@ class DownloadFileThread extends Thread {
|
|||||||
observer.setBytesCompleted(bytesDownloaded);
|
observer.setBytesCompleted(bytesDownloaded);
|
||||||
observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
|
observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
|
||||||
}
|
}
|
||||||
// If this is a test and we're downloading a large file
|
|
||||||
if (AbstractRipper.isThisATest() && shouldSkipFileDownload) {
|
|
||||||
logger.debug("Not downloading whole file because it is over 10mb and this is a test");
|
|
||||||
bis.close();
|
|
||||||
fos.close();
|
|
||||||
break;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bis.close();
|
bis.close();
|
||||||
@ -249,24 +275,34 @@ class DownloadFileThread extends Thread {
|
|||||||
logger.debug(rb.getString("http.status.exception"), hse);
|
logger.debug(rb.getString("http.status.exception"), hse);
|
||||||
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
|
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
|
||||||
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
||||||
observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
observer.downloadErrored(url,
|
||||||
|
"HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.debug("IOException", e);
|
logger.debug("IOException", e);
|
||||||
logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - " + e.getMessage());
|
logger.error("[!] " + rb.getString("exception.while.downloading.file") + ": " + url + " - "
|
||||||
|
+ e.getMessage());
|
||||||
} finally {
|
} finally {
|
||||||
// Close any open streams
|
// Close any open streams
|
||||||
try {
|
try {
|
||||||
if (bis != null) { bis.close(); }
|
if (bis != null) {
|
||||||
} catch (IOException e) { }
|
bis.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
if (fos != null) { fos.close(); }
|
if (fos != null) {
|
||||||
} catch (IOException e) { }
|
fos.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (tries > this.retries) {
|
if (tries > this.retries) {
|
||||||
logger.error("[!] " + rb.getString ("exceeded.maximum.retries") + " (" + this.retries + ") for URL " + url);
|
logger.error("[!] " + rb.getString("exceeded.maximum.retries") + " (" + this.retries
|
||||||
observer.downloadErrored(url, rb.getString("failed.to.download") + " " + url.toExternalForm());
|
+ ") for URL " + url);
|
||||||
|
observer.downloadErrored(url,
|
||||||
|
rb.getString("failed.to.download") + " " + url.toExternalForm());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} while (true);
|
} while (true);
|
||||||
|
@ -9,19 +9,27 @@ import java.util.List;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
|
||||||
public class GfycatRipper extends AbstractSingleFileRipper {
|
public class GfycatRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String HOST = "gfycat.com";
|
private static final String HOST = "gfycat.com";
|
||||||
|
String username = "";
|
||||||
|
String cursor = "";
|
||||||
|
String count = "30";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public GfycatRipper(URL url) throws IOException {
|
public GfycatRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -41,14 +49,26 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
String sUrl = url.toExternalForm();
|
||||||
|
sUrl = sUrl.replace("/gifs/detail", "");
|
||||||
|
sUrl = sUrl.replace("/amp", "");
|
||||||
|
return new URL(sUrl);
|
||||||
|
}
|
||||||
|
|
||||||
return url;
|
public boolean isProfile() {
|
||||||
|
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
return m.matches();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
|
if (!isProfile()) {
|
||||||
return Http.url(url).get();
|
return Http.url(url).get();
|
||||||
|
} else {
|
||||||
|
username = getGID(url);
|
||||||
|
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -58,27 +78,58 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
|
Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
if (m.matches())
|
||||||
}
|
return m.group(2);
|
||||||
|
|
||||||
throw new MalformedURLException(
|
throw new MalformedURLException(
|
||||||
"Expected gfycat.com format: "
|
"Expected gfycat.com format: "
|
||||||
+ "gfycat.com/id"
|
+ "gfycat.com/id or "
|
||||||
|
+ "thumbs.gfycat.com/id.gif"
|
||||||
+ " Got: " + url);
|
+ " Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String stripHTMLTags(String t) {
|
||||||
|
t = t.replaceAll("<html>\n" +
|
||||||
|
" <head></head>\n" +
|
||||||
|
" <body>", "");
|
||||||
|
t = t.replaceAll("</body>\n" +
|
||||||
|
"</html>", "");
|
||||||
|
t = t.replaceAll("\n", "");
|
||||||
|
t = t.replaceAll("=\"\"", "");
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
if (cursor.equals("")) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<>();
|
List<String> result = new ArrayList<>();
|
||||||
Elements videos = doc.select("source");
|
if (isProfile()) {
|
||||||
String vidUrl = videos.first().attr("src");
|
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
|
||||||
if (vidUrl.startsWith("//")) {
|
JSONArray content = page.getJSONArray("gfycats");
|
||||||
vidUrl = "http:" + vidUrl;
|
for (int i = 0; i < content.length(); i++) {
|
||||||
|
result.add(content.getJSONObject(i).getString("mp4Url"));
|
||||||
|
}
|
||||||
|
cursor = page.getString("cursor");
|
||||||
|
} else {
|
||||||
|
Elements videos = doc.select("script");
|
||||||
|
for (Element el : videos) {
|
||||||
|
String json = el.html();
|
||||||
|
if (json.startsWith("{")) {
|
||||||
|
JSONObject page = new JSONObject(json);
|
||||||
|
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
result.add(vidUrl);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,14 +146,14 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
|||||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||||
|
|
||||||
Document doc = Http.url(url).get();
|
Document doc = Http.url(url).get();
|
||||||
Elements videos = doc.select("source");
|
Elements videos = doc.select("script");
|
||||||
if (videos.isEmpty()) {
|
for (Element el : videos) {
|
||||||
throw new IOException("Could not find source at " + url);
|
String json = el.html();
|
||||||
}
|
if (json.startsWith("{")) {
|
||||||
String vidUrl = videos.first().attr("src");
|
JSONObject page = new JSONObject(json);
|
||||||
if (vidUrl.startsWith("//")) {
|
return page.getJSONObject("video").getString("contentUrl");
|
||||||
vidUrl = "http:" + vidUrl;
|
}
|
||||||
}
|
}
|
||||||
return vidUrl;
|
throw new IOException();
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,480 +1,502 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLConnection;
|
|
||||||
import java.time.*;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.security.*;
|
|
||||||
|
|
||||||
import org.json.JSONArray;
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
import jdk.nashorn.internal.ir.Block;
|
||||||
|
import jdk.nashorn.internal.ir.CallNode;
|
||||||
|
import jdk.nashorn.internal.ir.ExpressionStatement;
|
||||||
|
import jdk.nashorn.internal.ir.FunctionNode;
|
||||||
|
import jdk.nashorn.internal.ir.Statement;
|
||||||
|
import jdk.nashorn.internal.parser.Parser;
|
||||||
|
import jdk.nashorn.internal.runtime.Context;
|
||||||
|
import jdk.nashorn.internal.runtime.ErrorManager;
|
||||||
|
import jdk.nashorn.internal.runtime.Source;
|
||||||
|
import jdk.nashorn.internal.runtime.options.Options;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Spliterators;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
import static java.lang.String.format;
|
||||||
|
|
||||||
|
// Available configuration options:
|
||||||
|
// instagram.download_images_only - use to skip video links
|
||||||
|
// instagram.session_id - should be set for stories and private accounts (look for sessionid cookie)
|
||||||
public class InstagramRipper extends AbstractJSONRipper {
|
public class InstagramRipper extends AbstractJSONRipper {
|
||||||
String nextPageID = "";
|
|
||||||
private String qHash;
|
private String qHash;
|
||||||
private boolean rippingTag = false;
|
private Map<String, String> cookies = new HashMap<>();
|
||||||
private String tagName;
|
private String idString;
|
||||||
|
private List<String> itemPrefixes = new ArrayList<>();
|
||||||
|
private List<String> failedItems = new ArrayList<>();
|
||||||
|
|
||||||
private String userID;
|
private boolean hashtagRip;
|
||||||
private String rhx_gis = null;
|
private boolean taggedRip;
|
||||||
private String csrftoken;
|
private boolean igtvRip;
|
||||||
// Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
|
private boolean postRip;
|
||||||
// see https://github.com/RipMeApp/ripme/issues/601
|
private boolean storiesRip;
|
||||||
private String workAroundJsonString;
|
private boolean pinnedRip;
|
||||||
|
private boolean pinnedReelRip;
|
||||||
|
|
||||||
|
private enum UrlTypePattern {
|
||||||
|
// e.g. https://www.instagram.com/explore/tags/rachelc00k/
|
||||||
|
HASHTAG("explore/tags/(?<tagname>[^?/]+)"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/stories/rachelc00k/
|
||||||
|
STORIES("stories/(?<username>[^?/]+)"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/tagged/
|
||||||
|
USER_TAGGED("(?<username>[^?/]+)/tagged"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/channel/
|
||||||
|
IGTV("(?<username>[^?/]+)/channel"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/p/Bu4CEfbhNk4/
|
||||||
|
SINGLE_POST("(?:p|tv)/(?<shortcode>[^?/]+)"),
|
||||||
|
|
||||||
|
// pseudo-url, e.g. https://www.instagram.com/rachelc00k/?pinned
|
||||||
|
PINNED("(?<username>[^?/]+)/?[?]pinned"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/
|
||||||
|
USER_PROFILE("(?<username>[^?/]+)");
|
||||||
|
|
||||||
|
private final String urlTypePattern;
|
||||||
|
|
||||||
|
UrlTypePattern(String urlTypePattern) {
|
||||||
|
this.urlTypePattern = urlTypePattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public InstagramRipper(URL url) throws IOException {
|
public InstagramRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
protected String getDomain() {
|
||||||
return "instagram";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "instagram.com";
|
return "instagram.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public String getHost() {
|
||||||
return (url.getHost().endsWith("instagram.com"));
|
return "instagram";
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
|
|
||||||
LOGGER.info("sanitized URL is " + san_url.toExternalForm());
|
|
||||||
return san_url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String normalizeUrl(String url) {
|
|
||||||
// Remove the date sig from the url
|
|
||||||
return url.replaceAll("/[A-Z0-9]{8}/", "/");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override public boolean hasASAPRipping() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getPostsFromSinglePage(JSONObject json) {
|
|
||||||
List<String> imageURLs = new ArrayList<>();
|
|
||||||
JSONArray datas;
|
|
||||||
if (json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
|
||||||
.has("edge_sidecar_to_children")) {
|
|
||||||
datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
|
||||||
.getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
|
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
|
||||||
data = data.getJSONObject("node");
|
|
||||||
if (data.has("is_video") && data.getBoolean("is_video")) {
|
|
||||||
imageURLs.add(data.getString("video_url"));
|
|
||||||
} else {
|
|
||||||
imageURLs.add(data.getString("display_url"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
|
|
||||||
if (data.getBoolean("is_video")) {
|
|
||||||
imageURLs.add(data.getString("video_url"));
|
|
||||||
} else {
|
|
||||||
imageURLs.add(data.getString("display_url"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
for (UrlTypePattern urlType : UrlTypePattern.values()) {
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher urlMatcher = getUrlMatcher(url, urlType);
|
||||||
if (m.matches()) {
|
if (urlMatcher.matches()) {
|
||||||
return m.group(1);
|
switch (urlType) {
|
||||||
|
case HASHTAG:
|
||||||
|
hashtagRip = true;
|
||||||
|
return "tag_" + urlMatcher.group("tagname");
|
||||||
|
case PINNED:
|
||||||
|
pinnedRip = true;
|
||||||
|
return urlMatcher.group("username") + "_pinned";
|
||||||
|
case STORIES:
|
||||||
|
storiesRip = true;
|
||||||
|
return urlMatcher.group("username") + "_stories";
|
||||||
|
case USER_TAGGED:
|
||||||
|
taggedRip = true;
|
||||||
|
return urlMatcher.group("username") + "_tagged";
|
||||||
|
case IGTV:
|
||||||
|
igtvRip = true;
|
||||||
|
return urlMatcher.group("username") + "_igtv";
|
||||||
|
case SINGLE_POST:
|
||||||
|
postRip = true;
|
||||||
|
return "post_" + urlMatcher.group("shortcode");
|
||||||
|
case USER_PROFILE:
|
||||||
|
return urlMatcher.group("username");
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Reached unreachable");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("This URL can't be ripped");
|
||||||
}
|
}
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
|
private Matcher getUrlMatcher(URL url, UrlTypePattern type) {
|
||||||
m = p.matcher(url.toExternalForm());
|
String baseRegex = "^https?://(?:www[.])?instagram[.]com/%s(?:[?/].*)?";
|
||||||
if (m.matches()) {
|
Pattern pattern = Pattern.compile(format(baseRegex, type.urlTypePattern));
|
||||||
return m.group(1);
|
return pattern.matcher(url.toExternalForm());
|
||||||
}
|
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(2) + "_" + m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
rippingTag = true;
|
|
||||||
tagName = m.group(1);
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new MalformedURLException("Unable to find user in " + url);
|
|
||||||
}
|
|
||||||
|
|
||||||
private String stripHTMLTags(String t) {
|
|
||||||
t = t.replaceAll("<html>\n" +
|
|
||||||
" <head></head>\n" +
|
|
||||||
" <body>", "");
|
|
||||||
t.replaceAll("</body>\n" +
|
|
||||||
"</html>", "");
|
|
||||||
t = t.replaceAll("\n", "");
|
|
||||||
t = t.replaceAll("=\"\"", "");
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private JSONObject getJSONFromPage(Document firstPage) throws IOException {
|
|
||||||
// Check if this page is HTML + JSON or jsut json
|
|
||||||
if (!firstPage.html().contains("window._sharedData =")) {
|
|
||||||
return new JSONObject(stripHTMLTags(firstPage.html()));
|
|
||||||
}
|
|
||||||
String jsonText = "";
|
|
||||||
try {
|
|
||||||
for (Element script : firstPage.select("script[type=text/javascript]")) {
|
|
||||||
if (script.data().contains("window._sharedData = ")) {
|
|
||||||
jsonText = script.data().replaceAll("window._sharedData = ", "");
|
|
||||||
jsonText = jsonText.replaceAll("};", "}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new JSONObject(jsonText);
|
|
||||||
} catch (JSONException e) {
|
|
||||||
throw new IOException("Could not get JSON from page");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getFirstPage() throws IOException {
|
public JSONObject getFirstPage() throws IOException {
|
||||||
Connection.Response resp = Http.url(url).response();
|
setAuthCookie();
|
||||||
LOGGER.info(resp.cookies());
|
Document document = Http.url(url).cookies(cookies).response().parse();
|
||||||
csrftoken = resp.cookie("csrftoken");
|
qHash = getQhash(document);
|
||||||
Document p = resp.parse();
|
JSONObject jsonObject = getJsonObjectFromDoc(document);
|
||||||
// Get the query hash so we can download the next page
|
String hashtagNamePath = "entry_data.TagPage[0].graphql.hashtag.name";
|
||||||
qHash = getQHash(p);
|
String singlePostIdPath = "graphql.shortcode_media.shortcode";
|
||||||
return getJSONFromPage(p);
|
String profileIdPath = "entry_data.ProfilePage[0].graphql.user.id";
|
||||||
|
String storiesPath = "entry_data.StoriesPage[0].user.id";
|
||||||
|
String idPath = hashtagRip ? hashtagNamePath : storiesRip ? storiesPath : postRip ? singlePostIdPath : profileIdPath;
|
||||||
|
idString = getJsonStringByPath(jsonObject, idPath);
|
||||||
|
return taggedRip ? getNextPage(null) : pinnedRip ? getPinnedItems(document) : storiesRip ? getStoriesItems() : jsonObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getVideoFromPage(String videoID) {
|
private void setAuthCookie() throws IOException {
|
||||||
|
String sessionId = Utils.getConfigString("instagram.session_id", null);
|
||||||
|
if ((storiesRip || pinnedRip) && sessionId == null) {
|
||||||
|
throw new IOException("instagram.session_id should be set up for Instagram stories");
|
||||||
|
}
|
||||||
|
if (sessionId != null) {
|
||||||
|
cookies.put("sessionid", sessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query hash is used for graphql requests
|
||||||
|
private String getQhash(Document doc) throws IOException {
|
||||||
|
if (postRip) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
|
||||||
|
href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
|
||||||
|
|
||||||
|
String href = doc.select("link[rel=preload]").stream()
|
||||||
|
.map(link -> link.attr("href"))
|
||||||
|
.filter(hrefFilter)
|
||||||
|
.findFirst().orElse("");
|
||||||
|
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
|
||||||
|
|
||||||
|
Function<String, String> hashExtractor =
|
||||||
|
storiesRip || pinnedReelRip ? this::getStoriesHash :
|
||||||
|
pinnedRip ? this::getPinnedHash : hashtagRip ? this::getTagHash :
|
||||||
|
taggedRip ? this::getUserTagHash : this::getProfileHash;
|
||||||
|
|
||||||
|
return hashExtractor.apply(body);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getStoriesHash(String jsData) {
|
||||||
|
return getHashValue(jsData, "loadStoryViewers", -5);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getProfileHash(String jsData) {
|
||||||
|
return getHashValue(jsData, "loadProfilePageExtras", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPinnedHash(String jsData) {
|
||||||
|
return getHashValue(jsData, "loadProfilePageExtras", -2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTagHash(String jsData) {
|
||||||
|
return getHashValue(jsData, "requestNextTagMedia", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getUserTagHash(String jsData) {
|
||||||
|
return getHashValue(jsData, "requestNextTaggedPosts", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getJsonObjectFromDoc(Document document) {
|
||||||
|
for (Element script : document.select("script[type=text/javascript]")) {
|
||||||
|
String scriptText = script.data();
|
||||||
|
if (scriptText.startsWith("window._sharedData") || scriptText.startsWith("window.__additionalDataLoaded")) {
|
||||||
|
String jsonText = scriptText.replaceAll("[^{]*([{].*})[^}]*", "$1");
|
||||||
|
if (jsonText.contains("graphql") || jsonText.contains("StoriesPage")) {
|
||||||
|
return new JSONObject(jsonText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject getNextPage(JSONObject source) throws IOException {
|
||||||
|
if (postRip || storiesRip || pinnedReelRip) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
JSONObject nextPageQuery = new JSONObject().put(hashtagRip ? "tag_name" : "id", idString).put("first", 12);
|
||||||
|
if (source == null) {
|
||||||
|
return graphqlRequest(nextPageQuery);
|
||||||
|
}
|
||||||
|
JSONObject pageInfo = getMediaRoot(source).getJSONObject("page_info");
|
||||||
|
if (pageInfo.getBoolean("has_next_page")) {
|
||||||
|
return graphqlRequest(nextPageQuery.put("after", pageInfo.getString("end_cursor")));
|
||||||
|
} else {
|
||||||
|
failedItems.forEach(LOGGER::error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getStoriesItems() throws IOException {
|
||||||
|
return graphqlRequest(new JSONObject().append("reel_ids", idString).put("precomposed_overlay", false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two requests with different query hashes required for pinned items.
|
||||||
|
// Query hash to be used depends on flag specified:
|
||||||
|
// pinnedRip flag is used initially to get list of pinned albums;
|
||||||
|
// pinnedReelRip flag is used next to get media urls.
|
||||||
|
private JSONObject getPinnedItems(Document document) throws IOException {
|
||||||
|
JSONObject queryForIds = new JSONObject().put("user_id", idString).put("include_highlight_reels", true);
|
||||||
|
JSONObject pinnedIdsJson = graphqlRequest(queryForIds);
|
||||||
|
JSONArray pinnedItems = getJsonArrayByPath(pinnedIdsJson, "data.user.edge_highlight_reels.edges");
|
||||||
|
pinnedRip = false;
|
||||||
|
pinnedReelRip = true;
|
||||||
|
qHash = getQhash(document);
|
||||||
|
JSONObject queryForDetails = new JSONObject();
|
||||||
|
getStreamOfJsonArray(pinnedItems)
|
||||||
|
.map(object -> getJsonStringByPath(object, "node.id"))
|
||||||
|
.forEach(id -> queryForDetails.append("highlight_reel_ids", id));
|
||||||
|
queryForDetails.put("precomposed_overlay", false);
|
||||||
|
return graphqlRequest(queryForDetails);
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject graphqlRequest(JSONObject vars) throws IOException {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
String url = format("https://www.instagram.com/graphql/query/?query_hash=%s&variables=%s", qHash, vars.toString());
|
||||||
|
return Http.url(url).cookies(cookies).getJSON();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
|
if (storiesRip || pinnedReelRip) {
|
||||||
|
JSONArray storyAlbums = getJsonArrayByPath(json, "data.reels_media");
|
||||||
|
return getStreamOfJsonArray(storyAlbums)
|
||||||
|
.flatMap(album -> getStreamOfJsonArray(album.getJSONArray("items")))
|
||||||
|
.peek(storyItem -> itemPrefixes.add(getTimestampPrefix(storyItem)))
|
||||||
|
.flatMap(this::parseStoryItemForUrls)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
if (postRip) {
|
||||||
|
JSONObject detailsJson = downloadItemDetailsJson(idString);
|
||||||
|
addPrefixInfo(detailsJson);
|
||||||
|
return parseItemDetailsForUrls(detailsJson).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
JSONArray edges = getMediaRoot(json).getJSONArray("edges");
|
||||||
|
return getStreamOfJsonArray(edges)
|
||||||
|
.map(edge -> getJsonStringByPath(edge, "node.shortcode"))
|
||||||
|
.map(this::downloadItemDetailsJson)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.peek(this::addPrefixInfo)
|
||||||
|
.flatMap(this::parseItemDetailsForUrls)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<? extends String> parseStoryItemForUrls(JSONObject storyItem) {
|
||||||
|
if (storyItem.getBoolean("is_video")) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(storyItem) + "preview_");
|
||||||
|
int lastIndex = storyItem.getJSONArray("video_resources").length() - 1;
|
||||||
|
return Stream.of(
|
||||||
|
getJsonStringByPath(storyItem, "video_resources[" + lastIndex + "].src"),
|
||||||
|
storyItem.getString("display_url"));
|
||||||
|
}
|
||||||
|
return Stream.of(storyItem.getString("display_url"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getMediaRoot(JSONObject json) {
|
||||||
|
String userExtra = "data.user.edge_owner_to_timeline_media";
|
||||||
|
String igtvExtra = "data.user.edge_felix_video_timeline";
|
||||||
|
String taggedExtra = "data.user.edge_user_to_photos_of_you";
|
||||||
|
String hashtagExtra = "data.hashtag.edge_hashtag_to_media";
|
||||||
|
String userHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_owner_to_timeline_media";
|
||||||
|
String igtvHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_felix_video_timeline";
|
||||||
|
String hashtagHomeRoot = "entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media";
|
||||||
|
String mediaRootPath = json.optJSONObject("entry_data") != null ?
|
||||||
|
(hashtagRip ? hashtagHomeRoot : igtvRip ? igtvHomeRoot : userHomeRoot) : hashtagRip ?
|
||||||
|
hashtagExtra : igtvRip ? igtvExtra : taggedRip ? taggedExtra : userExtra;
|
||||||
|
return getJsonObjectByPath(json, mediaRootPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject downloadItemDetailsJson(String shortcode) {
|
||||||
|
String url = "https://www.instagram.com/p/%s/?__a=1";
|
||||||
try {
|
try {
|
||||||
Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
|
Http http = Http.url(format(url, shortcode));
|
||||||
|
http.ignoreContentType();
|
||||||
|
http.connection().followRedirects(false);
|
||||||
|
Connection.Response response = http.cookies(cookies).response();
|
||||||
|
// Fix for redirection link; repeat request with the new shortcode
|
||||||
|
if (response.statusCode() == 302) {
|
||||||
|
Pattern redirectIdPattern = Pattern.compile("/p/(?<shortcode>[^?/]+)");
|
||||||
|
Matcher m = redirectIdPattern.matcher(response.header("location"));
|
||||||
|
return m.find() ? downloadItemDetailsJson(m.group("shortcode")) : null;
|
||||||
|
}
|
||||||
|
return new JSONObject(response.body());
|
||||||
|
} catch (Exception e) {
|
||||||
|
failedItems.add(shortcode);
|
||||||
|
LOGGER.trace(format("No item %s found", shortcode), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addPrefixInfo(JSONObject itemDetailsJson) {
|
||||||
|
JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
|
||||||
|
String shortcode = mediaItem.getString("shortcode");
|
||||||
|
int subItemsCount = "GraphSidecar".equals(mediaItem.getString("__typename")) ?
|
||||||
|
getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges").length() : 1;
|
||||||
|
for (int i = 0; i < subItemsCount; i++) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTimestampPrefix(JSONObject item) {
|
||||||
|
Instant instant = Instant.ofEpochSecond(item.getLong("taken_at_timestamp"));
|
||||||
|
return DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<? extends String> parseItemDetailsForUrls(JSONObject itemDetailsJson) {
|
||||||
|
JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
|
||||||
|
// For some reason JSON video_url has lower quality than the HTML-tag one
|
||||||
|
// HTML-tag url is requested here and marked with _extra_ prefix
|
||||||
|
if ("GraphVideo".equals(mediaItem.getString("__typename"))) {
|
||||||
|
String shortcode = mediaItem.getString("shortcode");
|
||||||
|
String urlFromPage = getVideoUrlFromPage(shortcode);
|
||||||
|
if (!urlFromPage.isEmpty()) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_extra_");
|
||||||
|
return Stream.of(mediaItem.getString("video_url"), urlFromPage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parseRootForUrls(mediaItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uses recursion for GraphSidecar
|
||||||
|
private Stream<? extends String> parseRootForUrls(JSONObject mediaItem) {
|
||||||
|
String typeName = mediaItem.getString("__typename");
|
||||||
|
switch (typeName) {
|
||||||
|
case "GraphImage":
|
||||||
|
return Stream.of(mediaItem.getString("display_url"));
|
||||||
|
case "GraphVideo":
|
||||||
|
return Stream.of(mediaItem.getString("video_url"));
|
||||||
|
case "GraphSidecar":
|
||||||
|
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
|
||||||
|
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
|
||||||
|
.flatMap(this::parseRootForUrls);
|
||||||
|
default:
|
||||||
|
return Stream.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getVideoUrlFromPage(String videoID) {
|
||||||
|
try {
|
||||||
|
Document doc = Http.url("https://www.instagram.com/p/" + videoID).cookies(cookies).get();
|
||||||
return doc.select("meta[property=og:video]").attr("content");
|
return doc.select("meta[property=og:video]").attr("content");
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
|
LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getOriginalUrl(String imageURL) {
|
@Override
|
||||||
// Without this regex most images will return a 403 error
|
protected void downloadURL(URL url, int index) {
|
||||||
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
|
if (Utils.getConfigBoolean("instagram.download_images_only", false) && url.toString().contains(".mp4?")) {
|
||||||
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
LOGGER.info("Skipped video url: " + url);
|
||||||
|
return;
|
||||||
// Instagram returns cropped images to unauthenticated applications to maintain legacy support.
|
}
|
||||||
// To retrieve the uncropped image, remove this segment from the URL.
|
addURLToDownload(url, itemPrefixes.get(index - 1), "", null, cookies);
|
||||||
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080
|
|
||||||
imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
|
|
||||||
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
|
|
||||||
return imageURL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAfter(JSONObject json) {
|
// Javascript parsing
|
||||||
try {
|
/* ------------------------------------------------------------------------------------------------------- */
|
||||||
return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
|
private String getHashValue(String javaScriptData, String keyword, int offset) {
|
||||||
.getJSONObject("graphql").getJSONObject("user")
|
List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
|
return statements.stream()
|
||||||
} catch (JSONException e) {
|
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
|
||||||
// This is here so that when the user rips the last page they don't get a "end_cursor not a string" error
|
.map(ExpressionStatement::getExpression)
|
||||||
try {
|
.flatMap(expression -> filterItems(expression, CallNode.class))
|
||||||
return json.getJSONObject("data").getJSONObject("user")
|
.map(CallNode::getArgs)
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
|
.map(expressions -> expressions.get(0))
|
||||||
} catch (JSONException t) {
|
.flatMap(expression -> filterItems(expression, FunctionNode.class))
|
||||||
return "";
|
.map(FunctionNode::getBody)
|
||||||
|
.map(Block::getStatements)
|
||||||
|
.map(statementList -> lookForHash(statementList, keyword, offset))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.findFirst().orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String lookForHash(List<Statement> list, String keyword, int offset) {
|
||||||
|
for (int i = 0; i < list.size(); i++) {
|
||||||
|
Statement st = list.get(i);
|
||||||
|
if (st.toString().contains(keyword)) {
|
||||||
|
return list.get(i + offset).toString().replaceAll(".*\"([0-9a-f]*)\".*", "$1");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> Stream<T> filterItems(Object obj, Class<T> aClass) {
|
||||||
|
return Stream.of(obj).filter(aClass::isInstance).map(aClass::cast);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Block getJsBodyBlock(String javaScriptData) {
|
||||||
|
ErrorManager errors = new ErrorManager();
|
||||||
|
Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
|
||||||
|
return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some JSON helper methods below
|
||||||
|
/* ------------------------------------------------------------------------------------------------------- */
|
||||||
|
private JSONObject getJsonObjectByPath(JSONObject object, String key) {
|
||||||
|
Pattern arrayPattern = Pattern.compile("(?<arr>.*)\\[(?<idx>\\d+)]");
|
||||||
|
JSONObject result = object;
|
||||||
|
for (String s : key.split("[.]")) {
|
||||||
|
Matcher m = arrayPattern.matcher(s);
|
||||||
|
result = m.matches() ?
|
||||||
|
result.getJSONArray(m.group("arr")).getJSONObject(Integer.parseInt(m.group("idx"))) :
|
||||||
|
result.getJSONObject(s);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T getByPath(BiFunction<JSONObject, String, T> func, JSONObject object, String key) {
|
||||||
|
int namePos = key.lastIndexOf('.');
|
||||||
|
JSONObject parent = namePos < 0 ? object : getJsonObjectByPath(object, key.substring(0, namePos));
|
||||||
|
return func.apply(parent, key.substring(namePos + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONArray getJsonArrayByPath(JSONObject object, String key) {
|
||||||
|
return getByPath(JSONObject::getJSONArray, object, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getJsonStringByPath(JSONObject object, String key) {
|
||||||
|
return getByPath(JSONObject::getString, object, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<JSONObject> getStreamOfJsonArray(JSONArray array) {
|
||||||
|
return StreamSupport.stream(new JSONSpliterator(array), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class JSONSpliterator extends Spliterators.AbstractSpliterator<JSONObject> {
|
||||||
|
private JSONArray array;
|
||||||
|
private int index = 0;
|
||||||
|
|
||||||
|
JSONSpliterator(JSONArray array) {
|
||||||
|
super(array.length(), SIZED | ORDERED);
|
||||||
|
this.array = array;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
public boolean tryAdvance(Consumer<? super JSONObject> action) {
|
||||||
List<String> imageURLs = new ArrayList<>();
|
if (index == array.length()) {
|
||||||
if (!url.toExternalForm().contains("/p/")) {
|
|
||||||
nextPageID = getAfter(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the rhx_gis value so we can get the next page later on
|
|
||||||
if (rhx_gis == null) {
|
|
||||||
rhx_gis = json.getString("rhx_gis");
|
|
||||||
}
|
|
||||||
if (!url.toExternalForm().contains("/p/")) {
|
|
||||||
JSONArray datas = new JSONArray();
|
|
||||||
if (!rippingTag) {
|
|
||||||
// This first try only works on data from the first page
|
|
||||||
try {
|
|
||||||
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
|
||||||
userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", "");
|
|
||||||
datas = json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
|
|
||||||
.getJSONObject("graphql").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
datas = json.getJSONObject("data").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage");
|
|
||||||
datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag")
|
|
||||||
.getJSONObject("edge_hashtag_to_media").getJSONArray("edges");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media")
|
|
||||||
.getJSONArray("edges");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
|
||||||
data = data.getJSONObject("node");
|
|
||||||
Long epoch = data.getLong("taken_at_timestamp");
|
|
||||||
Instant instant = Instant.ofEpochSecond(epoch);
|
|
||||||
String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
|
|
||||||
// It looks like tag pages don't have the __typename key
|
|
||||||
if (!rippingTag) {
|
|
||||||
if (data.getString("__typename").equals("GraphSidecar")) {
|
|
||||||
try {
|
|
||||||
Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get();
|
|
||||||
List<String> toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
|
|
||||||
for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) {
|
|
||||||
addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode"));
|
|
||||||
}
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.error("Unable to download slide show, URL was malformed");
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.error("Unable to download slide show");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
if (!data.getBoolean("is_video")) {
|
|
||||||
if (imageURLs.isEmpty()) {
|
|
||||||
// We add this one item to the array because either wise
|
|
||||||
// the ripper will error out because we returned an empty array
|
|
||||||
imageURLs.add(getOriginalUrl(data.getString("display_url")));
|
|
||||||
}
|
|
||||||
addURLToDownload(new URL(data.getString("display_url")), image_date);
|
|
||||||
} else {
|
|
||||||
if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
|
|
||||||
addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date);
|
|
||||||
} else {
|
|
||||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Got MalformedURLException");
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else { // We're ripping from a single page
|
|
||||||
LOGGER.info("Ripping from single page");
|
|
||||||
imageURLs = getPostsFromSinglePage(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getIGGis(String variables) {
|
|
||||||
String stringToMD5 = rhx_gis + ":" + variables;
|
|
||||||
LOGGER.debug("String to md5 is \"" + stringToMD5 + "\"");
|
|
||||||
try {
|
|
||||||
byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8");
|
|
||||||
|
|
||||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
|
||||||
byte[] hash = md.digest(bytesOfMessage);
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
for (int i = 0; i < hash.length; ++i) {
|
|
||||||
sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3));
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
} catch(UnsupportedEncodingException e) {
|
|
||||||
return null;
|
|
||||||
} catch(NoSuchAlgorithmException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
|
||||||
JSONObject toreturn;
|
|
||||||
java.util.Map<String, String> cookies = new HashMap<String, String>();
|
|
||||||
// This shouldn't be hardcoded and will break one day
|
|
||||||
cookies.put("ig_pr", "1");
|
|
||||||
cookies.put("csrftoken", csrftoken);
|
|
||||||
if (!nextPageID.equals("") && !isThisATest()) {
|
|
||||||
if (rippingTag) {
|
|
||||||
try {
|
|
||||||
sleep(2500);
|
|
||||||
String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}";
|
|
||||||
String ig_gis = getIGGis(vars);
|
|
||||||
toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
|
|
||||||
"&variables=" + vars, ig_gis);
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
LOGGER.info(toreturn);
|
|
||||||
if (!pageHasImages(toreturn)) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
return toreturn;
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
sleep(2500);
|
|
||||||
String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}";
|
|
||||||
String ig_gis = getIGGis(vars);
|
|
||||||
LOGGER.info(ig_gis);
|
|
||||||
|
|
||||||
LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars);
|
|
||||||
toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis);
|
|
||||||
if (!pageHasImages(toreturn)) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
return toreturn;
|
|
||||||
} catch (IOException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean pageHasImages(JSONObject json) {
|
|
||||||
LOGGER.info(json);
|
|
||||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
|
||||||
if (numberOfImages == 0) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
action.accept(array.getJSONObject(index++));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private JSONObject getPage(String url, String ig_gis) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
try {
|
|
||||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
|
||||||
// even if you set maxBodySize to 0
|
|
||||||
URLConnection connection = new URL(url).openConnection();
|
|
||||||
connection.setRequestProperty("User-Agent", USER_AGENT);
|
|
||||||
connection.setRequestProperty("x-instagram-gis", ig_gis);
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
sb.append(line);
|
|
||||||
|
|
||||||
}
|
|
||||||
in.close();
|
|
||||||
workAroundJsonString = sb.toString();
|
|
||||||
return new JSONObject(sb.toString());
|
|
||||||
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash");
|
|
||||||
LOGGER.info(e.getMessage());
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getQHash(Document doc) {
|
|
||||||
String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
Document jsPage;
|
|
||||||
try {
|
|
||||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
|
||||||
// even if you set maxBodySize to 0
|
|
||||||
URLConnection connection = new URL(jsFileURL).openConnection();
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
sb.append(line);
|
|
||||||
}
|
|
||||||
in.close();
|
|
||||||
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash");
|
|
||||||
LOGGER.info(e.getMessage());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (!rippingTag) {
|
|
||||||
Pattern jsP = Pattern.compile("byUserId\\.get\\(t\\)\\)\\|\\|void 0===r\\?void 0:r\\.pagination},queryId:.([a-zA-Z0-9]+)");
|
|
||||||
Matcher m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");
|
|
||||||
Matcher m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
|
||||||
return null;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
@ -12,6 +12,7 @@ import javax.sound.sampled.Clip;
|
|||||||
import javax.sound.sampled.Line;
|
import javax.sound.sampled.Line;
|
||||||
import javax.sound.sampled.LineEvent;
|
import javax.sound.sampled.LineEvent;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
@ -179,21 +180,21 @@ public class Utils {
|
|||||||
/**
|
/**
|
||||||
* Determines if your current system is a Windows system.
|
* Determines if your current system is a Windows system.
|
||||||
*/
|
*/
|
||||||
private static boolean isWindows() {
|
public static boolean isWindows() {
|
||||||
return OS.contains("win");
|
return OS.contains("win");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if your current system is a Mac system
|
* Determines if your current system is a Mac system
|
||||||
*/
|
*/
|
||||||
private static boolean isMacOS() {
|
public static boolean isMacOS() {
|
||||||
return OS.contains("mac");
|
return OS.contains("mac");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if current system is based on UNIX
|
* Determines if current system is based on UNIX
|
||||||
*/
|
*/
|
||||||
private static boolean isUnix() {
|
public static boolean isUnix() {
|
||||||
return OS.contains("nix") || OS.contains("nux") || OS.contains("bsd");
|
return OS.contains("nix") || OS.contains("nux") || OS.contains("bsd");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -773,4 +774,34 @@ public class Utils {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static File shortenSaveAsWindows(String ripsDirPath, String fileName) throws FileNotFoundException {
|
||||||
|
// int ripDirLength = ripsDirPath.length();
|
||||||
|
// int maxFileNameLength = 260 - ripDirLength;
|
||||||
|
// LOGGER.info(maxFileNameLength);
|
||||||
|
LOGGER.error("The filename " + fileName + " is to long to be saved on this file system.");
|
||||||
|
LOGGER.info("Shortening filename");
|
||||||
|
String fullPath = ripsDirPath + File.separator + fileName;
|
||||||
|
// How long the path without the file name is
|
||||||
|
int pathLength = ripsDirPath.length();
|
||||||
|
int fileNameLength = fileName.length();
|
||||||
|
if (pathLength == 260) {
|
||||||
|
// We've reached the max length, there's nothing more we can do
|
||||||
|
throw new FileNotFoundException("File path is too long for this OS");
|
||||||
|
}
|
||||||
|
String[] saveAsSplit = fileName.split("\\.");
|
||||||
|
// Get the file extension so when we shorten the file name we don't cut off the
|
||||||
|
// file extension
|
||||||
|
String fileExt = saveAsSplit[saveAsSplit.length - 1];
|
||||||
|
// The max limit for paths on Windows is 260 chars
|
||||||
|
LOGGER.info(fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt);
|
||||||
|
fullPath = fullPath.substring(0, 259 - pathLength - fileExt.length() + 1) + "." + fileExt;
|
||||||
|
LOGGER.info(fullPath);
|
||||||
|
LOGGER.info(fullPath.length());
|
||||||
|
return new File(fullPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String sanitizeSaveAs(String fileNameToSan) {
|
||||||
|
return fileNameToSan.replaceAll("[\\\\/:*?\"<>|]", "_");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user