commit
41eb9fd09b
1
build.sh
1
build.sh
@ -1 +1,2 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
mvn clean compile assembly:single
|
mvn clean compile assembly:single
|
@ -1,40 +0,0 @@
|
|||||||
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
|
|
||||||
|
|
||||||
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
|
|
||||||
|
|
||||||
error.skip404 | bool | Don't retry on 404 errors
|
|
||||||
|
|
||||||
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
|
|
||||||
|
|
||||||
auto.update | bool | If true ripme will auto-update every time it's started
|
|
||||||
|
|
||||||
play.sound | bool | If true ripme will play a sound every time a rip finishes
|
|
||||||
|
|
||||||
download.show_popup| bool | TODO figure out what this is for
|
|
||||||
|
|
||||||
log.save | bool | If true ripme will save it's logs
|
|
||||||
|
|
||||||
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
|
|
||||||
|
|
||||||
album_titles.save | bool | Currently does nothing
|
|
||||||
|
|
||||||
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
|
|
||||||
|
|
||||||
download.timeout | int | File download timeout (in milliseconds)
|
|
||||||
|
|
||||||
page.timeout | int | Page download timeout (in milliseconds)
|
|
||||||
|
|
||||||
download.max_size | int | Maximum size of downloaded files in bytes
|
|
||||||
|
|
||||||
threads.size | int | The number of threads to use
|
|
||||||
|
|
||||||
twitter.auth | String | Twitter API key (Base64'd)
|
|
||||||
|
|
||||||
tumblr.auth | String | Tumblr API key
|
|
||||||
|
|
||||||
log.level | String | The debug log level (Example: Log level: Debug)
|
|
||||||
|
|
||||||
gw.api | String | TODO figure out what this is for
|
|
||||||
|
|
||||||
twitter.max_requests | int | TODO figure out what this is for
|
|
||||||
|
|
2
pom.xml
2
pom.xml
@ -4,7 +4,7 @@
|
|||||||
<groupId>com.rarchives.ripme</groupId>
|
<groupId>com.rarchives.ripme</groupId>
|
||||||
<artifactId>ripme</artifactId>
|
<artifactId>ripme</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<version>1.7.19</version>
|
<version>1.7.23</version>
|
||||||
<name>ripme</name>
|
<name>ripme</name>
|
||||||
<url>http://rip.rarchives.com</url>
|
<url>http://rip.rarchives.com</url>
|
||||||
<properties>
|
<properties>
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
{
|
{
|
||||||
"latestVersion": "1.7.19",
|
"latestVersion": "1.7.23",
|
||||||
"changeList": [
|
"changeList": [
|
||||||
|
"1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
|
||||||
|
"1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
|
||||||
|
"1.7.21: Fixed last seen feature",
|
||||||
|
"1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
|
||||||
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
|
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
|
||||||
"1.7.18: AlbumRipper now removes bad chars from file names",
|
"1.7.18: AlbumRipper now removes bad chars from file names",
|
||||||
"1.7.17: Fixed hentai.cafe autorip from clipboard",
|
"1.7.17: Fixed hentai.cafe autorip from clipboard",
|
||||||
|
@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
Document doc = getFirstPage();
|
Document doc = getFirstPage();
|
||||||
|
|
||||||
while (doc != null) {
|
while (doc != null) {
|
||||||
|
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
|
||||||
|
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
|
||||||
|
break;
|
||||||
|
}
|
||||||
List<String> imageURLs = getURLsFromPage(doc);
|
List<String> imageURLs = getURLsFromPage(doc);
|
||||||
|
// If hasASAPRipping() returns true then the ripper will handle downloading the files
|
||||||
|
// if not it's done in the following block of code
|
||||||
if (!hasASAPRipping()) {
|
if (!hasASAPRipping()) {
|
||||||
// Remove all but 1 image
|
// Remove all but 1 image
|
||||||
if (isThisATest()) {
|
if (isThisATest()) {
|
||||||
|
@ -44,7 +44,8 @@ public abstract class AbstractRipper
|
|||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
public abstract String getGID(URL url) throws MalformedURLException;
|
public abstract String getGID(URL url) throws MalformedURLException;
|
||||||
public boolean hasASAPRipping() { return false; }
|
public boolean hasASAPRipping() { return false; }
|
||||||
|
// Everytime addUrlToDownload skips a already downloaded url this increases by 1
|
||||||
|
public int alreadyDownloadedUrls = 0;
|
||||||
private boolean shouldStop = false;
|
private boolean shouldStop = false;
|
||||||
private boolean thisIsATest = false;
|
private boolean thisIsATest = false;
|
||||||
|
|
||||||
@ -60,7 +61,13 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a URL to the url history file
|
||||||
|
* @param downloadedURL URL to check if downloaded
|
||||||
|
*/
|
||||||
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
||||||
|
downloadedURL = normalizeUrl(downloadedURL);
|
||||||
BufferedWriter bw = null;
|
BufferedWriter bw = null;
|
||||||
FileWriter fw = null;
|
FileWriter fw = null;
|
||||||
try {
|
try {
|
||||||
@ -86,6 +93,15 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a URL
|
||||||
|
* @param url URL to check if downloaded
|
||||||
|
*/
|
||||||
|
public String normalizeUrl(String url) {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks to see if Ripme has already downloaded a URL
|
* Checks to see if Ripme has already downloaded a URL
|
||||||
* @param url URL to check if downloaded
|
* @param url URL to check if downloaded
|
||||||
@ -95,6 +111,7 @@ public abstract class AbstractRipper
|
|||||||
*/
|
*/
|
||||||
private boolean hasDownloadedURL(String url) {
|
private boolean hasDownloadedURL(String url) {
|
||||||
File file = new File(URLHistoryFile);
|
File file = new File(URLHistoryFile);
|
||||||
|
url = normalizeUrl(url);
|
||||||
try {
|
try {
|
||||||
Scanner scanner = new Scanner(file);
|
Scanner scanner = new Scanner(file);
|
||||||
while (scanner.hasNextLine()) {
|
while (scanner.hasNextLine()) {
|
||||||
@ -194,9 +211,11 @@ public abstract class AbstractRipper
|
|||||||
* False if failed to download
|
* False if failed to download
|
||||||
*/
|
*/
|
||||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
|
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
|
||||||
|
// Don't re-add the url if it was downloaded in a previous rip
|
||||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||||
if (hasDownloadedURL(url.toExternalForm())) {
|
if (hasDownloadedURL(url.toExternalForm())) {
|
||||||
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
|
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
|
||||||
|
alreadyDownloadedUrls += 1;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
|
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()) {
|
if (!m.matches()) {
|
||||||
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
|
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
|
||||||
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
Elements pageImages = page.getElementsByClass("c-tile");
|
Elements pageImages = page.getElementsByClass("c-tile");
|
||||||
for (Element thumb : pageImages) {
|
for (Element thumb : pageImages) {
|
||||||
// If true this link is a sub album
|
// If true this link is a sub album
|
||||||
if (thumb.attr("href").contains("/comix/album/")) {
|
if (thumb.attr("href").contains("/comics/album/")) {
|
||||||
String subUrl = "https://www.8muses.com" + thumb.attr("href");
|
String subUrl = "https://www.8muses.com" + thumb.attr("href");
|
||||||
try {
|
try {
|
||||||
logger.info("Retrieving " + subUrl);
|
logger.info("Retrieving " + subUrl);
|
||||||
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
logger.warn("Error while loading subalbum " + subUrl, e);
|
logger.warn("Error while loading subalbum " + subUrl, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (thumb.attr("href").contains("/comix/picture/")) {
|
} else if (thumb.attr("href").contains("/comics/picture/")) {
|
||||||
|
logger.info("This page is a album");
|
||||||
logger.info("Ripping image");
|
logger.info("Ripping image");
|
||||||
if (super.isStopped()) break;
|
if (super.isStopped()) break;
|
||||||
// Find thumbnail image source
|
// Find thumbnail image source
|
||||||
|
@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
|
|
||||||
|
|
||||||
public class InstagramRipper extends AbstractHTMLRipper {
|
public class InstagramRipper extends AbstractHTMLRipper {
|
||||||
|
String nextPageID = "";
|
||||||
|
|
||||||
private String userID;
|
private String userID;
|
||||||
|
|
||||||
@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
return san_url;
|
return san_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String normalizeUrl(String url) {
|
||||||
|
// Remove the date sig from the url
|
||||||
|
return url.replaceAll("/[A-Z0-9]{8}/", "/");
|
||||||
|
}
|
||||||
|
|
||||||
private List<String> getPostsFromSinglePage(Document Doc) {
|
private List<String> getPostsFromSinglePage(Document Doc) {
|
||||||
List<String> imageURLs = new ArrayList<>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
JSONArray datas;
|
JSONArray datas;
|
||||||
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
String nextPageID = "";
|
|
||||||
List<String> imageURLs = new ArrayList<>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
JSONObject json = new JSONObject();
|
JSONObject json = new JSONObject();
|
||||||
try {
|
try {
|
||||||
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Rip the next page
|
|
||||||
if (!nextPageID.equals("") && !isThisATest()) {
|
|
||||||
if (url.toExternalForm().contains("/tags/")) {
|
|
||||||
try {
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
sleep(2500);
|
|
||||||
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
|
||||||
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
|
|
||||||
} else {
|
|
||||||
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
sleep(2500);
|
|
||||||
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
|
|
||||||
} catch (IOException e) {
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
logger.warn("Can't get net page");
|
|
||||||
}
|
|
||||||
} else { // We're ripping from a single page
|
} else { // We're ripping from a single page
|
||||||
logger.info("Ripping from single page");
|
logger.info("Ripping from single page");
|
||||||
imageURLs = getPostsFromSinglePage(doc);
|
imageURLs = getPostsFromSinglePage(doc);
|
||||||
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
Document toreturn;
|
||||||
|
if (!nextPageID.equals("") && !isThisATest()) {
|
||||||
|
if (url.toExternalForm().contains("/tags/")) {
|
||||||
|
try {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||||
|
toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
|
||||||
|
} else {
|
||||||
|
toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
|
||||||
|
}
|
||||||
|
logger.info(toreturn.html());
|
||||||
|
if (!hasImage(toreturn)) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
return toreturn;
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
|
||||||
|
if (!hasImage(toreturn)) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
return toreturn;
|
||||||
|
} catch (IOException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
addURLToDownload(url);
|
addURLToDownload(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean hasImage(Document doc) {
|
||||||
|
try {
|
||||||
|
JSONObject json = getJSONFromPage(doc);
|
||||||
|
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
||||||
|
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
||||||
|
logger.info(datas.length());
|
||||||
|
if (datas.length() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
|
|||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
logger.info(" Retrieving " + this.url);
|
logger.info(" Retrieving " + this.url);
|
||||||
Document doc = Http.url(this.url).get();
|
Document doc = Http.url(this.url).get();
|
||||||
Elements embeds = doc.select("embed");
|
Elements scripts = doc.select("script");
|
||||||
if (embeds.size() == 0) {
|
for (Element e : scripts) {
|
||||||
throw new IOException("Could not find Embed code at " + url);
|
if (e.html().contains("html5player.setVideoUrlHigh")) {
|
||||||
}
|
logger.info("Found the right script");
|
||||||
Element embed = embeds.get(0);
|
String[] lines = e.html().split("\n");
|
||||||
String vars = embed.attr("flashvars");
|
for (String line: lines) {
|
||||||
for (String var : vars.split("&")) {
|
if (line.contains("html5player.setVideoUrlHigh")) {
|
||||||
if (var.startsWith("flv_url=")) {
|
String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
|
||||||
String vidUrl = var.substring("flv_url=".length());
|
addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
|
||||||
vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
|
waitForThreads();
|
||||||
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waitForThreads();
|
throw new IOException("Unable to find video url at " + this.url.toExternalForm());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
public class UpdateUtils {
|
public class UpdateUtils {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||||
private static final String DEFAULT_VERSION = "1.7.19";
|
private static final String DEFAULT_VERSION = "1.7.23";
|
||||||
private static final String REPO_NAME = "ripmeapp/ripme";
|
private static final String REPO_NAME = "ripmeapp/ripme";
|
||||||
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
|
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
|
||||||
private static final String mainFileName = "ripme.jar";
|
private static final String mainFileName = "ripme.jar";
|
||||||
|
@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
|
|||||||
// A simple image album
|
// A simple image album
|
||||||
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
|
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
|
||||||
testRipper(ripper);
|
testRipper(ripper);
|
||||||
|
// Test the new url format
|
||||||
|
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
|
||||||
|
testRipper(ripper);
|
||||||
// Test pages with subalbums
|
// Test pages with subalbums
|
||||||
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
|
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
|
||||||
testRipper(ripper);
|
testRipper(ripper);
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
package com.rarchives.ripme.tst.ripper.rippers;
|
//package com.rarchives.ripme.tst.ripper.rippers;
|
||||||
|
//
|
||||||
|
//import java.io.IOException;
|
||||||
|
//import java.net.URL;
|
||||||
|
//
|
||||||
|
//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
|
||||||
|
//
|
||||||
|
//public class FivehundredpxRipperTest extends RippersTest {
|
||||||
|
// public void test500pxAlbum() throws IOException {
|
||||||
|
// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
|
||||||
|
// testRipper(ripper);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
import java.io.IOException;
|
// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
|
|
||||||
|
|
||||||
public class FivehundredpxRipperTest extends RippersTest {
|
|
||||||
public void test500pxAlbum() throws IOException {
|
|
||||||
FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
|
|
||||||
testRipper(ripper);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user