Merge pull request #5 from RipMeApp/master

Sync.
This commit is contained in:
rephormat 2018-03-12 10:16:58 -05:00 committed by GitHub
commit 41eb9fd09b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 132 additions and 98 deletions

View File

@ -1 +1,2 @@
#!/usr/bin/env bash
mvn clean compile assembly:single mvn clean compile assembly:single

View File

@ -1,40 +0,0 @@
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
error.skip404 | bool | Don't retry on 404 errors
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
auto.update | bool | If true ripme will auto-update every time it's started
play.sound | bool | If true ripme will play a sound every time a rip finishes
download.show_popup| bool | TODO figure out what this is for
log.save | bool | If true ripme will save it's logs
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
album_titles.save | bool | Currently does nothing
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
download.timeout | int | File download timeout (in milliseconds)
page.timeout | int | Page download timeout (in milliseconds)
download.max_size | int | Maximum size of downloaded files in bytes
threads.size | int | The number of threads to use
twitter.auth | String | Twitter API key (Base64'd)
tumblr.auth | String | Tumblr API key
log.level | String | The debug log level (Example: Log level: Debug)
gw.api | String | TODO figure out what this is for
twitter.max_requests | int | TODO figure out what this is for

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId> <groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId> <artifactId>ripme</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<version>1.7.19</version> <version>1.7.23</version>
<name>ripme</name> <name>ripme</name>
<url>http://rip.rarchives.com</url> <url>http://rip.rarchives.com</url>
<properties> <properties>

View File

@ -1,6 +1,10 @@
{ {
"latestVersion": "1.7.19", "latestVersion": "1.7.23",
"changeList": [ "changeList": [
"1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
"1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
"1.7.21: Fixed last seen feature",
"1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
"1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.19: imgurRipper no longer tries to add ?1 to file names",
"1.7.18: AlbumRipper now removes bad chars from file names", "1.7.18: AlbumRipper now removes bad chars from file names",
"1.7.17: Fixed hentai.cafe autorip from clipboard", "1.7.17: Fixed hentai.cafe autorip from clipboard",

View File

@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
Document doc = getFirstPage(); Document doc = getFirstPage();
while (doc != null) { while (doc != null) {
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
break;
}
List<String> imageURLs = getURLsFromPage(doc); List<String> imageURLs = getURLsFromPage(doc);
// If hasASAPRipping() returns true then the ripper will handle downloading the files
// if not it's done in the following block of code
if (!hasASAPRipping()) { if (!hasASAPRipping()) {
// Remove all but 1 image // Remove all but 1 image
if (isThisATest()) { if (isThisATest()) {

View File

@ -44,7 +44,8 @@ public abstract class AbstractRipper
public abstract String getHost(); public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException; public abstract String getGID(URL url) throws MalformedURLException;
public boolean hasASAPRipping() { return false; } public boolean hasASAPRipping() { return false; }
// Everytime addUrlToDownload skips a already downloaded url this increases by 1
public int alreadyDownloadedUrls = 0;
private boolean shouldStop = false; private boolean shouldStop = false;
private boolean thisIsATest = false; private boolean thisIsATest = false;
@ -60,7 +61,13 @@ public abstract class AbstractRipper
} }
} }
/**
* Adds a URL to the url history file
* @param downloadedURL URL to check if downloaded
*/
private void writeDownloadedURL(String downloadedURL) throws IOException { private void writeDownloadedURL(String downloadedURL) throws IOException {
downloadedURL = normalizeUrl(downloadedURL);
BufferedWriter bw = null; BufferedWriter bw = null;
FileWriter fw = null; FileWriter fw = null;
try { try {
@ -86,6 +93,15 @@ public abstract class AbstractRipper
} }
} }
/**
* Normalize a URL
* @param url URL to check if downloaded
*/
public String normalizeUrl(String url) {
return url;
}
/** /**
* Checks to see if Ripme has already downloaded a URL * Checks to see if Ripme has already downloaded a URL
* @param url URL to check if downloaded * @param url URL to check if downloaded
@ -95,6 +111,7 @@ public abstract class AbstractRipper
*/ */
private boolean hasDownloadedURL(String url) { private boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile); File file = new File(URLHistoryFile);
url = normalizeUrl(url);
try { try {
Scanner scanner = new Scanner(file); Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) { while (scanner.hasNextLine()) {
@ -194,9 +211,11 @@ public abstract class AbstractRipper
* False if failed to download * False if failed to download
*/ */
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) { protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
// Don't re-add the url if it was downloaded in a previous rip
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) { if (hasDownloadedURL(url.toExternalForm())) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm()); sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
alreadyDownloadedUrls += 1;
return false; return false;
} }
} }

View File

@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
Elements pageImages = page.getElementsByClass("c-tile"); Elements pageImages = page.getElementsByClass("c-tile");
for (Element thumb : pageImages) { for (Element thumb : pageImages) {
// If true this link is a sub album // If true this link is a sub album
if (thumb.attr("href").contains("/comix/album/")) { if (thumb.attr("href").contains("/comics/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href"); String subUrl = "https://www.8muses.com" + thumb.attr("href");
try { try {
logger.info("Retrieving " + subUrl); logger.info("Retrieving " + subUrl);
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
logger.warn("Error while loading subalbum " + subUrl, e); logger.warn("Error while loading subalbum " + subUrl, e);
} }
} else if (thumb.attr("href").contains("/comix/picture/")) { } else if (thumb.attr("href").contains("/comics/picture/")) {
logger.info("This page is a album");
logger.info("Ripping image"); logger.info("Ripping image");
if (super.isStopped()) break; if (super.isStopped()) break;
// Find thumbnail image source // Find thumbnail image source

View File

@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper { public class InstagramRipper extends AbstractHTMLRipper {
String nextPageID = "";
private String userID; private String userID;
@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
return san_url; return san_url;
} }
@Override
public String normalizeUrl(String url) {
// Remove the date sig from the url
return url.replaceAll("/[A-Z0-9]{8}/", "/");
}
private List<String> getPostsFromSinglePage(Document Doc) { private List<String> getPostsFromSinglePage(Document Doc) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONArray datas; JSONArray datas;
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
String nextPageID = "";
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
try { try {
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
break; break;
} }
} }
// Rip the next page
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} else {
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
}
} catch (IOException e) {
return imageURLs;
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
} else {
logger.warn("Can't get net page");
}
} else { // We're ripping from a single page } else { // We're ripping from a single page
logger.info("Ripping from single page"); logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc); imageURLs = getPostsFromSinglePage(doc);
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
return imageURLs; return imageURLs;
} }
@Override
public Document getNextPage(Document doc) throws IOException {
Document toreturn;
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
} else {
toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
}
logger.info(toreturn.html());
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
throw new IOException("No more pages");
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
return null;
}
} else {
throw new IOException("No more pages");
}
}
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
addURLToDownload(url); addURLToDownload(url);
} }
private boolean hasImage(Document doc) {
try {
JSONObject json = getJSONFromPage(doc);
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
logger.info(datas.length());
if (datas.length() == 0) {
return false;
}
return true;
} catch (IOException e) {
return false;
}
}
} }

View File

@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
public void rip() throws IOException { public void rip() throws IOException {
logger.info(" Retrieving " + this.url); logger.info(" Retrieving " + this.url);
Document doc = Http.url(this.url).get(); Document doc = Http.url(this.url).get();
Elements embeds = doc.select("embed"); Elements scripts = doc.select("script");
if (embeds.size() == 0) { for (Element e : scripts) {
throw new IOException("Could not find Embed code at " + url); if (e.html().contains("html5player.setVideoUrlHigh")) {
} logger.info("Found the right script");
Element embed = embeds.get(0); String[] lines = e.html().split("\n");
String vars = embed.attr("flashvars"); for (String line: lines) {
for (String var : vars.split("&")) { if (line.contains("html5player.setVideoUrlHigh")) {
if (var.startsWith("flv_url=")) { String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
String vidUrl = var.substring("flv_url=".length()); addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
vidUrl = URLDecoder.decode(vidUrl, "UTF-8"); waitForThreads();
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); return;
}
}
} }
} }
waitForThreads(); throw new IOException("Unable to find video url at " + this.url.toExternalForm());
} }
} }

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils { public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class); private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.7.19"; private static final String DEFAULT_VERSION = "1.7.23";
private static final String REPO_NAME = "ripmeapp/ripme"; private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static final String mainFileName = "ripme.jar"; private static final String mainFileName = "ripme.jar";

View File

@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
// A simple image album // A simple image album
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper); testRipper(ripper);
// Test the new url format
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper);
// Test pages with subalbums // Test pages with subalbums
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor")); ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
testRipper(ripper); testRipper(ripper);

View File

@ -1,13 +1,15 @@
package com.rarchives.ripme.tst.ripper.rippers; //package com.rarchives.ripme.tst.ripper.rippers;
//
//import java.io.IOException;
//import java.net.URL;
//
//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
//
//public class FivehundredpxRipperTest extends RippersTest {
// public void test500pxAlbum() throws IOException {
// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
// testRipper(ripper);
// }
//}
import java.io.IOException; // Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
public class FivehundredpxRipperTest extends RippersTest {
public void test500pxAlbum() throws IOException {
FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
testRipper(ripper);
}
}