commit
41eb9fd09b
@ -1,40 +0,0 @@
|
||||
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
|
||||
|
||||
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
|
||||
|
||||
error.skip404 | bool | Don't retry on 404 errors
|
||||
|
||||
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
|
||||
|
||||
auto.update | bool | If true ripme will auto-update every time it's started
|
||||
|
||||
play.sound | bool | If true ripme will play a sound every time a rip finishes
|
||||
|
||||
download.show_popup| bool | TODO figure out what this is for
|
||||
|
||||
log.save | bool | If true ripme will save it's logs
|
||||
|
||||
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
|
||||
|
||||
album_titles.save | bool | Currently does nothing
|
||||
|
||||
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
|
||||
|
||||
download.timeout | int | File download timeout (in milliseconds)
|
||||
|
||||
page.timeout | int | Page download timeout (in milliseconds)
|
||||
|
||||
download.max_size | int | Maximum size of downloaded files in bytes
|
||||
|
||||
threads.size | int | The number of threads to use
|
||||
|
||||
twitter.auth | String | Twitter API key (Base64'd)
|
||||
|
||||
tumblr.auth | String | Tumblr API key
|
||||
|
||||
log.level | String | The debug log level (Example: Log level: Debug)
|
||||
|
||||
gw.api | String | TODO figure out what this is for
|
||||
|
||||
twitter.max_requests | int | TODO figure out what this is for
|
||||
|
2
pom.xml
2
pom.xml
@ -4,7 +4,7 @@
|
||||
<groupId>com.rarchives.ripme</groupId>
|
||||
<artifactId>ripme</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.7.19</version>
|
||||
<version>1.7.23</version>
|
||||
<name>ripme</name>
|
||||
<url>http://rip.rarchives.com</url>
|
||||
<properties>
|
||||
|
@ -1,6 +1,10 @@
|
||||
{
|
||||
"latestVersion": "1.7.19",
|
||||
"latestVersion": "1.7.23",
|
||||
"changeList": [
|
||||
"1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
|
||||
"1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
|
||||
"1.7.21: Fixed last seen feature",
|
||||
"1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
|
||||
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
|
||||
"1.7.18: AlbumRipper now removes bad chars from file names",
|
||||
"1.7.17: Fixed hentai.cafe autorip from clipboard",
|
||||
|
@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||
Document doc = getFirstPage();
|
||||
|
||||
while (doc != null) {
|
||||
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
|
||||
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
|
||||
break;
|
||||
}
|
||||
List<String> imageURLs = getURLsFromPage(doc);
|
||||
// If hasASAPRipping() returns true then the ripper will handle downloading the files
|
||||
// if not it's done in the following block of code
|
||||
if (!hasASAPRipping()) {
|
||||
// Remove all but 1 image
|
||||
if (isThisATest()) {
|
||||
|
@ -44,7 +44,8 @@ public abstract class AbstractRipper
|
||||
public abstract String getHost();
|
||||
public abstract String getGID(URL url) throws MalformedURLException;
|
||||
public boolean hasASAPRipping() { return false; }
|
||||
|
||||
// Everytime addUrlToDownload skips a already downloaded url this increases by 1
|
||||
public int alreadyDownloadedUrls = 0;
|
||||
private boolean shouldStop = false;
|
||||
private boolean thisIsATest = false;
|
||||
|
||||
@ -60,7 +61,13 @@ public abstract class AbstractRipper
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a URL to the url history file
|
||||
* @param downloadedURL URL to check if downloaded
|
||||
*/
|
||||
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
||||
downloadedURL = normalizeUrl(downloadedURL);
|
||||
BufferedWriter bw = null;
|
||||
FileWriter fw = null;
|
||||
try {
|
||||
@ -86,6 +93,15 @@ public abstract class AbstractRipper
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Normalize a URL
|
||||
* @param url URL to check if downloaded
|
||||
*/
|
||||
public String normalizeUrl(String url) {
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if Ripme has already downloaded a URL
|
||||
* @param url URL to check if downloaded
|
||||
@ -95,6 +111,7 @@ public abstract class AbstractRipper
|
||||
*/
|
||||
private boolean hasDownloadedURL(String url) {
|
||||
File file = new File(URLHistoryFile);
|
||||
url = normalizeUrl(url);
|
||||
try {
|
||||
Scanner scanner = new Scanner(file);
|
||||
while (scanner.hasNextLine()) {
|
||||
@ -194,9 +211,11 @@ public abstract class AbstractRipper
|
||||
* False if failed to download
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
|
||||
// Don't re-add the url if it was downloaded in a previous rip
|
||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||
if (hasDownloadedURL(url.toExternalForm())) {
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
|
||||
alreadyDownloadedUrls += 1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
|
||||
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
|
||||
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
Elements pageImages = page.getElementsByClass("c-tile");
|
||||
for (Element thumb : pageImages) {
|
||||
// If true this link is a sub album
|
||||
if (thumb.attr("href").contains("/comix/album/")) {
|
||||
if (thumb.attr("href").contains("/comics/album/")) {
|
||||
String subUrl = "https://www.8muses.com" + thumb.attr("href");
|
||||
try {
|
||||
logger.info("Retrieving " + subUrl);
|
||||
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
logger.warn("Error while loading subalbum " + subUrl, e);
|
||||
}
|
||||
|
||||
} else if (thumb.attr("href").contains("/comix/picture/")) {
|
||||
} else if (thumb.attr("href").contains("/comics/picture/")) {
|
||||
logger.info("This page is a album");
|
||||
logger.info("Ripping image");
|
||||
if (super.isStopped()) break;
|
||||
// Find thumbnail image source
|
||||
|
@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
|
||||
public class InstagramRipper extends AbstractHTMLRipper {
|
||||
String nextPageID = "";
|
||||
|
||||
private String userID;
|
||||
|
||||
@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
return san_url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizeUrl(String url) {
|
||||
// Remove the date sig from the url
|
||||
return url.replaceAll("/[A-Z0-9]{8}/", "/");
|
||||
}
|
||||
|
||||
private List<String> getPostsFromSinglePage(Document Doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
JSONArray datas;
|
||||
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
String nextPageID = "";
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
JSONObject json = new JSONObject();
|
||||
try {
|
||||
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Rip the next page
|
||||
if (!nextPageID.equals("") && !isThisATest()) {
|
||||
if (url.toExternalForm().contains("/tags/")) {
|
||||
try {
|
||||
// Sleep for a while to avoid a ban
|
||||
sleep(2500);
|
||||
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
|
||||
} else {
|
||||
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
}
|
||||
try {
|
||||
// Sleep for a while to avoid a ban
|
||||
sleep(2500);
|
||||
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
|
||||
} catch (IOException e) {
|
||||
return imageURLs;
|
||||
}
|
||||
} else {
|
||||
logger.warn("Can't get net page");
|
||||
}
|
||||
} else { // We're ripping from a single page
|
||||
logger.info("Ripping from single page");
|
||||
imageURLs = getPostsFromSinglePage(doc);
|
||||
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
Document toreturn;
|
||||
if (!nextPageID.equals("") && !isThisATest()) {
|
||||
if (url.toExternalForm().contains("/tags/")) {
|
||||
try {
|
||||
// Sleep for a while to avoid a ban
|
||||
sleep(2500);
|
||||
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||
toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
|
||||
} else {
|
||||
toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
|
||||
}
|
||||
logger.info(toreturn.html());
|
||||
if (!hasImage(toreturn)) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
return toreturn;
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
|
||||
}
|
||||
try {
|
||||
// Sleep for a while to avoid a ban
|
||||
sleep(2500);
|
||||
toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
|
||||
if (!hasImage(toreturn)) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
return toreturn;
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url);
|
||||
}
|
||||
|
||||
private boolean hasImage(Document doc) {
|
||||
try {
|
||||
JSONObject json = getJSONFromPage(doc);
|
||||
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
||||
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
||||
logger.info(datas.length());
|
||||
if (datas.length() == 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
|
||||
public void rip() throws IOException {
|
||||
logger.info(" Retrieving " + this.url);
|
||||
Document doc = Http.url(this.url).get();
|
||||
Elements embeds = doc.select("embed");
|
||||
if (embeds.size() == 0) {
|
||||
throw new IOException("Could not find Embed code at " + url);
|
||||
}
|
||||
Element embed = embeds.get(0);
|
||||
String vars = embed.attr("flashvars");
|
||||
for (String var : vars.split("&")) {
|
||||
if (var.startsWith("flv_url=")) {
|
||||
String vidUrl = var.substring("flv_url=".length());
|
||||
vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
|
||||
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
|
||||
}
|
||||
}
|
||||
Elements scripts = doc.select("script");
|
||||
for (Element e : scripts) {
|
||||
if (e.html().contains("html5player.setVideoUrlHigh")) {
|
||||
logger.info("Found the right script");
|
||||
String[] lines = e.html().split("\n");
|
||||
for (String line: lines) {
|
||||
if (line.contains("html5player.setVideoUrlHigh")) {
|
||||
String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
|
||||
addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
|
||||
waitForThreads();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IOException("Unable to find video url at " + this.url.toExternalForm());
|
||||
}
|
||||
}
|
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
public class UpdateUtils {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||
private static final String DEFAULT_VERSION = "1.7.19";
|
||||
private static final String DEFAULT_VERSION = "1.7.23";
|
||||
private static final String REPO_NAME = "ripmeapp/ripme";
|
||||
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
|
||||
private static final String mainFileName = "ripme.jar";
|
||||
|
@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
|
||||
// A simple image album
|
||||
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
|
||||
testRipper(ripper);
|
||||
// Test the new url format
|
||||
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
|
||||
testRipper(ripper);
|
||||
// Test pages with subalbums
|
||||
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
|
||||
testRipper(ripper);
|
||||
|
@ -1,13 +1,15 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
//package com.rarchives.ripme.tst.ripper.rippers;
|
||||
//
|
||||
//import java.io.IOException;
|
||||
//import java.net.URL;
|
||||
//
|
||||
//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
|
||||
//
|
||||
//public class FivehundredpxRipperTest extends RippersTest {
|
||||
// public void test500pxAlbum() throws IOException {
|
||||
// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
|
||||
// testRipper(ripper);
|
||||
// }
|
||||
//}
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
|
||||
|
||||
public class FivehundredpxRipperTest extends RippersTest {
|
||||
public void test500pxAlbum() throws IOException {
|
||||
FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438
|
Loading…
Reference in New Issue
Block a user