Merge pull request #5 from RipMeApp/master

Sync.
This commit is contained in:
rephormat 2018-03-12 10:16:58 -05:00 committed by GitHub
commit 41eb9fd09b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 132 additions and 98 deletions

View File

@ -1 +1,2 @@
#!/usr/bin/env bash
mvn clean compile assembly:single

View File

@ -1,40 +0,0 @@
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
error.skip404 | bool | Don't retry on 404 errors
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
auto.update | bool | If true ripme will auto-update every time it's started
play.sound | bool | If true ripme will play a sound every time a rip finishes
download.show_popup| bool | TODO figure out what this is for
log.save | bool | If true ripme will save it's logs
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
album_titles.save | bool | Currently does nothing
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
download.timeout | int | File download timeout (in milliseconds)
page.timeout | int | Page download timeout (in milliseconds)
download.max_size | int | Maximum size of downloaded files in bytes
threads.size | int | The number of threads to use
twitter.auth | String | Twitter API key (Base64'd)
tumblr.auth | String | Tumblr API key
log.level | String | The debug log level (Example: Log level: Debug)
gw.api | String | TODO figure out what this is for
twitter.max_requests | int | TODO figure out what this is for

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.7.19</version>
<version>1.7.23</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>

View File

@ -1,6 +1,10 @@
{
"latestVersion": "1.7.19",
"latestVersion": "1.7.23",
"changeList": [
"1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
"1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
"1.7.21: Fixed last seen feature",
"1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
"1.7.18: AlbumRipper now removes bad chars from file names",
"1.7.17: Fixed hentai.cafe autorip from clipboard",

View File

@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
Document doc = getFirstPage();
while (doc != null) {
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
break;
}
List<String> imageURLs = getURLsFromPage(doc);
// If hasASAPRipping() returns true then the ripper will handle downloading the files
// if not it's done in the following block of code
if (!hasASAPRipping()) {
// Remove all but 1 image
if (isThisATest()) {

View File

@ -44,7 +44,8 @@ public abstract class AbstractRipper
public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException;
public boolean hasASAPRipping() { return false; }
// Everytime addUrlToDownload skips a already downloaded url this increases by 1
public int alreadyDownloadedUrls = 0;
private boolean shouldStop = false;
private boolean thisIsATest = false;
@ -60,7 +61,13 @@ public abstract class AbstractRipper
}
}
/**
* Adds a URL to the url history file
* @param downloadedURL URL to check if downloaded
*/
private void writeDownloadedURL(String downloadedURL) throws IOException {
downloadedURL = normalizeUrl(downloadedURL);
BufferedWriter bw = null;
FileWriter fw = null;
try {
@ -86,6 +93,15 @@ public abstract class AbstractRipper
}
}
/**
* Normalize a URL
* @param url URL to check if downloaded
*/
public String normalizeUrl(String url) {
return url;
}
/**
* Checks to see if Ripme has already downloaded a URL
* @param url URL to check if downloaded
@ -95,6 +111,7 @@ public abstract class AbstractRipper
*/
private boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile);
url = normalizeUrl(url);
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
@ -194,9 +211,11 @@ public abstract class AbstractRipper
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
// Don't re-add the url if it was downloaded in a previous rip
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
alreadyDownloadedUrls += 1;
return false;
}
}

View File

@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
Elements pageImages = page.getElementsByClass("c-tile");
for (Element thumb : pageImages) {
// If true this link is a sub album
if (thumb.attr("href").contains("/comix/album/")) {
if (thumb.attr("href").contains("/comics/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href");
try {
logger.info("Retrieving " + subUrl);
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
logger.warn("Error while loading subalbum " + subUrl, e);
}
} else if (thumb.attr("href").contains("/comix/picture/")) {
} else if (thumb.attr("href").contains("/comics/picture/")) {
logger.info("This page is a album");
logger.info("Ripping image");
if (super.isStopped()) break;
// Find thumbnail image source

View File

@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper {
String nextPageID = "";
private String userID;
@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
return san_url;
}
@Override
public String normalizeUrl(String url) {
// Remove the date sig from the url
return url.replaceAll("/[A-Z0-9]{8}/", "/");
}
private List<String> getPostsFromSinglePage(Document Doc) {
List<String> imageURLs = new ArrayList<>();
JSONArray datas;
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
String nextPageID = "";
List<String> imageURLs = new ArrayList<>();
JSONObject json = new JSONObject();
try {
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
break;
}
}
// Rip the next page
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} else {
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
}
} catch (IOException e) {
return imageURLs;
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
} else {
logger.warn("Can't get net page");
}
} else { // We're ripping from a single page
logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc);
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
return imageURLs;
}
@Override
public Document getNextPage(Document doc) throws IOException {
Document toreturn;
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
} else {
toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
}
logger.info(toreturn.html());
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
throw new IOException("No more pages");
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
return null;
}
} else {
throw new IOException("No more pages");
}
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url);
}
private boolean hasImage(Document doc) {
try {
JSONObject json = getJSONFromPage(doc);
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
logger.info(datas.length());
if (datas.length() == 0) {
return false;
}
return true;
} catch (IOException e) {
return false;
}
}
}

View File

@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
public void rip() throws IOException {
logger.info(" Retrieving " + this.url);
Document doc = Http.url(this.url).get();
Elements embeds = doc.select("embed");
if (embeds.size() == 0) {
throw new IOException("Could not find Embed code at " + url);
}
Element embed = embeds.get(0);
String vars = embed.attr("flashvars");
for (String var : vars.split("&")) {
if (var.startsWith("flv_url=")) {
String vidUrl = var.substring("flv_url=".length());
vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
Elements scripts = doc.select("script");
for (Element e : scripts) {
if (e.html().contains("html5player.setVideoUrlHigh")) {
logger.info("Found the right script");
String[] lines = e.html().split("\n");
for (String line: lines) {
if (line.contains("html5player.setVideoUrlHigh")) {
String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
waitForThreads();
return;
}
}
}
}
waitForThreads();
throw new IOException("Unable to find video url at " + this.url.toExternalForm());
}
}

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.7.19";
private static final String DEFAULT_VERSION = "1.7.23";
private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static final String mainFileName = "ripme.jar";

View File

@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
// A simple image album
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper);
// Test the new url format
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper);
// Test pages with subalbums
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
testRipper(ripper);

View File

@ -1,13 +1,15 @@
package com.rarchives.ripme.tst.ripper.rippers;
//package com.rarchives.ripme.tst.ripper.rippers;
//
//import java.io.IOException;
//import java.net.URL;
//
//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
//
//public class FivehundredpxRipperTest extends RippersTest {
// public void test500pxAlbum() throws IOException {
// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
// testRipper(ripper);
// }
//}
import java.io.IOException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
public class FivehundredpxRipperTest extends RippersTest {
public void test500pxAlbum() throws IOException {
FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
testRipper(ripper);
}
}
// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438