Merge pull request #4 from RipMeApp/master

Update from Original
This commit is contained in:
rephormat 2018-02-12 18:26:58 -06:00 committed by GitHub
commit d9423c1fae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 137 additions and 234 deletions

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.7.14</version>
<version>1.7.19</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>

View File

@ -1,6 +1,11 @@
{
"latestVersion": "1.7.14",
"latestVersion": "1.7.19",
"changeList": [
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
"1.7.18: AlbumRipper now removes bad chars from file names",
"1.7.17: Fixed hentai.cafe autorip from clipboard",
"1.7.16: Eightmuses now supports pages containing both images and subpages",
"1.7.15: Eigthmuses ripper now uses ASAP ripping; Remove ripper and tests for gwarchives.com and hushpix.com; Remove ripper and tests for imagearn.com; Fixed pornhub video downloader",
"1.7.14: Tumblr API Key Choosing Fix; Make webtoons ripper download maximum quality images; Added twitch ripper; Added VSCO ripper; Fixed pornhub video ripper",
"1.7.13: disabled FuskatorRipperTest; Fixes xhamster.com video ripper; Add yuvutu.com ripper",
"1.7.12: Instagram ripper no longer 403s on certain images",

View File

@ -69,23 +69,25 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
while (doc != null) {
List<String> imageURLs = getURLsFromPage(doc);
// Remove all but 1 image
if (isThisATest()) {
while (imageURLs.size() > 1) {
imageURLs.remove(1);
if (!hasASAPRipping()) {
// Remove all but 1 image
if (isThisATest()) {
while (imageURLs.size() > 1) {
imageURLs.remove(1);
}
}
}
if (imageURLs.size() == 0) {
throw new IOException("No images found at " + doc.location());
}
if (imageURLs.size() == 0) {
throw new IOException("No images found at " + doc.location());
}
for (String imageURL : imageURLs) {
index += 1;
logger.debug("Found image url #" + index + ": " + imageURL);
downloadURL(new URL(imageURL), index);
if (isStopped()) {
break;
for (String imageURL : imageURLs) {
index += 1;
logger.debug("Found image url #" + index + ": " + imageURL);
downloadURL(new URL(imageURL), index);
if (isStopped()) {
break;
}
}
}
if (hasDescriptionSupport() && Utils.getConfigBoolean("descriptions.save", false)) {

View File

@ -43,6 +43,7 @@ public abstract class AbstractRipper
public abstract void rip() throws IOException;
public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException;
public boolean hasASAPRipping() { return false; }
private boolean shouldStop = false;
private boolean thisIsATest = false;

View File

@ -13,6 +13,7 @@ import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
// Should this file even exist? It does the same thing as abstractHTML ripper
/**'
* For ripping delicious albums off the interwebz.
@ -197,8 +198,11 @@ public abstract class AlbumRipper extends AbstractRipper {
title = super.getAlbumTitle(this.url);
}
logger.debug("Using album title '" + title + "'");
title = Utils.filesystemSafe(title);
path += title + File.separator;
path += title;
path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
this.workingDir = new File(path);
if (!this.workingDir.exists()) {
logger.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));

View File

@ -28,7 +28,7 @@ public class CheveretoRipper extends AbstractHTMLRipper {
super(url);
}
private static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com", "gwarchives.com");
private static List<String> explicit_domains_1 = Arrays.asList("tag-fox.com");
@Override
public String getHost() {

View File

@ -34,10 +34,16 @@ public class EightmusesRipper extends AbstractHTMLRipper {
super(url);
}
@Override
public boolean hasASAPRipping() {
return true;
}
@Override
public String getHost() {
return "8muses";
}
@Override
public String getDomain() {
return "8muses.com";
@ -82,67 +88,26 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<>();
// get the first image link on the page and check if the last char in it is a number
// if it is a number then we're ripping a comic if not it's a subalbum
String firstImageLink = page.select("div.gallery > a.t-hover").first().attr("href");
Pattern p = Pattern.compile("/comix/picture/([a-zA-Z0-9\\-_/]*/)?\\d+");
Matcher m = p.matcher(firstImageLink);
if (!m.matches()) {
logger.info("Ripping subalbums");
// Page contains subalbums (not images)
Elements albumElements = page.select("div.gallery > a.t-hover");
List<Element> albumsList = albumElements.subList(0, albumElements.size());
Collections.reverse(albumsList);
// Iterate over elements in reverse order
for (Element subalbum : albumsList) {
String subUrl = subalbum.attr("href");
// This if is to skip ads which don't have a href
if (subUrl != "") {
subUrl = subUrl.replaceAll("\\.\\./", "");
if (subUrl.startsWith("//")) {
subUrl = "https:";
}
else if (!subUrl.startsWith("http://")) {
subUrl = "https://www.8muses.com" + subUrl;
}
try {
logger.info("Retrieving " + subUrl);
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
Document subPage = Http.url(subUrl).get();
// Get all images in subalbum, add to list.
List<String> subalbumImages = getURLsFromPage(subPage);
String albumTitle = subPage.select("meta[name=description]").attr("content");
albumTitle = albumTitle.replace("A huge collection of free porn comics for adults. Read ", "");
albumTitle = albumTitle.replace(" online for free at 8muses.com", "");
albumTitle = albumTitle.replace(" ", "_");
// albumTitle = albumTitle.replace("Sex and Porn Comics", "");
// albumTitle = albumTitle.replace("|", "");
// albumTitle = albumTitle.replace("8muses", "");
// albumTitle = albumTitle.replaceAll("-", "_");
// albumTitle = albumTitle.replaceAll(" ", "_");
// albumTitle = albumTitle.replaceAll("___", "_");
// albumTitle = albumTitle.replaceAll("__", "_");
// // This is here to remove the trailing __ from folder names
// albumTitle = albumTitle.replaceAll("__", "");
logger.info("Found " + subalbumImages.size() + " images in subalbum");
int prefix = 1;
for (String image : subalbumImages) {
URL imageUrl = new URL(image);
// urlTitles.put(imageUrl, albumTitle);
addURLToDownload(imageUrl, getPrefix(prefix), albumTitle, this.url.toExternalForm(), cookies);
prefix = prefix + 1;
}
rippingSubalbums = true;
imageURLs.addAll(subalbumImages);
} catch (IOException e) {
logger.warn("Error while loading subalbum " + subUrl, e);
}
int x = 1;
// This contains the thumbnails of all images on the page
Elements pageImages = page.getElementsByClass("c-tile");
for (Element thumb : pageImages) {
// If true this link is a sub album
if (thumb.attr("href").contains("/comix/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href");
try {
logger.info("Retrieving " + subUrl);
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
Document subPage = Http.url(subUrl).get();
// If the page below this one has images this line will download them
List<String> subalbumImages = getURLsFromPage(subPage);
logger.info("Found " + subalbumImages.size() + " images in subalbum");
} catch (IOException e) {
logger.warn("Error while loading subalbum " + subUrl, e);
}
}
}
else {
// Page contains images
for (Element thumb : page.select(".image")) {
} else if (thumb.attr("href").contains("/comix/picture/")) {
logger.info("Ripping image");
if (super.isStopped()) break;
// Find thumbnail image source
String image = null;
@ -150,16 +115,21 @@ public class EightmusesRipper extends AbstractHTMLRipper {
image = thumb.attr("data-cfsrc");
}
else {
String parentHref = thumb.parent().attr("href");
if (parentHref.equals("")) continue;
if (parentHref.startsWith("/")) {
parentHref = "https://www.8muses.com" + parentHref;
String imageHref = thumb.attr("href");
if (imageHref.equals("")) continue;
if (imageHref.startsWith("/")) {
imageHref = "https://www.8muses.com" + imageHref;
}
try {
logger.info("Retrieving full-size image location from " + parentHref);
image = getFullSizeImage(parentHref);
logger.info("Retrieving full-size image location from " + imageHref);
image = getFullSizeImage(imageHref);
URL imageUrl = new URL(image);
addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies);
// X is our page index
x++;
} catch (IOException e) {
logger.error("Failed to get full-size image from " + parentHref);
logger.error("Failed to get full-size image from " + imageHref);
continue;
}
}
@ -170,6 +140,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
imageURLs.add(image);
if (isThisATest()) break;
}
}
return imageURLs;
}
@ -182,6 +153,25 @@ public class EightmusesRipper extends AbstractHTMLRipper {
return "https://www.8muses.com/image/fm/" + imageName;
}
private String getTitle(String albumTitle) {
albumTitle = albumTitle.replace("A huge collection of free porn comics for adults. Read ", "");
albumTitle = albumTitle.replace(" online for free at 8muses.com", "");
albumTitle = albumTitle.replace(" ", "_");
return albumTitle;
}
private String getSubdir(String rawHref) {
logger.info("Raw title: " + rawHref);
String title = rawHref;
title = title.replaceAll("8muses - Sex and Porn Comics", "");
title = title.replaceAll("\t\t", "");
title = title.replaceAll("\n", "");
title = title.replaceAll("\\| ", "");
title = title.replace(" ", "-");
logger.info(title);
return title;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);

View File

@ -1,112 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ImagearnRipper extends AbstractHTMLRipper {
public ImagearnRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "imagearn";
}
@Override
public String getDomain() {
return "imagearn.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagearn.com gallery formats: "
+ "imagearn.com/gallery.php?id=####..."
+ " Got: " + url);
}
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// URL points to imagearn *image*, not gallery
try {
url = getGalleryFromImage(url);
} catch (Exception e) {
logger.error("[!] " + e.getMessage(), e);
}
}
return url;
}
private URL getGalleryFromImage(URL url) throws IOException {
Document doc = Http.url(url).get();
for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
logger.info("LINK: " + link.toString());
if (link.hasAttr("href")
&& link.attr("href").contains("gallery.php")) {
url = new URL("http://imagearn.com/" + link.attr("href"));
logger.info("[!] Found gallery from given link: " + url);
return url;
}
}
throw new IOException("Failed to find gallery at URL " + url);
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("h3 > strong").first().text(); // profile name
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div#gallery > div > a")) {
String imageURL = thumb.attr("href");
try {
Document imagedoc = new Http("http://imagearn.com/" + imageURL).get();
String image = imagedoc.select("a.thickbox").first().attr("href");
imageURLs.add(image);
} catch (IOException e) {
logger.warn("Was unable to download page: " + imageURL);
}
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
sleep(1000);
}
}

View File

@ -213,6 +213,7 @@ public class ImgurRipper extends AlbumRipper {
saveAs += String.format("%03d_", index);
}
saveAs += imgurImage.getSaveAs();
saveAs = saveAs.replaceAll("\\?\\d", "");
addURLToDownload(imgurImage.url, new File(saveAs));
}
}

View File

@ -28,7 +28,7 @@ public class PornhubRipper extends VideoRipper {
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wm.]*pornhub\\.com/view_video.php\\?viewkey=[0-9]+.*$");
Pattern p = Pattern.compile("^https?://[wm.]*pornhub\\.com/view_video.php\\?viewkey=[a-z0-9]+$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@ -40,7 +40,7 @@ public class PornhubRipper extends VideoRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wm.]*pornhub\\.com/view_video.php\\?viewkey=([0-9]+).*$");
Pattern p = Pattern.compile("^https?://[wm.]*pornhub\\.com/view_video.php\\?viewkey=([a-z0-9]+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@ -69,7 +69,7 @@ public class PornhubRipper extends VideoRipper {
vidUrl = null;
for (String quality : new String[] {"1080", "720", "480", "240"}) {
Pattern pv = Pattern.compile("\"quality\":\"" + quality + "\",\"videoUrl\":\"(.*?)\"");
Pattern pv = Pattern.compile("\"format\":\"\",\"quality\":\"" + quality + "\",\"videoUrl\":\"(.*?)\"");
Matcher mv = pv.matcher(html);
if (mv.find()) {
vidUrl = mv.group(1).replace("\\/", "/");

View File

@ -57,10 +57,11 @@ class AutoripThread extends Thread {
String clipboard = ClipboardUtils.getClipboardString();
if (clipboard != null) {
Pattern p = Pattern.compile(
// TODO: This regex is a monster and doesn't match all links; It needs to be rewritten
"\\b(((ht|f)tp(s?)://|~/|/)|www.)" +
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2}))(:[\\d]{1,5})?" +
"|travel|cafe|[a-z]{2}))(:[\\d]{1,5})?" +
"(((/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.7.14";
private static final String DEFAULT_VERSION = "1.7.19";
private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static final String mainFileName = "ripme.jar";

View File

@ -8,11 +8,7 @@ import java.lang.reflect.Constructor;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
@ -370,6 +366,40 @@ public class Utils {
return text;
}
/**
* Checks if given path already exists as lowercase
*
* @param path - original path entered to be ripped
* @return path of existing folder or the original path if not present
*/
public static String getOriginalDirectory(String path) {
int index;
if(isUnix() || isMacOS()) {
index = path.lastIndexOf('/');
} else {
// current OS is windows - nothing to do here
return path;
}
String original = path; // needs to be checked if lowercase exists
String lastPart = original.substring(index+1).toLowerCase(); // setting lowercase to check if it exists
// Get a List of all Directories and check its lowercase
// if file exists return it
File f = new File(path.substring(0, index));
ArrayList<String> names = new ArrayList<String>(Arrays.asList(f.list()));
for (String s : names) {
if(s.toLowerCase().equals(lastPart)) {
// Building Path of existing file
return path.substring(0, index) + File.separator + s;
}
}
return original;
}
public static String bytesToHumanReadable(int bytes) {
float fbytes = (float) bytes;
String[] mags = new String[] {"", "K", "M", "G", "T"};

View File

@ -6,18 +6,8 @@ import java.net.URL;
import com.rarchives.ripme.ripper.rippers.CheveretoRipper;
public class CheveretoRipperTest extends RippersTest {
public void testHushpix() throws IOException {
CheveretoRipper ripper = new CheveretoRipper(new URL("https://hushpix.com/album/gKcu"));
testRipper(ripper);
}
public void testTagFox() throws IOException {
CheveretoRipper ripper = new CheveretoRipper(new URL("http://tag-fox.com/album/Thjb"));
testRipper(ripper);
}
public void testgwarchives() throws IOException {
CheveretoRipper ripper = new CheveretoRipper(new URL("https://gwarchives.com/album/ns4q"));
testRipper(ripper);
}
}

View File

@ -7,7 +7,11 @@ import com.rarchives.ripme.ripper.rippers.EightmusesRipper;
public class EightmusesRipperTest extends RippersTest {
public void testEightmusesAlbum() throws IOException {
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/album/jab-comics/a-model-life"));
// A simple image album
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper);
// Test pages with subalbums
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
testRipper(ripper);
}
}
}

View File

@ -1,13 +0,0 @@
package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.ImagearnRipper;
public class ImagearnRipperTest extends RippersTest {
public void testImagearnRip() throws IOException {
ImagearnRipper ripper = new ImagearnRipper(new URL("http://imagearn.com//gallery.php?id=578682"));
testRipper(ripper);
}
}

View File

@ -8,7 +8,7 @@ import com.rarchives.ripme.ripper.rippers.MotherlessRipper;
public class MotherlessRipperTest extends RippersTest {
// https://github.com/RipMeApp/ripme/issues/238 - MotherlessRipperTest is flaky on Travis CI
public void testMotherlessAlbumRip() throws IOException {
MotherlessRipper ripper = new MotherlessRipper(new URL("http://motherless.com/G4DAA18D"));
MotherlessRipper ripper = new MotherlessRipper(new URL("http://motherless.com/G1E5C971"));
testRipper(ripper);
}
}

View File

@ -68,7 +68,7 @@ public class VideoRippersTest extends RippersTest {
public void testPornhubRipper() throws IOException {
List<URL> contentURLs = new ArrayList<>();
contentURLs.add(new URL("http://www.pornhub.com/view_video.php?viewkey=993166542"));
contentURLs.add(new URL("https://www.pornhub.com/view_video.php?viewkey=ph5a329fa707269"));
for (URL url : contentURLs) {
PornhubRipper ripper = new PornhubRipper(url);
videoTestHelper(ripper);