Merge branch 'master' into m

This commit is contained in:
metaprime 2017-04-25 10:01:19 -07:00
commit f089e804fd
19 changed files with 761 additions and 332 deletions

3
.gitignore vendored
View File

@ -12,3 +12,6 @@ history.json
*.iml
.settings/
.classpath
*.txt
bin/
.vscode/

View File

@ -24,7 +24,7 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/
* Quickly downloads all images in an online album (see supported sites below)
* Easily re-rip albums to fetch new content
## Supported sites:
## [List of Supported Sites](https://github.com/4pr0n/ripme/wiki/Supported-Sites)
* imgur
* twitter
@ -44,11 +44,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/
* xhamster
* (more)
### [Full updated list](https://github.com/4pr0n/ripme/issues/8)
## Not Supported?
Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/8).
Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/502).
If you're a developer, you can add your own by following the wiki guide
[How To Create A Ripper for HTML Websites](https://github.com/4pr0n/ripme/wiki/How-To-Create-A-Ripper-for-HTML-websites).

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.4.2</version>
<version>1.4.7</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>

View File

@ -1,6 +1,11 @@
{
"latestVersion" : "1.4.2",
"latestVersion" : "1.4.7",
"changeList" : [
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
"1.4.5: SinnerComics: Added work around for naming bug",
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
"1.4.2: Added nhentai ripper.",
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",

View File

@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Http;
public class ChanRipper extends AbstractHTMLRipper {
public static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org")),
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),

View File

@ -6,10 +6,12 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
@ -24,13 +26,11 @@ import org.jsoup.select.Elements;
*
* @author
*/
public class E621Ripper extends AbstractHTMLRipper{
private static Pattern gidPattern=null;
private static Pattern gidPattern2=null;
private static Pattern gidPatternPool=null;
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
public class E621Ripper extends AbstractHTMLRipper {
public static final int POOL_IMAGES_PER_PAGE = 24;
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
public E621Ripper(URL url) throws IOException {
super(url);
}
@ -52,31 +52,50 @@ public class E621Ripper extends AbstractHTMLRipper{
@Override
public Document getFirstPage() throws IOException {
if(url.getPath().startsWith("/pool/show/"))
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
else
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
if (url.getPath().startsWith("/pool/show/")) {
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
} else {
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
}
}
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a");
List<String> res=new ArrayList<String>(elements.size());
for(Element e:elements){
res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1));
Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a");
List<String> res = new ArrayList<String>(elements.size());
if (page.getElementById("pool-show") != null) {
int index = 0;
Element e = page.getElementById("paginator");
if (e != null) {
e = e.getElementsByClass("current").first();
if (e != null) {
index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
}
}
for (Element e_ : elements) {
res.add(e_.absUrl("href") + "#" + ++index);
}
} else {
for (Element e : elements) {
res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
}
}
return res;
}
@Override
public Document getNextPage(Document page) throws IOException {
for(Element e:page.select("#paginator a")){
if(e.attr("rel").equals("next"))
for (Element e : page.select("#paginator a")) {
if (e.attr("rel").equals("next")) {
return Http.url(e.absUrl("href")).get();
}
}
return null;
}
@ -85,58 +104,78 @@ public class E621Ripper extends AbstractHTMLRipper{
e621ThreadPool.addThread(new Thread(new Runnable() {
public void run() {
try {
Document page=Http.url(url).get();
addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":"");
Document page = Http.url(url).get();
Element e = page.getElementById("image");
if (e != null) {
addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
} else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) {
addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
} else {
Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
}
} catch (IOException ex) {
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
}
}
}));
}
private String getTerm(URL url) throws MalformedURLException{
if(gidPattern==null)
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
if(gidPatternPool==null)
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
Matcher m = gidPattern.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
m = gidPatternPool.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
private String getTerm(URL url) throws MalformedURLException {
String query = url.getQuery();
if (query != null) {
return Utils.parseUrlQuery(query, "tags");
}
if (query == null) {
if ((query = url.getPath()).startsWith("/post/index/")) {
query = query.substring(12);
int pos = query.indexOf('/');
if (pos == -1) {
return null;
}
// skip page number
query = query.substring(pos + 1);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
try {
return URLDecoder.decode(query, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Shouldn't happen since UTF-8 is required to be supported
throw new RuntimeException(e);
}
} else if (query.startsWith("/pool/show/")) {
query = query.substring(11);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
return query;
}
}
return null;
}
@Override
public String getGID(URL url) throws MalformedURLException {
try {
String prefix="";
if(url.getPath().startsWith("/pool/show/"))
prefix="pool_";
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
String prefix = "";
if (url.getPath().startsWith("/pool/show/")) {
prefix = "pool_";
} else {
prefix = "term_";
}
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
return Utils.filesystemSafe(prefix + getTerm(url));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
if(gidPattern2==null)
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
Matcher m = gidPattern2.matcher(url.toExternalForm());
if(m.matches())
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
return url;
}
}

View File

@ -38,7 +38,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
// Current HTML document
private Document albumDoc = null;
private static final Map<String,String> cookies = new HashMap<String,String>();
static {
cookies.put("nw", "1");
@ -53,10 +53,10 @@ public class EHentaiRipper extends AbstractHTMLRipper {
public String getHost() {
return "e-hentai";
}
@Override
public String getDomain() {
return "g.e-hentai.org";
return "e-hentai.org";
}
public String getAlbumTitle(URL url) throws MalformedURLException {
@ -79,18 +79,18 @@ public class EHentaiRipper extends AbstractHTMLRipper {
Pattern p;
Matcher m;
p = Pattern.compile("^.*g\\.e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$");
p = Pattern.compile("^https?://e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1) + "-" + m.group(2);
}
throw new MalformedURLException(
"Expected g.e-hentai.org gallery format: "
+ "http://g.e-hentai.org/g/####/####/"
"Expected e-hentai.org gallery format: "
+ "http://e-hentai.org/g/####/####/"
+ " Got: " + url);
}
/**
* Attempts to get page, checks for IP ban, waits.
* @param url
@ -185,7 +185,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
/**
* Helper class to find and download images found on "image" pages
*
*
* Handles case when site has IP-banned the user.
*/
private class EHentaiImageThread extends Thread {
@ -204,7 +204,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
public void run() {
fetchImage();
}
private void fetchImage() {
try {
Document doc = getPageWithRetries(this.url);
@ -246,4 +246,4 @@ public class EHentaiRipper extends AbstractHTMLRipper {
}
}
}
}
}

View File

@ -40,7 +40,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/index/category/([a-zA-Z0-9\\-_]+).*$");
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@ -54,7 +54,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[name=description]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
title = title.replace("A huge collection of free porn comics for adults. Read", "");
title = title.replace("online for free at 8muses.com", "");
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
@ -122,14 +123,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
}
try {
logger.info("Retrieving full-size image location from " + parentHref);
Thread.sleep(1000);
image = getFullSizeImage(parentHref);
} catch (IOException e) {
logger.error("Failed to get full-size image from " + parentHref);
continue;
} catch (InterruptedException e) {
logger.error("Interrupted while getting full-size image from " + parentHref);
continue;
}
}
if (!image.contains("8muses.com")) {

View File

@ -29,11 +29,11 @@ import com.rarchives.ripme.utils.Http;
* @author losipher
*/
public class EroShareRipper extends AbstractHTMLRipper {
public EroShareRipper (URL url) throws IOException {
super(url);
}
@Override
public String getDomain() {
return "eroshare.com";
@ -43,12 +43,28 @@ public class EroShareRipper extends AbstractHTMLRipper {
public String getHost() {
return "eroshare";
}
@Override
public void downloadURL(URL url, int index){
addURLToDownload(url);
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + getGID(url) + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc){
List<String> URLs = new ArrayList<String>();
@ -70,10 +86,10 @@ public class EroShareRipper extends AbstractHTMLRipper {
URLs.add(videoURL);
}
}
return URLs;
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(this.url)
@ -81,10 +97,10 @@ public class EroShareRipper extends AbstractHTMLRipper {
.response();
Document doc = resp.parse();
return doc;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
@ -94,9 +110,9 @@ public class EroShareRipper extends AbstractHTMLRipper {
}
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album");
}
public static List<URL> getURLs(URL url) throws IOException{
Response resp = Http.url(url)
.ignoreContentType()
.response();
@ -122,7 +138,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
URLs.add(new URL(videoURL));
}
}
return URLs;
}
}

View File

@ -28,7 +28,7 @@ public class ImgurRipper extends AlbumRipper {
HOST = "imgur";
private final int SLEEP_BETWEEN_ALBUMS;
private Document albumDoc;
static enum ALBUM_TYPE {
@ -104,12 +104,10 @@ public class ImgurRipper extends AlbumRipper {
*/
String title = null;
elems = albumDoc.select(".post-title");
if (elems.size() > 0) {
Element postTitle = elems.get(0);
if (postTitle != null) {
title = postTitle.text();
}
logger.info("Trying to get album title");
elems = albumDoc.select("meta[property=og:title]");
if (elems!=null) {
title = elems.attr("content");
}
String albumTitle = "imgur_";
@ -138,18 +136,25 @@ public class ImgurRipper extends AlbumRipper {
case ALBUM:
// Fall-through
case USER_ALBUM:
logger.info("Album type is USER_ALBUM");
// Don't call getAlbumTitle(this.url) with this
// as it seems to cause the album to be downloaded to a subdir.
ripAlbum(this.url);
break;
case SERIES_OF_IMAGES:
logger.info("Album type is SERIES_OF_IMAGES");
ripAlbum(this.url);
break;
case USER:
logger.info("Album type is USER");
ripUserAccount(url);
break;
case SUBREDDIT:
logger.info("Album type is SUBREDDIT");
ripSubreddit(url);
break;
case USER_IMAGES:
logger.info("Album type is USER_IMAGES");
ripUserImages(url);
break;
}
@ -338,7 +343,7 @@ public class ImgurRipper extends AlbumRipper {
}
return imgurAlbum;
}
/**
* Rips all albums in an imgur user's account.
* @param url
@ -366,7 +371,7 @@ public class ImgurRipper extends AlbumRipper {
}
}
}
private void ripUserImages(URL url) throws IOException {
int page = 0; int imagesFound = 0; int imagesTotal = 0;
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
@ -404,7 +409,7 @@ public class ImgurRipper extends AlbumRipper {
}
}
}
private void ripSubreddit(URL url) throws IOException {
int page = 0;
while (true) {

View File

@ -1,187 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class MinusRipper extends AlbumRipper {
private static final String DOMAIN = "minus.com",
HOST = "minus";
private Document albumDoc = null;
private static enum ALBUM_TYPE {
GUEST,
ACCOUNT_ALBUM,
ACCOUNT
}
private ALBUM_TYPE albumType;
public MinusRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return HOST;
}
public URL sanitizeURL(URL url) throws MalformedURLException {
getGID(url);
return url;
}
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
if (albumDoc == null) {
albumDoc = Http.url(url).get();
}
Elements titles = albumDoc.select("meta[property=og:title]");
if (titles.size() > 0) {
return HOST + "_" + titles.get(0).attr("content");
}
} catch (IOException e) {
// Fall back to default album naming convention
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
// http://vampyr3.minus.com/
// http://vampyr3.minus.com/uploads
// http://minus.com/mw7ztQ6xzP7ae
// http://vampyr3.minus.com/mw7ztQ6xzP7ae
String u = url.toExternalForm();
u = u.replace("/www.minus.com", "/minus.com");
u = u.replace("/i.minus.com", "/minus.com");
Pattern p; Matcher m;
p = Pattern.compile("^https?://minus\\.com/m([a-zA-Z0-9]+).*$");
m = p.matcher(u);
if (m.matches()) {
albumType = ALBUM_TYPE.GUEST;
return "guest_" + m.group(1);
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com/m([a-zA-Z0-9]+).*$");
m = p.matcher(u);
if (m.matches()) {
albumType = ALBUM_TYPE.ACCOUNT_ALBUM;
return m.group(1) + "_" + m.group(2);
}
p = Pattern.compile("^https?://([a-zA-Z0-9]+)\\.minus\\.com/?(uploads)?$");
m = p.matcher(u);
if (m.matches()) {
albumType = ALBUM_TYPE.ACCOUNT;
return m.group(1);
}
throw new MalformedURLException(
"Expected minus.com album URL formats: "
+ "username.minus.com or "
+ "username.minus.com/m... or "
+ "minus.com/m..."
+ " Got: " + url);
}
@Override
public void rip() throws IOException {
switch (albumType) {
case ACCOUNT:
ripAccount(this.url);
break;
case ACCOUNT_ALBUM:
ripAlbum(this.url);
break;
case GUEST:
ripAlbum(this.url);
break;
}
waitForThreads();
}
private void ripAccount(URL url) throws IOException {
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com.*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new IOException("Could not find username from URL " + url);
}
String user = m.group(1);
int page = 1;
while (true) {
String jsonUrl = "http://" + user
+ ".minus.com/api/pane/user/"
+ user + "/shares.json/"
+ page;
logger.info(" Retrieving " + jsonUrl);
JSONObject json = Http.url(jsonUrl).getJSON();
JSONArray galleries = json.getJSONArray("galleries");
for (int i = 0; i < galleries.length(); i++) {
JSONObject gallery = galleries.getJSONObject(i);
String title = gallery.getString("name");
String albumUrl = "http://" + user + ".minus.com/m" + gallery.getString("reader_id");
ripAlbum(new URL(albumUrl), Utils.filesystemSafe(title));
if (isThisATest()) {
break;
}
}
if (page >= json.getInt("total_pages") || isThisATest()) {
break;
}
page++;
}
}
private void ripAlbum(URL url) throws IOException {
ripAlbum(url, "");
}
private void ripAlbum(URL url, String subdir) throws IOException {
logger.info(" Retrieving " + url.toExternalForm());
if (albumDoc == null || !subdir.equals("")) {
albumDoc = Http.url(url).get();
}
Pattern p = Pattern.compile("^.*var gallerydata = (\\{.*\\});.*$", Pattern.DOTALL);
Matcher m = p.matcher(albumDoc.data());
if (m.matches()) {
JSONObject json = new JSONObject(m.group(1));
JSONArray items = json.getJSONArray("items");
for (int i = 0; i < items.length(); i++) {
JSONObject item = items.getJSONObject(i);
String extension = item.getString("name");
extension = extension.substring(extension.lastIndexOf('.'));
String image = "http://i.minus.com/i"
+ item.getString("id")
+ extension;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", i + 1);
}
addURLToDownload(new URL(image), prefix, subdir);
if (isThisATest()) {
break;
}
}
}
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
}

View File

@ -0,0 +1,200 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
public static boolean isTag;
public MyhentaicomicsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "myhentaicomics";
}
@Override
public String getDomain() {
return "myhentaicomics.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://myhentaicomics.com/index.php/([a-zA-Z0-9-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
isTag = false;
return m.group(1);
}
Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
isTag = true;
return ma.group(1);
}
Pattern pat = Pattern.compile("^http://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
isTag = true;
return mat.group(1);
}
throw new MalformedURLException("Expected myhentaicomics.com URL format: " +
"myhentaicomics.com/index.php/albumName - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("a.ui-icon-right").first();
String nextPage = elem.attr("href");
Pattern p = Pattern.compile("/index.php/[a-zA-Z0-9_-]*\\?page=\\d");
Matcher m = p.matcher(nextPage);
if (m.matches()) {
nextUrl = "http://myhentaicomics.com" + m.group(0);
}
if (nextUrl == "") {
throw new IOException("No more pages");
}
// Sleep for half a sec to avoid getting IP banned
sleep(500);
return Http.url(nextUrl).get();
}
// This replaces getNextPage when downloading from searchs and tags
public List<String> getNextAlbumPage(String pageUrl) {
List<String> albumPagesList = new ArrayList<String>();
int pageNumber = 1;
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
while(true) {
String urlToGet = "http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber);
Document nextAlbumPage;
try {
logger.info("Grabbing " + urlToGet);
nextAlbumPage = Http.url(urlToGet).get();
} catch(IOException e){
logger.warn("Failed to log link in Jsoup");
nextAlbumPage = null;
e.printStackTrace();
}
Element elem = nextAlbumPage.select("a.ui-icon-right").first();
String nextPage = elem.attr("href");
pageNumber = pageNumber + 1;
if(nextPage == ""){
logger.info("Got " + pageNumber + " pages");
break;
}
else {
logger.info(nextPage);
albumPagesList.add(nextPage);
logger.info("Adding " + nextPage);
}
}
return albumPagesList;
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> pagesToRip;
// Checks if this is a comic page or a page of albums
if (doc.toString().contains("class=\"g-item g-album\"")) {
for (Element elem : doc.select("li.g-album > a")) {
String link = elem.attr("href");
logger.info("Grabbing album " + link);
pagesToRip = getNextAlbumPage(link);
logger.info(pagesToRip);
for (String element : pagesToRip) {
Document album_doc;
try {
logger.info("grabbing " + element + " with jsoup");
boolean startsWithhttp = element.startsWith("http");
if (startsWithhttp == false) {
album_doc = Http.url("http://myhentaicomics.com/" + element).get();
}
else {
album_doc = Http.url(element).get();
}
} catch(IOException e){
logger.warn("Failed to log link in Jsoup");
album_doc = null;
e.printStackTrace();
}
for (Element el :album_doc.select("img")) {
String imageSource = el.attr("src");
// This bool is here so we don't try and download the site logo
boolean b = imageSource.startsWith("http");
if (b == false) {
// We replace thumbs with resizes so we can the full sized images
imageSource = imageSource.replace("thumbs", "resizes");
result.add("http://myhentaicomics.com/" + imageSource);
}
}
}
}
}
else {
for (Element el : doc.select("img")) {
String imageSource = el.attr("src");
// This bool is here so we don't try and download the site logo
boolean b = imageSource.startsWith("http");
if (b == false) {
// We replace thumbs with resizes so we can the full sized images
imageSource = imageSource.replace("thumbs", "resizes");
result.add("http://myhentaicomics.com/" + imageSource);
}
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
String url_string = url.toExternalForm();
url_string = url_string.replace("%20", "_");
url_string = url_string.replace("%27", "");
url_string = url_string.replace("%28", "_");
url_string = url_string.replace("%29", "_");
url_string = url_string.replace("%2C", "_");
if (isTag == true) {
logger.info("Downloading from a tag or search");
addURLToDownload(url, getPrefix(index), url_string.split("/")[6]);
}
else {
addURLToDownload(url, getPrefix(index));
}
}
}

View File

@ -1,19 +1,19 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AlbumRipper;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
public class NewsfilterRipper extends AlbumRipper {
private static final String HOST = "newsfilter";
@ -33,7 +33,7 @@ public class NewsfilterRipper extends AlbumRipper {
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
if (u.indexOf('#') >= 0) {
u = u.substring(0, u.indexOf('#'));
u = u.substring(0, u.indexOf('#'));
}
u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org");
return new URL(u);
@ -41,39 +41,28 @@ public class NewsfilterRipper extends AlbumRipper {
@Override
public void rip() throws IOException {
String gid = getGID(this.url),
theurl = "http://newsfilter.org/gallery/" + gid;
Connection.Response resp = null;
String gid = getGID(this.url);
String theurl = "http://newsfilter.org/gallery/" + gid;
logger.info("Loading " + theurl);
resp = Jsoup.connect(theurl)
.timeout(5000)
.referrer("")
.userAgent(USER_AGENT)
.method(Connection.Method.GET)
.execute();
Connection.Response resp = Jsoup.connect(theurl)
.timeout(5000)
.referrer("")
.userAgent(USER_AGENT)
.method(Connection.Method.GET)
.execute();
Document doc = resp.parse();
//Element gallery = doc.getElementById("thegalmain");
//Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL");
Pattern pat = Pattern.compile(gid+"/\\d+");
Elements piclinks = doc.getElementsByAttributeValueMatching("href", pat);
for (Element picelem : piclinks) {
String picurl = "http://newsfilter.org"+picelem.attr("href");
logger.info("Getting to picture page: "+picurl);
resp = Jsoup.connect(picurl)
.timeout(5000)
.referrer(theurl)
.userAgent(USER_AGENT)
.method(Connection.Method.GET)
.execute();
Document picdoc = resp.parse();
String dlurl = picdoc.getElementsByAttributeValue("itemprop","contentURL").first().attr("src");
addURLToDownload(new URL(dlurl));
Elements thumbnails = doc.select("#galleryImages .inner-block img");
for (Element thumb : thumbnails) {
String thumbUrl = thumb.attr("src");
String picUrl = thumbUrl.replace("thumbs/", "");
addURLToDownload(new URL(picUrl));
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
@ -86,9 +75,8 @@ public class NewsfilterRipper extends AlbumRipper {
if (m.matches()) {
return m.group(2);
}
throw new MalformedURLException("Expected newsfilter gallery format: "
+ "http://newsfilter.org/gallery/galleryid"
+ " Got: " + url);
throw new MalformedURLException(
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
" Got: " + url);
}
}

View File

@ -0,0 +1,89 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SinnercomicsRipper extends AbstractHTMLRipper {
public SinnercomicsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "sinnercomics";
}
@Override
public String getDomain() {
return "sinnercomics.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected sinnercomics.com URL format: " +
"sinnercomics.com/comic/albumName - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("a.comic-nav-next").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Wait half a sec to avoid IP bans
sleep(500);
return Http.url(nextPage).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("meta[property=og:image]")) {
String imageSource = el.attr("content");
imageSource = imageSource.replace(" alt=", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,78 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper {
public static boolean isTag;
public ThechiveRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "thechive";
}
@Override
public String getDomain() {
return "thechive.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
isTag = false;
return m.group(1);
}
throw new MalformedURLException("Expected thechive.com URL format: " +
"thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("img.attachment-gallery-item-full")) {
String imageSource = el.attr("src");
// We replace thumbs with resizes so we can the full sized images
imageSource = imageSource.replace("thumbs", "resizes");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -48,6 +48,7 @@ public class XhamsterRipper extends AlbumRipper {
image = image.replaceAll(
"https://upt.xhcdn\\.",
"http://up.xhamster.");
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
image = image.replaceAll(
"_160\\.",
"_1000.");

View File

@ -0,0 +1,125 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class ZizkiRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>();
public ZizkiRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "zizki";
}
@Override
public String getDomain() {
return "zizki.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?zizki\\.com/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.zizki.com/author/albumname, got: " + url);
}
return m.group(m.groupCount());
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[name=description]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
Element authorSpan = getFirstPage().select("span[class=creator]").first();
String author = authorSpan.select("a").first().text();
logger.debug("Author: " + author);
return getHost() + "_" + author + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
Response resp = Http.url(url).response();
cookies.putAll(resp.cookies());
albumDoc = resp.parse();
}
return albumDoc;
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
// Page contains images
logger.info("Look for images.");
for (Element thumb : page.select("img")) {
logger.info("Img");
if (super.isStopped()) break;
// Find thumbnail image source
String image = null;
String img_type = null;
String src = null;
if (thumb.hasAttr("typeof")) {
img_type = thumb.attr("typeof");
if (img_type.equals("foaf:Image")) {
logger.debug("Found image with " + img_type);
if (thumb.parent() != null &&
thumb.parent().parent() != null &&
thumb.parent().parent().attr("class") != null &&
thumb.parent().parent().attr("class").equals("aimage-center")
)
{
src = thumb.attr("src");
logger.debug("Found url with " + src);
if (!src.contains("zizki.com")) {
continue;
} else {
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
}
}
}
}
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
}
@Override
public String getPrefix(int index) {
return String.format("%03d_", index);
}
}

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.4.2";
private static final String DEFAULT_VERSION = "1.4.7";
private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json";
private static final String mainFileName = "ripme.jar";
private static final String updateFileName = "ripme.jar.update";

View File

@ -3,13 +3,16 @@ package com.rarchives.ripme.utils;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Constructor;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
@ -387,4 +390,73 @@ public class Utils {
}
return result;
}
/**
* Parses an URL query
*
* @param query
* The query part of an URL
* @return The map of all query parameters
*/
public static Map<String,String> parseUrlQuery(String query) {
Map<String,String> res = new HashMap<String, String>();
if (query.equals("")){
return res;
}
String[] parts = query.split("&");
int pos;
try {
for (String part : parts) {
if ((pos = part.indexOf('=')) >= 0){
res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), URLDecoder.decode(part.substring(pos + 1), "UTF-8"));
}else{
res.put(URLDecoder.decode(part, "UTF-8"), "");
}
}
} catch (UnsupportedEncodingException e) {
// Shouldn't happen since UTF-8 is required to be supported
throw new RuntimeException(e);
}
return res;
}
/**
* Parses an URL query and returns the requested parameter's value
*
* @param query
* The query part of an URL
* @param key
* The key whose value is requested
* @return The associated value or null if key wasn't found
*/
public static String parseUrlQuery(String query, String key) {
if (query.equals("")){
return null;
}
String[] parts = query.split("&");
int pos;
try {
for (String part : parts) {
if ((pos = part.indexOf('=')) >= 0) {
if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)){
return URLDecoder.decode(part.substring(pos + 1), "UTF-8");
}
} else if (URLDecoder.decode(part, "UTF-8").equals(key)) {
return "";
}
}
} catch (UnsupportedEncodingException e) {
// Shouldn't happen since UTF-8 is required to be supported
throw new RuntimeException(e);
}
return null;
}
}