Merge pull request #1 from RipMeApp/master

Update from original
This commit is contained in:
rephormat 2018-01-12 12:50:17 -06:00 committed by GitHub
commit c070f154f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
176 changed files with 4346 additions and 3174 deletions

View File

@ -1,12 +1,3 @@
<!--
We've moved! If you are not already, please consider opening your issue at the following link:
https://github.com/RipMeApp/ripme/issues/new
If this is a bug, please fill out the information below.
Please include any additional information that would help us fix the bug.
If this is a feature request or other type of issue, provide whatever information you feel is appropriate.
-->
* Ripme version: * Ripme version:
* Java version: <!-- (output of `java -version`) --> * Java version: <!-- (output of `java -version`) -->
* Operating system: <!-- (if Windows, output of `ver` or `winver`) --> * Operating system: <!-- (if Windows, output of `ver` or `winver`) -->

View File

@ -1,10 +1,3 @@
<!--
We've moved! If you are not already, please consider opening your pull request here:
https://github.com/RipMeApp/ripme/
To help us verify your change, please fill out the information below.
-->
# Category # Category
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which: This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:

114
.gitignore vendored
View File

@ -1,17 +1,121 @@
# Created by https://www.gitignore.io/api/java,linux,macos,maven,windows
### Java ###
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### Maven ###
target/ target/
.DS_Store pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
!/.mvn/wrapper/maven-wrapper.jar
### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
### IDEs ###
.vscode
.idea
.project
### Ripme ###
ripme.log ripme.log
rips/ rips/
.history .history
ripme.jar.update ripme.jar.update
*.swp *.swp
ripme.jar *.properties
rip.properties
history.json history.json
.idea
*.iml *.iml
.settings/ .settings/
.classpath .classpath
*.txt *.txt
bin/ bin/
.vscode/

View File

@ -1,3 +1,6 @@
language: java language: java
jdk: jdk:
- oraclejdk7 - oraclejdk8
- openjdk8
after_success:
- mvn clean test jacoco:report coveralls:report

View File

@ -2,7 +2,9 @@
"files.exclude": { "files.exclude": {
"target/**": true, "target/**": true,
"**/.git": true, "**/.git": true,
"**/.DS_Store": true "**/.DS_Store": true,
"**/*.class": true,
"**/rips/**": true
}, },
"java.configuration.updateBuildConfiguration": "automatic" "java.configuration.updateBuildConfiguration": "automatic"
} }

View File

@ -7,7 +7,16 @@ You can now find the latest code, issues, and releases at [RipMeApp/ripme](https
Please be polite and supportive to all users and contributors. Please be inclusive of everyone regardless of race, religion, gender identity or expression, sexual preference, or tools and platform preferences. Please be helpful and stick to the engineering facts, and avoid expressing unhelpful or off-topic opinions. Please be polite and supportive to all users and contributors. Please be inclusive of everyone regardless of race, religion, gender identity or expression, sexual preference, or tools and platform preferences. Please be helpful and stick to the engineering facts, and avoid expressing unhelpful or off-topic opinions.
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either (NSFW) or (SFW) to be considerate to others who may not be browsing this repo in private.
# NSFW Content
**Please tag NSFW links (links to sites with adult content) with "(NSFW)"!**
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either "(NSFW)" or "(SFW)" to be considerate to others who may not be browsing this repo in private or who are not interested in NSFW content.
There is a helpful plugin called uMatrix available for [Firefox](https://addons.mozilla.org/en-US/firefox/addon/umatrix/) and [Chrome](https://chrome.google.com/webstore/detail/umatrix/ogfcmafjalglgifnmanfmnieipoejdcf) which allows you to block certain types of content like media and scripts.
If you're not sure if a site might contain NSFW images or media, and you are in mixed company but want to develop a new ripper, you can block downloading images and media in the * (all sites) scope and allow requests for specific domains you trust as you go.
Being able to browse the HTML is usually the most important part of developing or fixing a ripper, so it is not necessarily important to actually see the images load.
# Priorities # Priorities
@ -68,13 +77,14 @@ Good style is a tool for communicating your intent with other developers of the
Some recommendations: Some recommendations:
* Above all, be consistent! * Above all, be consistent!
* Spaces, not tabs. * Spaces, not tabs. Indents should be 4 spaces.
* We prefer "Egyptian brackets" (in `if`, `for`, `while`, `switch`, etc.): * We prefer "Egyptian brackets" (in `if`, `for`, `while`, `switch`, etc.):
* `if (...) {` * `if (...) {`
* `} else if (...) {` * `} else if (...) {`
* `} else {` * `} else {`
* `}` * `}`
* Constants in `UPPER_SNAKE_CASE` * Note the spacing convention above for control flow constructs (a single space on the outside of each paren)
* Constants in `UPPER_SNAKE_CASE` a.k.a. `CONST_CASE`
* Class names in `PascalCase` a.k.a. `UpperCamelCase` * Class names in `PascalCase` a.k.a. `UpperCamelCase`
* Variable names in `camelCase` a.k.a. `lowerCamelCase` * Variable names in `camelCase` a.k.a. `lowerCamelCase`
* Do not use Hungarian notation * Do not use Hungarian notation

View File

@ -1,9 +1,22 @@
# RipMe # RipMe [![Licensed under the MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/RipMeApp/ripme/blob/master/LICENSE.txt) [![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Subreddit](https://img.shields.io/badge/discuss-on%20reddit-blue.svg)](https://www.reddit.com/r/ripme/)
[![Build Status](https://travis-ci.org/4pr0n/ripme.svg?branch=master)](https://travis-ci.org/4pr0n/ripme) [![Build Status](https://travis-ci.org/RipMeApp/ripme.svg?branch=master)](https://travis-ci.org/RipMeApp/ripme)
[![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Coverage Status](https://coveralls.io/repos/github/RipMeApp/ripme/badge.svg?branch=master)](https://coveralls.io/github/RipMeApp/ripme?branch=master)
Album ripper for various websites. Runs on your computer. Requires Java 1.6 # Contribute
RipMe is maintained with ♥️ and in our limited free time by **[@MetaPrime](https://github.com/metaprime)** and **[@cyian-1756](https://github.com/cyian-1756)**. If you'd like to contribute but aren't good with code, help keep us happy with a small contribution!
[![Tip with PayPal](https://img.shields.io/badge/PayPal-Buy_us...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
[![Tip with PayPal](https://img.shields.io/badge/coffee-%245-green.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=5.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/beer-%2410-yellow.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=10.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/lunch-%2420-orange.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=20.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/dinner-%2450-red.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=50.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/custom_amount-...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
# About
RipMe is an album ripper for various websites. Runs on your computer. Requires Java 8.
![Screenshot](http://i.imgur.com/kWzhsIu.png) ![Screenshot](http://i.imgur.com/kWzhsIu.png)
@ -73,14 +86,3 @@ mvn test
Please note that some tests may fail as sites change and our rippers become out of date. Please note that some tests may fail as sites change and our rippers become out of date.
Start by building and testing a released version of RipMe Start by building and testing a released version of RipMe
and then ensure that any changes you make do not cause more tests to break. and then ensure that any changes you make do not cause more tests to break.
# Dependencies
* junit-3.8.1
* jsoup-1.7.3
* json-20140107
* apache-commons-configuration-1.7
* log4j-1.2.17
* commons-cli-1.2
* commons-io-1.3.2
* httpcomponents-4.3.3

0
build.bat Normal file → Executable file
View File

1
build.sh Executable file
View File

@ -0,0 +1 @@
mvn clean compile assembly:single

40
docs/options.md Normal file
View File

@ -0,0 +1,40 @@
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
error.skip404 | bool | Don't retry on 404 errors
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
auto.update | bool | If true ripme will auto-update every time it's started
play.sound | bool | If true ripme will play a sound every time a rip finishes
download.show_popup| bool | TODO figure out what this is for
log.save | bool | If true ripme will save it's logs
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
album_titles.save | bool | Currently does nothing
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
download.timeout | int | File download timeout (in milliseconds)
page.timeout | int | Page download timeout (in milliseconds)
download.max_size | int | Maximum size of downloaded files in bytes
threads.size | int | The number of threads to use
twitter.auth | String | Twitter API key (Base64'd)
tumblr.auth | String | Tumblr API key
log.level | String | The debug log level (Example: Log level: Debug)
gw.api | String | TODO figure out what this is for
twitter.max_requests | int | TODO figure out what this is for

View File

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath com.rarchives.ripme.App</exec.args>
<exec.executable>java</exec.executable>
</properties>
</action>
</actions>

View File

@ -1,2 +0,0 @@
@echo off
powershell .\patch.ps1

View File

@ -1,53 +0,0 @@
Param (
[Parameter(Mandatory=$True)]
[string]$message
)
# This script will:
# - read current version
# - increment patch version
# - update version in a few places
# - insert new line in ripme.json with $message
$ripmeJson = (Get-Content "ripme.json") -join "`n" | ConvertFrom-Json
$currentVersion = $ripmeJson.latestVersion
Write-Output (("Current version", $currentVersion) -join ' ')
$versionFields = $currentVersion.split('.')
$patchCurr = [int]($versionFields[2])
$patchNext = $patchCurr + 1
$majorMinor = $versionFields[0..1]
$majorMinorPatch = $majorMinor + $patchNext
$nextVersion = $majorMinorPatch -join '.'
Write-Output (("Updating to", $nextVersion) -join ' ')
$substExpr = "s/${currentVersion}/${nextVersion}/"
sed src/main/java/com/rarchives/ripme/ui/UpdateUtils.java -i -e "${substExpr}"
git grep "DEFAULT_VERSION.*${nextVersion}" src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
$substExpr = "s/\`"latestVersion\`" : \`"${currentVersion}\`"/\`"latestVersion\`" : \`"${nextVersion}\`"/"
sed ripme.json -i -e "${substExpr}"
git grep "latestVersion" ripme.json
$substExpr = "s/<version>${currentVersion}/<version>${nextVersion}/"
sed pom.xml -i -e "${substExpr}"
git grep "<version>${nextVersion}" pom.xml
$commitMessage = "${nextVersion}: ${message}"
$ripmeJsonLines = Get-Content "ripme.json"
$ripmeJsonHead = $ripmeJsonLines[0..2]
$ripmeJsonRest = $ripmeJsonLines[3..$ripmeJsonLines.length]
$changelogLine = " `"${commitMessage}`","
$updatedLines = $ripmeJsonHead + $changelogLine + $ripmeJsonRest + ""
$outputContent = $updatedLines -join "`n"
$outputPath = (Resolve-Path .\ripme.json).Path
$Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
[System.IO.File]::WriteAllText($outputPath, $outputContent, $Utf8NoBomEncoding)
git add -u
git commit -m $commitMessage
git tag $nextVersion

56
patch.py Normal file
View File

@ -0,0 +1,56 @@
import json
import subprocess
# This script will:
# - read current version
# - increment patch version
# - update version in a few places
# - insert new line in ripme.json with message
message = input('message: ')
with open('ripme.json') as dataFile:
ripmeJson = json.load(dataFile)
currentVersion = ripmeJson["latestVersion"]
print ('Current version ' + currentVersion)
versionFields = currentVersion.split('.')
patchCur = int(versionFields[2])
patchNext = patchCur + 1
majorMinor = versionFields[:2]
majorMinor.append(str(patchNext))
nextVersion = '.'.join(majorMinor)
print ('Updating to ' + nextVersion)
substrExpr = 's/' + currentVersion + '/' + nextVersion + '/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
subprocess.call(['git', 'grep', 'DEFAULT_VERSION.*' + nextVersion,
'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
substrExpr = 's/\\\"latestVersion\\\": \\\"' + currentVersion + '\\\"/\\\"latestVersion\\\": \\\"' +\
nextVersion + '\\\"/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'ripme.json'])
subprocess.call(['git', 'grep', 'latestVersion', 'ripme.json'])
substrExpr = 's/<version>' + currentVersion + '/<version>' + nextVersion + '/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'pom.xml'])
subprocess.call(['git', 'grep', '<version>' + nextVersion + '</version>', 'pom.xml'])
commitMessage = nextVersion + ': ' + message
changeLogLine = ' \"' + commitMessage + '\",\n'
dataFile = open("ripme.json", "r")
ripmeJsonLines = dataFile.readlines()
ripmeJsonLines.insert(3, changeLogLine)
outputContent = ''.join(ripmeJsonLines)
dataFile.close()
dataFile = open("ripme.json", "w")
dataFile.write(outputContent)
dataFile.close()
subprocess.call(['git', 'add', '-u'])
subprocess.call(['git', 'commit', '-m', commitMessage])
subprocess.call(['git', 'tag', nextVersion])

25
pom.xml
View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId> <groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId> <artifactId>ripme</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<version>1.5.7</version> <version>1.7.12</version>
<name>ripme</name> <name>ripme</name>
<url>http://rip.rarchives.com</url> <url>http://rip.rarchives.com</url>
<properties> <properties>
@ -84,10 +84,29 @@
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version> <version>3.1</version>
<configuration> <configuration>
<source>1.6</source> <source>1.8</source>
<target>1.6</target> <target>1.8</target>
</configuration> </configuration>
</plugin> </plugin>
<plugin>
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>4.3.0</version>
</plugin>
<plugin>
<!-- At time of writing: JaCoCo is (allegedly) the only coverage report generator that supports Java 8 -->
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.6.201602180812</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins> </plugins>
</build> </build>
</project> </project>

View File

@ -1,152 +1,187 @@
{ {
"latestVersion" : "1.5.7", "latestVersion": "1.7.12",
"changeList" : [ "changeList": [
"1.5.7: Added EromeRipper", "1.7.12: Instagram ripper no longer 403s on certain images",
"1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper", "1.7.11: Added gwarchives support to the cheveretoRipper; Gfycat Tests & Fix for bad reddit submissions; instagram ripper can now be made to skip videos",
"1.5.5: Wordpress comic ripper Updates", "1.7.10: Added basic pornpics.com ripper; Fixed hentai.cafe regex",
"1.5.4: Added Luscious.net ripper", "1.7.9: FuraffinityRipper can now rip non-public albums; Added 2 new api keys, ripper can now download raw images from tumblr; Erome ripper now matchs links without the www; Tumblr ripper now tells the user if it hits the rate limit",
"1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper", "1.7.8: Forced https for tumblr image links; Fixed imgur album filenames; SankakuComplexRipper now downloads full sized images; Added dribbble.com ripper; Added comfirm button for clearing history",
"1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures", "1.7.7: Fixed E621 Ripper; Added unit test for zizki.com; Added unit test for Xbooru.com; Updated reddit useragent",
"1.5.1: Ensure update mechanism is working correctly.", "1.7.6: Added OglafRipper",
"1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe", "1.7.5: Improve WordpressComicRipper; update to a modern User Agent",
"1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)", "1.7.4: Fix WordpressComicRipper konradokonski.com/wiory; Fix CheveretoRipper hushpix.com by adding consent cookie",
"1.4.20: EroshareRipper can now rip user profiles", "1.7.3: Improved Aerisdies and Imagearn folders; fixed tapas.io; XhamsterRipper now uses mobile site; InstagramRipper slideshows under user profiles",
"1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality", "1.7.2: InstagramRipper: Added support for ripping individual posts",
"1.4.18: Fix video rippers (broken in 1.4.14)", "1.7.1: Fix WordpressComicRipper's ripper for freeadultcomix.com; FuraffinityRipper can now rip public albums",
"1.4.17: MyHentaiComics improvements", "1.7.0: Improved Webtoons folders; Added code coverage with Coveralls.io and improved unit tests; removed rippers for dead sites",
"1.4.16: Fix Eightmuses; Add Instagram album support", "1.6.13: Added Instagram tags; improved Instagram and Pichunter regexes",
"1.4.15: Fixed DeviantArt Ripper", "1.6.12: Fix InstagramRipper with timestamps; Pichunter galleries support; logging improvements",
"1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)", "1.6.11: Added pichunter.com ripper; Improved Instagram filenames; added tehyiffgallery ripper; Fixed xchan ripper; Fixed chanRipper folders",
"1.4.13: Fixed furaffinity ripper.", "1.6.10: Added viewcomic ripper; Fixed webtoons malformed url error message; Fixed chan ripper thread title; Fixed Modelmayhem ripper",
"1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.", "1.6.9: Added support for imgur /t/ albums; Added portable mode; Unit tests no longer fail if run twice; Formating fixes",
"1.4.11: Instagram: fixed cropped images issue.", "1.6.8: code clean up; ripme can now remeber and skip already downloaded images",
"1.4.10: Add WordPressComicRipper (various sites supported)", "1.6.7: Fixed instagram ripper",
"1.4.9: Fixed HentaiFoundry ripper", "1.6.6: Fixed 8muses ripper",
"1.4.8: Added Jagodibuja comics ripper", "1.6.5: Imgbox ripper now downloads full sized image from galleries",
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper", "1.6.4: Added webtoons ripper",
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.", "1.6.3: Window is now resizable; Added Porncomix.info ripper; Fixed imgbox ripper; Added hentai2read ripper",
"1.4.5: SinnerComics: Added work around for naming bug", "1.6.2: Fixed shesfreaky.com ripper; Fixed imgbox ripper; Fixed Xhamster video ripping",
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.", "1.6.1: Rolled E621Ripper back from 1.6.0 to the 1.5.15 version",
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.", "1.6.0: Updated to java 8; Some code cleanup",
"1.4.2: Added nhentai ripper.", "1.5.15: Added Hbrowse.com ripper; 8muses ripper now can rip from all album types",
"1.4.1: Fixed Imgbox: correctly downloads full-size images.", "1.5.14: Myhentaicomics ripper no longer tries to download ads; Added hentai.cafe ripper; Fixed sankakucomplex ripper",
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.", "1.5.13: InstagramRipper: fixed minor bug",
"1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.", "1.5.12: Make tray icon optional; work around window positioning bug on Windows.",
"1.2.13: Hotfix for imgur album rips", "1.5.11: Added -v, --version flag",
"1.2.12: 500px gallery/subgallery support", "1.5.10: Added ripper for cfake.com; Fixed nhentai album naming",
"1.2.11: Deviant fav subfolders, and reddituploads support", "1.5.9: InstagramRipper now downloads full sized images; ImagefapRipper Now adds GID to folder name",
"1.2.10: Imgur /gallery/ images fix", "1.5.8: Fixed 8muses ripper",
"1.2.9: Imgur 10-image fix, original twitter sizes", "1.5.7: Added EromeRipper",
"1.2.8: Option to prefer MP4 over GIF for imgur", "1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper",
"1.2.7: Fix 500px ripper to fetch NSFW images", "1.5.5: Wordpress comic ripper Updates",
"1.2.6: Fix 500px ripper", "1.5.4: Added Luscious.net ripper",
"1.2.5: Descriptions are optional, minor imgur fixes", "1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper",
"1.2.4: Fix instagram ripper", "1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures",
"1.2.3: Fix xhamster videos, option to remove/clear Queue", "1.5.1: Ensure update mechanism is working correctly.",
"1.2.2: Fix imagefap ripper", "1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe",
"1.2.1: Gfycat Fix, lots of changes pushed", "1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)",
"1.2.0: Fix imagebam, 8muses. Remember queue items", "1.4.20: EroshareRipper can now rip user profiles",
"1.1.9: Hotfix for new imgur album layout", "1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality",
"1.1.8: Fix for failed reddit rips", "1.4.18: Fix video rippers (broken in 1.4.14)",
"1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix", "1.4.17: MyHentaiComics improvements",
"1.1.6: History error handling and drawchan support", "1.4.16: Fix Eightmuses; Add Instagram album support",
"1.1.5: Fix imagefap and 8muses rippers", "1.4.15: Fixed DeviantArt Ripper",
"1.1.4: Fix DeviantArt 403 errors", "1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)",
"1.1.3: Fix Check Selected in History", "1.4.13: Fixed furaffinity ripper.",
"1.1.2: Check/Uncheck history by right-clicking the history", "1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.",
"1.1.1: Gfycat/Reddit fix", "1.4.11: Instagram: fixed cropped images issue.",
"1.1.0: Revamped History, Cheeby fix", "1.4.10: Add WordPressComicRipper (various sites supported)",
"1.0.93: Reddit fix, gfycat fix, video download fix", "1.4.9: Fixed HentaiFoundry ripper",
"1.0.92: Anon-ib fix, cheeby fix, vid.me ripper", "1.4.8: Added Jagodibuja comics ripper",
"1.0.91: Fix for anon-ib, minus rippers", "1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
"1.0.90: Hide error message when ripping valid album", "1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
"1.0.89: Fix fapproved ripper", "1.4.5: SinnerComics: Added work around for naming bug",
"1.0.88: Fix imgbox ripper", "1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
"1.0.87: Chan ripper update, Finebox update, Motherless video ripper", "1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
"1.0.86: Fix for imagefap albums larger than 1k images", "1.4.2: Added nhentai ripper.",
"1.0.85: Fix Modelmayhem ripper", "1.4.1: Fixed Imgbox: correctly downloads full-size images.",
"1.0.84: Ripper can resume after being stopped", "1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
"1.0.83: Fix 2dgalleries ripper", "1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.",
"1.0.82: Photobucket ripper fix, Clipboard Autorip toggle", "1.2.13: Hotfix for imgur album rips",
"1.0.81: Tumblr/seenive fixes, queue system, better history", "1.2.12: 500px gallery/subgallery support",
"1.0.80: Fix Butttoucher ripper", "1.2.11: Deviant fav subfolders, and reddituploads support",
"1.0.79: Fix cheeby to rip all images", "1.2.10: Imgur /gallery/ images fix",
"1.0.78: BCFakes ripper", "1.2.9: Imgur 10-image fix, original twitter sizes",
"1.0.77: Cheeby ripper, status in title, various fixes", "1.2.8: Option to prefer MP4 over GIF for imgur",
"1.0.76: Option to only save URLs, Taptastic ripper", "1.2.7: Fix 500px ripper to fetch NSFW images",
"1.0.75: 500px ripper", "1.2.6: Fix 500px ripper",
"1.0.74: Videarn video ripper", "1.2.5: Descriptions are optional, minor imgur fixes",
"1.0.73: Datw.in ripper", "1.2.4: Fix instagram ripper",
"1.0.72: Support for DeviantArt favourites", "1.2.3: Fix xhamster videos, option to remove/clear Queue",
"1.0.71: Fuskator ripper", "1.2.2: Fix imagefap ripper",
"1.0.70: Various improvements. Xhamster, Cliphunter video rippers", "1.2.1: Gfycat Fix, lots of changes pushed",
"1.0.69: Gfycat video ripper, instgram username fix", "1.2.0: Fix imagebam, 8muses. Remember queue items",
"1.0.68: Imagevenue and hentai-foundry rippers", "1.1.9: Hotfix for new imgur album layout",
"1.0.67: Support for external tumblr domains", "1.1.8: Fix for failed reddit rips",
"1.0.66: GirlsOfDesire ripper", "1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix",
"1.0.65: Vidd.me video ripper", "1.1.6: History error handling and drawchan support",
"1.0.64: Imagebam ripper", "1.1.5: Fix imagefap and 8muses rippers",
"1.0.63: Hopefully fixing freezing issue while re-ripping all albums", "1.1.4: Fix DeviantArt 403 errors",
"1.0.62: Imgur album directories named after album title", "1.1.3: Fix Check Selected in History",
"1.0.61: Logs are optional, defaults to not save logs", "1.1.2: Check/Uncheck history by right-clicking the history",
"1.0.60: Fix for crazy directory creation bug", "1.1.1: Gfycat/Reddit fix",
"1.0.59: Show when albums can be ripped immediately", "1.1.0: Revamped History, Cheeby fix",
"1.0.58: Logs are saved to album directory, ehentai fix", "1.0.93: Reddit fix, gfycat fix, video download fix",
"1.0.57: Nfsfw ripper", "1.0.92: Anon-ib fix, cheeby fix, vid.me ripper",
"1.0.56: Fix for imgur rips", "1.0.91: Fix for anon-ib, minus rippers",
"1.0.55: Ehentai ripper bypasses content warning", "1.0.90: Hide error message when ripping valid album",
"1.0.54: Mediacru.sh ripper, may require a Java update", "1.0.89: Fix fapproved ripper",
"1.0.53: 8Muses ripper fix, can rip subalbums", "1.0.88: Fix imgbox ripper",
"1.0.52: Imgbox ripper, popup notifications are optional", "1.0.87: Chan ripper update, Finebox update, Motherless video ripper",
"1.0.51: Deviantart rips full-size NSFW images", "1.0.86: Fix for imagefap albums larger than 1k images",
"1.0.50: Smutty.com ripper", "1.0.85: Fix Modelmayhem ripper",
"1.0.49: More Ehentai ripper fixes", "1.0.84: Ripper can resume after being stopped",
"1.0.48: Imagestash.org /tag/ ripper, ehentai fixes", "1.0.83: Fix 2dgalleries ripper",
"1.0.47: Vidble ripper, right-click popupmenu on text", "1.0.82: Photobucket ripper fix, Clipboard Autorip toggle",
"1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional", "1.0.81: Tumblr/seenive fixes, queue system, better history",
"1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix", "1.0.80: Fix Butttoucher ripper",
"1.0.44: Deviantart rips full-size images", "1.0.79: Fix cheeby to rip all images",
"1.0.43: Added Modelmayhem ripper", "1.0.78: BCFakes ripper",
"1.0.42: Added Drawcrowd ripper, bug fix for large albums", "1.0.77: Cheeby ripper, status in title, various fixes",
"1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs", "1.0.76: Option to only save URLs, Taptastic ripper",
"1.0.40: Flickr bug fix and groups support", "1.0.75: 500px ripper",
"1.0.39: Various fixes for Ehentai and Motherless", "1.0.74: Videarn video ripper",
"1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations", "1.0.73: Datw.in ripper",
"1.0.37: Added Vine.co and Supertangas rippers", "1.0.72: Support for DeviantArt favourites",
"1.0.36: Added semi-working Gifyo ripper", "1.0.71: Fuskator ripper",
"1.0.35: Fixed i.rarchives ripper, delete empty directories", "1.0.70: Various improvements. Xhamster, Cliphunter video rippers",
"1.0.34: Added fapproved and anonib rippers", "1.0.69: Gfycat video ripper, instgram username fix",
"1.0.33: Imgur ripper fixes", "1.0.68: Imagevenue and hentai-foundry rippers",
"1.0.32: Fix for directories with special characters", "1.0.67: Support for external tumblr domains",
"1.0.31: Fix for large imgur albums", "1.0.66: GirlsOfDesire ripper",
"1.0.30: Added Minus ripper", "1.0.65: Vidd.me video ripper",
"1.0.29: Various fixes for tumblr, flickr, 4chan", "1.0.64: Imagebam ripper",
"1.0.28: Added vk.com video ripper(s)", "1.0.63: Hopefully fixing freezing issue while re-ripping all albums",
"1.0.27: Added flickr ripper", "1.0.62: Imgur album directories named after album title",
"1.0.26: Ability to rerip history from command-line", "1.0.61: Logs are optional, defaults to not save logs",
"1.0.25: Added photobucket ripper", "1.0.60: Fix for crazy directory creation bug",
"1.0.24: Fixed possible deadlock issue while re-ripping albums", "1.0.59: Show when albums can be ripped immediately",
"1.0.23: Added teenplanet, irarchives, and butttoucher support", "1.0.58: Logs are saved to album directory, ehentai fix",
"1.0.22: Fixed huge bug where ripper did not work at all for any sites", "1.0.57: Nfsfw ripper",
"1.0.21: Ability to rip user account images on imgur", "1.0.56: Fix for imgur rips",
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos", "1.0.55: Ehentai ripper bypasses content warning",
"1.0.19: Fix imgur account ripper", "1.0.54: Mediacru.sh ripper, may require a Java update",
"1.0.18: Button icons, kinkyshare.com ripper", "1.0.53: 8Muses ripper fix, can rip subalbums",
"1.0.17: *chan ripper, imgur titles in filenames", "1.0.52: Imgbox ripper, popup notifications are optional",
"1.0.16: Fix bug with instagram usernames containing _ or -", "1.0.51: Deviantart rips full-size NSFW images",
"1.0.15: Auto-updater should be compatible with Windows", "1.0.50: Smutty.com ripper",
"1.0.14: Fix twitter account names with _ or -", "1.0.49: More Ehentai ripper fixes",
"1.0.13: Auto-updater is more verbose, hopefully works", "1.0.48: Imagestash.org /tag/ ripper, ehentai fixes",
"1.0.12: Fixed clipboard autorip bug", "1.0.47: Vidble ripper, right-click popupmenu on text",
"1.0.11: 404 images are markead as errored", "1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional",
"1.0.10: Taskbar notifications when rips start", "1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix",
"1.0.9: More-verbose completion, UI tweaks", "1.0.44: Deviantart rips full-size images",
"1.0.8: Auto-update functionality", "1.0.43: Added Modelmayhem ripper",
"1.0.7: Clipboard Autorip and tray icons", "1.0.42: Added Drawcrowd ripper, bug fix for large albums",
"1.0.6: Support imgur.com/r/subreddit albums", "1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs",
"1.0.5: Persistent configuration, small bug fixes", "1.0.40: Flickr bug fix and groups support",
"1.0.4: Fixed spaces-in-directory bug", "1.0.39: Various fixes for Ehentai and Motherless",
"1.0.3: Added VK.com ripper", "1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations",
"1.0.1: Added auto-update functionality" "1.0.37: Added Vine.co and Supertangas rippers",
] "1.0.36: Added semi-working Gifyo ripper",
"1.0.35: Fixed i.rarchives ripper, delete empty directories",
"1.0.34: Added fapproved and anonib rippers",
"1.0.33: Imgur ripper fixes",
"1.0.32: Fix for directories with special characters",
"1.0.31: Fix for large imgur albums",
"1.0.30: Added Minus ripper",
"1.0.29: Various fixes for tumblr, flickr, 4chan",
"1.0.28: Added vk.com video ripper(s)",
"1.0.27: Added flickr ripper",
"1.0.26: Ability to rerip history from command-line",
"1.0.25: Added photobucket ripper",
"1.0.24: Fixed possible deadlock issue while re-ripping albums",
"1.0.23: Added teenplanet, irarchives, and butttoucher support",
"1.0.22: Fixed huge bug where ripper did not work at all for any sites",
"1.0.21: Ability to rip user account images on imgur",
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos",
"1.0.19: Fix imgur account ripper",
"1.0.18: Button icons, kinkyshare.com ripper",
"1.0.17: *chan ripper, imgur titles in filenames",
"1.0.16: Fix bug with instagram usernames containing _ or -",
"1.0.15: Auto-updater should be compatible with Windows",
"1.0.14: Fix twitter account names with _ or -",
"1.0.13: Auto-updater is more verbose, hopefully works",
"1.0.12: Fixed clipboard autorip bug",
"1.0.11: 404 images are markead as errored",
"1.0.10: Taskbar notifications when rips start",
"1.0.9: More-verbose completion, UI tweaks",
"1.0.8: Auto-update functionality",
"1.0.7: Clipboard Autorip and tray icons",
"1.0.6: Support imgur.com/r/subreddit albums",
"1.0.5: Persistent configuration, small bug fixes",
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
]
} }

View File

@ -1,7 +1,6 @@
package com.rarchives.ripme; package com.rarchives.ripme;
import java.io.File; import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException; import java.io.IOException;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileReader; import java.io.FileReader;
@ -35,31 +34,51 @@ import com.rarchives.ripme.utils.Utils;
*/ */
public class App { public class App {
public static Logger logger; public static final Logger logger;
private static final History HISTORY = new History(); private static final History HISTORY = new History();
public static void main(String[] args) throws MalformedURLException { static {
//initialize logger
Utils.configureLogger(); Utils.configureLogger();
logger = Logger.getLogger(App.class);
}
public static void main(String[] args) throws MalformedURLException {
CommandLine cl = getArgs(args);
if (args.length > 0 && cl.hasOption('v')){
logger.error(UpdateUtils.getThisJarVersion());
System.exit(0);
}
System.setProperty("apple.laf.useScreenMenuBar", "true"); System.setProperty("apple.laf.useScreenMenuBar", "true");
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe"); System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe");
logger = Logger.getLogger(App.class);
logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion());
if (args.length > 0) { if (args.length > 0) {
// CLI Mode
handleArguments(args); handleArguments(args);
} else { } else {
// GUI Mode
MainWindow mw = new MainWindow(); MainWindow mw = new MainWindow();
SwingUtilities.invokeLater(mw); SwingUtilities.invokeLater(mw);
} }
} }
/**
public static void rip(URL url) throws Exception { * Creates an abstract ripper and instructs it to rip.
* @param url URL to be ripped
* @throws Exception
*/
private static void rip(URL url) throws Exception {
AbstractRipper ripper = AbstractRipper.getRipper(url); AbstractRipper ripper = AbstractRipper.getRipper(url);
ripper.setup(); ripper.setup();
ripper.rip(); ripper.rip();
} }
public static void handleArguments(String[] args) { /**
* For dealing with command-line arguments.
* @param args Array of Command-line arguments
*/
private static void handleArguments(String[] args) {
CommandLine cl = getArgs(args); CommandLine cl = getArgs(args);
if (cl.hasOption('h')) { if (cl.hasOption('h')) {
HelpFormatter hf = new HelpFormatter(); HelpFormatter hf = new HelpFormatter();
@ -98,8 +117,8 @@ public class App {
} }
if (cl.hasOption('R')) { if (cl.hasOption('R')) {
loadHistory(); loadHistory();
if (HISTORY.toList().size() == 0) { if (HISTORY.toList().isEmpty()) {
System.err.println("There are no history entries to re-rip. Rip some albums first"); logger.error("There are no history entries to re-rip. Rip some albums first");
System.exit(-1); System.exit(-1);
} }
int added = 0; int added = 0;
@ -122,7 +141,7 @@ public class App {
} }
} }
if (added == 0) { if (added == 0) {
System.err.println("No history entries have been 'Checked'\n" + logger.error("No history entries have been 'Checked'\n" +
"Check an entry by clicking the checkbox to the right of the URL or Right-click a URL to check/uncheck all items"); "Check an entry by clicking the checkbox to the right of the URL or Right-click a URL to check/uncheck all items");
System.exit(-1); System.exit(-1);
} }
@ -134,7 +153,7 @@ public class App {
Utils.setConfigBoolean("download.save_order", false); Utils.setConfigBoolean("download.save_order", false);
} }
if ((cl.hasOption('d'))&&(cl.hasOption('D'))) { if ((cl.hasOption('d'))&&(cl.hasOption('D'))) {
System.err.println("\nCannot specify '-d' and '-D' simultaneously"); logger.error("\nCannot specify '-d' and '-D' simultaneously");
System.exit(-1); System.exit(-1);
} }
if (cl.hasOption('l')) { if (cl.hasOption('l')) {
@ -162,14 +181,18 @@ public class App {
} }
} }
// this function will attempt to rip the provided url /**
public static void ripURL(String targetURL, boolean saveConfig) { * Attempt to rip targetURL.
* @param targetURL URL to rip
* @param saveConfig Whether or not you want to save the config (?)
*/
private static void ripURL(String targetURL, boolean saveConfig) {
try { try {
URL url = new URL(targetURL); URL url = new URL(targetURL);
rip(url); rip(url);
List<String> history = Utils.getConfigList("download.history"); List<String> history = Utils.getConfigList("download.history");
if (!history.contains(url.toExternalForm())) { if (!history.contains(url.toExternalForm())) {//if you haven't already downloaded the file before
history.add(url.toExternalForm()); history.add(url.toExternalForm());//add it to history so you won't have to redownload
Utils.setConfigList("download.history", Arrays.asList(history.toArray())); Utils.setConfigList("download.history", Arrays.asList(history.toArray()));
if (saveConfig) { if (saveConfig) {
Utils.saveConfig(); Utils.saveConfig();
@ -184,7 +207,11 @@ public class App {
} }
} }
public static Options getOptions() { /**
* Creates an Options object, returns it.
* @return Returns all acceptable command-line options.
*/
private static Options getOptions() {
Options opts = new Options(); Options opts = new Options();
opts.addOption("h", "help", false, "Print the help"); opts.addOption("h", "help", false, "Print the help");
opts.addOption("u", "url", true, "URL of album to rip"); opts.addOption("u", "url", true, "URL of album to rip");
@ -198,14 +225,19 @@ public class App {
opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)"); opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)");
opts.addOption("n", "no-prop-file", false, "Do not create properties file."); opts.addOption("n", "no-prop-file", false, "Do not create properties file.");
opts.addOption("f", "urls-file", true, "Rip URLs from a file."); opts.addOption("f", "urls-file", true, "Rip URLs from a file.");
opts.addOption("v", "version", false, "Show current version");
return opts; return opts;
} }
public static CommandLine getArgs(String[] args) { /**
* Tries to parse commandline arguments.
* @param args Array of commandline arguments.
* @return CommandLine object containing arguments.
*/
private static CommandLine getArgs(String[] args) {
BasicParser parser = new BasicParser(); BasicParser parser = new BasicParser();
try { try {
CommandLine cl = parser.parse(getOptions(), args, false); return parser.parse(getOptions(), args, false);
return cl;
} catch (ParseException e) { } catch (ParseException e) {
logger.error("[!] Error while parsing command-line arguments: " + Arrays.toString(args), e); logger.error("[!] Error while parsing command-line arguments: " + Arrays.toString(args), e);
System.exit(-1); System.exit(-1);
@ -213,16 +245,19 @@ public class App {
} }
} }
/**
* Loads history from history file into memory.
*/
private static void loadHistory() { private static void loadHistory() {
File historyFile = new File("history.json"); File historyFile = new File(Utils.getConfigDir() + File.separator + "history.json");
HISTORY.clear(); HISTORY.clear();
if (historyFile.exists()) { if (historyFile.exists()) {
try { try {
logger.info("Loading history from history.json"); logger.info("Loading history from " + historyFile.getCanonicalPath());
HISTORY.fromFile("history.json"); HISTORY.fromFile(historyFile.getCanonicalPath());
} catch (IOException e) { } catch (IOException e) {
logger.error("Failed to load history from file " + historyFile, e); logger.error("Failed to load history from file " + historyFile, e);
System.out.println( logger.warn(
"RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" + "RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" +
"Error: " + e.getMessage() + "\n\n" + "Error: " + e.getMessage() + "\n\n" +
"Closing RipMe will automatically overwrite the contents of this file,\n" + "Closing RipMe will automatically overwrite the contents of this file,\n" +
@ -234,12 +269,7 @@ public class App {
if (HISTORY.toList().size() == 0) { if (HISTORY.toList().size() == 0) {
// Loaded from config, still no entries. // Loaded from config, still no entries.
// Guess rip history based on rip folder // Guess rip history based on rip folder
String[] dirs = Utils.getWorkingDirectory().list(new FilenameFilter() { String[] dirs = Utils.getWorkingDirectory().list((dir, file) -> new File(dir.getAbsolutePath() + File.separator + file).isDirectory());
@Override
public boolean accept(File dir, String file) {
return new File(dir.getAbsolutePath() + File.separator + file).isDirectory();
}
});
for (String dir : dirs) { for (String dir : dirs) {
String url = RipUtils.urlFromDirectoryName(dir); String url = RipUtils.urlFromDirectoryName(dir);
if (url != null) { if (url != null) {

View File

@ -17,27 +17,27 @@ import com.rarchives.ripme.utils.Utils;
*/ */
public abstract class AbstractHTMLRipper extends AlbumRipper { public abstract class AbstractHTMLRipper extends AlbumRipper {
public AbstractHTMLRipper(URL url) throws IOException { protected AbstractHTMLRipper(URL url) throws IOException {
super(url); super(url);
} }
public abstract String getDomain(); protected abstract String getDomain();
public abstract String getHost(); public abstract String getHost();
public abstract Document getFirstPage() throws IOException; protected abstract Document getFirstPage() throws IOException;
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
return null; return null;
} }
public abstract List<String> getURLsFromPage(Document page); protected abstract List<String> getURLsFromPage(Document page);
public List<String> getDescriptionsFromPage(Document doc) throws IOException { protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
} }
public abstract void downloadURL(URL url, int index); protected abstract void downloadURL(URL url, int index);
public DownloadThreadPool getThreadPool() { protected DownloadThreadPool getThreadPool() {
return null; return null;
} }
public boolean keepSortOrder() { protected boolean keepSortOrder() {
return true; return true;
} }
@ -50,13 +50,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
return url; return url;
} }
public boolean hasDescriptionSupport() { protected boolean hasDescriptionSupport() {
return false; return false;
} }
public String[] getDescription(String url,Document page) throws IOException { protected String[] getDescription(String url, Document page) throws IOException {
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
} }
public int descSleepTime() { protected int descSleepTime() {
return 100; return 100;
} }
@Override @Override
@ -140,7 +140,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
} }
waitForThreads(); waitForThreads();
} }
public String fileNameFromURL(URL url) {
/**
* Gets the file name from the URL
* @param url
* URL that you want to get the filename from
* @return
* Filename of the URL
*/
private String fileNameFromURL(URL url) {
String saveAs = url.toExternalForm(); String saveAs = url.toExternalForm();
if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
@ -150,11 +158,25 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
return saveAs; return saveAs;
} }
/**
*
* @param url
* Target URL
* @param subdirectory
* Path to subdirectory where you want to save it
* @param text
* Text you want to save
* @param index
* Index in something like an album
* @return
* True if ripped successfully
* False if failed
*/
public boolean saveText(URL url, String subdirectory, String text, int index) { public boolean saveText(URL url, String subdirectory, String text, int index) {
String saveAs = fileNameFromURL(url); String saveAs = fileNameFromURL(url);
return saveText(url,subdirectory,text,index,saveAs); return saveText(url,subdirectory,text,index,saveAs);
} }
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { private boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
// Not the best for some cases, like FurAffinity. Overridden there. // Not the best for some cases, like FurAffinity. Overridden there.
try { try {
stopCheck(); stopCheck();
@ -189,7 +211,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
} }
return true; return true;
} }
public String getPrefix(int index) {
/**
* Gets prefix based on where in the index it is
* @param index
* The index in question
* @return
* Returns prefix for a file. (?)
*/
protected String getPrefix(int index) {
String prefix = ""; String prefix = "";
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index); prefix = String.format("%03d_", index);

View File

@ -15,24 +15,25 @@ import com.rarchives.ripme.utils.Utils;
*/ */
public abstract class AbstractJSONRipper extends AlbumRipper { public abstract class AbstractJSONRipper extends AlbumRipper {
public AbstractJSONRipper(URL url) throws IOException { protected AbstractJSONRipper(URL url) throws IOException {
super(url); super(url);
} }
public abstract String getDomain(); protected abstract String getDomain();
@Override
public abstract String getHost(); public abstract String getHost();
public abstract JSONObject getFirstPage() throws IOException; protected abstract JSONObject getFirstPage() throws IOException;
public JSONObject getNextPage(JSONObject doc) throws IOException { protected JSONObject getNextPage(JSONObject doc) throws IOException {
throw new IOException("getNextPage not implemented"); throw new IOException("getNextPage not implemented");
} }
public abstract List<String> getURLsFromJSON(JSONObject json); protected abstract List<String> getURLsFromJSON(JSONObject json);
public abstract void downloadURL(URL url, int index); protected abstract void downloadURL(URL url, int index);
public DownloadThreadPool getThreadPool() { private DownloadThreadPool getThreadPool() {
return null; return null;
} }
public boolean keepSortOrder() { protected boolean keepSortOrder() {
return true; return true;
} }
@ -96,7 +97,7 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
waitForThreads(); waitForThreads();
} }
public String getPrefix(int index) { protected String getPrefix(int index) {
String prefix = ""; String prefix = "";
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index); prefix = String.format("%03d_", index);

View File

@ -1,8 +1,7 @@
package com.rarchives.ripme.ripper; package com.rarchives.ripme.ripper;
import java.awt.Desktop; import java.awt.Desktop;
import java.io.File; import java.io.*;
import java.io.IOException;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
@ -21,21 +20,25 @@ import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.util.Scanner;
public abstract class AbstractRipper public abstract class AbstractRipper
extends Observable extends Observable
implements RipperInterface, Runnable { implements RipperInterface, Runnable {
protected static final Logger logger = Logger.getLogger(AbstractRipper.class); protected static final Logger logger = Logger.getLogger(AbstractRipper.class);
private final String URLHistoryFile = Utils.getURLHistoryFile();
public static final String USER_AGENT = public static final String USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:36.0) Gecko/20100101 Firefox/36.0"; "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
protected URL url; protected URL url;
protected File workingDir; protected File workingDir;
protected DownloadThreadPool threadPool; DownloadThreadPool threadPool;
protected RipStatusHandler observer = null; RipStatusHandler observer = null;
protected boolean completed = true; private boolean completed = true;
public abstract void rip() throws IOException; public abstract void rip() throws IOException;
public abstract String getHost(); public abstract String getHost();
@ -56,6 +59,56 @@ public abstract class AbstractRipper
} }
} }
private void writeDownloadedURL(String downloadedURL) throws IOException {
BufferedWriter bw = null;
FileWriter fw = null;
try {
File file = new File(URLHistoryFile);
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
}
fw = new FileWriter(file.getAbsoluteFile(), true);
bw = new BufferedWriter(fw);
bw.write(downloadedURL);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fw != null)
fw.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
/**
* Checks to see if Ripme has already downloaded a URL
* @param url URL to check if downloaded
* @return
* Returns true if previously downloaded.
* Returns false if not yet downloaded.
*/
private boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile);
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
final String lineFromFile = scanner.nextLine();
if (lineFromFile.equals(url)) {
return true;
}
}
} catch (FileNotFoundException e) {
return false;
}
return false;
}
/** /**
* Ensures inheriting ripper can rip this URL, raises exception if not. * Ensures inheriting ripper can rip this URL, raises exception if not.
* Otherwise initializes working directory and thread pool. * Otherwise initializes working directory and thread pool.
@ -72,6 +125,15 @@ public abstract class AbstractRipper
this.url = sanitizeURL(url); this.url = sanitizeURL(url);
} }
/**
* Sets ripper's:
* Working directory
* Logger (for debugging)
* FileAppender
* Threadpool
* @throws IOException
* Always be prepared.
*/
public void setup() throws IOException { public void setup() throws IOException {
setWorkingDir(this.url); setWorkingDir(this.url);
Logger rootLogger = Logger.getRootLogger(); Logger rootLogger = Logger.getRootLogger();
@ -109,10 +171,34 @@ public abstract class AbstractRipper
* @param cookies * @param cookies
* The cookies to send to the server while downloading this file. * The cookies to send to the server while downloading this file.
* @return * @return
* True if downloaded successfully
* False if failed to download
*/ */
public abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies); protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies);
public boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies) { /**
* Queues image to be downloaded and saved.
* @param url
* URL of the file
* @param prefix
* Prefix for the downloaded file
* @param subdirectory
* Path to get to desired directory from working directory
* @param referrer
* The HTTP referrer to use while downloading this file.
* @param cookies
* The cookies to send to the server while downloading this file.
* @return
* True if downloaded successfully
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
return false;
}
}
try { try {
stopCheck(); stopCheck();
} catch (IOException e) { } catch (IOException e) {
@ -131,6 +217,7 @@ public abstract class AbstractRipper
if (!subdirectory.equals("")) { if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory; subdirectory = File.separator + subdirectory;
} }
prefix = Utils.filesystemSanitized(prefix);
saveFileAs = new File( saveFileAs = new File(
workingDir.getCanonicalPath() workingDir.getCanonicalPath()
+ subdirectory + subdirectory
@ -146,6 +233,13 @@ public abstract class AbstractRipper
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent())); logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
saveFileAs.getParentFile().mkdirs(); saveFileAs.getParentFile().mkdirs();
} }
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
try {
writeDownloadedURL(url.toExternalForm() + "\n");
} catch (IOException e) {
logger.debug("Unable to write URL history file");
}
}
return addURLToDownload(url, saveFileAs, referrer, cookies); return addURLToDownload(url, saveFileAs, referrer, cookies);
} }
@ -159,7 +253,7 @@ public abstract class AbstractRipper
* Sub-directory of the working directory to save the images to. * Sub-directory of the working directory to save the images to.
* @return True on success, flase on failure. * @return True on success, flase on failure.
*/ */
public boolean addURLToDownload(URL url, String prefix, String subdirectory) { protected boolean addURLToDownload(URL url, String prefix, String subdirectory) {
return addURLToDownload(url, prefix, subdirectory, null, null); return addURLToDownload(url, prefix, subdirectory, null, null);
} }
@ -172,7 +266,7 @@ public abstract class AbstractRipper
* Text to append to saved filename. * Text to append to saved filename.
* @return True on success, flase on failure. * @return True on success, flase on failure.
*/ */
public boolean addURLToDownload(URL url, String prefix) { protected boolean addURLToDownload(URL url, String prefix) {
// Use empty subdirectory // Use empty subdirectory
return addURLToDownload(url, prefix, ""); return addURLToDownload(url, prefix, "");
} }
@ -223,14 +317,14 @@ public abstract class AbstractRipper
/** /**
* @return Number of files downloaded. * @return Number of files downloaded.
*/ */
public int getCount() { int getCount() {
return 1; return 1;
} }
/** /**
* Notifies observers and updates state if all files have been ripped. * Notifies observers and updates state if all files have been ripped.
*/ */
protected void checkIfComplete() { void checkIfComplete() {
if (observer == null) { if (observer == null) {
logger.debug("observer is null"); logger.debug("observer is null");
return; return;
@ -262,6 +356,11 @@ public abstract class AbstractRipper
} }
} }
/**
* Gets URL
* @return
* Returns URL that wants to be downloaded.
*/
public URL getURL() { public URL getURL() {
return url; return url;
} }
@ -275,8 +374,20 @@ public abstract class AbstractRipper
return workingDir; return workingDir;
} }
@Override
public abstract void setWorkingDir(URL url) throws IOException; public abstract void setWorkingDir(URL url) throws IOException;
/**
*
* @param url
* The URL you want to get the title of.
* @return
* host_URLid
* e.g. (for a reddit post)
* reddit_post_7mg2ur
* @throws MalformedURLException
* If any of those damned URLs gets malformed.
*/
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
return getHost() + "_" + getGID(url); return getHost() + "_" + getGID(url);
} }
@ -320,10 +431,10 @@ public abstract class AbstractRipper
* @throws Exception * @throws Exception
*/ */
public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception { public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception {
List<Constructor<?>> constructors = new ArrayList<Constructor<?>>(); List<Constructor<?>> constructors = new ArrayList<>();
for (Class<?> clazz : Utils.getClassesForPackage(pkg)) { for (Class<?> clazz : Utils.getClassesForPackage(pkg)) {
if (AbstractRipper.class.isAssignableFrom(clazz)) { if (AbstractRipper.class.isAssignableFrom(clazz)) {
constructors.add( (Constructor<?>) clazz.getConstructor(URL.class) ); constructors.add(clazz.getConstructor(URL.class));
} }
} }
return constructors; return constructors;
@ -341,8 +452,16 @@ public abstract class AbstractRipper
observer.update(this, new RipStatusMessage(status, message)); observer.update(this, new RipStatusMessage(status, message));
} }
/**
* Get the completion percentage.
* @return
* Percentage complete
*/
public abstract int getCompletionPercentage(); public abstract int getCompletionPercentage();
/**
* @return
* Text for status
*/
public abstract String getStatusText(); public abstract String getStatusText();
/** /**
@ -355,10 +474,6 @@ public abstract class AbstractRipper
logger.error("Got exception while running ripper:", e); logger.error("Got exception while running ripper:", e);
waitForThreads(); waitForThreads();
sendUpdate(STATUS.RIP_ERRORED, "HTTP status code " + e.getStatusCode() + " for URL " + e.getUrl()); sendUpdate(STATUS.RIP_ERRORED, "HTTP status code " + e.getStatusCode() + " for URL " + e.getUrl());
} catch (IOException e) {
logger.error("Got exception while running ripper:", e);
waitForThreads();
sendUpdate(STATUS.RIP_ERRORED, e.getMessage());
} catch (Exception e) { } catch (Exception e) {
logger.error("Got exception while running ripper:", e); logger.error("Got exception while running ripper:", e);
waitForThreads(); waitForThreads();
@ -367,8 +482,10 @@ public abstract class AbstractRipper
cleanup(); cleanup();
} }
} }
/**
public void cleanup() { * Tries to delete any empty directories
*/
private void cleanup() {
if (this.workingDir.list().length == 0) { if (this.workingDir.list().length == 0) {
// No files, delete the dir // No files, delete the dir
logger.info("Deleting empty directory " + this.workingDir); logger.info("Deleting empty directory " + this.workingDir);
@ -379,7 +496,15 @@ public abstract class AbstractRipper
} }
} }
public boolean sleep(int milliseconds) { /**
* Pauses thread for a set amount of time.
* @param milliseconds
* Amount of time (in milliseconds) that the thread gets paused for
* @return
* True if paused successfully
* False if failed to pause/got interrupted.
*/
protected boolean sleep(int milliseconds) {
try { try {
logger.debug("Sleeping " + milliseconds + "ms"); logger.debug("Sleeping " + milliseconds + "ms");
Thread.sleep(milliseconds); Thread.sleep(milliseconds);
@ -402,7 +527,7 @@ public abstract class AbstractRipper
logger.debug("THIS IS A TEST RIP"); logger.debug("THIS IS A TEST RIP");
thisIsATest = true; thisIsATest = true;
} }
public boolean isThisATest() { protected boolean isThisATest() {
return thisIsATest; return thisIsATest;
} }
} }

View File

@ -13,13 +13,17 @@ import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
/**'
* For ripping delicious albums off the interwebz.
*/
public abstract class AlbumRipper extends AbstractRipper { public abstract class AlbumRipper extends AbstractRipper {
protected Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>()); private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>()); private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>()); private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
public AlbumRipper(URL url) throws IOException { protected AlbumRipper(URL url) throws IOException {
super(url); super(url);
} }
@ -29,15 +33,22 @@ public abstract class AlbumRipper extends AbstractRipper {
public abstract String getHost(); public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException; public abstract String getGID(URL url) throws MalformedURLException;
public boolean allowDuplicates() { protected boolean allowDuplicates() {
return false; return false;
} }
@Override @Override
/**
* Returns total amount of files attempted.
*/
public int getCount() { public int getCount() {
return itemsCompleted.size() + itemsErrored.size(); return itemsCompleted.size() + itemsErrored.size();
} }
@Override
/**
* Queues multiple URLs of single images to download from a single Album URL
*/
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies) { public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies) {
// Only download one file if this is a test. // Only download one file if this is a test.
if (super.isThisATest() && if (super.isThisATest() &&
@ -95,12 +106,15 @@ public abstract class AlbumRipper extends AbstractRipper {
* @return * @return
* True on success * True on success
*/ */
public boolean addURLToDownload(URL url) { protected boolean addURLToDownload(URL url) {
// Use empty prefix and empty subdirectory // Use empty prefix and empty subdirectory
return addURLToDownload(url, "", ""); return addURLToDownload(url, "", "");
} }
@Override @Override
/**
* Cleans up & tells user about successful download
*/
public void downloadCompleted(URL url, File saveAs) { public void downloadCompleted(URL url, File saveAs) {
if (observer == null) { if (observer == null) {
return; return;
@ -119,6 +133,9 @@ public abstract class AlbumRipper extends AbstractRipper {
} }
@Override @Override
/**
* Cleans up & tells user about failed download.
*/
public void downloadErrored(URL url, String reason) { public void downloadErrored(URL url, String reason) {
if (observer == null) { if (observer == null) {
return; return;
@ -131,6 +148,10 @@ public abstract class AlbumRipper extends AbstractRipper {
} }
@Override @Override
/**
* Tells user that a single file in the album they wish to download has
* already been downloaded in the past.
*/
public void downloadExists(URL url, File file) { public void downloadExists(URL url, File file) {
if (observer == null) { if (observer == null) {
return; return;

View File

@ -24,12 +24,12 @@ import com.rarchives.ripme.utils.Utils;
* Thread for downloading files. * Thread for downloading files.
* Includes retry logic, observer notifications, and other goodies. * Includes retry logic, observer notifications, and other goodies.
*/ */
public class DownloadFileThread extends Thread { class DownloadFileThread extends Thread {
private static final Logger logger = Logger.getLogger(DownloadFileThread.class); private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
private String referrer = ""; private String referrer = "";
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
private URL url; private URL url;
private File saveAs; private File saveAs;

View File

@ -24,16 +24,27 @@ public class DownloadThreadPool {
initialize(threadPoolName); initialize(threadPoolName);
} }
/**
* Initializes the threadpool.
* @param threadPoolName Name of the threadpool.
*/
private void initialize(String threadPoolName) { private void initialize(String threadPoolName) {
int threads = Utils.getConfigInteger("threads.size", 10); int threads = Utils.getConfigInteger("threads.size", 10);
logger.debug("Initializing " + threadPoolName + " thread pool with " + threads + " threads"); logger.debug("Initializing " + threadPoolName + " thread pool with " + threads + " threads");
threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(threads); threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(threads);
} }
/**
* For adding threads to execution pool.
* @param t
* Thread to be added.
*/
public void addThread(Thread t) { public void addThread(Thread t) {
threadPool.execute(t); threadPool.execute(t);
} }
/**
* Tries to shutdown threadpool.
*/
public void waitForThreads() { public void waitForThreads() {
threadPool.shutdown(); threadPool.shutdown();
try { try {

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Utils;
* Thread for downloading files. * Thread for downloading files.
* Includes retry logic, observer notifications, and other goodies. * Includes retry logic, observer notifications, and other goodies.
*/ */
public class DownloadVideoThread extends Thread { class DownloadVideoThread extends Thread {
private static final Logger logger = Logger.getLogger(DownloadVideoThread.class); private static final Logger logger = Logger.getLogger(DownloadVideoThread.class);
@ -136,6 +136,12 @@ public class DownloadVideoThread extends Thread {
logger.info("[+] Saved " + url + " as " + this.prettySaveAs); logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
} }
/**
* @param url
* Target URL
* @return
* Returns connection length
*/
private int getTotalBytes(URL url) throws IOException { private int getTotalBytes(URL url) throws IOException {
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("HEAD"); conn.setRequestMethod("HEAD");

View File

@ -7,8 +7,10 @@ import java.net.URL;
/** /**
* I have no idea why I made this interface. Everything is captured within the AbstractRipper. * I have no idea why I made this interface. Everything is captured within the AbstractRipper.
* Oh well, here's to encapsulation and abstraction! (raises glass) * Oh well, here's to encapsulation and abstraction! (raises glass)
*
* (cheers!)
*/ */
public interface RipperInterface { interface RipperInterface {
void rip() throws IOException; void rip() throws IOException;
boolean canRip(URL url); boolean canRip(URL url);
URL sanitizeURL(URL url) throws MalformedURLException; URL sanitizeURL(URL url) throws MalformedURLException;

View File

@ -16,7 +16,7 @@ public abstract class VideoRipper extends AbstractRipper {
private int bytesTotal = 1, private int bytesTotal = 1,
bytesCompleted = 1; bytesCompleted = 1;
public VideoRipper(URL url) throws IOException { protected VideoRipper(URL url) throws IOException {
super(url); super(url);
} }
@ -74,6 +74,12 @@ public abstract class VideoRipper extends AbstractRipper {
return addURLToDownload(url, saveAs); return addURLToDownload(url, saveAs);
} }
/**
* Creates & sets working directory based on URL.
* @param url
* Target URL
*/
@Override @Override
public void setWorkingDir(URL url) throws IOException { public void setWorkingDir(URL url) throws IOException {
String path = Utils.getWorkingDirectory().getCanonicalPath(); String path = Utils.getWorkingDirectory().getCanonicalPath();
@ -89,11 +95,22 @@ public abstract class VideoRipper extends AbstractRipper {
logger.debug("Set working directory to: " + this.workingDir); logger.debug("Set working directory to: " + this.workingDir);
} }
/**
* @return
* Returns % of video done downloading.
*/
@Override @Override
public int getCompletionPercentage() { public int getCompletionPercentage() {
return (int) (100 * (bytesCompleted / (float) bytesTotal)); return (int) (100 * (bytesCompleted / (float) bytesTotal));
} }
/**
* Runs if download successfully completed.
* @param url
* Target URL
* @param saveAs
* Path to file, including filename.
*/
@Override @Override
public void downloadCompleted(URL url, File saveAs) { public void downloadCompleted(URL url, File saveAs) {
if (observer == null) { if (observer == null) {
@ -109,6 +126,14 @@ public abstract class VideoRipper extends AbstractRipper {
logger.error("Exception while updating observer: ", e); logger.error("Exception while updating observer: ", e);
} }
} }
/**
* Runs if the download errored somewhere.
* @param url
* Target URL
* @param reason
* Reason why the download failed.
*/
@Override @Override
public void downloadErrored(URL url, String reason) { public void downloadErrored(URL url, String reason) {
if (observer == null) { if (observer == null) {
@ -117,6 +142,15 @@ public abstract class VideoRipper extends AbstractRipper {
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason)); observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
checkIfComplete(); checkIfComplete();
} }
/**
* Runs if user tries to redownload an already existing File.
* @param url
* Target URL
* @param file
* Existing file
*/
@Override @Override
public void downloadExists(URL url, File file) { public void downloadExists(URL url, File file) {
if (observer == null) { if (observer == null) {
@ -126,6 +160,11 @@ public abstract class VideoRipper extends AbstractRipper {
checkIfComplete(); checkIfComplete();
} }
/**
* Gets the status and changes it to a human-readable form.
* @return
* Status of current download.
*/
@Override @Override
public String getStatusText() { public String getStatusText() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -139,6 +178,10 @@ public abstract class VideoRipper extends AbstractRipper {
} }
@Override @Override
/**
* Sanitizes URL.
* Usually just returns itself.
*/
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
return url; return url;
} }

View File

@ -4,7 +4,6 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -16,14 +15,13 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import java.util.HashMap; import java.util.HashMap;
public class AerisdiesRipper extends AbstractHTMLRipper { public class AerisdiesRipper extends AbstractHTMLRipper {
private Document albumDoc = null; private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
public AerisdiesRipper(URL url) throws IOException { public AerisdiesRipper(URL url) throws IOException {
@ -41,20 +39,20 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/([a-z]*_[0-9]*_\\d)\\.html"); Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/[a-z]*_(\\d+)_\\d\\.html");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.aerisdies.com/html/lb/albumDIG, got: " + url); throw new MalformedURLException("Expected URL format: http://www.aerisdies.com/html/lb/albumDIG, got: " + url);
} }
return m.group(m.groupCount()); return m.group(1);
} }
@Override @Override
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
try { try {
// Attempt to use album title as GID // Attempt to use album title as GID
String title = getFirstPage().select("title").first().text(); String title = getFirstPage().select("div > div > span[id=albumname] > a").first().text();
return getHost() + "_" + title.trim(); return getHost() + "_" + getGID(url) + "_" + title.trim();
} catch (IOException e) { } catch (IOException e) {
// Fall back to default album naming convention // Fall back to default album naming convention
logger.info("Unable to find title at " + url); logger.info("Unable to find title at " + url);
@ -74,7 +72,7 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Elements albumElements = page.select("div.imgbox > a > img"); Elements albumElements = page.select("div.imgbox > a > img");
for (Element imageBox : albumElements) { for (Element imageBox : albumElements) {
String imageUrl = imageBox.attr("src"); String imageUrl = imageBox.attr("src");

View File

@ -66,7 +66,7 @@ public class BcfakesRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) { for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) {
String imageURL = thumb.attr("src"); String imageURL = thumb.attr("src");
imageURL = imageURL.replace("thumbs/thumbs_", ""); imageURL = imageURL.replace("thumbs/thumbs_", "");

View File

@ -1,70 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ButttoucherRipper extends AbstractHTMLRipper {
public ButttoucherRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "butttoucher";
}
@Override
public String getDomain() {
return "butttoucher.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
p = Pattern.compile("^.*butttoucher.com/users/([a-zA-Z0-9_\\-]{1,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected butttoucher.com gallery format: "
+ "butttoucher.com/users/<username>"
+ " Got: " + url);
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(this.url).get();
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> thumbs = new ArrayList<String>();
for (Element thumb : page.select(".thumb img")) {
if (!thumb.hasAttr("src")) {
continue;
}
String smallImage = thumb.attr("src");
thumbs.add(smallImage.replace("m.", "."));
}
return thumbs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,91 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class CfakeRipper extends AbstractHTMLRipper {
public CfakeRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "cfake";
}
@Override
public String getDomain() {
return "cfake.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://cfake\\.com/picture/([a-zA-Z1-9_-]*)/\\d+/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected cfake URL format: " +
"cfake.com/picture/MODEL/ID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("td > div.next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
}
else {
return Http.url("http://cfake.com" + nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) {
if (el.attr("href").contains("upload")) {
return result;
} else {
String imageSource = el.select("img").attr("src");
// We remove the .md from images so we download the full size image
// not the thumbnail ones
imageSource = imageSource.replace("thumbs", "photos");
result.add("http://cfake.com" + imageSource);
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -18,23 +18,21 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.RipUtils;
public class ChanRipper extends AbstractHTMLRipper { public class ChanRipper extends AbstractHTMLRipper {
public static List<ChanSite> explicit_domains = Arrays.asList( private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")), new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")), new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")), new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org"))
new ChanSite(Arrays.asList("fgts.jp"), Arrays.asList("dat.fgtsi.org"))
); );
public static List<String> url_piece_blacklist = Arrays.asList( private static List<String> url_piece_blacklist = Arrays.asList(
"=http", "=http",
"http://imgops.com/", "http://imgops.com/",
"iqdb.org", "iqdb.org",
"saucenao.com" "saucenao.com"
); );
public ChanSite chanSite; private ChanSite chanSite;
public Boolean generalChanSite = true; private Boolean generalChanSite = true;
public ChanRipper(URL url) throws IOException { public ChanRipper(URL url) throws IOException {
super(url); super(url);
@ -66,13 +64,18 @@ public class ChanRipper extends AbstractHTMLRipper {
try { try {
// Attempt to use album title as GID // Attempt to use album title as GID
Document doc = getFirstPage(); Document doc = getFirstPage();
String subject = doc.select(".post.op > .postinfo > .subject").first().text(); try {
return getHost() + "_" + getGID(url) + "_" + subject; String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (NullPointerException e) {
logger.warn("Failed to get thread title from " + url);
}
} catch (Exception e) { } catch (Exception e) {
// Fall back to default album naming convention // Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e); logger.warn("Failed to get album title from " + url, e);
} }
return super.getAlbumTitle(url); // Fall back on the GID
return getHost() + "_" + getGID(url);
} }
@Override @Override
@ -114,6 +117,12 @@ public class ChanRipper extends AbstractHTMLRipper {
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
// xchan
p = Pattern.compile("^.*\\.[a-z]{1,3}/board/[a-zA-Z0-9]+/thread/([0-9]+)/?.*$");
m = p.matcher(u);
if (m.matches()) {
return m.group(1);
}
} }
throw new MalformedURLException( throw new MalformedURLException(
@ -143,7 +152,7 @@ public class ChanRipper extends AbstractHTMLRipper {
} }
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Pattern p; Matcher m; Pattern p; Matcher m;
for (Element link : page.select("a")) { for (Element link : page.select("a")) {
if (!link.hasAttr("href")) { if (!link.hasAttr("href")) {
@ -208,6 +217,6 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toString(), null); addURLToDownload(url, getPrefix(index));
} }
} }

View File

@ -1,180 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class CheebyRipper extends AbstractHTMLRipper {
private int offset = 0;
private Map<String, Integer> albumSets = new HashMap<String, Integer>();
public CheebyRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "cheeby";
}
@Override
public String getDomain() {
return "cheeby.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*cheeby.com/u/([a-zA-Z0-9\\-_]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("cheeby user not found in " + url + ", expected http://cheeby.com/u/username");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://cheeby.com/u/" + getGID(url) + "/pics");
}
@Override
public Document getFirstPage() throws IOException {
String url = this.url + "?limit=10&offset=0";
return Http.url(url)
.get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
sleep(500);
offset += 1;
String url = this.url + "?p=" + offset;
Document nextDoc = Http.url(url).get();
if (nextDoc.select("div.i a img").size() == 0) {
throw new IOException("No more images to fetch");
}
return nextDoc;
}
@Override
public void downloadURL(URL url, int index) {
// Not implmeneted here
}
@Override
public List<String> getURLsFromPage(Document page) {
// Not implemented here
return null;
}
public List<Image> getImagesFromPage(Document page) {
List<Image> imageURLs = new ArrayList<Image>();
for (Element image : page.select("div.i a img")) {
// Get image URL
String imageURL = image.attr("src");
imageURL = imageURL.replace("s.", ".");
// Get "album" from image link
String href = image.parent().attr("href");
while (href.endsWith("/")) {
href = href.substring(0, href.length() - 2);
}
String[] hrefs = href.split("/");
String prefix = hrefs[hrefs.length - 1];
// Keep track of how many images are in this album
int albumSetCount = 0;
if (albumSets.containsKey(prefix)) {
albumSetCount = albumSets.get(prefix);
}
albumSetCount++;
albumSets.put(prefix, albumSetCount);
imageURLs.add(new Image(imageURL, prefix, albumSetCount));
}
return imageURLs;
}
@Override
public void rip() throws IOException {
logger.info("Retrieving " + this.url);
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = getFirstPage();
while (doc != null) {
List<Image> images = getImagesFromPage(doc);
if (images.size() == 0) {
throw new IOException("No images found at " + doc.location());
}
for (Image image : images) {
if (isStopped()) {
break;
}
// Don't create subdirectory if "album" only has 1 image
if (albumSets.get(image.prefix) > 1) {
addURLToDownload(new URL(image.url), getPrefix(image.index), image.prefix);
}
else {
addURLToDownload(new URL(image.url));
}
}
if (isStopped()) {
break;
}
try {
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
doc = getNextPage(doc);
} catch (IOException e) {
logger.info("Can't get next page: " + e.getMessage());
break;
}
}
// If they're using a thread pool, wait for it.
if (getThreadPool() != null) {
getThreadPool().waitForThreads();
}
waitForThreads();
// Delete empty subdirectories
for (String prefix : albumSets.keySet()) {
if (prefix.trim().equals("")) {
continue;
}
File f = new File(this.workingDir, prefix);
if (f.list() != null && f.list().length == 0) {
logger.info("Deleting empty directory: " + f.getAbsolutePath());
f.delete();
}
}
}
private class Image {
String url, prefix;
int index;
public Image(String url, String prefix, int index) {
this.url = url;
this.prefix = prefix;
this.index = index;
}
}
}

View File

@ -6,118 +6,119 @@ import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class CheveretoRipper extends AbstractHTMLRipper { public class CheveretoRipper extends AbstractHTMLRipper {
private static final Map<String, String> CONSENT_COOKIE;
static {
CONSENT_COOKIE = new TreeMap<String, String>();
CONSENT_COOKIE.put("AGREE_CONSENT", "1");
}
public CheveretoRipper(URL url) throws IOException { public CheveretoRipper(URL url) throws IOException {
super(url); super(url);
} }
public static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com"); private static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com", "gwarchives.com");
@Override
public String getHost() {
String host = url.toExternalForm().split("/")[2];
return host;
}
@Override
public String getDomain() {
String host = url.toExternalForm().split("/")[2];
return host;
}
@Override
public boolean canRip(URL url) {
String url_name = url.toExternalForm();
if (explicit_domains_1.contains(url_name.split("/")[2]) == true) {
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected chevereto URL format: " +
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("li.pagination-next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
}
else {
return Http.url(nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("a.image-container > img")) {
String imageSource = el.attr("src");
// We remove the .md from images so we download the full size image
// not the medium ones
imageSource = imageSource.replace(".md", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
public String getHost() {
return url.toExternalForm().split("/")[2];
} }
@Override
public String getDomain() {
return url.toExternalForm().split("/")[2];
}
@Override
public boolean canRip(URL url) {
String url_name = url.toExternalForm();
if (explicit_domains_1.contains(url_name.split("/")[2])) {
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected chevereto URL format: " +
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).cookies(CONSENT_COOKIE).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("li.pagination-next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
} else {
return Http.url(nextPage).cookies(CONSENT_COOKIE).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("a.image-container > img")) {
String imageSource = el.attr("src");
// We remove the .md from images so we download the full size image
// not the medium ones
imageSource = imageSource.replace(".md", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -32,8 +32,8 @@ public class DeviantartRipper extends AbstractHTMLRipper {
private static final int PAGE_SLEEP_TIME = 3000, private static final int PAGE_SLEEP_TIME = 3000,
IMAGE_SLEEP_TIME = 2000; IMAGE_SLEEP_TIME = 2000;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
private Set<String> triedURLs = new HashSet<String>(); private Set<String> triedURLs = new HashSet<>();
public DeviantartRipper(URL url) throws IOException { public DeviantartRipper(URL url) throws IOException {
super(url); super(url);
@ -63,7 +63,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
u += "gallery/?"; u += "gallery/?";
} }
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/*?$"); Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/*?$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
String subdir = "/"; String subdir = "/";
@ -88,18 +88,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
return m.group(1); return m.group(1);
} }
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/gallery/([0-9]+).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Subgallery // Subgallery
return m.group(1) + "_" + m.group(2); return m.group(1) + "_" + m.group(2);
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$"); p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1) + "_faves_" + m.group(2); return m.group(1) + "_faves_" + m.group(2);
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/?$"); p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/?$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Subgallery // Subgallery
@ -121,14 +121,14 @@ public class DeviantartRipper extends AbstractHTMLRipper {
.cookies(cookies) .cookies(cookies)
.get(); .get();
} }
public String jsonToImage(Document page,String id) { private String jsonToImage(Document page, String id) {
Elements js = page.select("script[type=\"text/javascript\"]"); Elements js = page.select("script[type=\"text/javascript\"]");
for (Element tag : js) { for (Element tag : js) {
if (tag.html().contains("window.__pageload")) { if (tag.html().contains("window.__pageload")) {
try { try {
String script = tag.html(); String script = tag.html();
script = script.substring(script.indexOf("window.__pageload")); script = script.substring(script.indexOf("window.__pageload"));
if (script.indexOf(id) < 0) { if (!script.contains(id)) {
continue; continue;
} }
script = script.substring(script.indexOf(id)); script = script.substring(script.indexOf(id));
@ -144,7 +144,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
} }
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
// Iterate over all thumbnails // Iterate over all thumbnails
for (Element thumb : page.select("div.zones-container span.thumb")) { for (Element thumb : page.select("div.zones-container span.thumb")) {
@ -194,7 +194,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
} }
@Override @Override
public List<String> getDescriptionsFromPage(Document page) { public List<String> getDescriptionsFromPage(Document page) {
List<String> textURLs = new ArrayList<String>(); List<String> textURLs = new ArrayList<>();
// Iterate over all thumbnails // Iterate over all thumbnails
for (Element thumb : page.select("div.zones-container span.thumb")) { for (Element thumb : page.select("div.zones-container span.thumb")) {
logger.info(thumb.attr("href")); logger.info(thumb.attr("href"));
@ -257,9 +257,9 @@ public class DeviantartRipper extends AbstractHTMLRipper {
* @return Full-size image URL * @return Full-size image URL
* @throws Exception If it can't find the full-size URL * @throws Exception If it can't find the full-size URL
*/ */
public static String thumbToFull(String thumb, boolean throwException) throws Exception { private static String thumbToFull(String thumb, boolean throwException) throws Exception {
thumb = thumb.replace("http://th", "http://fc"); thumb = thumb.replace("http://th", "http://fc");
List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/"))); List<String> fields = new ArrayList<>(Arrays.asList(thumb.split("/")));
fields.remove(4); fields.remove(4);
if (!fields.get(4).equals("f") && throwException) { if (!fields.get(4).equals("f") && throwException) {
// Not a full-size image // Not a full-size image
@ -339,7 +339,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
* @param page Page the thumbnail is retrieved from * @param page Page the thumbnail is retrieved from
* @return Highest-resolution version of the image based on thumbnail URL and the page. * @return Highest-resolution version of the image based on thumbnail URL and the page.
*/ */
public String smallToFull(String thumb, String page) { private String smallToFull(String thumb, String page) {
try { try {
// Fetch the image page // Fetch the image page
Response resp = Http.url(page) Response resp = Http.url(page)
@ -373,7 +373,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
} }
cookieString = cookieString.substring(0,cookieString.length() - 1); cookieString = cookieString.substring(0,cookieString.length() - 1);
con.setRequestProperty("Cookie",cookieString); con.setRequestProperty("Cookie",cookieString);
con.setRequestProperty("User-Agent",this.USER_AGENT); con.setRequestProperty("User-Agent", USER_AGENT);
con.setInstanceFollowRedirects(true); con.setInstanceFollowRedirects(true);
con.connect(); con.connect();
int code = con.getResponseCode(); int code = con.getResponseCode();
@ -406,7 +406,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
*/ */
private Map<String, String> loginToDeviantart() throws IOException { private Map<String, String> loginToDeviantart() throws IOException {
// Populate postData fields // Populate postData fields
Map<String,String> postData = new HashMap<String,String>(); Map<String,String> postData = new HashMap<>();
String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5"))); String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz"))); String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
if (username == null || password == null) { if (username == null || password == null) {

View File

@ -72,7 +72,7 @@ public class DrawcrowdRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("div.item.asset img")) { for (Element thumb : page.select("div.item.asset img")) {
String image = thumb.attr("src"); String image = thumb.attr("src");
image = image image = image

View File

@ -0,0 +1,74 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class DribbbleRipper extends AbstractHTMLRipper {
public DribbbleRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "dribbble";
}
@Override
public String getDomain() {
return "dribbble.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wm.]*dribbble\\.com/([a-zA-Z0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected dribbble.com URL format: " +
"dribbble.com/albumid - got " + url + "instead");
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements hrefs = doc.select("a.next_page");
if (hrefs.size() == 0) {
throw new IOException("No more pages");
}
String nextUrl = "https://www.dribbble.com" + hrefs.first().attr("href");
sleep(500);
return Http.url(nextUrl).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a.dribbble-link > picture > source")) {
// nl skips thumbnails
if ( thumb.attr("srcset").contains("teaser")) continue;
String image = thumb.attr("srcset").replace("_1x", "");
imageURLs.add(image);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -5,167 +5,137 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.logging.Level; import java.util.regex.Matcher;
import java.util.logging.Logger; import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
public class E621Ripper extends AbstractHTMLRipper { public class E621Ripper extends AbstractHTMLRipper{
public static final int POOL_IMAGES_PER_PAGE = 24; private static final Logger logger = Logger.getLogger(E621Ripper.class);
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); private static Pattern gidPattern=null;
private static Pattern gidPattern2=null;
private static Pattern gidPatternPool=null;
public E621Ripper(URL url) throws IOException { private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
super(url);
public E621Ripper(URL url) throws IOException {
super(url);
}
@Override
public DownloadThreadPool getThreadPool() {
return e621ThreadPool;
}
@Override
public String getDomain() {
return "e621.net";
}
@Override
public String getHost() {
return "e621";
}
@Override
public Document getFirstPage() throws IOException {
if(url.getPath().startsWith("/pool/show/"))
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
else
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
}
private String getFullSizedImage(String url) {
try {
return Http.url("https://e621.net" + url).get().select("div > img#image").attr("src");
} catch (IOException e) {
logger.error("Unable to get full sized image from " + url);
return null;
}
} }
@Override @Override
public DownloadThreadPool getThreadPool() { public List<String> getURLsFromPage(Document page) {
return e621ThreadPool; Elements elements = page.select("div > span.thumb > a");
} List<String> res = new ArrayList<>();
@Override for(Element e:elements) {
public String getDomain() { if (!e.attr("href").isEmpty()) {
return "e621.net"; String fullSizedImage = getFullSizedImage(e.attr("href"));
} if (fullSizedImage != null && !fullSizedImage.equals("")) {
res.add(getFullSizedImage(e.attr("href")));
}
}
}
@Override return res;
public String getHost() { }
return "e621";
}
@Override @Override
public Document getFirstPage() throws IOException { public Document getNextPage(Document page) throws IOException {
if (url.getPath().startsWith("/pool/show/")) { if (page.select("a.next_page") != null) {
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get(); return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get();
} else { } else {
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get(); throw new IOException("No more pages");
} }
} }
@Override @Override
public List<String> getURLsFromPage(Document page) { public void downloadURL(final URL url, int index) {
Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a"); addURLToDownload(url, getPrefix(index));
List<String> res = new ArrayList<String>(elements.size()); }
if (page.getElementById("pool-show") != null) { private String getTerm(URL url) throws MalformedURLException{
int index = 0; if(gidPattern==null)
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
if(gidPatternPool==null)
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
Element e = page.getElementById("paginator"); Matcher m = gidPattern.matcher(url.toExternalForm());
if (e != null) { if(m.matches())
e = e.getElementsByClass("current").first(); return m.group(2);
if (e != null) {
index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
}
}
for (Element e_ : elements) { m = gidPatternPool.matcher(url.toExternalForm());
res.add(e_.absUrl("href") + "#" + ++index); if(m.matches())
} return m.group(2);
} else { throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
for (Element e : elements) { }
res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
}
}
return res; @Override
} public String getGID(URL url) throws MalformedURLException {
try {
String prefix="";
if(url.getPath().startsWith("/pool/show/"))
prefix="pool_";
@Override return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
public Document getNextPage(Document page) throws IOException { } catch (URISyntaxException ex) {
for (Element e : page.select("#paginator a")) { logger.error(ex);
if (e.attr("rel").equals("next")) { }
return Http.url(e.absUrl("href")).get();
}
}
return null; throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
} }
@Override @Override
public void downloadURL(final URL url, int index) { public URL sanitizeURL(URL url) throws MalformedURLException {
e621ThreadPool.addThread(new Thread(new Runnable() { if(gidPattern2==null)
public void run() { gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
try {
Document page = Http.url(url).get();
Element e = page.getElementById("image");
if (e != null) { Matcher m = gidPattern2.matcher(url.toExternalForm());
addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); if(m.matches())
} else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) { return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
} else {
Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
}
} catch (IOException ex) { return url;
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); }
}
}
}));
}
private String getTerm(URL url) throws MalformedURLException {
String query = url.getQuery();
if (query != null) {
return Utils.parseUrlQuery(query, "tags");
}
if (query == null) {
if ((query = url.getPath()).startsWith("/post/index/")) {
query = query.substring(12);
int pos = query.indexOf('/');
if (pos == -1) {
return null;
}
// skip page number
query = query.substring(pos + 1);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
try {
return URLDecoder.decode(query, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Shouldn't happen since UTF-8 is required to be supported
throw new RuntimeException(e);
}
} else if (query.startsWith("/pool/show/")) {
query = query.substring(11);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
return query;
}
}
return null;
}
@Override
public String getGID(URL url) throws MalformedURLException {
String prefix = "";
if (url.getPath().startsWith("/pool/show/")) {
prefix = "pool_";
} else {
prefix = "term_";
}
return Utils.filesystemSafe(prefix + getTerm(url));
}
} }

View File

@ -39,7 +39,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
// Current HTML document // Current HTML document
private Document albumDoc = null; private Document albumDoc = null;
private static final Map<String,String> cookies = new HashMap<String,String>(); private static final Map<String,String> cookies = new HashMap<>();
static { static {
cookies.put("nw", "1"); cookies.put("nw", "1");
cookies.put("tip", "1"); cookies.put("tip", "1");
@ -162,7 +162,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Elements thumbs = page.select("#gdt > .gdtm a"); Elements thumbs = page.select("#gdt > .gdtm a");
// Iterate over images on page // Iterate over images on page
for (Element thumb : thumbs) { for (Element thumb : thumbs) {
@ -193,7 +193,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
private int index; private int index;
private File workingDir; private File workingDir;
public EHentaiImageThread(URL url, int index, File workingDir) { EHentaiImageThread(URL url, int index, File workingDir) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;

View File

@ -23,7 +23,12 @@ import com.rarchives.ripme.utils.Http;
public class EightmusesRipper extends AbstractHTMLRipper { public class EightmusesRipper extends AbstractHTMLRipper {
private Document albumDoc = null; private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
// TODO put up a wiki page on using maps to store titles
// the map for storing the title of each album when downloading sub albums
private Map<URL,String> urlTitles = new HashMap<>();
private Boolean rippingSubalbums = false;
public EightmusesRipper(URL url) throws IOException { public EightmusesRipper(URL url) throws IOException {
super(url); super(url);
@ -76,33 +81,62 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
if (page.select(".preview > span").size() > 0) { // get the first image link on the page and check if the last char in it is a number
// if it is a number then we're ripping a comic if not it's a subalbum
String firstImageLink = page.select("div.gallery > a.t-hover").first().attr("href");
Pattern p = Pattern.compile("/comix/picture/([a-zA-Z0-9\\-_/]*/)?\\d+");
Matcher m = p.matcher(firstImageLink);
if (!m.matches()) {
logger.info("Ripping subalbums");
// Page contains subalbums (not images) // Page contains subalbums (not images)
Elements albumElements = page.select("a.preview"); Elements albumElements = page.select("div.gallery > a.t-hover");
List<Element> albumsList = albumElements.subList(0, albumElements.size()); List<Element> albumsList = albumElements.subList(0, albumElements.size());
Collections.reverse(albumsList); Collections.reverse(albumsList);
// Iterate over elements in reverse order // Iterate over elements in reverse order
for (Element subalbum : albumsList) { for (Element subalbum : albumsList) {
String subUrl = subalbum.attr("href"); String subUrl = subalbum.attr("href");
subUrl = subUrl.replaceAll("\\.\\./", ""); // This if is to skip ads which don't have a href
if (subUrl.startsWith("//")) { if (subUrl != "") {
subUrl = "http:"; subUrl = subUrl.replaceAll("\\.\\./", "");
} if (subUrl.startsWith("//")) {
else if (!subUrl.startsWith("http://")) { subUrl = "https:";
subUrl = "http://www.8muses.com/" + subUrl; }
} else if (!subUrl.startsWith("http://")) {
try { subUrl = "https://www.8muses.com" + subUrl;
logger.info("Retrieving " + subUrl); }
sendUpdate(STATUS.LOADING_RESOURCE, subUrl); try {
Document subPage = Http.url(subUrl).get(); logger.info("Retrieving " + subUrl);
// Get all images in subalbum, add to list. sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
List<String> subalbumImages = getURLsFromPage(subPage); Document subPage = Http.url(subUrl).get();
logger.info("Found " + subalbumImages.size() + " images in subalbum"); // Get all images in subalbum, add to list.
imageURLs.addAll(subalbumImages); List<String> subalbumImages = getURLsFromPage(subPage);
} catch (IOException e) { String albumTitle = subPage.select("meta[name=description]").attr("content");
logger.warn("Error while loading subalbum " + subUrl, e); albumTitle = albumTitle.replace("A huge collection of free porn comics for adults. Read ", "");
continue; albumTitle = albumTitle.replace(" online for free at 8muses.com", "");
albumTitle = albumTitle.replace(" ", "_");
// albumTitle = albumTitle.replace("Sex and Porn Comics", "");
// albumTitle = albumTitle.replace("|", "");
// albumTitle = albumTitle.replace("8muses", "");
// albumTitle = albumTitle.replaceAll("-", "_");
// albumTitle = albumTitle.replaceAll(" ", "_");
// albumTitle = albumTitle.replaceAll("___", "_");
// albumTitle = albumTitle.replaceAll("__", "_");
// // This is here to remove the trailing __ from folder names
// albumTitle = albumTitle.replaceAll("__", "");
logger.info("Found " + subalbumImages.size() + " images in subalbum");
int prefix = 1;
for (String image : subalbumImages) {
URL imageUrl = new URL(image);
// urlTitles.put(imageUrl, albumTitle);
addURLToDownload(imageUrl, getPrefix(prefix), albumTitle, this.url.toExternalForm(), cookies);
prefix = prefix + 1;
}
rippingSubalbums = true;
imageURLs.addAll(subalbumImages);
} catch (IOException e) {
logger.warn("Error while loading subalbum " + subUrl, e);
}
} }
} }
} }
@ -142,10 +176,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
private String getFullSizeImage(String imageUrl) throws IOException { private String getFullSizeImage(String imageUrl) throws IOException {
sendUpdate(STATUS.LOADING_RESOURCE, imageUrl); sendUpdate(STATUS.LOADING_RESOURCE, imageUrl);
logger.info("Getting full sized image from " + imageUrl);
Document doc = new Http(imageUrl).get(); // Retrieve the webpage of the image URL Document doc = new Http(imageUrl).get(); // Retrieve the webpage of the image URL
Element fullSizeImage = doc.select(".photo").first(); // Select the "photo" element from the page (there should only be 1) String imageName = doc.select("input[id=imageName]").attr("value"); // Select the "input" element from the page
String path = "https://cdn.ampproject.org/i/s/www.8muses.com/data/ufu/small/" + fullSizeImage.children().select("#imageName").attr("value"); // Append the path to the fullsize image file to the standard prefix return "https://www.8muses.com/image/fm/" + imageName;
return path;
} }
@Override @Override

View File

@ -9,7 +9,6 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -18,10 +17,8 @@ import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.jsoup.Connection.Method;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
/** /**
@ -71,19 +68,13 @@ public class EroShareRipper extends AbstractHTMLRipper {
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm()); Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
if (m_eroshare_profile.matches()) { return m_eroshare_profile.matches();
return true;
}
return false;
} }
public boolean is_profile(URL url) { private boolean is_profile(URL url) {
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher ma = pa.matcher(url.toExternalForm()); Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) { return ma.matches();
return true;
}
return false;
} }
@Override @Override
@ -103,7 +94,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
@Override @Override
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
if (is_profile(url) == false) { if (!is_profile(url)) {
try { try {
// Attempt to use album title as GID // Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first(); Element titleElement = getFirstPage().select("meta[property=og:title]").first();
@ -122,7 +113,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> URLs = new ArrayList<String>(); List<String> URLs = new ArrayList<>();
//Pictures //Pictures
Elements imgs = doc.getElementsByTag("img"); Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) { for (Element img : imgs) {
@ -172,9 +163,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
.ignoreContentType() .ignoreContentType()
.response(); .response();
Document doc = resp.parse(); return resp.parse();
return doc;
} }
@Override @Override
@ -214,7 +203,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
Document doc = resp.parse(); Document doc = resp.parse();
List<URL> URLs = new ArrayList<URL>(); List<URL> URLs = new ArrayList<>();
//Pictures //Pictures
Elements imgs = doc.getElementsByTag("img"); Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) { for (Element img : imgs) {

View File

@ -1,15 +1,9 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -18,10 +12,8 @@ import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.jsoup.Connection.Method;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
/** /**
@ -65,10 +57,15 @@ public class EromeRipper extends AbstractHTMLRipper {
return super.getAlbumTitle(url); return super.getAlbumTitle(url);
} }
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
}
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> URLs = new ArrayList<String>(); List<String> URLs = new ArrayList<>();
//Pictures //Pictures
Elements imgs = doc.select("div.img > img.img-front"); Elements imgs = doc.select("div.img > img.img-front");
for (Element img : imgs) { for (Element img : imgs) {
@ -92,9 +89,7 @@ public class EromeRipper extends AbstractHTMLRipper {
.ignoreContentType() .ignoreContentType()
.response(); .response();
Document doc = resp.parse(); return resp.parse();
return doc;
} }
@Override @Override
@ -104,7 +99,15 @@ public class EromeRipper extends AbstractHTMLRipper {
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
throw new MalformedURLException("erome album not found in " + url + ", expected https://erome.com/album");
p = Pattern.compile("^https?://erome.com/a/([a-zA-Z0-9]*)/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("erome album not found in " + url + ", expected https://www.erome.com/album");
} }
public static List<URL> getURLs(URL url) throws IOException{ public static List<URL> getURLs(URL url) throws IOException{
@ -115,7 +118,7 @@ public class EromeRipper extends AbstractHTMLRipper {
Document doc = resp.parse(); Document doc = resp.parse();
List<URL> URLs = new ArrayList<URL>(); List<URL> URLs = new ArrayList<>();
//Pictures //Pictures
Elements imgs = doc.getElementsByTag("img"); Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) { for (Element img : imgs) {

View File

@ -1,101 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class FapprovedRipper extends AbstractHTMLRipper {
private int pageIndex = 1;
private String username = null;
public FapprovedRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "fapproved";
}
@Override
public String getDomain() {
return "fapproved.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
username = m.group(1);
return username;
}
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://fapproved.com/users/" + getGID(url));
}
@Override
public Document getFirstPage() throws IOException {
pageIndex = 1;
String pageURL = getPageURL(pageIndex);
return Http.url(pageURL)
.ignoreContentType()
.get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if ((doc.select("div.pagination li.next.disabled").size() != 0)
|| (doc.select("div.pagination").size() == 0)) {
throw new IOException("No more pages found");
}
sleep(1000);
pageIndex++;
String pageURL = getPageURL(pageIndex);
return Http.url(pageURL)
.ignoreContentType()
.get();
}
private String getPageURL(int index) throws IOException {
if (username == null) {
username = getGID(this.url);
}
return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex;
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
for (Element image : page.select("div.actual-image img")) {
String imageURL = image.attr("src");
if (imageURL.startsWith("//")) {
imageURL = "http:" + imageURL;
}
else if (imageURL.startsWith("/")) {
imageURL = "http://fapproved.com" + imageURL;
}
imageURLs.add(imageURL);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -1,91 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import org.jsoup.select.Elements;
public class FineboxRipper extends AlbumRipper {
private static final String DOMAIN = "finebox.co",
DOMAIN_OLD = "vinebox.co",
HOST = "finebox";
public FineboxRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN) || url.getHost().endsWith(DOMAIN_OLD);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://"+DOMAIN+"/u/" + getGID(url));
}
@Override
public void rip() throws IOException {
int page = 0;
Document doc;
Boolean hasPagesLeft = true;
while (hasPagesLeft) {
page++;
String urlPaged = this.url.toExternalForm() + "?page=" + page;
logger.info("Retrieving " + urlPaged);
sendUpdate(STATUS.LOADING_RESOURCE, urlPaged);
try {
doc = Http.url(this.url).get();
} catch (HttpStatusException e) {
logger.debug("Hit end of pages at page " + page, e);
break;
}
Elements videos = doc.select("video");
for (Element element : videos) {
String videourl = element.select("source").attr("src");
if (!videourl.startsWith("http")) {
videourl = "http://" + DOMAIN + videourl;
}
logger.info("URL to download: " + videourl);
if (!addURLToDownload(new URL(videourl))) {
hasPagesLeft = false;
break;
}
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next page", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?(v|f)inebox\\.co/u/([a-zA-Z0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://"+DOMAIN+"/u/USERNAME");
}
return m.group(m.groupCount());
}
}

View File

@ -256,7 +256,7 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
JSONArray photos = json.getJSONArray("photos"); JSONArray photos = json.getJSONArray("photos");
for (int i = 0; i < photos.length(); i++) { for (int i = 0; i < photos.length(); i++) {
if (super.isStopped()) { if (super.isStopped()) {

View File

@ -28,9 +28,9 @@ import com.rarchives.ripme.utils.Utils;
public class FlickrRipper extends AbstractHTMLRipper { public class FlickrRipper extends AbstractHTMLRipper {
private int page = 1; private int page = 1;
private Set<String> attempted = new HashSet<String>(); private Set<String> attempted = new HashSet<>();
private Document albumDoc = null; private Document albumDoc = null;
private DownloadThreadPool flickrThreadPool; private final DownloadThreadPool flickrThreadPool;
@Override @Override
public DownloadThreadPool getThreadPool() { public DownloadThreadPool getThreadPool() {
return flickrThreadPool; return flickrThreadPool;
@ -162,7 +162,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("a[data-track=photo-click]")) { for (Element thumb : page.select("a[data-track=photo-click]")) {
/* TODO find a way to persist the image title /* TODO find a way to persist the image title
String imageTitle = null; String imageTitle = null;
@ -215,7 +215,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
.method(Method.GET) .method(Method.GET)
.execute(); .execute();
Document doc = resp.parse(); Document doc = resp.parse();
Map<String,String> postData = new HashMap<String,String>(); Map<String,String> postData = new HashMap<>();
for (Element input : doc.select("input[type=hidden]")) { for (Element input : doc.select("input[type=hidden]")) {
postData.put(input.attr("name"), input.attr("value")); postData.put(input.attr("name"), input.attr("value"));
} }
@ -239,7 +239,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
private URL url; private URL url;
private int index; private int index;
public FlickrImageThread(URL url, int index) { FlickrImageThread(URL url, int index) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;
@ -252,7 +252,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
Elements fullsizeImages = doc.select("div#allsizes-photo img"); Elements fullsizeImages = doc.select("div#allsizes-photo img");
if (fullsizeImages.size() == 0) { if (fullsizeImages.size() == 0) {
logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'"); logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
return;
} }
else { else {
String prefix = ""; String prefix = "";

View File

@ -28,12 +28,16 @@ import com.rarchives.ripme.utils.Http;
public class FuraffinityRipper extends AbstractHTMLRipper { public class FuraffinityRipper extends AbstractHTMLRipper {
static Map<String, String> cookies=null; private static final String urlBase = "https://www.furaffinity.net";
static final String urlBase = "https://www.furaffinity.net"; private static Map<String,String> cookies = new HashMap<>();
static {
cookies.put("b", "bd5ccac8-51dc-4265-8ae1-7eac685ad667");
cookies.put("a", "7c41b782-d01d-4b0e-b45b-62a4f0b2a369");
}
// Thread pool for finding direct image links from "image" pages (html) // Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool furaffinityThreadPool private DownloadThreadPool furaffinityThreadPool
= new DownloadThreadPool( "furaffinity"); = new DownloadThreadPool( "furaffinity");
@Override @Override
public DownloadThreadPool getThreadPool() { public DownloadThreadPool getThreadPool() {
@ -55,73 +59,49 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
} }
@Override @Override
public boolean hasDescriptionSupport() { public boolean hasDescriptionSupport() {
return true; return false;
} }
@Override @Override
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
if (cookies == null || cookies.size() == 0) {
login();
}
return Http.url(url).cookies(cookies).get(); return Http.url(url).cookies(cookies).get();
} }
private void login() throws IOException {
String user = new String(Base64.decode("cmlwbWU="));
String pass = new String(Base64.decode("cmlwbWVwYXNzd29yZA=="));
Response loginPage = Http.url(urlBase + "/login/")
.referrer(urlBase)
.response();
cookies = loginPage.cookies();
Map<String,String> formData = new HashMap<String,String>();
formData.put("action", "login");
formData.put("retard_protection", "1");
formData.put("name", user);
formData.put("pass", pass);
formData.put("login", "Login to FurAffinity");
Response doLogin = Http.url(urlBase + "/login/?ref=" + url)
.referrer(urlBase + "/login/")
.cookies(cookies)
.data(formData)
.method(Method.POST)
.response();
cookies.putAll(doLogin.cookies());
}
@Override @Override
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
// Find next page // Find next page
Elements nextPageUrl = doc.select("td[align=right] form"); Elements nextPageUrl = doc.select("a.right");
if (nextPageUrl.size() == 0) { if (nextPageUrl.size() == 0) {
throw new IOException("No more pages"); throw new IOException("No more pages");
} }
String nextUrl = urlBase + nextPageUrl.first().attr("action"); String nextUrl = urlBase + nextPageUrl.first().attr("href");
sleep(500); sleep(500);
Document nextPage = Http.url(nextUrl).cookies(cookies).get(); Document nextPage = Http.url(nextUrl).cookies(cookies).get();
Elements hrefs = nextPage.select("div#no-images");
if (hrefs.size() != 0) {
throw new IOException("No more pages");
}
return nextPage; return nextPage;
} }
private String getImageFromPost(String url) {
try {
logger.info("found url " + Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content"));
return Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content");
} catch (IOException e) {
return "";
}
}
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>(); List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a"); Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) { for (Element e : urlElements) {
urls.add(urlBase + e.select("a").first().attr("href")); urls.add(getImageFromPost(urlBase + e.select("a").first().attr("href")));
} }
return urls; return urls;
} }
@Override @Override
public List<String> getDescriptionsFromPage(Document page) { public List<String> getDescriptionsFromPage(Document page) {
List<String> urls = new ArrayList<String>(); List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a"); Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) { for (Element e : urlElements) {
urls.add(urlBase + e.select("a").first().attr("href")); urls.add(urlBase + e.select("a").first().attr("href"));
@ -138,7 +118,6 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
// Fetch the image page // Fetch the image page
Response resp = Http.url(page) Response resp = Http.url(page)
.referrer(this.url) .referrer(this.url)
.cookies(cookies)
.response(); .response();
cookies.putAll(resp.cookies()); cookies.putAll(resp.cookies());
@ -157,9 +136,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
ele.select("p").prepend("\\n\\n"); ele.select("p").prepend("\\n\\n");
logger.debug("Returning description at " + page); logger.debug("Returning description at " + page);
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
String title = documentz.select("meta[property=og:title]").attr("content"); return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
String tempText = title;
return tempText + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
} catch (IOException ioe) { } catch (IOException ioe) {
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
return null; return null;
@ -167,8 +144,8 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
} }
@Override @Override
public boolean saveText(URL url, String subdirectory, String text, int index) { public boolean saveText(URL url, String subdirectory, String text, int index) {
//TODO Make this better please? //TODO Make this better please?
try { try {
stopCheck(); stopCheck();
} catch (IOException e) { } catch (IOException e) {
return false; return false;
@ -179,7 +156,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
saveAs = text.split("\n")[0]; saveAs = text.split("\n")[0];
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1"); saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
for (int i = 1;i < text.split("\n").length; i++) { for (int i = 1;i < text.split("\n").length; i++) {
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i]; newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
} }
try { try {
if (!subdirectory.equals("")) { if (!subdirectory.equals("")) {
@ -212,8 +189,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
} }
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
furaffinityThreadPool.addThread(new FuraffinityDocumentThread(url)); addURLToDownload(url, getPrefix(index));
sleep(250);
} }
@Override @Override
@ -224,6 +200,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
throw new MalformedURLException("Expected furaffinity.net URL format: " throw new MalformedURLException("Expected furaffinity.net URL format: "
+ "www.furaffinity.net/gallery/username - got " + url + "www.furaffinity.net/gallery/username - got " + url
+ " instead"); + " instead");
@ -232,42 +209,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
private class FuraffinityDocumentThread extends Thread { private class FuraffinityDocumentThread extends Thread {
private URL url; private URL url;
public FuraffinityDocumentThread(URL url) { FuraffinityDocumentThread(URL url) {
super(); super();
this.url = url; this.url = url;
} }
@Override
public void run() {
try {
Document doc = Http.url(url).cookies(cookies).get();
// Find image
Elements donwloadLink = doc.select("div.alt1 b a[href^=//d.facdn.net/]");
if (donwloadLink.size() == 0) {
logger.warn("Could not download " + this.url);
return;
}
String link = "http:" + donwloadLink.first().attr("href");
logger.info("Found URL " + link);
String[] fileNameSplit = link.split("/");
String fileName = fileNameSplit[fileNameSplit.length -1];
fileName = fileName.replaceAll("[0-9]*\\.", "");
String[] fileExtSplit = link.split("\\.");
String fileExt = fileExtSplit[fileExtSplit.length -1];
fileName = fileName.replaceAll(fileExt, "");
File saveAS;
fileName = fileName.replace("[0-9]*\\.", "");
saveAS = new File(
workingDir.getCanonicalPath()
+ File.separator
+ fileName
+ "."
+ fileExt);
addURLToDownload(new URL(link),saveAS,"",cookies);
} catch (IOException e) {
logger.error("[!] Exception while loading/parsing " + this.url, e);
}
}
} }
} }

View File

@ -60,7 +60,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
String html = doc.html(); String html = doc.html();
// Get "baseUrl" // Get "baseUrl"
String baseUrl = Utils.between(html, "unescape('", "'").get(0); String baseUrl = Utils.between(html, "unescape('", "'").get(0);

View File

@ -1,113 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class GifyoRipper extends AbstractHTMLRipper {
private int page = 0;
private Map<String,String> cookies = new HashMap<String,String>();
public GifyoRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "gifyo";
}
@Override
public String getDomain() {
return "gifyo.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*gifyo.com/([a-zA-Z0-9\\-_]+)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://gifyo.com/" + getGID(url) + "/");
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(this.url)
.ignoreContentType()
.response();
cookies = resp.cookies();
Document doc = resp.parse();
if (doc.html().contains("profile is private")) {
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
throw new IOException("User has private profile");
}
return doc;
}
@Override
public Document getNextPage(Document doc) throws IOException {
page++;
Map<String,String> postData = new HashMap<String,String>();
postData.put("cmd", "refreshData");
postData.put("view", "gif");
postData.put("layout", "grid");
postData.put("page", Integer.toString(page));
Response resp = Http.url(this.url)
.ignoreContentType()
.data(postData)
.cookies(cookies)
.method(Method.POST)
.response();
cookies.putAll(resp.cookies());
Document nextDoc = resp.parse();
if (nextDoc.select("div.gif img").size() == 0) {
throw new IOException("No more images found");
}
sleep(2000);
return nextDoc;
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element image : doc.select("img.profile_gif")) {
String imageUrl = image.attr("data-animated");
if (imageUrl.startsWith("//")) {
imageUrl = "http:" + imageUrl;
}
imageUrl = imageUrl.replace("/medium/", "/large/");
imageUrl = imageUrl.replace("_s.gif", ".gif");
imageURLs.add(imageUrl);
}
logger.debug("Found " + imageURLs.size() + " images");
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url);
}
}

View File

@ -50,7 +50,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
Pattern p; Pattern p;
Matcher m; Matcher m;
p = Pattern.compile("^www\\.girlsofdesire\\.org\\/galleries\\/([\\w\\d-]+)\\/$"); p = Pattern.compile("^www\\.girlsofdesire\\.org/galleries/([\\w\\d-]+)/$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -72,7 +72,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("td.vtop > a > img")) { for (Element thumb : doc.select("td.vtop > a > img")) {
String imgSrc = thumb.attr("src"); String imgSrc = thumb.attr("src");
imgSrc = imgSrc.replaceAll("_thumb\\.", "."); imgSrc = imgSrc.replaceAll("_thumb\\.", ".");

View File

@ -0,0 +1,78 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class HbrowseRipper extends AbstractHTMLRipper {
public HbrowseRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hbrowse";
}
@Override
public String getDomain() {
return "hbrowse.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hbrowse.com URL format: " +
"hbrowse.com/ID/COMICID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
Document tempDoc = Http.url(url).get();
return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("table > tbody > tr > td > a > img")) {
String imageURL = el.attr("src").replace("/zzz", "");
result.add(imageURL);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,113 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class Hentai2readRipper extends AbstractHTMLRipper {
String lastPage;
public Hentai2readRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hentai2read";
}
@Override
public String getDomain() {
return "hentai2read.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https://hentai2read\\.com/([a-zA-Z0-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentai2read.com URL format: " +
"hbrowse.com/COMICID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
Document tempDoc;
// get the first page of the comic
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
tempDoc = Http.url(url + "1").get();
} else {
tempDoc = Http.url(url + "/1").get();
}
for (Element el : tempDoc.select("ul.nav > li > a")) {
if (el.attr("href").startsWith("https://hentai2read.com/thumbnails/")) {
// Get the page with the thumbnails
return Http.url(el.attr("href")).get();
}
}
throw new IOException("Unable to get first page");
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("span[itemprop=title]").text();
return getHost() + "_" + title;
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) {
String imageURL = "https:" + el.attr("src");
imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com");
imageURL = imageURL.replace("thumbnails/", "");
imageURL = imageURL.replace("tmb", "");
result.add(imageURL);
}
return result;
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("div.bg-white > ul.pagination > li > a").last();
if (elem == null) {
throw new IOException("No more pages");
}
nextUrl = elem.attr("href");
// We use the global lastPage to check if we've already ripped this page
// and is so we quit as there are no more pages
if (nextUrl.equals(lastPage)) {
throw new IOException("No more pages");
}
lastPage = nextUrl;
// Sleep for half a sec to avoid getting IP banned
sleep(500);
return Http.url(nextUrl).get();
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,73 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class HentaiCafeRipper extends AbstractHTMLRipper {
public HentaiCafeRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hentai";
}
@Override
public String getDomain() {
return "hentai.cafe";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://hentai\\.cafe/([a-zA-Z0-9_\\-%]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentai.cafe URL format: " +
"hentai.cafe/COMIC - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
Document tempDoc = Http.url(url).get();
return Http.url(tempDoc.select("div.last > p > a.x-btn").attr("href")).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
String nextPageURL = doc.select("div[id=page] > div.inner > a").attr("href");
int totalPages = Integer.parseInt(doc.select("div.panel > div.topbar > div > div.topbar_right > div.tbtitle > div.text").text().replace("", ""));
String[] nextPageURLSplite = nextPageURL.split("/");
// This checks if the next page number is greater than the total number of pages
if (totalPages >= Integer.parseInt(nextPageURLSplite[nextPageURLSplite.length -1])) {
return Http.url(nextPageURL).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
result.add(doc.select("div[id=page] > div.inner > a > img.open").attr("src"));
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
public class HentaifoundryRipper extends AbstractHTMLRipper { public class HentaifoundryRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
public HentaifoundryRipper(URL url) throws IOException { public HentaifoundryRipper(URL url) throws IOException {
super(url); super(url);
} }
@ -84,7 +84,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*"); Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
for (Element thumb : doc.select("div.thumb_square > a.thumbLink")) { for (Element thumb : doc.select("div.thumb_square > a.thumbLink")) {
if (isStopped()) { if (isStopped()) {
@ -115,7 +115,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
imagePage = null; imagePage = null;
} }
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize // This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.") == true) { if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.")) {
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", "")); imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
} }
else { else {

View File

@ -31,7 +31,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$"); Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -43,7 +43,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
} }
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$"); Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// URL points to imagearn *image*, not gallery // URL points to imagearn *image*, not gallery
@ -75,9 +75,22 @@ public class ImagearnRipper extends AbstractHTMLRipper {
return Http.url(url).get(); return Http.url(url).get();
} }
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("h3 > strong").first().text(); // profile name
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div#gallery > div > a")) { for (Element thumb : doc.select("div#gallery > div > a")) {
String imageURL = thumb.attr("href"); String imageURL = thumb.attr("href");
try { try {

View File

@ -81,7 +81,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) { for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
imageURLs.add(thumb.attr("href")); imageURLs.add(thumb.attr("href"));
} }
@ -124,7 +124,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
private URL url; private URL url;
private int index; private int index;
public ImagebamImageThread(URL url, int index) { ImagebamImageThread(URL url, int index) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;

View File

@ -120,7 +120,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("#gallery img")) { for (Element thumb : doc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) { if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue; continue;
@ -129,7 +129,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
image = image.replaceAll( image = image.replaceAll(
"http://x.*.fap.to/images/thumb/", "http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/"); "http://fap.to/images/full/");
image = image.replaceAll("w[0-9]{1,}-h[0-9]{1,}/", ""); image = image.replaceAll("w[0-9]+-h[0-9]+/", "");
imageURLs.add(image); imageURLs.add(image);
if (isThisATest()) { if (isThisATest()) {
break; break;
@ -152,7 +152,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
Matcher m = p.matcher(title); Matcher m = p.matcher(title);
if (m.matches()) { if (m.matches()) {
return getHost() + "_" + m.group(1); return getHost() + "_" + m.group(1) + "_" + getGID(url);
} }
} catch (IOException e) { } catch (IOException e) {
// Fall back to default album naming convention // Fall back to default album naming convention

View File

@ -1,87 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
public class ImagestashRipper extends AbstractJSONRipper {
private int page = 1;
public ImagestashRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "imagestash";
}
@Override
public String getDomain() {
return "imagestash.org";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagestash.org/tag/([a-zA-Z0-9\\-_]+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagestash.org tag formats: "
+ "imagestash.org/tag/tagname"
+ " Got: " + url);
}
@Override
public JSONObject getFirstPage() throws IOException {
String baseURL = "https://imagestash.org/images?tags="
+ getGID(url)
+ "&page=" + page;
return Http.url(baseURL).getJSON();
}
@Override
public JSONObject getNextPage(JSONObject json) throws IOException {
int count = json.getInt("count"),
offset = json.getInt("offset"),
total = json.getInt("total");
if (count + offset >= total || json.getJSONArray("images").length() == 0) {
throw new IOException("No more images");
}
sleep(1000);
page++;
return getFirstPage();
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray images = json.getJSONArray("images");
for (int i = 0; i < images.length(); i++) {
JSONObject image = images.getJSONObject(i);
String imageURL = image.getString("src");
if (imageURL.startsWith("/")) {
imageURL = "https://imagestash.org" + imageURL;
}
imageURLs.add(imageURL);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -62,7 +62,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
} }
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[target=_blank]")) { for (Element thumb : doc.select("a[target=_blank]")) {
imageURLs.add(thumb.attr("href")); imageURLs.add(thumb.attr("href"));
} }
@ -83,7 +83,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
private URL url; private URL url;
private int index; private int index;
public ImagevenueImageThread(URL url, int index) { ImagevenueImageThread(URL url, int index) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;

View File

@ -46,11 +46,11 @@ public class ImgboxRipper extends AbstractHTMLRipper {
} }
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.boxed-content > a > img")) { for (Element thumb : doc.select("div.boxed-content > a > img")) {
String image = thumb.attr("src") String image = thumb.attr("src").replaceAll("thumbs", "images");
.replaceAll("[-a-zA-Z0-9.]+s.imgbox.com", image = image.replace("_b", "_o");
"i.imgbox.com"); image = image.replaceAll("\\d-s", "i");
imageURLs.add(image); imageURLs.add(image);
} }
return imageURLs; return imageURLs;

View File

@ -31,14 +31,15 @@ public class ImgurRipper extends AlbumRipper {
private Document albumDoc; private Document albumDoc;
static enum ALBUM_TYPE { enum ALBUM_TYPE {
ALBUM, ALBUM,
USER, USER,
USER_ALBUM, USER_ALBUM,
USER_IMAGES, USER_IMAGES,
SERIES_OF_IMAGES, SERIES_OF_IMAGES,
SUBREDDIT SUBREDDIT
}; }
private ALBUM_TYPE albumType; private ALBUM_TYPE albumType;
public ImgurRipper(URL url) throws IOException { public ImgurRipper(URL url) throws IOException {
@ -104,7 +105,8 @@ public class ImgurRipper extends AlbumRipper {
*/ */
String title = null; String title = null;
final String defaultTitle = "Imgur: The most awesome images on the Internet"; final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
final String defaultTitle2 = "Imgur: The magic of the Internet";
logger.info("Trying to get album title"); logger.info("Trying to get album title");
elems = albumDoc.select("meta[property=og:title]"); elems = albumDoc.select("meta[property=og:title]");
if (elems != null) { if (elems != null) {
@ -113,7 +115,7 @@ public class ImgurRipper extends AlbumRipper {
} }
// This is here encase the album is unnamed, to prevent // This is here encase the album is unnamed, to prevent
// Imgur: The most awesome images on the Internet from being added onto the album name // Imgur: The most awesome images on the Internet from being added onto the album name
if (title.contains(defaultTitle)) { if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
logger.debug("Album is untitled or imgur is returning the default title"); logger.debug("Album is untitled or imgur is returning the default title");
// We set the title to "" here because if it's found in the next few attempts it will be changed // We set the title to "" here because if it's found in the next few attempts it will be changed
// but if it's nto found there will be no reason to set it later // but if it's nto found there will be no reason to set it later
@ -121,7 +123,7 @@ public class ImgurRipper extends AlbumRipper {
logger.debug("Trying to use title tag to get title"); logger.debug("Trying to use title tag to get title");
elems = albumDoc.select("title"); elems = albumDoc.select("title");
if (elems != null) { if (elems != null) {
if (elems.text().contains(defaultTitle)) { if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
logger.debug("Was unable to get album title or album was untitled"); logger.debug("Was unable to get album title or album was untitled");
} }
else { else {
@ -223,7 +225,7 @@ public class ImgurRipper extends AlbumRipper {
String[] imageIds = m.group(1).split(","); String[] imageIds = m.group(1).split(",");
for (String imageId : imageIds) { for (String imageId : imageIds) {
// TODO: Fetch image with ID imageId // TODO: Fetch image with ID imageId
logger.debug("Fetching image info for ID " + imageId);; logger.debug("Fetching image info for ID " + imageId);
try { try {
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON(); JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
if (!json.has("image")) { if (!json.has("image")) {
@ -350,7 +352,6 @@ public class ImgurRipper extends AlbumRipper {
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000); Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
} catch (Exception e) { } catch (Exception e) {
logger.error("Error while ripping album: " + e.getMessage(), e); logger.error("Error while ripping album: " + e.getMessage(), e);
continue;
} }
} }
} }
@ -448,6 +449,15 @@ public class ImgurRipper extends AlbumRipper {
this.url = new URL("http://imgur.com/a/" + gid); this.url = new URL("http://imgur.com/a/" + gid);
return gid; return gid;
} }
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
this.url = new URL("http://imgur.com/a/" + gid);
return gid;
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$"); p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
@ -515,12 +525,12 @@ public class ImgurRipper extends AlbumRipper {
} }
public static class ImgurImage { public static class ImgurImage {
public String title = "", String title = "";
description = "", String description = "";
extension = ""; String extension = "";
public URL url = null; public URL url = null;
public ImgurImage(URL url) { ImgurImage(URL url) {
this.url = url; this.url = url;
String tempUrl = url.toExternalForm(); String tempUrl = url.toExternalForm();
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.')); this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
@ -528,7 +538,7 @@ public class ImgurRipper extends AlbumRipper {
this.extension = this.extension.substring(0, this.extension.indexOf("?")); this.extension = this.extension.substring(0, this.extension.indexOf("?"));
} }
} }
public ImgurImage(URL url, String title) { ImgurImage(URL url, String title) {
this(url); this(url);
this.title = title; this.title = title;
} }
@ -536,7 +546,7 @@ public class ImgurRipper extends AlbumRipper {
this(url, title); this(url, title);
this.description = description; this.description = description;
} }
public String getSaveAs() { String getSaveAs() {
String saveAs = this.title; String saveAs = this.title;
String u = url.toExternalForm(); String u = url.toExternalForm();
if (u.contains("?")) { if (u.contains("?")) {
@ -554,17 +564,17 @@ public class ImgurRipper extends AlbumRipper {
} }
public static class ImgurAlbum { public static class ImgurAlbum {
public String title = null; String title = null;
public URL url = null; public URL url = null;
public List<ImgurImage> images = new ArrayList<ImgurImage>(); public List<ImgurImage> images = new ArrayList<>();
public ImgurAlbum(URL url) { ImgurAlbum(URL url) {
this.url = url; this.url = url;
} }
public ImgurAlbum(URL url, String title) { public ImgurAlbum(URL url, String title) {
this(url); this(url);
this.title = title; this.title = title;
} }
public void addImage(ImgurImage image) { void addImage(ImgurImage image) {
images.add(image); images.add(image);
} }
} }

View File

@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -12,10 +14,16 @@ import org.json.JSONArray;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class InstagramRipper extends AbstractJSONRipper { import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper {
private String userID; private String userID;
@ -37,131 +45,248 @@ public class InstagramRipper extends AbstractJSONRipper {
return (url.getHost().endsWith("instagram.com")); return (url.getHost().endsWith("instagram.com"));
} }
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
logger.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
}
private List<String> getPostsFromSinglePage(Document Doc) {
List<String> imageURLs = new ArrayList<>();
JSONArray datas;
try {
JSONObject json = getJSONFromPage(Doc);
if (json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
.has("edge_sidecar_to_children")) {
datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
.getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
data = data.getJSONObject("node");
if (data.has("is_video") && data.getBoolean("is_video")) {
imageURLs.add(data.getString("video_url"));
} else {
imageURLs.add(data.getString("display_url"));
}
}
} else {
JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
if (data.getBoolean("is_video")) {
imageURLs.add(data.getString("video_url"));
} else {
imageURLs.add(data.getString("display_url"));
}
}
return imageURLs;
} catch (IOException e) {
logger.error("Unable to get JSON from page " + url.toExternalForm());
return null;
}
}
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)"); Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2) + "_" + m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Unable to find user in " + url); throw new MalformedURLException("Unable to find user in " + url);
} }
@Override private JSONObject getJSONFromPage(Document firstPage) throws IOException {
public URL sanitizeURL(URL url) throws MalformedURLException { String jsonText = "";
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return new URL("http://instagram.com/" + m.group(1));
}
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
}
private String getUserID(URL url) throws IOException {
Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new IOException("Unable to find userID at " + this.url);
}
@Override
public JSONObject getFirstPage() throws IOException {
userID = getUserID(url);
String baseURL = "http://instagram.com/" + userID + "/media";
try { try {
JSONObject result = Http.url(baseURL).getJSON(); for (Element script : firstPage.select("script[type=text/javascript]")) {
return result; if (script.data().contains("window._sharedData = ")) {
jsonText = script.data().replaceAll("window._sharedData = ", "");
jsonText = jsonText.replaceAll("};", "}");
}
}
return new JSONObject(jsonText);
} catch (JSONException e) { } catch (JSONException e) {
throw new IOException("Could not get instagram user via: " + baseURL); throw new IOException("Could not get JSON from page");
} }
} }
@Override @Override
public JSONObject getNextPage(JSONObject json) throws IOException { public Document getFirstPage() throws IOException {
userID = getGID(url);
return Http.url(url).get();
}
boolean nextPageAvailable; private String getVideoFromPage(String videoID) {
try { try {
nextPageAvailable = json.getBoolean("more_available"); Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
} catch (Exception e) { return doc.select("meta[property=og:video]").attr("content");
throw new IOException("No additional pages found"); } catch (IOException e) {
} logger.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
if (nextPageAvailable) {
JSONArray items = json.getJSONArray("items");
JSONObject last_item = items.getJSONObject(items.length() - 1);
String nextMaxID = last_item.getString("id");
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
logger.info("Loading " + baseURL);
sleep(1000);
JSONObject nextJSON = Http.url(baseURL).getJSON();
return nextJSON;
} else {
throw new IOException("No more images found");
} }
return "";
} }
private String getOriginalUrl(String imageURL) { private String getOriginalUrl(String imageURL) {
// Without this regex most images will return a 403 error
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-"); imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
// TODO replace this with a single regex
imageURL = imageURL.replaceAll("p150x150/", "");
imageURL = imageURL.replaceAll("p320x320/", "");
imageURL = imageURL.replaceAll("p480x480/", "");
imageURL = imageURL.replaceAll("p640x640/", "");
imageURL = imageURL.replaceAll("p720x720/", "");
imageURL = imageURL.replaceAll("p1080x1080/", "");
imageURL = imageURL.replaceAll("p2048x2048/", "");
imageURL = imageURL.replaceAll("s150x150/", "");
imageURL = imageURL.replaceAll("s320x320/", "");
imageURL = imageURL.replaceAll("s480x480/", "");
imageURL = imageURL.replaceAll("s640x640/", ""); imageURL = imageURL.replaceAll("s640x640/", "");
imageURL = imageURL.replaceAll("s720x720/", "");
imageURL = imageURL.replaceAll("s1080x1080/", "");
imageURL = imageURL.replaceAll("s2048x2048/", "");
// Instagram returns cropped images to unauthenticated applications to maintain legacy support. // Instagram returns cropped images to unauthenticated applications to maintain legacy support.
// To retrieve the uncropped image, remove this segment from the URL. // To retrieve the uncropped image, remove this segment from the URL.
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080 // Segment format: cX.Y.W.H - eg: c0.134.1080.1080
imageURL = imageURL.replaceAll("\\/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", ""); imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", ""); imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
return imageURL; return imageURL;
} }
private String getMedia(JSONObject data) {
String imageURL = "";
if (data.has("videos")) {
imageURL = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url");
} else if (data.has("images")) {
imageURL = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url");
}
return imageURL;
}
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); String nextPageID = "";
JSONArray datas = json.getJSONArray("items"); List<String> imageURLs = new ArrayList<>();
for (int i = 0; i < datas.length(); i++) { JSONObject json = new JSONObject();
JSONObject data = (JSONObject) datas.get(i); try {
json = getJSONFromPage(doc);
} catch (IOException e) {
logger.warn("Unable to exact json from page");
}
String dataType = data.getString("type");
if (dataType.equals("carousel")) { if (!url.toExternalForm().contains("/p/")) {
JSONArray carouselMedias = data.getJSONArray("carousel_media"); JSONArray datas = new JSONArray();
for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) { try {
JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex); JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
String imageURL = getMedia(carouselMedia); datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
if (!imageURL.equals("")) { } catch (JSONException e) {
imageURL = getOriginalUrl(imageURL); // Handle hashtag pages
imageURLs.add(imageURL); datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
.getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
}
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
Long epoch = data.getLong("date");
Instant instant = Instant.ofEpochSecond(epoch);
String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
if (data.getString("__typename").equals("GraphSidecar")) {
try {
Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get();
List<String> toAdd = getPostsFromSinglePage(slideShowDoc);
for (int slideShowInt=0; slideShowInt<toAdd.size(); slideShowInt++) {
addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("code"));
}
} catch (MalformedURLException e) {
logger.error("Unable to download slide show, URL was malformed");
} catch (IOException e) {
logger.error("Unable to download slide show");
} }
} }
} else { try {
String imageURL = getMedia(data); if (!data.getBoolean("is_video")) {
if (!imageURL.equals("")) { if (imageURLs.size() == 0) {
imageURL = getOriginalUrl(imageURL); // We add this one item to the array because either wise
imageURLs.add(imageURL); // the ripper will error out because we returned an empty array
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
}
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date);
} else {
if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
addURLToDownload(new URL(getVideoFromPage(data.getString("code"))), image_date);
} else {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("code"));
}
}
} catch (MalformedURLException e) {
return imageURLs;
}
nextPageID = data.getString("id");
if (isThisATest()) {
break;
} }
} }
// Rip the next page
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} else {
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
}
if (isThisATest()) { } catch (IOException e) {
break; return imageURLs;
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
} else {
logger.warn("Can't get net page");
} }
} else { // We're ripping from a single page
logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc);
} }
return imageURLs; return imageURLs;
} }

View File

@ -4,14 +4,12 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
@ -50,7 +48,7 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
for (Element comicPageUrl : doc.select("div.gallery-icon > a")) { for (Element comicPageUrl : doc.select("div.gallery-icon > a")) {
try { try {
sleep(500); sleep(500);

View File

@ -1,24 +1,15 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@ -51,7 +42,7 @@ public class LusciousRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>(); List<String> urls = new ArrayList<>();
Elements urlElements = page.select("img#single_picture"); Elements urlElements = page.select("img#single_picture");
for (Element e : urlElements) { for (Element e : urlElements) {
urls.add(e.attr("src")); urls.add(e.attr("src"));

View File

@ -1,166 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.awt.Desktop;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.SSLException;
import javax.swing.JOptionPane;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class MediacrushRipper extends AbstractJSONRipper {
/** Ordered list of preferred formats, sorted by preference (low-to-high) */
private static final Map<String, Integer> PREFERRED_FORMATS = new HashMap<String,Integer>();
static {
PREFERRED_FORMATS.put("mp4", 0);
PREFERRED_FORMATS.put("wemb",1);
PREFERRED_FORMATS.put("ogv", 2);
PREFERRED_FORMATS.put("mp3", 3);
PREFERRED_FORMATS.put("ogg", 4);
PREFERRED_FORMATS.put("gif", 5);
PREFERRED_FORMATS.put("png", 6);
PREFERRED_FORMATS.put("jpg", 7);
PREFERRED_FORMATS.put("jpeg",8);
};
public MediacrushRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "mediacrush";
}
@Override
public String getDomain() {
return "mediacru.sh";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Could not find mediacru.sh page ID from " + url
+ " expected format: http://mediacru.sh/pageid");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
// Strip trailing "/" characters
while (u.endsWith("/")) {
u = u.substring(0, u.length() - 1);
}
// Append .json
if (!u.endsWith(".json")) {
u += ".json";
}
return new URL(u);
}
@Override
public JSONObject getFirstPage() throws IOException {
try {
String jsonString = Http.url(url)
.ignoreContentType()
.connection()
.execute().body();
jsonString = jsonString.replace("&quot;", "\"");
return new JSONObject(jsonString);
} catch (SSLException re) {
// Check for >1024 bit encryption but in older versions of Java
// It's the bug. Suggest downloading the latest version.
int selection = JOptionPane.showOptionDialog(null,
"You need to upgrade to the latest Java (7+) to rip this album.\n"
+ "Do you want to open java.com and download the latest version?",
"RipMe - Java Error",
JOptionPane.OK_CANCEL_OPTION,
JOptionPane.ERROR_MESSAGE,
null,
new String[] {"Go to java.com", "Cancel"},
0);
sendUpdate(STATUS.RIP_ERRORED, "Your version of Java can't handle some secure websites");
if (selection == 0) {
URL javaUrl = new URL("https://www.java.com/en/download/");
try {
Desktop.getDesktop().browse(javaUrl.toURI());
} catch (URISyntaxException use) { }
}
throw new IOException("Cannot rip due to limitations in Java installation, consider upgrading Java", re.getCause());
}
catch (Exception e) {
throw new IOException("Unexpected error: " + e.getMessage(), e);
}
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
// Iterate over all files
JSONArray files = json.getJSONArray("files");
for (int i = 0; i < files.length(); i++) {
JSONObject file = (JSONObject) files.get(i);
// Find preferred file format
JSONArray subfiles = file.getJSONArray("files");
String preferredUrl = getPreferredUrl(subfiles);
if (preferredUrl == null) {
logger.warn("Could not find 'file' inside of " + file);
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Could not find file inside of " + file);
continue;
}
imageURLs.add(preferredUrl);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
/**
* Iterates over list if "file" objects and returns the preferred
* image format.
* @param subfiles Array of "files" (JSONObjects) which contain
* @return Preferred media format.
*/
private String getPreferredUrl(JSONArray subfiles) {
String preferredUrl = null;
int preferredIndex = Integer.MAX_VALUE;
// Iterate over all media types
for (int j = 0; j < subfiles.length(); j++) {
JSONObject subfile = subfiles.getJSONObject(j);
String thisurl = subfile.getString("url");
String extension = thisurl.substring(thisurl.lastIndexOf(".") + 1);
if (!PREFERRED_FORMATS.containsKey(extension)) {
continue;
}
// Keep track of the most-preferred format
int thisindex = PREFERRED_FORMATS.get(extension);
if (preferredUrl == null || thisindex < preferredIndex) {
preferredIndex = thisindex;
preferredUrl = thisurl;
}
}
return preferredUrl;
}
}

View File

@ -3,134 +3,64 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.HashMap; import java.util.ArrayList;
import java.util.Map; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ModelmayhemRipper extends AlbumRipper { public class ModelmayhemRipper extends AbstractHTMLRipper {
private static final String DOMAIN = "modelmayhem.com",
HOST = "modelmayhem";
public ModelmayhemRipper(URL url) throws IOException { public ModelmayhemRipper(URL url) throws IOException {
super(url); super(url);
} }
@Override
public boolean canRip(URL url) {
return (url.getHost().endsWith(DOMAIN));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
Map<String,String> cookies = null,
postData = new HashMap<String,String>();
String gid = getGID(this.url),
ref = "http://www.modelmayhem.com/" + gid;
Response resp = null;
String theurl = "http://www.modelmayhem.com/" + gid;
logger.info("Loading " + theurl);
resp = Jsoup.connect(theurl)
.timeout(5000)
.referrer("")
.userAgent(USER_AGENT)
.method(Method.GET)
.execute();
cookies = resp.cookies();
resp = Jsoup.connect("http://www.modelmayhem.com/includes/js/auth.php")
.cookies(cookies)
.ignoreContentType(true)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.GET)
.execute();
String authText = resp.parse().html();
String mmservice = authText.substring(authText.indexOf("token = '") + 9);
mmservice = mmservice.substring(0, mmservice.indexOf("'"));
cookies.putAll(resp.cookies());
cookies.put("worksafe", "0");
theurl = "http://www.modelmayhem.com/services/photo_viewer/albums/" + gid;
postData.put("MMSERVICE", mmservice);
resp = Jsoup.connect(theurl)
.data(postData)
.cookies(cookies)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.POST)
.execute();
cookies.putAll(resp.cookies());
theurl = "http://www.modelmayhem.com/services/photo_viewer/pictures/" + gid + "/0/0/1/0";
this.sendUpdate(STATUS.LOADING_RESOURCE, theurl);
logger.info("Loading " + theurl);
resp = Jsoup.connect(theurl)
.data(postData)
.cookies(cookies)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.POST)
.execute();
Document doc = resp.parse();
String jsonText = doc.body().html();
jsonText = jsonText.replace("&quot;", "\"");
System.err.println(jsonText);
JSONObject json = new JSONObject(jsonText);
JSONArray pictures = json.getJSONArray("pictures");
for (int i = 0; i < pictures.length(); i++) {
JSONObject picture = pictures.getJSONObject(i);
String bigImage = picture.getString("big_image");
if (bigImage.trim().equals("")) {
logger.info("Got empty image for " + picture.toString(2));
continue;
}
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", i + 1);
}
addURLToDownload(new URL(bigImage), prefix);
if (isThisATest()) {
break;
}
}
waitForThreads();
}
@Override @Override
public String getHost() { public String getHost() {
return HOST; return "modelmayhem";
}
@Override
public String getDomain() {
return "modelmayhem.com";
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*modelmayhem.com.*/([0-9]+)/?.*$"); Pattern p = Pattern.compile("https?://www\\.modelmayhem\\.com/portfolio/(\\d+)/viewall");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
throw new MalformedURLException("Modelmayhem user ID not found in " + url + ", expected http://modelmayhem.com/userid"); throw new MalformedURLException("Expected modelmayhem URL format: " +
"modelmayhem.com/portfolio/ID/viewall - got " + url + " instead");
} }
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("tr.a_pics > td > div > a")) {
String image_URL = el.select("img").attr("src").replaceAll("_m", "");
if (image_URL.contains("http")) {
result.add(image_URL);
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
} }

View File

@ -126,7 +126,7 @@ public class MotherlessRipper extends AlbumRipper {
private URL url; private URL url;
private int index; private int index;
public MotherlessImageThread(URL url, int index) { MotherlessImageThread(URL url, int index) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;
@ -142,7 +142,7 @@ public class MotherlessRipper extends AlbumRipper {
Document doc = Http.url(u) Document doc = Http.url(u)
.referrer(u) .referrer(u)
.get(); .get();
Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL); Pattern p = Pattern.compile("^.*__fileurl = '([^']+)';.*$", Pattern.DOTALL);
Matcher m = p.matcher(doc.outerHtml()); Matcher m = p.matcher(doc.outerHtml());
if (m.matches()) { if (m.matches()) {
String file = m.group(1); String file = m.group(1);

View File

@ -11,11 +11,9 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.Arrays;
public class MyhentaicomicsRipper extends AbstractHTMLRipper { public class MyhentaicomicsRipper extends AbstractHTMLRipper {
public static boolean isTag; private static boolean isTag;
public MyhentaicomicsRipper(URL url) throws IOException { public MyhentaicomicsRipper(URL url) throws IOException {
super(url); super(url);
@ -47,7 +45,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return ma.group(1); return ma.group(1);
} }
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$"); Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
Matcher mat = pat.matcher(url.toExternalForm()); Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) { if (mat.matches()) {
isTag = true; isTag = true;
@ -84,8 +82,8 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
} }
// This replaces getNextPage when downloading from searchs and tags // This replaces getNextPage when downloading from searchs and tags
public List<String> getNextAlbumPage(String pageUrl) { private List<String> getNextAlbumPage(String pageUrl) {
List<String> albumPagesList = new ArrayList<String>(); List<String> albumPagesList = new ArrayList<>();
int pageNumber = 1; int pageNumber = 1;
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber)); albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
while (true) { while (true) {
@ -115,9 +113,9 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return albumPagesList; return albumPagesList;
} }
public List<String> getAlbumsFromPage(String url) { private List<String> getAlbumsFromPage(String url) {
List<String> pagesToRip; List<String> pagesToRip;
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
logger.info("Running getAlbumsFromPage"); logger.info("Running getAlbumsFromPage");
Document doc; Document doc;
try { try {
@ -161,7 +159,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
url_string = url_string.replace("%28", "_"); url_string = url_string.replace("%28", "_");
url_string = url_string.replace("%29", "_"); url_string = url_string.replace("%29", "_");
url_string = url_string.replace("%2C", "_"); url_string = url_string.replace("%2C", "_");
if (isTag == true) { if (isTag) {
logger.info("Downloading from a tag or search"); logger.info("Downloading from a tag or search");
try { try {
sleep(500); sleep(500);
@ -180,11 +178,11 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return result; return result;
} }
public List<String> getListOfPages(Document doc) { private List<String> getListOfPages(Document doc) {
List<String> pages = new ArrayList<String>(); List<String> pages = new ArrayList<>();
// Get the link from the last button // Get the link from the last button
String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href"); String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href");
Pattern pat = Pattern.compile("\\/index\\.php\\/tag\\/[0-9]*\\/[a-zA-Z0-9_\\-\\:+]*\\?page=(\\d+)"); Pattern pat = Pattern.compile("/index\\.php/tag/[0-9]*/[a-zA-Z0-9_\\-:+]*\\?page=(\\d+)");
Matcher mat = pat.matcher(nextPageUrl); Matcher mat = pat.matcher(nextPageUrl);
if (mat.matches()) { if (mat.matches()) {
logger.debug("Getting pages from a tag"); logger.debug("Getting pages from a tag");
@ -197,7 +195,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
pages.add(link); pages.add(link);
} }
} else { } else {
Pattern pa = Pattern.compile("\\/index\\.php\\/search\\?q=[a-zA-Z0-9_\\-\\:]*\\&page=(\\d+)"); Pattern pa = Pattern.compile("/index\\.php/search\\?q=[a-zA-Z0-9_\\-:]*&page=(\\d+)");
Matcher ma = pa.matcher(nextPageUrl); Matcher ma = pa.matcher(nextPageUrl);
if (ma.matches()) { if (ma.matches()) {
logger.debug("Getting pages from a search"); logger.debug("Getting pages from a search");
@ -217,7 +215,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
// Checks if this is a comic page or a page of albums // Checks if this is a comic page or a page of albums
// If true the page is a page of albums // If true the page is a page of albums
if (doc.toString().contains("class=\"g-item g-album\"")) { if (doc.toString().contains("class=\"g-item g-album\"")) {
@ -241,7 +239,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
for (Element el : doc.select("img")) { for (Element el : doc.select("img")) {
String imageSource = el.attr("src"); String imageSource = el.attr("src");
// This bool is here so we don't try and download the site logo // This bool is here so we don't try and download the site logo
if (!imageSource.startsWith("http://")) { if (!imageSource.startsWith("http://") && !imageSource.startsWith("https://")) {
// We replace thumbs with resizes so we can the full sized images // We replace thumbs with resizes so we can the full sized images
imageSource = imageSource.replace("thumbs", "resizes"); imageSource = imageSource.replace("thumbs", "resizes");
result.add("http://myhentaicomics.com/" + imageSource); result.add("http://myhentaicomics.com/" + imageSource);

View File

@ -86,7 +86,7 @@ public class NatalieMuRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Pattern p; Matcher m; Pattern p; Matcher m;
//select all album thumbnails //select all album thumbnails
for (Element span : page.select(".NA_articleGallery span")) { for (Element span : page.select(".NA_articleGallery span")) {

View File

@ -75,7 +75,7 @@ public class NfsfwRipper extends AlbumRipper {
@Override @Override
public void rip() throws IOException { public void rip() throws IOException {
List<Pair> subAlbums = new ArrayList<Pair>(); List<Pair> subAlbums = new ArrayList<>();
int index = 0; int index = 0;
subAlbums.add(new Pair(this.url.toExternalForm(), "")); subAlbums.add(new Pair(this.url.toExternalForm(), ""));
while (subAlbums.size() > 0) { while (subAlbums.size() > 0) {
@ -153,7 +153,7 @@ public class NfsfwRipper extends AlbumRipper {
private String subdir; private String subdir;
private int index; private int index;
public NfsfwImageThread(URL url, String subdir, int index) { NfsfwImageThread(URL url, String subdir, int index) {
super(); super();
this.url = url; this.url = url;
this.subdir = subdir; this.subdir = subdir;
@ -187,8 +187,9 @@ public class NfsfwRipper extends AlbumRipper {
} }
private class Pair { private class Pair {
public String first, second; String first;
public Pair(String first, String second) { String second;
Pair(String first, String second) {
this.first = first; this.first = first;
this.second = second; this.second = second;
} }

View File

@ -61,7 +61,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
if (title == null) { if (title == null) {
return getAlbumTitle(url); return getAlbumTitle(url);
} }
return title; return "nhentai" + title;
} }
@Override @Override
@ -87,7 +87,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Elements thumbs = page.select(".gallerythumb"); Elements thumbs = page.select(".gallerythumb");
for (Element el : thumbs) { for (Element el : thumbs) {
String imageUrl = el.attr("href"); String imageUrl = el.attr("href");

View File

@ -55,7 +55,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
Pattern p; Pattern p;
Matcher m; Matcher m;
p = Pattern.compile("^.*nude-gals\\.com\\/photoshoot\\.php\\?photoshoot_id=(\\d+)$"); p = Pattern.compile("^.*nude-gals\\.com/photoshoot\\.php\\?photoshoot_id=(\\d+)$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -77,7 +77,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Elements thumbs = doc.select("#grid_container .grid > .grid_box"); Elements thumbs = doc.select("#grid_container .grid > .grid_box");
for (Element thumb : thumbs) { for (Element thumb : thumbs) {

View File

@ -0,0 +1,87 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class OglafRipper extends AbstractHTMLRipper {
public OglafRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "oglaf";
}
@Override
public String getDomain() {
return "oglaf.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("http://oglaf\\.com/([a-zA-Z1-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected oglaf URL format: " +
"oglaf.com/NAME - got " + url + " instead");
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
return getDomain();
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("div#nav > a > div#nx").first() == null) {
throw new IOException("No more pages");
}
Element elem = doc.select("div#nav > a > div#nx").first().parent();
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage.equals("")) {
throw new IOException("No more pages");
}
else {
sleep(1000);
return Http.url("http://oglaf.com" + nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("b > img#strip")) {
String imageSource = el.select("img").attr("src");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -13,21 +13,22 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
public class PahealRipper extends AbstractHTMLRipper { public class PahealRipper extends AbstractHTMLRipper {
private static final Logger logger = Logger.getLogger(PahealRipper.class);
private static Map<String, String> cookies = null; private static Map<String, String> cookies = null;
private static Pattern gidPattern = null; private static Pattern gidPattern = null;
private static Map<String, String> getCookies() { private static Map<String, String> getCookies() {
if (cookies == null) { if (cookies == null) {
cookies = new HashMap<String, String>(1); cookies = new HashMap<>(1);
cookies.put("ui-tnc-agreed", "true"); cookies.put("ui-tnc-agreed", "true");
} }
return cookies; return cookies;
@ -66,7 +67,7 @@ public class PahealRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link"); Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
List<String> res = new ArrayList<String>(elements.size()); List<String> res = new ArrayList<>(elements.size());
for (Element e : elements) { for (Element e : elements) {
res.add(e.absUrl("href")); res.add(e.absUrl("href"));
@ -92,10 +93,8 @@ public class PahealRipper extends AbstractHTMLRipper {
+ Utils.filesystemSafe(new URI(name).getPath()) + Utils.filesystemSafe(new URI(name).getPath())
+ ext); + ext);
addURLToDownload(url, outFile); addURLToDownload(url, outFile);
} catch (IOException ex) { } catch (IOException | URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); logger.error("Error while downloading URL " + url, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
} }
} }
@ -117,7 +116,7 @@ public class PahealRipper extends AbstractHTMLRipper {
try { try {
return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) { } catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); logger.error(ex);
} }
throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got " + url + " instead"); throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got " + url + " instead");

View File

@ -85,8 +85,8 @@ public class PhotobucketRipper extends AlbumRipper {
public void rip() throws IOException { public void rip() throws IOException {
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm()); List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
List<String> subsToRip = new ArrayList<String>(), List<String> subsToRip = new ArrayList<>(),
rippedSubs = new ArrayList<String>(); rippedSubs = new ArrayList<>();
for (String sub : subalbums) { for (String sub : subalbums) {
subsToRip.add(sub); subsToRip.add(sub);
@ -117,7 +117,7 @@ public class PhotobucketRipper extends AlbumRipper {
waitForThreads(); waitForThreads();
} }
public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException { private List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
int filesIndex = 0, int filesIndex = 0,
filesTotal = 0, filesTotal = 0,
pageIndex = 0; pageIndex = 0;
@ -145,7 +145,7 @@ public class PhotobucketRipper extends AlbumRipper {
} }
// Grab the JSON // Grab the JSON
Pattern p; Matcher m; Pattern p; Matcher m;
p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL); p = Pattern.compile("^.*collectionData: (\\{.*}).*$", Pattern.DOTALL);
m = p.matcher(data); m = p.matcher(data);
if (m.matches()) { if (m.matches()) {
jsonString = m.group(1); jsonString = m.group(1);
@ -176,12 +176,12 @@ public class PhotobucketRipper extends AlbumRipper {
if (url != null) { if (url != null) {
return getSubAlbums(url, currentAlbumPath); return getSubAlbums(url, currentAlbumPath);
} else { } else {
return new ArrayList<String>(); return new ArrayList<>();
} }
} }
private List<String> getSubAlbums(String url, String currentAlbumPath) { private List<String> getSubAlbums(String url, String currentAlbumPath) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
String subdomain = url.substring(url.indexOf("://")+3); String subdomain = url.substring(url.indexOf("://")+3);
subdomain = subdomain.substring(0, subdomain.indexOf(".")); subdomain = subdomain.substring(0, subdomain.indexOf("."));
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList" String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"

View File

@ -0,0 +1,107 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class PichunterRipper extends AbstractHTMLRipper {
public PichunterRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "pichunter";
}
@Override
public String getDomain() {
return "pichunter.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://www.pichunter.com/(|tags|models|sites)/(\\S*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
p = Pattern.compile("https?://www.pichunter.com/(tags|models|sites)/(\\S*)/photos/\\d+/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
p = Pattern.compile("https?://www.pichunter.com/tags/all/(\\S*)/\\d+/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected pichunter URL format: " +
"pichunter.com/(tags|models|sites)/Name/ - got " + url + " instead");
}
private boolean isPhotoSet(URL url) {
Pattern p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
return false;
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// We use comic-nav-next to the find the next page
Element elem = doc.select("div.paperSpacings > ul > li.arrow").last();
if (elem != null) {
String nextPage = elem.select("a").attr("href");
// Some times this returns a empty string
// This for stops that
return Http.url("http://www.pichunter.com" + nextPage).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
if (!isPhotoSet(url)) {
for (Element el : doc.select("div.thumbtable > a.thumb > img")) {
result.add(el.attr("src").replaceAll("_i", "_o"));
}
} else {
for (Element el : doc.select("div.flex-images > figure > a.item > img")) {
result.add(el.attr("src").replaceAll("_i", "_o"));
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,67 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class PorncomixRipper extends AbstractHTMLRipper {
public PorncomixRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "porncomix";
}
@Override
public String getDomain() {
return "porncomix.info";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://www.porncomix.info/([a-zA-Z0-9_\\-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected proncomix URL format: " +
"porncomix.info/comic - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("div.single-post > div.gallery > dl > dt > a > img")) {
String imageSource = el.attr("data-lazy-src");
// We remove the .md from images so we download the full size image
// not the thumbnail ones
imageSource = imageSource.replaceAll("-\\d\\d\\dx\\d\\d\\d", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Utils;
public class PornhubRipper extends AlbumRipper { public class PornhubRipper extends AlbumRipper {
// All sleep times are in milliseconds // All sleep times are in milliseconds
private static final int IMAGE_SLEEP_TIME = 1 * 1000; private static final int IMAGE_SLEEP_TIME = 1000;
private static final String DOMAIN = "pornhub.com", HOST = "Pornhub"; private static final String DOMAIN = "pornhub.com", HOST = "Pornhub";
@ -134,7 +134,7 @@ public class PornhubRipper extends AlbumRipper {
private URL url; private URL url;
private int index; private int index;
public PornhubImageThread(URL url, int index, File workingDir) { PornhubImageThread(URL url, int index, File workingDir) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;

View File

@ -1,5 +1,10 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
@ -8,54 +13,47 @@ import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; public class PornpicsRipper extends AbstractHTMLRipper {
import com.rarchives.ripme.utils.Http;
public class DatwinRipper extends AbstractHTMLRipper { public PornpicsRipper(URL url) throws IOException {
public DatwinRipper(URL url) throws IOException {
super(url); super(url);
} }
@Override @Override
public String getHost() { public String getHost() {
return "datwin"; return "pornpics";
} }
@Override @Override
public String getDomain() { public String getDomain() {
return "datw.in"; return "pornpics.com";
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*datw.in/([a-zA-Z0-9\\-_]+).*$"); Pattern p = Pattern.compile("https?://www.pornpics.com/galleries/([a-zA-Z0-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
throw new MalformedURLException( throw new MalformedURLException("Expected pornpics URL format: " +
"Expected datw.in gallery formats: " "www.pornpics.com/galleries/ID - got " + url + " instead");
+ "datw.in/..."
+ " Got: " + url);
} }
@Override @Override
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get(); return Http.url(url).get();
} }
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> result = new ArrayList<>();
for (Element thumb : doc.select("img.attachment-thumbnail")) { for (Element el : doc.select("a.rel-link")) {
String image = thumb.attr("src"); result.add(el.attr("href"));
image = image.replaceAll("-\\d{1,3}x\\d{1,3}", "");
imageURLs.add(image);
} }
return imageURLs; return result;
} }
@Override @Override

View File

@ -64,7 +64,7 @@ public class RajceRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
for (Element el : page.select("a.photoThumb")) { for (Element el : page.select("a.photoThumb")) {
result.add(el.attr("href")); result.add(el.attr("href"));
} }

View File

@ -27,7 +27,7 @@ public class RedditRipper extends AlbumRipper {
private static final String HOST = "reddit"; private static final String HOST = "reddit";
private static final String DOMAIN = "reddit.com"; private static final String DOMAIN = "reddit.com";
private static final String REDDIT_USER_AGENT = "RipMe:github/4pr0n/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/4_pr0n)"; private static final String REDDIT_USER_AGENT = "RipMe:github.com/RipMeApp/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/metaprime and /u/ineedmorealts)";
private static final int SLEEP_TIME = 2000; private static final int SLEEP_TIME = 2000;
@ -131,7 +131,7 @@ public class RedditRipper extends AlbumRipper {
Object jsonObj = new JSONTokener(jsonString).nextValue(); Object jsonObj = new JSONTokener(jsonString).nextValue();
JSONArray jsonArray = new JSONArray(); JSONArray jsonArray = new JSONArray();
if (jsonObj instanceof JSONObject) { if (jsonObj instanceof JSONObject) {
jsonArray.put( (JSONObject) jsonObj); jsonArray.put(jsonObj);
} else if (jsonObj instanceof JSONArray) { } else if (jsonObj instanceof JSONArray) {
jsonArray = (JSONArray) jsonObj; jsonArray = (JSONArray) jsonObj;
} else { } else {
@ -167,7 +167,7 @@ public class RedditRipper extends AlbumRipper {
} }
} }
public void handleBody(String body, String id) { private void handleBody(String body, String id) {
Pattern p = RipUtils.getURLRegex(); Pattern p = RipUtils.getURLRegex();
Matcher m = p.matcher(body); Matcher m = p.matcher(body);
while (m.find()) { while (m.find()) {
@ -179,7 +179,7 @@ public class RedditRipper extends AlbumRipper {
} }
} }
public void handleURL(String theUrl, String id) { private void handleURL(String theUrl, String id) {
URL originalURL; URL originalURL;
try { try {
originalURL = new URL(theUrl); originalURL = new URL(theUrl);
@ -220,21 +220,21 @@ public class RedditRipper extends AlbumRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
// User // User
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$"); Pattern p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return "user_" + m.group(m.groupCount()); return "user_" + m.group(m.groupCount());
} }
// Post // Post
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$"); p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return "post_" + m.group(m.groupCount()); return "post_" + m.group(m.groupCount());
} }
// Subreddit // Subreddit
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]{1,}).*$"); p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]+).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return "sub_" + m.group(m.groupCount()); return "sub_" + m.group(m.groupCount());

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Http;
public class SankakuComplexRipper extends AbstractHTMLRipper { public class SankakuComplexRipper extends AbstractHTMLRipper {
private Document albumDoc = null; private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
public SankakuComplexRipper(URL url) throws IOException { public SankakuComplexRipper(URL url) throws IOException {
super(url); super(url);
@ -43,7 +43,7 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
try { try {
return URLDecoder.decode(m.group(1), "UTF-8"); return URLDecoder.decode(m.group(2), "UTF-8");
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'"); throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'");
} }
@ -65,34 +65,41 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
// Image URLs are basically thumbnail URLs with a different domain, a simple // Image URLs are basically thumbnail URLs with a different domain, a simple
// path replacement, and a ?xxxxxx post ID at the end (obtainable from the href) // path replacement, and a ?xxxxxx post ID at the end (obtainable from the href)
for (Element thumbSpan : doc.select("div.content > div > span.thumb")) { for (Element thumbSpan : doc.select("div.content > div > span.thumb > a")) {
String postId = thumbSpan.attr("id").replaceAll("p", ""); String postLink = thumbSpan.attr("href");
Element thumb = thumbSpan.getElementsByTag("img").first(); try {
String image = thumb.attr("abs:src") // Get the page the full sized image is on
.replace(".sankakucomplex.com/data/preview", Document subPage = Http.url("https://chan.sankakucomplex.com" + postLink).get();
"s.sankakucomplex.com/data") + "?" + postId; logger.info("Checking page " + "https://chan.sankakucomplex.com" + postLink);
imageURLs.add(image); imageURLs.add("https:" + subPage.select("div[id=stats] > ul > li > a[id=highres]").attr("href"));
} catch (IOException e) {
logger.warn("Error while loading page " + postLink, e);
}
} }
return imageURLs; return imageURLs;
} }
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
// Mock up the URL of the post page based on the post ID at the end of the URL. sleep(8000);
String postId = url.toExternalForm().replaceAll(".*\\?", ""); addURLToDownload(url, getPrefix(index));
addURLToDownload(url, postId + "_", "", "", null);
} }
@Override @Override
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
Element pagination = doc.select("div.pagination").first(); Element pagination = doc.select("div.pagination").first();
if (pagination.hasAttr("next-page-url")) { if (pagination.hasAttr("next-page-url")) {
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get(); String nextPage = pagination.attr("abs:next-page-url");
} else { // Only logged in users can see past page 25
return null; // Trying to rip page 26 will throw a no images found error
if (!nextPage.contains("page=26")) {
logger.info("Getting next page: " + pagination.attr("abs:next-page-url"));
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
}
} }
throw new IOException("No more pages");
} }
} }

View File

@ -48,10 +48,10 @@ public class ShesFreakyRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[data-lightbox=\"gallery\"]")) { for (Element thumb : doc.select("a[data-lightbox=\"gallery\"]")) {
String image = thumb.attr("href"); String image = thumb.attr("href");
imageURLs.add(image); imageURLs.add("https:" + image);
} }
return imageURLs; return imageURLs;
} }

View File

@ -2,24 +2,16 @@ package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SinnercomicsRipper extends AbstractHTMLRipper { public class SinnercomicsRipper extends AbstractHTMLRipper {
@ -71,7 +63,7 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
for (Element el : doc.select("meta[property=og:image]")) { for (Element el : doc.select("meta[property=og:image]")) {
String imageSource = el.attr("content"); String imageSource = el.attr("content");
imageSource = imageSource.replace(" alt=", ""); imageSource = imageSource.replace(" alt=", "");

View File

@ -1,95 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
/**
* Appears to be broken as of 2015-02-11.
* Looks like supertangas changed their site completely.
*/
public class SupertangasRipper extends AlbumRipper {
private static final String DOMAIN = "supertangas.com",
HOST = "supertangas";
public SupertangasRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
int page = 0;
String baseURL = "http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=" + this.getGID(this.url);
Document doc;
while (true) {
page++;
String theURL = baseURL;
if (page > 1) {
theURL += "&plog_page=" + page;
}
try {
logger.info(" Retrieving " + theURL);
sendUpdate(STATUS.LOADING_RESOURCE, theURL);
doc = Http.url(theURL).get();
} catch (HttpStatusException e) {
logger.debug("Hit end of pages at page " + page, e);
break;
}
Elements images = doc.select("li.thumbnail a");
if (images.size() == 0) {
break;
}
for (Element imageElement : images) {
String image = imageElement.attr("href");
image = image.replaceAll("\\/fotos\\/", "/fotos/images/");
addURLToDownload(new URL(image));
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next page", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
// http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=Tahiticora%20(France)
Pattern p = Pattern.compile("^https?://[w.]*supertangas\\.com/fotos/\\?.*&searchterms=([a-zA-Z0-9%()+]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://supertangas.com/fotos/?level=search&exact=1&searchterms=...");
}
return m.group(m.groupCount());
}
}

View File

@ -18,19 +18,19 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
class TapasticEpisode { class TapasticEpisode {
protected int index, id; int id;
protected String title, filename; String filename;
public TapasticEpisode(int index, int id, String title) { public TapasticEpisode(int index, int id, String title) {
this.index = index; int index1 = index;
this.id = id; this.id = id;
this.title = title; String title1 = title;
this.filename = Utils.filesystemSafe(title); this.filename = Utils.filesystemSafe(title);
} }
} }
public class TapasticRipper extends AbstractHTMLRipper { public class TapasticRipper extends AbstractHTMLRipper {
private List<TapasticEpisode> episodes=new ArrayList<TapasticEpisode>(); private List<TapasticEpisode> episodes= new ArrayList<>();
public TapasticRipper(URL url) throws IOException { public TapasticRipper(URL url) throws IOException {
super(url); super(url);
@ -38,12 +38,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override @Override
public String getDomain() { public String getDomain() {
return "tapastic.com"; return "tapas.io";
} }
@Override @Override
public String getHost() { public String getHost() {
return "tapastic"; return "tapas";
} }
@Override @Override
@ -53,7 +53,7 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>(); List<String> urls = new ArrayList<>();
String html = page.data(); String html = page.data();
if (!html.contains("episodeList : ")) { if (!html.contains("episodeList : ")) {
logger.error("No 'episodeList' found at " + this.url); logger.error("No 'episodeList' found at " + this.url);
@ -100,12 +100,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^http://tapastic.com/series/([^/?]+).*$"); Pattern p = Pattern.compile("^https?://tapas.io/series/([^/?]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return "series_ " + m.group(1); return "series_ " + m.group(1);
} }
p = Pattern.compile("^http://tapastic.com/episode/([^/?]+).*$"); p = Pattern.compile("^https?://tapas.io/episode/([^/?]+).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return "ep_" + m.group(1); return "ep_" + m.group(1);

View File

@ -3,27 +3,18 @@ package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper { public class ThechiveRipper extends AbstractHTMLRipper {
public static boolean isTag;
public ThechiveRipper(URL url) throws IOException { public ThechiveRipper(URL url) throws IOException {
super(url); super(url);
@ -44,7 +35,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$"); Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
isTag = false; boolean isTag = false;
return m.group(1); return m.group(1);
} }
throw new MalformedURLException("Expected thechive.com URL format: " + throw new MalformedURLException("Expected thechive.com URL format: " +
@ -59,7 +50,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
for (Element el : doc.select("img.attachment-gallery-item-full")) { for (Element el : doc.select("img.attachment-gallery-item-full")) {
String imageSource = el.attr("src"); String imageSource = el.attr("src");
// We replace thumbs with resizes so we can the full sized images // We replace thumbs with resizes so we can the full sized images

View File

@ -0,0 +1,75 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class TheyiffgalleryRipper extends AbstractHTMLRipper {
public TheyiffgalleryRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "theyiffgallery";
}
@Override
public String getDomain() {
return "theyiffgallery.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://theyiffgallery.com/index\\?/category/(\\d+)");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected theyiffgallery URL format: " +
"theyiffgallery.com/index?/category/#### - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
String nextPage = doc.select("span.navPrevNext > a").attr("href");
if (nextPage != null && !nextPage.isEmpty() && nextPage.contains("start-")) {
return Http.url("https://theyiffgallery.com/" + nextPage).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("ul.thumbnails > li.gdthumb")) {
String imageSource = el.select("a > img").attr("src");
imageSource = imageSource.replaceAll("_data/i", "");
imageSource = imageSource.replaceAll("-\\w\\w_\\w\\d+x\\d+", "");
result.add("https://theyiffgallery.com" + imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -4,6 +4,9 @@ import java.io.IOException;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -34,18 +37,19 @@ public class TumblrRipper extends AlbumRipper {
private static String TUMBLR_AUTH_CONFIG_KEY = "tumblr.auth"; private static String TUMBLR_AUTH_CONFIG_KEY = "tumblr.auth";
private static boolean useDefaultApiKey = false; // fall-back for bad user-specified key private static boolean useDefaultApiKey = false; // fall-back for bad user-specified key
private static final String DEFAULT_API_KEY = "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX"; private static final List<String> apiKeys = Arrays.asList("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX",
"FQrwZMCxVnzonv90rgNUJcAk4FpnoS0mYuSuGYqIpM2cFgp9L4",
"qpdkY6nMknksfvYAhf2xIHp0iNRLkMlcWShxqzXyFJRxIsZ1Zz");
private static final String API_KEY = apiKeys.get(new Random().nextInt(apiKeys.size()));
private static final String API_KEY;
static {
API_KEY = Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, DEFAULT_API_KEY);
}
private static String getApiKey() { private static String getApiKey() {
if (useDefaultApiKey) { if (useDefaultApiKey || Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX").equals("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX")) {
return DEFAULT_API_KEY; logger.info("Using api key: " + API_KEY);
} else {
return API_KEY; return API_KEY;
} else {
logger.info("Using user tumblr.auth api key");
return Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX");
} }
} }
@ -77,7 +81,7 @@ public class TumblrRipper extends AlbumRipper {
return url; return url;
} }
public boolean isTumblrURL(URL url) { private boolean isTumblrURL(URL url) {
String checkURL = "http://api.tumblr.com/v2/blog/"; String checkURL = "http://api.tumblr.com/v2/blog/";
checkURL += url.getHost(); checkURL += url.getHost();
checkURL += "/info?api_key=" + getApiKey(); checkURL += "/info?api_key=" + getApiKey();
@ -95,6 +99,7 @@ public class TumblrRipper extends AlbumRipper {
@Override @Override
public void rip() throws IOException { public void rip() throws IOException {
String[] mediaTypes; String[] mediaTypes;
boolean exceededRateLimit = false;
if (albumType == ALBUM_TYPE.POST) { if (albumType == ALBUM_TYPE.POST) {
mediaTypes = new String[] { "post" }; mediaTypes = new String[] { "post" };
} else { } else {
@ -105,12 +110,21 @@ public class TumblrRipper extends AlbumRipper {
if (isStopped()) { if (isStopped()) {
break; break;
} }
if (exceededRateLimit) {
break;
}
offset = 0; offset = 0;
while (true) { while (true) {
if (isStopped()) { if (isStopped()) {
break; break;
} }
if (exceededRateLimit) {
break;
}
String apiURL = getTumblrApiURL(mediaType, offset); String apiURL = getTumblrApiURL(mediaType, offset);
logger.info("Retrieving " + apiURL); logger.info("Retrieving " + apiURL);
sendUpdate(STATUS.LOADING_RESOURCE, apiURL); sendUpdate(STATUS.LOADING_RESOURCE, apiURL);
@ -126,6 +140,11 @@ public class TumblrRipper extends AlbumRipper {
HttpStatusException status = (HttpStatusException)cause; HttpStatusException status = (HttpStatusException)cause;
if (status.getStatusCode() == HttpURLConnection.HTTP_UNAUTHORIZED && !useDefaultApiKey) { if (status.getStatusCode() == HttpURLConnection.HTTP_UNAUTHORIZED && !useDefaultApiKey) {
retry = true; retry = true;
} else if (status.getStatusCode() == 429) {
logger.error("Tumblr rate limit has been exceeded");
sendUpdate(STATUS.DOWNLOAD_ERRORED,"Tumblr rate limit has been exceeded");
exceededRateLimit = true;
break;
} }
} }
} }
@ -192,7 +211,14 @@ public class TumblrRipper extends AlbumRipper {
for (int j = 0; j < photos.length(); j++) { for (int j = 0; j < photos.length(); j++) {
photo = photos.getJSONObject(j); photo = photos.getJSONObject(j);
try { try {
fileURL = new URL(photo.getJSONObject("original_size").getString("url")); if (Utils.getConfigBoolean("tumblr.get_raw_image", false)) {
String urlString = photo.getJSONObject("original_size").getString("url").replaceAll("https", "http");
urlString = urlString.replaceAll("https?://[a-sA-Z0-9_\\-\\.]*\\.tumblr", "http://data.tumblr");
urlString = urlString.replaceAll("_\\d+\\.", "_raw.");
fileURL = new URL(urlString);
} else {
fileURL = new URL(photo.getJSONObject("original_size").getString("url").replaceAll("http", "https"));
}
m = p.matcher(fileURL.toString()); m = p.matcher(fileURL.toString());
if (m.matches()) { if (m.matches()) {
addURLToDownload(fileURL); addURLToDownload(fileURL);
@ -202,12 +228,11 @@ public class TumblrRipper extends AlbumRipper {
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("[!] Error while parsing photo in " + photo, e); logger.error("[!] Error while parsing photo in " + photo, e);
continue;
} }
} }
} else if (post.has("video_url")) { } else if (post.has("video_url")) {
try { try {
fileURL = new URL(post.getString("video_url")); fileURL = new URL(post.getString("video_url").replaceAll("http", "https"));
addURLToDownload(fileURL); addURLToDownload(fileURL);
} catch (Exception e) { } catch (Exception e) {
logger.error("[!] Error while parsing video in " + post, e); logger.error("[!] Error while parsing video in " + post, e);
@ -254,7 +279,7 @@ public class TumblrRipper extends AlbumRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-\\.]+)"; final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-.]+)";
Pattern p; Pattern p;
Matcher m; Matcher m;

View File

@ -54,14 +54,14 @@ public class TwitterRipper extends AlbumRipper {
@Override @Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
// https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd // https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd
Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]{1,}).*$"); Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
albumType = ALBUM_TYPE.SEARCH; albumType = ALBUM_TYPE.SEARCH;
searchText = m.group(2); searchText = m.group(2);
return url; return url;
} }
p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]{1,}).*$"); p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]+).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
albumType = ALBUM_TYPE.ACCOUNT; albumType = ALBUM_TYPE.ACCOUNT;
@ -83,7 +83,6 @@ public class TwitterRipper extends AlbumRipper {
try { try {
JSONObject json = new JSONObject(body); JSONObject json = new JSONObject(body);
accessToken = json.getString("access_token"); accessToken = json.getString("access_token");
return;
} catch (JSONException e) { } catch (JSONException e) {
// Fall through // Fall through
throw new IOException("Failure while parsing JSON: " + body, e); throw new IOException("Failure while parsing JSON: " + body, e);
@ -142,7 +141,7 @@ public class TwitterRipper extends AlbumRipper {
} }
private List<JSONObject> getTweets(String url) throws IOException { private List<JSONObject> getTweets(String url) throws IOException {
List<JSONObject> tweets = new ArrayList<JSONObject>(); List<JSONObject> tweets = new ArrayList<>();
logger.info(" Retrieving " + url); logger.info(" Retrieving " + url);
Document doc = Http.url(url) Document doc = Http.url(url)
.ignoreContentType() .ignoreContentType()
@ -283,7 +282,6 @@ public class TwitterRipper extends AlbumRipper {
if (c == '%') { if (c == '%') {
gid.append('_'); gid.append('_');
i += 2; i += 2;
continue;
// Ignore non-alphanumeric chars // Ignore non-alphanumeric chars
} else if ( } else if (
(c >= 'a' && c <= 'z') (c >= 'a' && c <= 'z')

View File

@ -22,7 +22,7 @@ import com.rarchives.ripme.utils.Http;
public class TwodgalleriesRipper extends AbstractHTMLRipper { public class TwodgalleriesRipper extends AbstractHTMLRipper {
private int offset = 0; private int offset = 0;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
public TwodgalleriesRipper(URL url) throws IOException { public TwodgalleriesRipper(URL url) throws IOException {
super(url); super(url);
@ -90,7 +90,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.hcaption > img")) { for (Element thumb : doc.select("div.hcaption > img")) {
String image = thumb.attr("src"); String image = thumb.attr("src");
image = image.replace("/200H/", "/"); image = image.replace("/200H/", "/");
@ -114,7 +114,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
cookies = resp.cookies(); cookies = resp.cookies();
String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value"); String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value");
Map<String,String> postdata = new HashMap<String,String>(); Map<String,String> postdata = new HashMap<>();
postdata.put("user[login]", new String(Base64.decode("cmlwbWU="))); postdata.put("user[login]", new String(Base64.decode("cmlwbWU=")));
postdata.put("user[password]", new String(Base64.decode("cmlwcGVy"))); postdata.put("user[password]", new String(Base64.decode("cmlwcGVy")));
postdata.put("rememberme", "1"); postdata.put("rememberme", "1");

View File

@ -56,7 +56,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
} }
private static List<String> getURLsFromPageStatic(Document doc) { private static List<String> getURLsFromPageStatic(Document doc) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
Elements els = doc.select("#ContentPlaceHolder1_divContent"); Elements els = doc.select("#ContentPlaceHolder1_divContent");
Elements imgs = els.select("img"); Elements imgs = els.select("img");
for (Element img : imgs) { for (Element img : imgs) {
@ -76,7 +76,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
} }
public static List<URL> getURLsFromPage(URL url) throws IOException { public static List<URL> getURLsFromPage(URL url) throws IOException {
List<URL> urls = new ArrayList<URL>(); List<URL> urls = new ArrayList<>();
Document doc = Http.url(url).get(); Document doc = Http.url(url).get();
for (String stringURL : getURLsFromPageStatic(doc)) { for (String stringURL : getURLsFromPageStatic(doc)) {
urls.add(new URL(stringURL)); urls.add(new URL(stringURL));

View File

@ -0,0 +1,84 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ViewcomicRipper extends AbstractHTMLRipper {
public ViewcomicRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "view-comic";
}
@Override
public String getDomain() {
return "view-comic.com";
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
String titleText = getFirstPage().select("title").first().text();
String title = titleText.replace("Viewcomic reading comics online for free", "");
title = title.replace("_", "");
title = title.replace("|", "");
title = title.replace("", "");
title = title.replace(".", "");
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://view-comic.com/([a-zA-Z1-9_-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected view-comic URL format: " +
"view-comic.com/COMIC_NAME - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("div.pinbin-copy > a > img")) {
result.add(el.attr("src"));
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -84,7 +84,7 @@ public class VineRipper extends AlbumRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]{1,}).*$"); Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
throw new MalformedURLException("Expected format: http://vine.co/u/######"); throw new MalformedURLException("Expected format: http://vine.co/u/######");

View File

@ -37,11 +37,7 @@ public class VkRipper extends AlbumRipper {
} }
// Ignore /video pages (but not /videos pages) // Ignore /video pages (but not /videos pages)
String u = url.toExternalForm(); String u = url.toExternalForm();
if (u.contains("/video") && !u.contains("videos")) { return !u.contains("/video") || u.contains("videos");
// Single video page
return false;
}
return true;
} }
@Override @Override
@ -62,7 +58,7 @@ public class VkRipper extends AlbumRipper {
private void ripVideos() throws IOException { private void ripVideos() throws IOException {
String oid = getGID(this.url).replace("videos", ""); String oid = getGID(this.url).replace("videos", "");
String u = "http://vk.com/al_video.php"; String u = "http://vk.com/al_video.php";
Map<String,String> postData = new HashMap<String,String>(); Map<String,String> postData = new HashMap<>();
postData.put("al", "1"); postData.put("al", "1");
postData.put("act", "load_videos_silent"); postData.put("act", "load_videos_silent");
postData.put("offset", "0"); postData.put("offset", "0");
@ -97,13 +93,13 @@ public class VkRipper extends AlbumRipper {
} }
private void ripImages() throws IOException { private void ripImages() throws IOException {
Map<String,String> photoIDsToURLs = new HashMap<String,String>(); Map<String,String> photoIDsToURLs = new HashMap<>();
int offset = 0; int offset = 0;
while (true) { while (true) {
logger.info(" Retrieving " + this.url); logger.info(" Retrieving " + this.url);
// al=1&offset=80&part=1 // al=1&offset=80&part=1
Map<String,String> postData = new HashMap<String,String>(); Map<String,String> postData = new HashMap<>();
postData.put("al", "1"); postData.put("al", "1");
postData.put("offset", Integer.toString(offset)); postData.put("offset", Integer.toString(offset));
postData.put("part", "1"); postData.put("part", "1");
@ -120,7 +116,7 @@ public class VkRipper extends AlbumRipper {
body = body.substring(body.indexOf("<div")); body = body.substring(body.indexOf("<div"));
doc = Jsoup.parseBodyFragment(body); doc = Jsoup.parseBodyFragment(body);
List<Element> elements = doc.select("a"); List<Element> elements = doc.select("a");
Set<String> photoIDsToGet = new HashSet<String>(); Set<String> photoIDsToGet = new HashSet<>();
for (Element a : elements) { for (Element a : elements) {
if (!a.attr("onclick").contains("showPhoto('")) { if (!a.attr("onclick").contains("showPhoto('")) {
logger.error("a: " + a); logger.error("a: " + a);
@ -162,8 +158,8 @@ public class VkRipper extends AlbumRipper {
} }
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException { private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
Map<String,String> photoIDsToURLs = new HashMap<String,String>(); Map<String,String> photoIDsToURLs = new HashMap<>();
Map<String,String> postData = new HashMap<String,String>(); Map<String,String> postData = new HashMap<>();
// act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196 // act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
postData.put("list", getGID(this.url)); postData.put("list", getGID(this.url));
postData.put("act", "show"); postData.put("act", "show");
@ -202,7 +198,7 @@ public class VkRipper extends AlbumRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]{1,}).*$"); Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####"); throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####");

View File

@ -0,0 +1,102 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.Map;
import java.util.HashMap;
import org.jsoup.Connection.Response;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class WebtoonsRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<String,String>();
public WebtoonsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "webtoons";
}
@Override
public String getDomain() {
return "www.webtoons.com";
}
@Override
public boolean canRip(URL url) {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return true;
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return getHost() + "_" + mat.group(1);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return mat.group(1);
}
throw new MalformedURLException("Expected URL format: http://www.webtoons.com/LANG/CAT/TITLE/VOL/, got: " + url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element elem : doc.select("div.viewer_img > img")) {
result.add(elem.attr("data-url"));
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(url).response();
cookies = resp.cookies();
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("a.pg_next").first();
nextUrl = elem.attr("href");
if (nextUrl.equals("") || nextUrl.equals("#")) {
throw new IOException("No more pages");
}
return Http.url(nextUrl).get();
}
}

View File

@ -16,25 +16,25 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class WordpressComicRipper extends AbstractHTMLRipper { public class WordpressComicRipper extends AbstractHTMLRipper {
String pageTitle = ""; private String pageTitle = "";
public WordpressComicRipper(URL url) throws IOException { public WordpressComicRipper(URL url) throws IOException {
super(url); super(url);
} }
// Test links: // Test links (see also WordpressComicRipperTest.java)
// http://www.totempole666.com/comic/first-time-for-everything-00-cover/ // http://www.totempole666.com/comic/first-time-for-everything-00-cover/
// http://buttsmithy.com/archives/comic/p1 // http://buttsmithy.com/archives/comic/p1
// http://themonsterunderthebed.net/?comic=test-post // http://themonsterunderthebed.net/?comic=test-post
// http://prismblush.com/comic/hella-trap-pg-01/ // http://prismblush.com/comic/hella-trap-pg-01/
// http://www.konradokonski.com/sawdust/ // http://www.konradokonski.com/sawdust/comic/get-up/
// http://www.konradokonski.com/wiory/ // http://www.konradokonski.com/wiory/comic/08182008/
// http://freeadultcomix.com/finders-feepaid-in-full-sparrow/ // http://freeadultcomix.com/finders-feepaid-in-full-sparrow/
// http://comics-xxx.com/republic-rendezvous-palcomix-star-wars-xxx/ // http://thisis.delvecomic.com/NewWP/comic/in-too-deep/
// http://tnbtu.com/comic/01-00/ // http://tnbtu.com/comic/01-00/
// http://shipinbottle.pepsaga.com/?p=281 // http://shipinbottle.pepsaga.com/?p=281
public static List<String> explicit_domains = Arrays.asList( private static List<String> explicit_domains = Arrays.asList(
"www.totempole666.com", "www.totempole666.com",
"buttsmithy.com", "buttsmithy.com",
"themonsterunderthebed.net", "themonsterunderthebed.net",
@ -42,21 +42,18 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
"www.konradokonski.com", "www.konradokonski.com",
"freeadultcomix.com", "freeadultcomix.com",
"thisis.delvecomic.com", "thisis.delvecomic.com",
"comics-xxx.com",
"tnbtu.com", "tnbtu.com",
"shipinbottle.pepsaga.com" "shipinbottle.pepsaga.com"
); );
@Override @Override
public String getHost() { public String getHost() {
String host = url.toExternalForm().split("/")[2]; return url.toExternalForm().split("/")[2];
return host;
} }
@Override @Override
public String getDomain() { public String getDomain() {
String host = url.toExternalForm().split("/")[2]; return url.toExternalForm().split("/")[2];
return host;
} }
@Override @Override
@ -70,12 +67,20 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
return true; return true;
} }
Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$"); Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/([a-zA-Z0-9_-]*)/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm()); Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) { if (konradokonskiMat.matches()) {
return true; return true;
} }
// This is hardcoded because it starts on the first page, unlike all the other
// konradokonski which start on the last page
konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) {
return true;
}
Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$"); Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm()); Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
if (buttsmithyMat.matches()) { if (buttsmithyMat.matches()) {
@ -125,12 +130,13 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
} }
} }
return false; return false;
} }
@Override @Override
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com\\/comic/([a-zA-Z0-9_-]*)/?$"); Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com/comic/([a-zA-Z0-9_-]*)/?$");
Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm()); Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
if (totempole666Mat.matches()) { if (totempole666Mat.matches()) {
return "totempole666.com" + "_" + "The_cummoner"; return "totempole666.com" + "_" + "The_cummoner";
@ -142,16 +148,16 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
return "buttsmithy.com" + "_" + "Alfie"; return "buttsmithy.com" + "_" + "Alfie";
} }
Pattern konradokonskiSawdustPat = Pattern.compile("http://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$"); Pattern konradokonskiPat = Pattern.compile("http://www.konradokonski.com/([a-zA-Z]+)/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiSawdustMat = konradokonskiSawdustPat.matcher(url.toExternalForm()); Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiSawdustMat.matches()) { if (konradokonskiMat.matches()) {
return "konradokonski.com_sawdust"; return "konradokonski.com_" + konradokonskiMat.group(1);
} }
Pattern konradokonskiWioryPat = Pattern.compile("http://www.konradokonski.com/wiory/comic/([a-zA-Z0-9_-]*)/?$"); konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
Matcher konradokonskiWioryMat = konradokonskiWioryPat.matcher(url.toExternalForm()); konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiWioryMat.matches()) { if (konradokonskiMat.matches()) {
return "konradokonski.com_wiory"; return "konradokonski.com_aquartzbead";
} }
Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$"); Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
@ -237,7 +243,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<>();
if (getHost().contains("www.totempole666.com") if (getHost().contains("www.totempole666.com")
|| getHost().contains("buttsmithy.com") || getHost().contains("buttsmithy.com")
|| getHost().contains("themonsterunderthebed.net") || getHost().contains("themonsterunderthebed.net")
@ -277,9 +283,10 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
} }
// freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link // freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
// TODO review the above comment which no longer applies -- see if there's a refactoring we should do here.
if (url.toExternalForm().contains("freeadultcomix.com")) { if (url.toExternalForm().contains("freeadultcomix.com")) {
for (Element elem : doc.select("div.single-post > p > img.aligncenter")) { for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
result.add("http://freeadultcomix.com" + elem.attr("src")); result.add(elem.attr("src"));
} }
} }

View File

@ -10,14 +10,15 @@ import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
public class XbooruRipper extends AbstractHTMLRipper { public class XbooruRipper extends AbstractHTMLRipper {
private static final Logger logger = Logger.getLogger(XbooruRipper.class);
private static Pattern gidPattern = null; private static Pattern gidPattern = null;
public XbooruRipper(URL url) throws IOException { public XbooruRipper(URL url) throws IOException {
@ -57,7 +58,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> res = new ArrayList<String>(100); List<String> res = new ArrayList<>(100);
for (Element e : page.getElementsByTag("post")) { for (Element e : page.getElementsByTag("post")) {
res.add(e.absUrl("file_url") + "#" + e.attr("id")); res.add(e.absUrl("file_url") + "#" + e.attr("id"));
} }
@ -71,7 +72,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
private String getTerm(URL url) throws MalformedURLException { private String getTerm(URL url) throws MalformedURLException {
if (gidPattern == null) { if (gidPattern == null) {
gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(\\&|(#.*)?$)"); gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)");
} }
Matcher m = gidPattern.matcher(url.toExternalForm()); Matcher m = gidPattern.matcher(url.toExternalForm());
@ -87,7 +88,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
try { try {
return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) { } catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); logger.error(ex);
} }
throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead");

View File

@ -3,91 +3,46 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class XhamsterRipper extends AlbumRipper { public class XhamsterRipper extends AbstractHTMLRipper {
private static final String HOST = "xhamster";
public XhamsterRipper(URL url) throws IOException { public XhamsterRipper(URL url) throws IOException {
super(url); super(url);
} }
@Override @Override
public boolean canRip(URL url) { public String getHost() {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*[0-9]+$"); return "xhamster";
Matcher m = p.matcher(url.toExternalForm()); }
return m.matches();
@Override
public String getDomain() {
return "xhamster.com";
} }
@Override @Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
return url; String URLToReturn = url.toExternalForm();
} URLToReturn = URLToReturn.replaceAll("m.xhamster.com", "xhamster.com");
URLToReturn = URLToReturn.replaceAll("\\w\\w.xhamster.com", "xhamster.com");
@Override URL san_url = new URL(URLToReturn.replaceAll("xhamster.com", "m.xhamster.com"));
public void rip() throws IOException { logger.info("sanitized URL is " + san_url.toExternalForm());
int index = 0; return san_url;
String nextURL = this.url.toExternalForm();
while (nextURL != null) {
logger.info(" Retrieving " + nextURL);
Document doc = Http.url(nextURL).get();
for (Element thumb : doc.select("table.iListing div.img img")) {
if (!thumb.hasAttr("src")) {
continue;
}
String image = thumb.attr("src");
// replace thumbnail urls with the urls to the full sized images
image = image.replaceAll(
"https://upt.xhcdn\\.",
"http://up.xhamster.");
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
image = image.replaceAll(
"_160\\.",
"_1000.");
// Xhamster has shitty cert management and uses the wrong cert for their ep.xhamster Domain
// so we change all https requests to http
image = image.replaceAll(
"https://",
"http://");
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(image), prefix);
if (isThisATest()) {
break;
}
}
if (isThisATest()) {
break;
}
nextURL = null;
for (Element element : doc.select("a.last")) {
nextURL = element.attr("href");
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d{1,})$"); Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -98,4 +53,54 @@ public class XhamsterRipper extends AlbumRipper {
+ " Got: " + url); + " Got: " + url);
} }
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
return false;
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a.next").first().attr("href") != "") {
return Http.url(doc.select("a.next").first().attr("href")).get();
} else {
throw new IOException("No more pages");
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element thumb : doc.select("div.picture_view > div.pictures_block > div.items > div.item-container > a > div.thumb_container > div.img > img")) {
String image = thumb.attr("src");
// replace thumbnail urls with the urls to the full sized images
image = image.replaceAll(
"https://upt.xhcdn\\.",
"http://up.xhamster.");
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
image = image.replaceAll(
"_160\\.",
"_1000.");
// Xhamster has bad cert management and uses invalid certs for some cdns, so we change all our requests to http
image = image.replaceAll("https", "http");
result.add(image);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
} }

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
public class ZizkiRipper extends AbstractHTMLRipper { public class ZizkiRipper extends AbstractHTMLRipper {
private Document albumDoc = null; private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>(); private Map<String,String> cookies = new HashMap<>();
public ZizkiRipper(URL url) throws IOException { public ZizkiRipper(URL url) throws IOException {
super(url); super(url);
@ -76,7 +76,7 @@ public class ZizkiRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<>();
// Page contains images // Page contains images
logger.info("Look for images."); logger.info("Look for images.");
for (Element thumb : page.select("img")) { for (Element thumb : page.select("img")) {
@ -99,7 +99,6 @@ public class ZizkiRipper extends AbstractHTMLRipper {
src = thumb.attr("src"); src = thumb.attr("src");
logger.debug("Found url with " + src); logger.debug("Found url with " + src);
if (!src.contains("zizki.com")) { if (!src.contains("zizki.com")) {
continue;
} else { } else {
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/")); imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
} }

Some files were not shown because too many files have changed in this diff Show More