Merge pull request #1 from RipMeApp/master

Update from original
This commit is contained in:
rephormat 2018-01-12 12:50:17 -06:00 committed by GitHub
commit c070f154f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
176 changed files with 4346 additions and 3174 deletions

View File

@ -1,12 +1,3 @@
<!--
We've moved! If you are not already, please consider opening your issue at the following link:
https://github.com/RipMeApp/ripme/issues/new
If this is a bug, please fill out the information below.
Please include any additional information that would help us fix the bug.
If this is a feature request or other type of issue, provide whatever information you feel is appropriate.
-->
* Ripme version:
* Java version: <!-- (output of `java -version`) -->
* Operating system: <!-- (if Windows, output of `ver` or `winver`) -->

View File

@ -1,10 +1,3 @@
<!--
We've moved! If you are not already, please consider opening your pull request here:
https://github.com/RipMeApp/ripme/
To help us verify your change, please fill out the information below.
-->
# Category
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:

114
.gitignore vendored
View File

@ -1,17 +1,121 @@
# Created by https://www.gitignore.io/api/java,linux,macos,maven,windows
### Java ###
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### Maven ###
target/
.DS_Store
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
!/.mvn/wrapper/maven-wrapper.jar
### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
### IDEs ###
.vscode
.idea
.project
### Ripme ###
ripme.log
rips/
.history
ripme.jar.update
*.swp
ripme.jar
rip.properties
*.properties
history.json
.idea
*.iml
.settings/
.classpath
*.txt
bin/
.vscode/

View File

@ -1,3 +1,6 @@
language: java
jdk:
- oraclejdk7
- oraclejdk8
- openjdk8
after_success:
- mvn clean test jacoco:report coveralls:report

View File

@ -2,7 +2,9 @@
"files.exclude": {
"target/**": true,
"**/.git": true,
"**/.DS_Store": true
"**/.DS_Store": true,
"**/*.class": true,
"**/rips/**": true
},
"java.configuration.updateBuildConfiguration": "automatic"
}

View File

@ -7,7 +7,16 @@ You can now find the latest code, issues, and releases at [RipMeApp/ripme](https
Please be polite and supportive to all users and contributors. Please be inclusive of everyone regardless of race, religion, gender identity or expression, sexual preference, or tools and platform preferences. Please be helpful and stick to the engineering facts, and avoid expressing unhelpful or off-topic opinions.
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either (NSFW) or (SFW) to be considerate to others who may not be browsing this repo in private.
# NSFW Content
**Please tag NSFW links (links to sites with adult content) with "(NSFW)"!**
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either "(NSFW)" or "(SFW)" to be considerate to others who may not be browsing this repo in private or who are not interested in NSFW content.
There is a helpful plugin called uMatrix available for [Firefox](https://addons.mozilla.org/en-US/firefox/addon/umatrix/) and [Chrome](https://chrome.google.com/webstore/detail/umatrix/ogfcmafjalglgifnmanfmnieipoejdcf) which allows you to block certain types of content like media and scripts.
If you're not sure if a site might contain NSFW images or media, and you are in mixed company but want to develop a new ripper, you can block downloading images and media in the * (all sites) scope and allow requests for specific domains you trust as you go.
Being able to browse the HTML is usually the most important part of developing or fixing a ripper, so it is not necessarily important to actually see the images load.
# Priorities
@ -68,13 +77,14 @@ Good style is a tool for communicating your intent with other developers of the
Some recommendations:
* Above all, be consistent!
* Spaces, not tabs.
* Spaces, not tabs. Indents should be 4 spaces.
* We prefer "Egyptian brackets" (in `if`, `for`, `while`, `switch`, etc.):
* `if (...) {`
* `} else if (...) {`
* `} else {`
* `}`
* Constants in `UPPER_SNAKE_CASE`
* Note the spacing convention above for control flow constructs (a single space on the outside of each paren)
* Constants in `UPPER_SNAKE_CASE` a.k.a. `CONST_CASE`
* Class names in `PascalCase` a.k.a. `UpperCamelCase`
* Variable names in `camelCase` a.k.a. `lowerCamelCase`
* Do not use Hungarian notation

View File

@ -1,9 +1,22 @@
# RipMe
# RipMe [![Licensed under the MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/RipMeApp/ripme/blob/master/LICENSE.txt) [![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Subreddit](https://img.shields.io/badge/discuss-on%20reddit-blue.svg)](https://www.reddit.com/r/ripme/)
[![Build Status](https://travis-ci.org/4pr0n/ripme.svg?branch=master)](https://travis-ci.org/4pr0n/ripme)
[![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://travis-ci.org/RipMeApp/ripme.svg?branch=master)](https://travis-ci.org/RipMeApp/ripme)
[![Coverage Status](https://coveralls.io/repos/github/RipMeApp/ripme/badge.svg?branch=master)](https://coveralls.io/github/RipMeApp/ripme?branch=master)
Album ripper for various websites. Runs on your computer. Requires Java 1.6
# Contribute
RipMe is maintained with ♥️ and in our limited free time by **[@MetaPrime](https://github.com/metaprime)** and **[@cyian-1756](https://github.com/cyian-1756)**. If you'd like to contribute but aren't good with code, help keep us happy with a small contribution!
[![Tip with PayPal](https://img.shields.io/badge/PayPal-Buy_us...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
[![Tip with PayPal](https://img.shields.io/badge/coffee-%245-green.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=5.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/beer-%2410-yellow.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=10.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/lunch-%2420-orange.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=20.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/dinner-%2450-red.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=50.00&currencyCode=USD&locale.x=en_US&country.x=US)
[![Tip with PayPal](https://img.shields.io/badge/custom_amount-...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
# About
RipMe is an album ripper for various websites. Runs on your computer. Requires Java 8.
![Screenshot](http://i.imgur.com/kWzhsIu.png)
@ -73,14 +86,3 @@ mvn test
Please note that some tests may fail as sites change and our rippers become out of date.
Start by building and testing a released version of RipMe
and then ensure that any changes you make do not cause more tests to break.
# Dependencies
* junit-3.8.1
* jsoup-1.7.3
* json-20140107
* apache-commons-configuration-1.7
* log4j-1.2.17
* commons-cli-1.2
* commons-io-1.3.2
* httpcomponents-4.3.3

0
build.bat Normal file → Executable file
View File

1
build.sh Executable file
View File

@ -0,0 +1 @@
mvn clean compile assembly:single

40
docs/options.md Normal file
View File

@ -0,0 +1,40 @@
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
error.skip404 | bool | Don't retry on 404 errors
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
auto.update | bool | If true ripme will auto-update every time it's started
play.sound | bool | If true ripme will play a sound every time a rip finishes
download.show_popup| bool | TODO figure out what this is for
log.save | bool | If true ripme will save it's logs
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
album_titles.save | bool | Currently does nothing
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
download.timeout | int | File download timeout (in milliseconds)
page.timeout | int | Page download timeout (in milliseconds)
download.max_size | int | Maximum size of downloaded files in bytes
threads.size | int | The number of threads to use
twitter.auth | String | Twitter API key (Base64'd)
tumblr.auth | String | Tumblr API key
log.level | String | The debug log level (Example: Log level: Debug)
gw.api | String | TODO figure out what this is for
twitter.max_requests | int | TODO figure out what this is for

View File

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath com.rarchives.ripme.App</exec.args>
<exec.executable>java</exec.executable>
</properties>
</action>
</actions>

View File

@ -1,2 +0,0 @@
@echo off
powershell .\patch.ps1

View File

@ -1,53 +0,0 @@
Param (
[Parameter(Mandatory=$True)]
[string]$message
)
# This script will:
# - read current version
# - increment patch version
# - update version in a few places
# - insert new line in ripme.json with $message
$ripmeJson = (Get-Content "ripme.json") -join "`n" | ConvertFrom-Json
$currentVersion = $ripmeJson.latestVersion
Write-Output (("Current version", $currentVersion) -join ' ')
$versionFields = $currentVersion.split('.')
$patchCurr = [int]($versionFields[2])
$patchNext = $patchCurr + 1
$majorMinor = $versionFields[0..1]
$majorMinorPatch = $majorMinor + $patchNext
$nextVersion = $majorMinorPatch -join '.'
Write-Output (("Updating to", $nextVersion) -join ' ')
$substExpr = "s/${currentVersion}/${nextVersion}/"
sed src/main/java/com/rarchives/ripme/ui/UpdateUtils.java -i -e "${substExpr}"
git grep "DEFAULT_VERSION.*${nextVersion}" src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
$substExpr = "s/\`"latestVersion\`" : \`"${currentVersion}\`"/\`"latestVersion\`" : \`"${nextVersion}\`"/"
sed ripme.json -i -e "${substExpr}"
git grep "latestVersion" ripme.json
$substExpr = "s/<version>${currentVersion}/<version>${nextVersion}/"
sed pom.xml -i -e "${substExpr}"
git grep "<version>${nextVersion}" pom.xml
$commitMessage = "${nextVersion}: ${message}"
$ripmeJsonLines = Get-Content "ripme.json"
$ripmeJsonHead = $ripmeJsonLines[0..2]
$ripmeJsonRest = $ripmeJsonLines[3..$ripmeJsonLines.length]
$changelogLine = " `"${commitMessage}`","
$updatedLines = $ripmeJsonHead + $changelogLine + $ripmeJsonRest + ""
$outputContent = $updatedLines -join "`n"
$outputPath = (Resolve-Path .\ripme.json).Path
$Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
[System.IO.File]::WriteAllText($outputPath, $outputContent, $Utf8NoBomEncoding)
git add -u
git commit -m $commitMessage
git tag $nextVersion

56
patch.py Normal file
View File

@ -0,0 +1,56 @@
import json
import subprocess
# This script will:
# - read current version
# - increment patch version
# - update version in a few places
# - insert new line in ripme.json with message
message = input('message: ')
with open('ripme.json') as dataFile:
ripmeJson = json.load(dataFile)
currentVersion = ripmeJson["latestVersion"]
print ('Current version ' + currentVersion)
versionFields = currentVersion.split('.')
patchCur = int(versionFields[2])
patchNext = patchCur + 1
majorMinor = versionFields[:2]
majorMinor.append(str(patchNext))
nextVersion = '.'.join(majorMinor)
print ('Updating to ' + nextVersion)
substrExpr = 's/' + currentVersion + '/' + nextVersion + '/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
subprocess.call(['git', 'grep', 'DEFAULT_VERSION.*' + nextVersion,
'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
substrExpr = 's/\\\"latestVersion\\\": \\\"' + currentVersion + '\\\"/\\\"latestVersion\\\": \\\"' +\
nextVersion + '\\\"/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'ripme.json'])
subprocess.call(['git', 'grep', 'latestVersion', 'ripme.json'])
substrExpr = 's/<version>' + currentVersion + '/<version>' + nextVersion + '/'
subprocess.call(['sed', '-i', '-e', substrExpr, 'pom.xml'])
subprocess.call(['git', 'grep', '<version>' + nextVersion + '</version>', 'pom.xml'])
commitMessage = nextVersion + ': ' + message
changeLogLine = ' \"' + commitMessage + '\",\n'
dataFile = open("ripme.json", "r")
ripmeJsonLines = dataFile.readlines()
ripmeJsonLines.insert(3, changeLogLine)
outputContent = ''.join(ripmeJsonLines)
dataFile.close()
dataFile = open("ripme.json", "w")
dataFile.write(outputContent)
dataFile.close()
subprocess.call(['git', 'add', '-u'])
subprocess.call(['git', 'commit', '-m', commitMessage])
subprocess.call(['git', 'tag', nextVersion])

25
pom.xml
View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.5.7</version>
<version>1.7.12</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>
@ -84,10 +84,29 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>4.3.0</version>
</plugin>
<plugin>
<!-- At time of writing: JaCoCo is (allegedly) the only coverage report generator that supports Java 8 -->
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.6.201602180812</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -1,152 +1,187 @@
{
"latestVersion" : "1.5.7",
"changeList" : [
"1.5.7: Added EromeRipper",
"1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper",
"1.5.5: Wordpress comic ripper Updates",
"1.5.4: Added Luscious.net ripper",
"1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper",
"1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures",
"1.5.1: Ensure update mechanism is working correctly.",
"1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe",
"1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)",
"1.4.20: EroshareRipper can now rip user profiles",
"1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality",
"1.4.18: Fix video rippers (broken in 1.4.14)",
"1.4.17: MyHentaiComics improvements",
"1.4.16: Fix Eightmuses; Add Instagram album support",
"1.4.15: Fixed DeviantArt Ripper",
"1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)",
"1.4.13: Fixed furaffinity ripper.",
"1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.",
"1.4.11: Instagram: fixed cropped images issue.",
"1.4.10: Add WordPressComicRipper (various sites supported)",
"1.4.9: Fixed HentaiFoundry ripper",
"1.4.8: Added Jagodibuja comics ripper",
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
"1.4.5: SinnerComics: Added work around for naming bug",
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
"1.4.2: Added nhentai ripper.",
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
"1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.",
"1.2.13: Hotfix for imgur album rips",
"1.2.12: 500px gallery/subgallery support",
"1.2.11: Deviant fav subfolders, and reddituploads support",
"1.2.10: Imgur /gallery/ images fix",
"1.2.9: Imgur 10-image fix, original twitter sizes",
"1.2.8: Option to prefer MP4 over GIF for imgur",
"1.2.7: Fix 500px ripper to fetch NSFW images",
"1.2.6: Fix 500px ripper",
"1.2.5: Descriptions are optional, minor imgur fixes",
"1.2.4: Fix instagram ripper",
"1.2.3: Fix xhamster videos, option to remove/clear Queue",
"1.2.2: Fix imagefap ripper",
"1.2.1: Gfycat Fix, lots of changes pushed",
"1.2.0: Fix imagebam, 8muses. Remember queue items",
"1.1.9: Hotfix for new imgur album layout",
"1.1.8: Fix for failed reddit rips",
"1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix",
"1.1.6: History error handling and drawchan support",
"1.1.5: Fix imagefap and 8muses rippers",
"1.1.4: Fix DeviantArt 403 errors",
"1.1.3: Fix Check Selected in History",
"1.1.2: Check/Uncheck history by right-clicking the history",
"1.1.1: Gfycat/Reddit fix",
"1.1.0: Revamped History, Cheeby fix",
"1.0.93: Reddit fix, gfycat fix, video download fix",
"1.0.92: Anon-ib fix, cheeby fix, vid.me ripper",
"1.0.91: Fix for anon-ib, minus rippers",
"1.0.90: Hide error message when ripping valid album",
"1.0.89: Fix fapproved ripper",
"1.0.88: Fix imgbox ripper",
"1.0.87: Chan ripper update, Finebox update, Motherless video ripper",
"1.0.86: Fix for imagefap albums larger than 1k images",
"1.0.85: Fix Modelmayhem ripper",
"1.0.84: Ripper can resume after being stopped",
"1.0.83: Fix 2dgalleries ripper",
"1.0.82: Photobucket ripper fix, Clipboard Autorip toggle",
"1.0.81: Tumblr/seenive fixes, queue system, better history",
"1.0.80: Fix Butttoucher ripper",
"1.0.79: Fix cheeby to rip all images",
"1.0.78: BCFakes ripper",
"1.0.77: Cheeby ripper, status in title, various fixes",
"1.0.76: Option to only save URLs, Taptastic ripper",
"1.0.75: 500px ripper",
"1.0.74: Videarn video ripper",
"1.0.73: Datw.in ripper",
"1.0.72: Support for DeviantArt favourites",
"1.0.71: Fuskator ripper",
"1.0.70: Various improvements. Xhamster, Cliphunter video rippers",
"1.0.69: Gfycat video ripper, instgram username fix",
"1.0.68: Imagevenue and hentai-foundry rippers",
"1.0.67: Support for external tumblr domains",
"1.0.66: GirlsOfDesire ripper",
"1.0.65: Vidd.me video ripper",
"1.0.64: Imagebam ripper",
"1.0.63: Hopefully fixing freezing issue while re-ripping all albums",
"1.0.62: Imgur album directories named after album title",
"1.0.61: Logs are optional, defaults to not save logs",
"1.0.60: Fix for crazy directory creation bug",
"1.0.59: Show when albums can be ripped immediately",
"1.0.58: Logs are saved to album directory, ehentai fix",
"1.0.57: Nfsfw ripper",
"1.0.56: Fix for imgur rips",
"1.0.55: Ehentai ripper bypasses content warning",
"1.0.54: Mediacru.sh ripper, may require a Java update",
"1.0.53: 8Muses ripper fix, can rip subalbums",
"1.0.52: Imgbox ripper, popup notifications are optional",
"1.0.51: Deviantart rips full-size NSFW images",
"1.0.50: Smutty.com ripper",
"1.0.49: More Ehentai ripper fixes",
"1.0.48: Imagestash.org /tag/ ripper, ehentai fixes",
"1.0.47: Vidble ripper, right-click popupmenu on text",
"1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional",
"1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix",
"1.0.44: Deviantart rips full-size images",
"1.0.43: Added Modelmayhem ripper",
"1.0.42: Added Drawcrowd ripper, bug fix for large albums",
"1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs",
"1.0.40: Flickr bug fix and groups support",
"1.0.39: Various fixes for Ehentai and Motherless",
"1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations",
"1.0.37: Added Vine.co and Supertangas rippers",
"1.0.36: Added semi-working Gifyo ripper",
"1.0.35: Fixed i.rarchives ripper, delete empty directories",
"1.0.34: Added fapproved and anonib rippers",
"1.0.33: Imgur ripper fixes",
"1.0.32: Fix for directories with special characters",
"1.0.31: Fix for large imgur albums",
"1.0.30: Added Minus ripper",
"1.0.29: Various fixes for tumblr, flickr, 4chan",
"1.0.28: Added vk.com video ripper(s)",
"1.0.27: Added flickr ripper",
"1.0.26: Ability to rerip history from command-line",
"1.0.25: Added photobucket ripper",
"1.0.24: Fixed possible deadlock issue while re-ripping albums",
"1.0.23: Added teenplanet, irarchives, and butttoucher support",
"1.0.22: Fixed huge bug where ripper did not work at all for any sites",
"1.0.21: Ability to rip user account images on imgur",
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos",
"1.0.19: Fix imgur account ripper",
"1.0.18: Button icons, kinkyshare.com ripper",
"1.0.17: *chan ripper, imgur titles in filenames",
"1.0.16: Fix bug with instagram usernames containing _ or -",
"1.0.15: Auto-updater should be compatible with Windows",
"1.0.14: Fix twitter account names with _ or -",
"1.0.13: Auto-updater is more verbose, hopefully works",
"1.0.12: Fixed clipboard autorip bug",
"1.0.11: 404 images are markead as errored",
"1.0.10: Taskbar notifications when rips start",
"1.0.9: More-verbose completion, UI tweaks",
"1.0.8: Auto-update functionality",
"1.0.7: Clipboard Autorip and tray icons",
"1.0.6: Support imgur.com/r/subreddit albums",
"1.0.5: Persistent configuration, small bug fixes",
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
]
"latestVersion": "1.7.12",
"changeList": [
"1.7.12: Instagram ripper no longer 403s on certain images",
"1.7.11: Added gwarchives support to the cheveretoRipper; Gfycat Tests & Fix for bad reddit submissions; instagram ripper can now be made to skip videos",
"1.7.10: Added basic pornpics.com ripper; Fixed hentai.cafe regex",
"1.7.9: FuraffinityRipper can now rip non-public albums; Added 2 new api keys, ripper can now download raw images from tumblr; Erome ripper now matchs links without the www; Tumblr ripper now tells the user if it hits the rate limit",
"1.7.8: Forced https for tumblr image links; Fixed imgur album filenames; SankakuComplexRipper now downloads full sized images; Added dribbble.com ripper; Added comfirm button for clearing history",
"1.7.7: Fixed E621 Ripper; Added unit test for zizki.com; Added unit test for Xbooru.com; Updated reddit useragent",
"1.7.6: Added OglafRipper",
"1.7.5: Improve WordpressComicRipper; update to a modern User Agent",
"1.7.4: Fix WordpressComicRipper konradokonski.com/wiory; Fix CheveretoRipper hushpix.com by adding consent cookie",
"1.7.3: Improved Aerisdies and Imagearn folders; fixed tapas.io; XhamsterRipper now uses mobile site; InstagramRipper slideshows under user profiles",
"1.7.2: InstagramRipper: Added support for ripping individual posts",
"1.7.1: Fix WordpressComicRipper's ripper for freeadultcomix.com; FuraffinityRipper can now rip public albums",
"1.7.0: Improved Webtoons folders; Added code coverage with Coveralls.io and improved unit tests; removed rippers for dead sites",
"1.6.13: Added Instagram tags; improved Instagram and Pichunter regexes",
"1.6.12: Fix InstagramRipper with timestamps; Pichunter galleries support; logging improvements",
"1.6.11: Added pichunter.com ripper; Improved Instagram filenames; added tehyiffgallery ripper; Fixed xchan ripper; Fixed chanRipper folders",
"1.6.10: Added viewcomic ripper; Fixed webtoons malformed url error message; Fixed chan ripper thread title; Fixed Modelmayhem ripper",
"1.6.9: Added support for imgur /t/ albums; Added portable mode; Unit tests no longer fail if run twice; Formating fixes",
"1.6.8: code clean up; ripme can now remeber and skip already downloaded images",
"1.6.7: Fixed instagram ripper",
"1.6.6: Fixed 8muses ripper",
"1.6.5: Imgbox ripper now downloads full sized image from galleries",
"1.6.4: Added webtoons ripper",
"1.6.3: Window is now resizable; Added Porncomix.info ripper; Fixed imgbox ripper; Added hentai2read ripper",
"1.6.2: Fixed shesfreaky.com ripper; Fixed imgbox ripper; Fixed Xhamster video ripping",
"1.6.1: Rolled E621Ripper back from 1.6.0 to the 1.5.15 version",
"1.6.0: Updated to java 8; Some code cleanup",
"1.5.15: Added Hbrowse.com ripper; 8muses ripper now can rip from all album types",
"1.5.14: Myhentaicomics ripper no longer tries to download ads; Added hentai.cafe ripper; Fixed sankakucomplex ripper",
"1.5.13: InstagramRipper: fixed minor bug",
"1.5.12: Make tray icon optional; work around window positioning bug on Windows.",
"1.5.11: Added -v, --version flag",
"1.5.10: Added ripper for cfake.com; Fixed nhentai album naming",
"1.5.9: InstagramRipper now downloads full sized images; ImagefapRipper Now adds GID to folder name",
"1.5.8: Fixed 8muses ripper",
"1.5.7: Added EromeRipper",
"1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper",
"1.5.5: Wordpress comic ripper Updates",
"1.5.4: Added Luscious.net ripper",
"1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper",
"1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures",
"1.5.1: Ensure update mechanism is working correctly.",
"1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe",
"1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)",
"1.4.20: EroshareRipper can now rip user profiles",
"1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality",
"1.4.18: Fix video rippers (broken in 1.4.14)",
"1.4.17: MyHentaiComics improvements",
"1.4.16: Fix Eightmuses; Add Instagram album support",
"1.4.15: Fixed DeviantArt Ripper",
"1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)",
"1.4.13: Fixed furaffinity ripper.",
"1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.",
"1.4.11: Instagram: fixed cropped images issue.",
"1.4.10: Add WordPressComicRipper (various sites supported)",
"1.4.9: Fixed HentaiFoundry ripper",
"1.4.8: Added Jagodibuja comics ripper",
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
"1.4.5: SinnerComics: Added work around for naming bug",
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
"1.4.2: Added nhentai ripper.",
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
"1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.",
"1.2.13: Hotfix for imgur album rips",
"1.2.12: 500px gallery/subgallery support",
"1.2.11: Deviant fav subfolders, and reddituploads support",
"1.2.10: Imgur /gallery/ images fix",
"1.2.9: Imgur 10-image fix, original twitter sizes",
"1.2.8: Option to prefer MP4 over GIF for imgur",
"1.2.7: Fix 500px ripper to fetch NSFW images",
"1.2.6: Fix 500px ripper",
"1.2.5: Descriptions are optional, minor imgur fixes",
"1.2.4: Fix instagram ripper",
"1.2.3: Fix xhamster videos, option to remove/clear Queue",
"1.2.2: Fix imagefap ripper",
"1.2.1: Gfycat Fix, lots of changes pushed",
"1.2.0: Fix imagebam, 8muses. Remember queue items",
"1.1.9: Hotfix for new imgur album layout",
"1.1.8: Fix for failed reddit rips",
"1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix",
"1.1.6: History error handling and drawchan support",
"1.1.5: Fix imagefap and 8muses rippers",
"1.1.4: Fix DeviantArt 403 errors",
"1.1.3: Fix Check Selected in History",
"1.1.2: Check/Uncheck history by right-clicking the history",
"1.1.1: Gfycat/Reddit fix",
"1.1.0: Revamped History, Cheeby fix",
"1.0.93: Reddit fix, gfycat fix, video download fix",
"1.0.92: Anon-ib fix, cheeby fix, vid.me ripper",
"1.0.91: Fix for anon-ib, minus rippers",
"1.0.90: Hide error message when ripping valid album",
"1.0.89: Fix fapproved ripper",
"1.0.88: Fix imgbox ripper",
"1.0.87: Chan ripper update, Finebox update, Motherless video ripper",
"1.0.86: Fix for imagefap albums larger than 1k images",
"1.0.85: Fix Modelmayhem ripper",
"1.0.84: Ripper can resume after being stopped",
"1.0.83: Fix 2dgalleries ripper",
"1.0.82: Photobucket ripper fix, Clipboard Autorip toggle",
"1.0.81: Tumblr/seenive fixes, queue system, better history",
"1.0.80: Fix Butttoucher ripper",
"1.0.79: Fix cheeby to rip all images",
"1.0.78: BCFakes ripper",
"1.0.77: Cheeby ripper, status in title, various fixes",
"1.0.76: Option to only save URLs, Taptastic ripper",
"1.0.75: 500px ripper",
"1.0.74: Videarn video ripper",
"1.0.73: Datw.in ripper",
"1.0.72: Support for DeviantArt favourites",
"1.0.71: Fuskator ripper",
"1.0.70: Various improvements. Xhamster, Cliphunter video rippers",
"1.0.69: Gfycat video ripper, instgram username fix",
"1.0.68: Imagevenue and hentai-foundry rippers",
"1.0.67: Support for external tumblr domains",
"1.0.66: GirlsOfDesire ripper",
"1.0.65: Vidd.me video ripper",
"1.0.64: Imagebam ripper",
"1.0.63: Hopefully fixing freezing issue while re-ripping all albums",
"1.0.62: Imgur album directories named after album title",
"1.0.61: Logs are optional, defaults to not save logs",
"1.0.60: Fix for crazy directory creation bug",
"1.0.59: Show when albums can be ripped immediately",
"1.0.58: Logs are saved to album directory, ehentai fix",
"1.0.57: Nfsfw ripper",
"1.0.56: Fix for imgur rips",
"1.0.55: Ehentai ripper bypasses content warning",
"1.0.54: Mediacru.sh ripper, may require a Java update",
"1.0.53: 8Muses ripper fix, can rip subalbums",
"1.0.52: Imgbox ripper, popup notifications are optional",
"1.0.51: Deviantart rips full-size NSFW images",
"1.0.50: Smutty.com ripper",
"1.0.49: More Ehentai ripper fixes",
"1.0.48: Imagestash.org /tag/ ripper, ehentai fixes",
"1.0.47: Vidble ripper, right-click popupmenu on text",
"1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional",
"1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix",
"1.0.44: Deviantart rips full-size images",
"1.0.43: Added Modelmayhem ripper",
"1.0.42: Added Drawcrowd ripper, bug fix for large albums",
"1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs",
"1.0.40: Flickr bug fix and groups support",
"1.0.39: Various fixes for Ehentai and Motherless",
"1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations",
"1.0.37: Added Vine.co and Supertangas rippers",
"1.0.36: Added semi-working Gifyo ripper",
"1.0.35: Fixed i.rarchives ripper, delete empty directories",
"1.0.34: Added fapproved and anonib rippers",
"1.0.33: Imgur ripper fixes",
"1.0.32: Fix for directories with special characters",
"1.0.31: Fix for large imgur albums",
"1.0.30: Added Minus ripper",
"1.0.29: Various fixes for tumblr, flickr, 4chan",
"1.0.28: Added vk.com video ripper(s)",
"1.0.27: Added flickr ripper",
"1.0.26: Ability to rerip history from command-line",
"1.0.25: Added photobucket ripper",
"1.0.24: Fixed possible deadlock issue while re-ripping albums",
"1.0.23: Added teenplanet, irarchives, and butttoucher support",
"1.0.22: Fixed huge bug where ripper did not work at all for any sites",
"1.0.21: Ability to rip user account images on imgur",
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos",
"1.0.19: Fix imgur account ripper",
"1.0.18: Button icons, kinkyshare.com ripper",
"1.0.17: *chan ripper, imgur titles in filenames",
"1.0.16: Fix bug with instagram usernames containing _ or -",
"1.0.15: Auto-updater should be compatible with Windows",
"1.0.14: Fix twitter account names with _ or -",
"1.0.13: Auto-updater is more verbose, hopefully works",
"1.0.12: Fixed clipboard autorip bug",
"1.0.11: 404 images are markead as errored",
"1.0.10: Taskbar notifications when rips start",
"1.0.9: More-verbose completion, UI tweaks",
"1.0.8: Auto-update functionality",
"1.0.7: Clipboard Autorip and tray icons",
"1.0.6: Support imgur.com/r/subreddit albums",
"1.0.5: Persistent configuration, small bug fixes",
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
]
}

View File

@ -1,7 +1,6 @@
package com.rarchives.ripme;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.FileReader;
@ -35,31 +34,51 @@ import com.rarchives.ripme.utils.Utils;
*/
public class App {
public static Logger logger;
public static final Logger logger;
private static final History HISTORY = new History();
public static void main(String[] args) throws MalformedURLException {
static {
//initialize logger
Utils.configureLogger();
logger = Logger.getLogger(App.class);
}
public static void main(String[] args) throws MalformedURLException {
CommandLine cl = getArgs(args);
if (args.length > 0 && cl.hasOption('v')){
logger.error(UpdateUtils.getThisJarVersion());
System.exit(0);
}
System.setProperty("apple.laf.useScreenMenuBar", "true");
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe");
logger = Logger.getLogger(App.class);
logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion());
if (args.length > 0) {
// CLI Mode
handleArguments(args);
} else {
// GUI Mode
MainWindow mw = new MainWindow();
SwingUtilities.invokeLater(mw);
}
}
public static void rip(URL url) throws Exception {
/**
* Creates an abstract ripper and instructs it to rip.
* @param url URL to be ripped
* @throws Exception
*/
private static void rip(URL url) throws Exception {
AbstractRipper ripper = AbstractRipper.getRipper(url);
ripper.setup();
ripper.rip();
}
public static void handleArguments(String[] args) {
/**
* For dealing with command-line arguments.
* @param args Array of Command-line arguments
*/
private static void handleArguments(String[] args) {
CommandLine cl = getArgs(args);
if (cl.hasOption('h')) {
HelpFormatter hf = new HelpFormatter();
@ -98,8 +117,8 @@ public class App {
}
if (cl.hasOption('R')) {
loadHistory();
if (HISTORY.toList().size() == 0) {
System.err.println("There are no history entries to re-rip. Rip some albums first");
if (HISTORY.toList().isEmpty()) {
logger.error("There are no history entries to re-rip. Rip some albums first");
System.exit(-1);
}
int added = 0;
@ -122,7 +141,7 @@ public class App {
}
}
if (added == 0) {
System.err.println("No history entries have been 'Checked'\n" +
logger.error("No history entries have been 'Checked'\n" +
"Check an entry by clicking the checkbox to the right of the URL or Right-click a URL to check/uncheck all items");
System.exit(-1);
}
@ -134,7 +153,7 @@ public class App {
Utils.setConfigBoolean("download.save_order", false);
}
if ((cl.hasOption('d'))&&(cl.hasOption('D'))) {
System.err.println("\nCannot specify '-d' and '-D' simultaneously");
logger.error("\nCannot specify '-d' and '-D' simultaneously");
System.exit(-1);
}
if (cl.hasOption('l')) {
@ -162,14 +181,18 @@ public class App {
}
}
// this function will attempt to rip the provided url
public static void ripURL(String targetURL, boolean saveConfig) {
/**
* Attempt to rip targetURL.
* @param targetURL URL to rip
* @param saveConfig Whether or not you want to save the config (?)
*/
private static void ripURL(String targetURL, boolean saveConfig) {
try {
URL url = new URL(targetURL);
rip(url);
List<String> history = Utils.getConfigList("download.history");
if (!history.contains(url.toExternalForm())) {
history.add(url.toExternalForm());
if (!history.contains(url.toExternalForm())) {//if you haven't already downloaded the file before
history.add(url.toExternalForm());//add it to history so you won't have to redownload
Utils.setConfigList("download.history", Arrays.asList(history.toArray()));
if (saveConfig) {
Utils.saveConfig();
@ -184,7 +207,11 @@ public class App {
}
}
public static Options getOptions() {
/**
* Creates an Options object, returns it.
* @return Returns all acceptable command-line options.
*/
private static Options getOptions() {
Options opts = new Options();
opts.addOption("h", "help", false, "Print the help");
opts.addOption("u", "url", true, "URL of album to rip");
@ -198,31 +225,39 @@ public class App {
opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)");
opts.addOption("n", "no-prop-file", false, "Do not create properties file.");
opts.addOption("f", "urls-file", true, "Rip URLs from a file.");
opts.addOption("v", "version", false, "Show current version");
return opts;
}
public static CommandLine getArgs(String[] args) {
/**
* Tries to parse commandline arguments.
* @param args Array of commandline arguments.
* @return CommandLine object containing arguments.
*/
private static CommandLine getArgs(String[] args) {
BasicParser parser = new BasicParser();
try {
CommandLine cl = parser.parse(getOptions(), args, false);
return cl;
return parser.parse(getOptions(), args, false);
} catch (ParseException e) {
logger.error("[!] Error while parsing command-line arguments: " + Arrays.toString(args), e);
System.exit(-1);
return null;
}
}
/**
* Loads history from history file into memory.
*/
private static void loadHistory() {
File historyFile = new File("history.json");
File historyFile = new File(Utils.getConfigDir() + File.separator + "history.json");
HISTORY.clear();
if (historyFile.exists()) {
try {
logger.info("Loading history from history.json");
HISTORY.fromFile("history.json");
logger.info("Loading history from " + historyFile.getCanonicalPath());
HISTORY.fromFile(historyFile.getCanonicalPath());
} catch (IOException e) {
logger.error("Failed to load history from file " + historyFile, e);
System.out.println(
logger.warn(
"RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" +
"Error: " + e.getMessage() + "\n\n" +
"Closing RipMe will automatically overwrite the contents of this file,\n" +
@ -234,12 +269,7 @@ public class App {
if (HISTORY.toList().size() == 0) {
// Loaded from config, still no entries.
// Guess rip history based on rip folder
String[] dirs = Utils.getWorkingDirectory().list(new FilenameFilter() {
@Override
public boolean accept(File dir, String file) {
return new File(dir.getAbsolutePath() + File.separator + file).isDirectory();
}
});
String[] dirs = Utils.getWorkingDirectory().list((dir, file) -> new File(dir.getAbsolutePath() + File.separator + file).isDirectory());
for (String dir : dirs) {
String url = RipUtils.urlFromDirectoryName(dir);
if (url != null) {

View File

@ -17,27 +17,27 @@ import com.rarchives.ripme.utils.Utils;
*/
public abstract class AbstractHTMLRipper extends AlbumRipper {
public AbstractHTMLRipper(URL url) throws IOException {
protected AbstractHTMLRipper(URL url) throws IOException {
super(url);
}
public abstract String getDomain();
protected abstract String getDomain();
public abstract String getHost();
public abstract Document getFirstPage() throws IOException;
protected abstract Document getFirstPage() throws IOException;
public Document getNextPage(Document doc) throws IOException {
return null;
}
public abstract List<String> getURLsFromPage(Document page);
public List<String> getDescriptionsFromPage(Document doc) throws IOException {
protected abstract List<String> getURLsFromPage(Document page);
protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
}
public abstract void downloadURL(URL url, int index);
public DownloadThreadPool getThreadPool() {
protected abstract void downloadURL(URL url, int index);
protected DownloadThreadPool getThreadPool() {
return null;
}
public boolean keepSortOrder() {
protected boolean keepSortOrder() {
return true;
}
@ -50,13 +50,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
public boolean hasDescriptionSupport() {
protected boolean hasDescriptionSupport() {
return false;
}
public String[] getDescription(String url,Document page) throws IOException {
protected String[] getDescription(String url, Document page) throws IOException {
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
}
public int descSleepTime() {
protected int descSleepTime() {
return 100;
}
@Override
@ -140,7 +140,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
waitForThreads();
}
public String fileNameFromURL(URL url) {
/**
* Gets the file name from the URL
* @param url
* URL that you want to get the filename from
* @return
* Filename of the URL
*/
private String fileNameFromURL(URL url) {
String saveAs = url.toExternalForm();
if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
@ -150,11 +158,25 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
return saveAs;
}
/**
*
* @param url
* Target URL
* @param subdirectory
* Path to subdirectory where you want to save it
* @param text
* Text you want to save
* @param index
* Index in something like an album
* @return
* True if ripped successfully
* False if failed
*/
public boolean saveText(URL url, String subdirectory, String text, int index) {
String saveAs = fileNameFromURL(url);
return saveText(url,subdirectory,text,index,saveAs);
}
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
private boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
// Not the best for some cases, like FurAffinity. Overridden there.
try {
stopCheck();
@ -189,7 +211,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
}
return true;
}
public String getPrefix(int index) {
/**
* Gets prefix based on where in the index it is
* @param index
* The index in question
* @return
* Returns prefix for a file. (?)
*/
protected String getPrefix(int index) {
String prefix = "";
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);

View File

@ -15,24 +15,25 @@ import com.rarchives.ripme.utils.Utils;
*/
public abstract class AbstractJSONRipper extends AlbumRipper {
public AbstractJSONRipper(URL url) throws IOException {
protected AbstractJSONRipper(URL url) throws IOException {
super(url);
}
public abstract String getDomain();
protected abstract String getDomain();
@Override
public abstract String getHost();
public abstract JSONObject getFirstPage() throws IOException;
public JSONObject getNextPage(JSONObject doc) throws IOException {
protected abstract JSONObject getFirstPage() throws IOException;
protected JSONObject getNextPage(JSONObject doc) throws IOException {
throw new IOException("getNextPage not implemented");
}
public abstract List<String> getURLsFromJSON(JSONObject json);
public abstract void downloadURL(URL url, int index);
public DownloadThreadPool getThreadPool() {
protected abstract List<String> getURLsFromJSON(JSONObject json);
protected abstract void downloadURL(URL url, int index);
private DownloadThreadPool getThreadPool() {
return null;
}
public boolean keepSortOrder() {
protected boolean keepSortOrder() {
return true;
}
@ -96,7 +97,7 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
waitForThreads();
}
public String getPrefix(int index) {
protected String getPrefix(int index) {
String prefix = "";
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);

View File

@ -1,8 +1,7 @@
package com.rarchives.ripme.ripper;
import java.awt.Desktop;
import java.io.File;
import java.io.IOException;
import java.io.*;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URL;
@ -21,21 +20,25 @@ import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.util.Scanner;
public abstract class AbstractRipper
extends Observable
implements RipperInterface, Runnable {
protected static final Logger logger = Logger.getLogger(AbstractRipper.class);
private final String URLHistoryFile = Utils.getURLHistoryFile();
public static final String USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:36.0) Gecko/20100101 Firefox/36.0";
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
protected URL url;
protected File workingDir;
protected DownloadThreadPool threadPool;
protected RipStatusHandler observer = null;
DownloadThreadPool threadPool;
RipStatusHandler observer = null;
protected boolean completed = true;
private boolean completed = true;
public abstract void rip() throws IOException;
public abstract String getHost();
@ -56,6 +59,56 @@ public abstract class AbstractRipper
}
}
private void writeDownloadedURL(String downloadedURL) throws IOException {
BufferedWriter bw = null;
FileWriter fw = null;
try {
File file = new File(URLHistoryFile);
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
}
fw = new FileWriter(file.getAbsoluteFile(), true);
bw = new BufferedWriter(fw);
bw.write(downloadedURL);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fw != null)
fw.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
/**
* Checks to see if Ripme has already downloaded a URL
* @param url URL to check if downloaded
* @return
* Returns true if previously downloaded.
* Returns false if not yet downloaded.
*/
private boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile);
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
final String lineFromFile = scanner.nextLine();
if (lineFromFile.equals(url)) {
return true;
}
}
} catch (FileNotFoundException e) {
return false;
}
return false;
}
/**
* Ensures inheriting ripper can rip this URL, raises exception if not.
* Otherwise initializes working directory and thread pool.
@ -72,6 +125,15 @@ public abstract class AbstractRipper
this.url = sanitizeURL(url);
}
/**
* Sets ripper's:
* Working directory
* Logger (for debugging)
* FileAppender
* Threadpool
* @throws IOException
* Always be prepared.
*/
public void setup() throws IOException {
setWorkingDir(this.url);
Logger rootLogger = Logger.getRootLogger();
@ -109,10 +171,34 @@ public abstract class AbstractRipper
* @param cookies
* The cookies to send to the server while downloading this file.
* @return
* True if downloaded successfully
* False if failed to download
*/
public abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies);
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies);
public boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies) {
/**
* Queues image to be downloaded and saved.
* @param url
* URL of the file
* @param prefix
* Prefix for the downloaded file
* @param subdirectory
* Path to get to desired directory from working directory
* @param referrer
* The HTTP referrer to use while downloading this file.
* @param cookies
* The cookies to send to the server while downloading this file.
* @return
* True if downloaded successfully
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
return false;
}
}
try {
stopCheck();
} catch (IOException e) {
@ -131,6 +217,7 @@ public abstract class AbstractRipper
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
prefix = Utils.filesystemSanitized(prefix);
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
@ -146,6 +233,13 @@ public abstract class AbstractRipper
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
saveFileAs.getParentFile().mkdirs();
}
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
try {
writeDownloadedURL(url.toExternalForm() + "\n");
} catch (IOException e) {
logger.debug("Unable to write URL history file");
}
}
return addURLToDownload(url, saveFileAs, referrer, cookies);
}
@ -159,7 +253,7 @@ public abstract class AbstractRipper
* Sub-directory of the working directory to save the images to.
* @return True on success, flase on failure.
*/
public boolean addURLToDownload(URL url, String prefix, String subdirectory) {
protected boolean addURLToDownload(URL url, String prefix, String subdirectory) {
return addURLToDownload(url, prefix, subdirectory, null, null);
}
@ -172,7 +266,7 @@ public abstract class AbstractRipper
* Text to append to saved filename.
* @return True on success, flase on failure.
*/
public boolean addURLToDownload(URL url, String prefix) {
protected boolean addURLToDownload(URL url, String prefix) {
// Use empty subdirectory
return addURLToDownload(url, prefix, "");
}
@ -223,14 +317,14 @@ public abstract class AbstractRipper
/**
* @return Number of files downloaded.
*/
public int getCount() {
int getCount() {
return 1;
}
/**
* Notifies observers and updates state if all files have been ripped.
*/
protected void checkIfComplete() {
void checkIfComplete() {
if (observer == null) {
logger.debug("observer is null");
return;
@ -262,6 +356,11 @@ public abstract class AbstractRipper
}
}
/**
* Gets URL
* @return
* Returns URL that wants to be downloaded.
*/
public URL getURL() {
return url;
}
@ -275,8 +374,20 @@ public abstract class AbstractRipper
return workingDir;
}
@Override
public abstract void setWorkingDir(URL url) throws IOException;
/**
*
* @param url
* The URL you want to get the title of.
* @return
* host_URLid
* e.g. (for a reddit post)
* reddit_post_7mg2ur
* @throws MalformedURLException
* If any of those damned URLs gets malformed.
*/
public String getAlbumTitle(URL url) throws MalformedURLException {
return getHost() + "_" + getGID(url);
}
@ -320,10 +431,10 @@ public abstract class AbstractRipper
* @throws Exception
*/
public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception {
List<Constructor<?>> constructors = new ArrayList<Constructor<?>>();
List<Constructor<?>> constructors = new ArrayList<>();
for (Class<?> clazz : Utils.getClassesForPackage(pkg)) {
if (AbstractRipper.class.isAssignableFrom(clazz)) {
constructors.add( (Constructor<?>) clazz.getConstructor(URL.class) );
constructors.add(clazz.getConstructor(URL.class));
}
}
return constructors;
@ -331,7 +442,7 @@ public abstract class AbstractRipper
/**
* Sends an update message to the relevant observer(s) on this ripper.
* @param status
* @param status
* @param message
*/
public void sendUpdate(STATUS status, Object message) {
@ -340,9 +451,17 @@ public abstract class AbstractRipper
}
observer.update(this, new RipStatusMessage(status, message));
}
/**
* Get the completion percentage.
* @return
* Percentage complete
*/
public abstract int getCompletionPercentage();
/**
* @return
* Text for status
*/
public abstract String getStatusText();
/**
@ -355,10 +474,6 @@ public abstract class AbstractRipper
logger.error("Got exception while running ripper:", e);
waitForThreads();
sendUpdate(STATUS.RIP_ERRORED, "HTTP status code " + e.getStatusCode() + " for URL " + e.getUrl());
} catch (IOException e) {
logger.error("Got exception while running ripper:", e);
waitForThreads();
sendUpdate(STATUS.RIP_ERRORED, e.getMessage());
} catch (Exception e) {
logger.error("Got exception while running ripper:", e);
waitForThreads();
@ -367,8 +482,10 @@ public abstract class AbstractRipper
cleanup();
}
}
public void cleanup() {
/**
* Tries to delete any empty directories
*/
private void cleanup() {
if (this.workingDir.list().length == 0) {
// No files, delete the dir
logger.info("Deleting empty directory " + this.workingDir);
@ -378,8 +495,16 @@ public abstract class AbstractRipper
}
}
}
public boolean sleep(int milliseconds) {
/**
* Pauses thread for a set amount of time.
* @param milliseconds
* Amount of time (in milliseconds) that the thread gets paused for
* @return
* True if paused successfully
* False if failed to pause/got interrupted.
*/
protected boolean sleep(int milliseconds) {
try {
logger.debug("Sleeping " + milliseconds + "ms");
Thread.sleep(milliseconds);
@ -402,7 +527,7 @@ public abstract class AbstractRipper
logger.debug("THIS IS A TEST RIP");
thisIsATest = true;
}
public boolean isThisATest() {
protected boolean isThisATest() {
return thisIsATest;
}
}

View File

@ -13,13 +13,17 @@ import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
/**'
* For ripping delicious albums off the interwebz.
*/
public abstract class AlbumRipper extends AbstractRipper {
protected Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
public AlbumRipper(URL url) throws IOException {
protected AlbumRipper(URL url) throws IOException {
super(url);
}
@ -29,15 +33,22 @@ public abstract class AlbumRipper extends AbstractRipper {
public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException;
public boolean allowDuplicates() {
protected boolean allowDuplicates() {
return false;
}
@Override
/**
* Returns total amount of files attempted.
*/
public int getCount() {
return itemsCompleted.size() + itemsErrored.size();
}
@Override
/**
* Queues multiple URLs of single images to download from a single Album URL
*/
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies) {
// Only download one file if this is a test.
if (super.isThisATest() &&
@ -95,12 +106,15 @@ public abstract class AlbumRipper extends AbstractRipper {
* @return
* True on success
*/
public boolean addURLToDownload(URL url) {
protected boolean addURLToDownload(URL url) {
// Use empty prefix and empty subdirectory
return addURLToDownload(url, "", "");
}
@Override
/**
* Cleans up & tells user about successful download
*/
public void downloadCompleted(URL url, File saveAs) {
if (observer == null) {
return;
@ -119,6 +133,9 @@ public abstract class AlbumRipper extends AbstractRipper {
}
@Override
/**
* Cleans up & tells user about failed download.
*/
public void downloadErrored(URL url, String reason) {
if (observer == null) {
return;
@ -131,6 +148,10 @@ public abstract class AlbumRipper extends AbstractRipper {
}
@Override
/**
* Tells user that a single file in the album they wish to download has
* already been downloaded in the past.
*/
public void downloadExists(URL url, File file) {
if (observer == null) {
return;

View File

@ -24,12 +24,12 @@ import com.rarchives.ripme.utils.Utils;
* Thread for downloading files.
* Includes retry logic, observer notifications, and other goodies.
*/
public class DownloadFileThread extends Thread {
class DownloadFileThread extends Thread {
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
private String referrer = "";
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
private URL url;
private File saveAs;

View File

@ -23,17 +23,28 @@ public class DownloadThreadPool {
public DownloadThreadPool(String threadPoolName) {
initialize(threadPoolName);
}
/**
* Initializes the threadpool.
* @param threadPoolName Name of the threadpool.
*/
private void initialize(String threadPoolName) {
int threads = Utils.getConfigInteger("threads.size", 10);
logger.debug("Initializing " + threadPoolName + " thread pool with " + threads + " threads");
threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(threads);
}
/**
* For adding threads to execution pool.
* @param t
* Thread to be added.
*/
public void addThread(Thread t) {
threadPool.execute(t);
}
/**
* Tries to shutdown threadpool.
*/
public void waitForThreads() {
threadPool.shutdown();
try {

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Utils;
* Thread for downloading files.
* Includes retry logic, observer notifications, and other goodies.
*/
public class DownloadVideoThread extends Thread {
class DownloadVideoThread extends Thread {
private static final Logger logger = Logger.getLogger(DownloadVideoThread.class);
@ -136,6 +136,12 @@ public class DownloadVideoThread extends Thread {
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
}
/**
* @param url
* Target URL
* @return
* Returns connection length
*/
private int getTotalBytes(URL url) throws IOException {
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("HEAD");

View File

@ -7,8 +7,10 @@ import java.net.URL;
/**
* I have no idea why I made this interface. Everything is captured within the AbstractRipper.
* Oh well, here's to encapsulation and abstraction! (raises glass)
*
* (cheers!)
*/
public interface RipperInterface {
interface RipperInterface {
void rip() throws IOException;
boolean canRip(URL url);
URL sanitizeURL(URL url) throws MalformedURLException;

View File

@ -16,7 +16,7 @@ public abstract class VideoRipper extends AbstractRipper {
private int bytesTotal = 1,
bytesCompleted = 1;
public VideoRipper(URL url) throws IOException {
protected VideoRipper(URL url) throws IOException {
super(url);
}
@ -74,6 +74,12 @@ public abstract class VideoRipper extends AbstractRipper {
return addURLToDownload(url, saveAs);
}
/**
* Creates & sets working directory based on URL.
* @param url
* Target URL
*/
@Override
public void setWorkingDir(URL url) throws IOException {
String path = Utils.getWorkingDirectory().getCanonicalPath();
@ -88,12 +94,23 @@ public abstract class VideoRipper extends AbstractRipper {
}
logger.debug("Set working directory to: " + this.workingDir);
}
/**
* @return
* Returns % of video done downloading.
*/
@Override
public int getCompletionPercentage() {
return (int) (100 * (bytesCompleted / (float) bytesTotal));
}
/**
* Runs if download successfully completed.
* @param url
* Target URL
* @param saveAs
* Path to file, including filename.
*/
@Override
public void downloadCompleted(URL url, File saveAs) {
if (observer == null) {
@ -109,6 +126,14 @@ public abstract class VideoRipper extends AbstractRipper {
logger.error("Exception while updating observer: ", e);
}
}
/**
* Runs if the download errored somewhere.
* @param url
* Target URL
* @param reason
* Reason why the download failed.
*/
@Override
public void downloadErrored(URL url, String reason) {
if (observer == null) {
@ -117,6 +142,15 @@ public abstract class VideoRipper extends AbstractRipper {
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
checkIfComplete();
}
/**
* Runs if user tries to redownload an already existing File.
* @param url
* Target URL
* @param file
* Existing file
*/
@Override
public void downloadExists(URL url, File file) {
if (observer == null) {
@ -126,6 +160,11 @@ public abstract class VideoRipper extends AbstractRipper {
checkIfComplete();
}
/**
* Gets the status and changes it to a human-readable form.
* @return
* Status of current download.
*/
@Override
public String getStatusText() {
StringBuilder sb = new StringBuilder();
@ -139,6 +178,10 @@ public abstract class VideoRipper extends AbstractRipper {
}
@Override
/**
* Sanitizes URL.
* Usually just returns itself.
*/
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}

View File

@ -4,7 +4,6 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
@ -16,14 +15,13 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import java.util.HashMap;
public class AerisdiesRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
public AerisdiesRipper(URL url) throws IOException {
@ -41,20 +39,20 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/([a-z]*_[0-9]*_\\d)\\.html");
Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/[a-z]*_(\\d+)_\\d\\.html");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.aerisdies.com/html/lb/albumDIG, got: " + url);
}
return m.group(m.groupCount());
return m.group(1);
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
String title = getFirstPage().select("title").first().text();
return getHost() + "_" + title.trim();
String title = getFirstPage().select("div > div > span[id=albumname] > a").first().text();
return getHost() + "_" + getGID(url) + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
@ -74,7 +72,7 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Elements albumElements = page.select("div.imgbox > a > img");
for (Element imageBox : albumElements) {
String imageUrl = imageBox.attr("src");

View File

@ -66,7 +66,7 @@ public class BcfakesRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) {
String imageURL = thumb.attr("src");
imageURL = imageURL.replace("thumbs/thumbs_", "");

View File

@ -1,70 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ButttoucherRipper extends AbstractHTMLRipper {
public ButttoucherRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "butttoucher";
}
@Override
public String getDomain() {
return "butttoucher.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
p = Pattern.compile("^.*butttoucher.com/users/([a-zA-Z0-9_\\-]{1,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected butttoucher.com gallery format: "
+ "butttoucher.com/users/<username>"
+ " Got: " + url);
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(this.url).get();
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> thumbs = new ArrayList<String>();
for (Element thumb : page.select(".thumb img")) {
if (!thumb.hasAttr("src")) {
continue;
}
String smallImage = thumb.attr("src");
thumbs.add(smallImage.replace("m.", "."));
}
return thumbs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,91 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class CfakeRipper extends AbstractHTMLRipper {
public CfakeRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "cfake";
}
@Override
public String getDomain() {
return "cfake.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://cfake\\.com/picture/([a-zA-Z1-9_-]*)/\\d+/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected cfake URL format: " +
"cfake.com/picture/MODEL/ID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("td > div.next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
}
else {
return Http.url("http://cfake.com" + nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) {
if (el.attr("href").contains("upload")) {
return result;
} else {
String imageSource = el.select("img").attr("src");
// We remove the .md from images so we download the full size image
// not the thumbnail ones
imageSource = imageSource.replace("thumbs", "photos");
result.add("http://cfake.com" + imageSource);
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -18,23 +18,21 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
public class ChanRipper extends AbstractHTMLRipper {
public static List<ChanSite> explicit_domains = Arrays.asList(
private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
new ChanSite(Arrays.asList("fgts.jp"), Arrays.asList("dat.fgtsi.org"))
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org"))
);
public static List<String> url_piece_blacklist = Arrays.asList(
private static List<String> url_piece_blacklist = Arrays.asList(
"=http",
"http://imgops.com/",
"iqdb.org",
"saucenao.com"
);
public ChanSite chanSite;
public Boolean generalChanSite = true;
private ChanSite chanSite;
private Boolean generalChanSite = true;
public ChanRipper(URL url) throws IOException {
super(url);
@ -66,13 +64,18 @@ public class ChanRipper extends AbstractHTMLRipper {
try {
// Attempt to use album title as GID
Document doc = getFirstPage();
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
try {
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (NullPointerException e) {
logger.warn("Failed to get thread title from " + url);
}
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
// Fall back on the GID
return getHost() + "_" + getGID(url);
}
@Override
@ -114,6 +117,12 @@ public class ChanRipper extends AbstractHTMLRipper {
if (m.matches()) {
return m.group(1);
}
// xchan
p = Pattern.compile("^.*\\.[a-z]{1,3}/board/[a-zA-Z0-9]+/thread/([0-9]+)/?.*$");
m = p.matcher(u);
if (m.matches()) {
return m.group(1);
}
}
throw new MalformedURLException(
@ -143,7 +152,7 @@ public class ChanRipper extends AbstractHTMLRipper {
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Pattern p; Matcher m;
for (Element link : page.select("a")) {
if (!link.hasAttr("href")) {
@ -208,6 +217,6 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toString(), null);
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -1,180 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class CheebyRipper extends AbstractHTMLRipper {
private int offset = 0;
private Map<String, Integer> albumSets = new HashMap<String, Integer>();
public CheebyRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "cheeby";
}
@Override
public String getDomain() {
return "cheeby.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*cheeby.com/u/([a-zA-Z0-9\\-_]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("cheeby user not found in " + url + ", expected http://cheeby.com/u/username");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://cheeby.com/u/" + getGID(url) + "/pics");
}
@Override
public Document getFirstPage() throws IOException {
String url = this.url + "?limit=10&offset=0";
return Http.url(url)
.get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
sleep(500);
offset += 1;
String url = this.url + "?p=" + offset;
Document nextDoc = Http.url(url).get();
if (nextDoc.select("div.i a img").size() == 0) {
throw new IOException("No more images to fetch");
}
return nextDoc;
}
@Override
public void downloadURL(URL url, int index) {
// Not implmeneted here
}
@Override
public List<String> getURLsFromPage(Document page) {
// Not implemented here
return null;
}
public List<Image> getImagesFromPage(Document page) {
List<Image> imageURLs = new ArrayList<Image>();
for (Element image : page.select("div.i a img")) {
// Get image URL
String imageURL = image.attr("src");
imageURL = imageURL.replace("s.", ".");
// Get "album" from image link
String href = image.parent().attr("href");
while (href.endsWith("/")) {
href = href.substring(0, href.length() - 2);
}
String[] hrefs = href.split("/");
String prefix = hrefs[hrefs.length - 1];
// Keep track of how many images are in this album
int albumSetCount = 0;
if (albumSets.containsKey(prefix)) {
albumSetCount = albumSets.get(prefix);
}
albumSetCount++;
albumSets.put(prefix, albumSetCount);
imageURLs.add(new Image(imageURL, prefix, albumSetCount));
}
return imageURLs;
}
@Override
public void rip() throws IOException {
logger.info("Retrieving " + this.url);
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = getFirstPage();
while (doc != null) {
List<Image> images = getImagesFromPage(doc);
if (images.size() == 0) {
throw new IOException("No images found at " + doc.location());
}
for (Image image : images) {
if (isStopped()) {
break;
}
// Don't create subdirectory if "album" only has 1 image
if (albumSets.get(image.prefix) > 1) {
addURLToDownload(new URL(image.url), getPrefix(image.index), image.prefix);
}
else {
addURLToDownload(new URL(image.url));
}
}
if (isStopped()) {
break;
}
try {
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
doc = getNextPage(doc);
} catch (IOException e) {
logger.info("Can't get next page: " + e.getMessage());
break;
}
}
// If they're using a thread pool, wait for it.
if (getThreadPool() != null) {
getThreadPool().waitForThreads();
}
waitForThreads();
// Delete empty subdirectories
for (String prefix : albumSets.keySet()) {
if (prefix.trim().equals("")) {
continue;
}
File f = new File(this.workingDir, prefix);
if (f.list() != null && f.list().length == 0) {
logger.info("Deleting empty directory: " + f.getAbsolutePath());
f.delete();
}
}
}
private class Image {
String url, prefix;
int index;
public Image(String url, String prefix, int index) {
this.url = url;
this.prefix = prefix;
this.index = index;
}
}
}

View File

@ -6,118 +6,119 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class CheveretoRipper extends AbstractHTMLRipper {
private static final Map<String, String> CONSENT_COOKIE;
static {
CONSENT_COOKIE = new TreeMap<String, String>();
CONSENT_COOKIE.put("AGREE_CONSENT", "1");
}
public CheveretoRipper(URL url) throws IOException {
super(url);
super(url);
}
public static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com");
@Override
public String getHost() {
String host = url.toExternalForm().split("/")[2];
return host;
}
@Override
public String getDomain() {
String host = url.toExternalForm().split("/")[2];
return host;
}
@Override
public boolean canRip(URL url) {
String url_name = url.toExternalForm();
if (explicit_domains_1.contains(url_name.split("/")[2]) == true) {
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected chevereto URL format: " +
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("li.pagination-next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
}
else {
return Http.url(nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("a.image-container > img")) {
String imageSource = el.attr("src");
// We remove the .md from images so we download the full size image
// not the medium ones
imageSource = imageSource.replace(".md", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
private static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com", "gwarchives.com");
@Override
public String getHost() {
return url.toExternalForm().split("/")[2];
}
@Override
public String getDomain() {
return url.toExternalForm().split("/")[2];
}
@Override
public boolean canRip(URL url) {
String url_name = url.toExternalForm();
if (explicit_domains_1.contains(url_name.split("/")[2])) {
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected chevereto URL format: " +
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).cookies(CONSENT_COOKIE).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
// We use comic-nav-next to the find the next page
Element elem = doc.select("li.pagination-next > a").first();
if (elem == null) {
throw new IOException("No more pages");
}
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage == "") {
return null;
} else {
return Http.url(nextPage).cookies(CONSENT_COOKIE).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("a.image-container > img")) {
String imageSource = el.attr("src");
// We remove the .md from images so we download the full size image
// not the medium ones
imageSource = imageSource.replace(".md", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -32,8 +32,8 @@ public class DeviantartRipper extends AbstractHTMLRipper {
private static final int PAGE_SLEEP_TIME = 3000,
IMAGE_SLEEP_TIME = 2000;
private Map<String,String> cookies = new HashMap<String,String>();
private Set<String> triedURLs = new HashSet<String>();
private Map<String,String> cookies = new HashMap<>();
private Set<String> triedURLs = new HashSet<>();
public DeviantartRipper(URL url) throws IOException {
super(url);
@ -63,7 +63,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
u += "gallery/?";
}
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/*?$");
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/*?$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
String subdir = "/";
@ -88,18 +88,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
return m.group(1);
}
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$");
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/gallery/([0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Subgallery
return m.group(1) + "_" + m.group(2);
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$");
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1) + "_faves_" + m.group(2);
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/?$");
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Subgallery
@ -121,14 +121,14 @@ public class DeviantartRipper extends AbstractHTMLRipper {
.cookies(cookies)
.get();
}
public String jsonToImage(Document page,String id) {
private String jsonToImage(Document page, String id) {
Elements js = page.select("script[type=\"text/javascript\"]");
for (Element tag : js) {
if (tag.html().contains("window.__pageload")) {
try {
String script = tag.html();
script = script.substring(script.indexOf("window.__pageload"));
if (script.indexOf(id) < 0) {
if (!script.contains(id)) {
continue;
}
script = script.substring(script.indexOf(id));
@ -144,7 +144,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
// Iterate over all thumbnails
for (Element thumb : page.select("div.zones-container span.thumb")) {
@ -194,7 +194,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
}
@Override
public List<String> getDescriptionsFromPage(Document page) {
List<String> textURLs = new ArrayList<String>();
List<String> textURLs = new ArrayList<>();
// Iterate over all thumbnails
for (Element thumb : page.select("div.zones-container span.thumb")) {
logger.info(thumb.attr("href"));
@ -257,9 +257,9 @@ public class DeviantartRipper extends AbstractHTMLRipper {
* @return Full-size image URL
* @throws Exception If it can't find the full-size URL
*/
public static String thumbToFull(String thumb, boolean throwException) throws Exception {
private static String thumbToFull(String thumb, boolean throwException) throws Exception {
thumb = thumb.replace("http://th", "http://fc");
List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/")));
List<String> fields = new ArrayList<>(Arrays.asList(thumb.split("/")));
fields.remove(4);
if (!fields.get(4).equals("f") && throwException) {
// Not a full-size image
@ -339,7 +339,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
* @param page Page the thumbnail is retrieved from
* @return Highest-resolution version of the image based on thumbnail URL and the page.
*/
public String smallToFull(String thumb, String page) {
private String smallToFull(String thumb, String page) {
try {
// Fetch the image page
Response resp = Http.url(page)
@ -373,7 +373,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
}
cookieString = cookieString.substring(0,cookieString.length() - 1);
con.setRequestProperty("Cookie",cookieString);
con.setRequestProperty("User-Agent",this.USER_AGENT);
con.setRequestProperty("User-Agent", USER_AGENT);
con.setInstanceFollowRedirects(true);
con.connect();
int code = con.getResponseCode();
@ -406,7 +406,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
*/
private Map<String, String> loginToDeviantart() throws IOException {
// Populate postData fields
Map<String,String> postData = new HashMap<String,String>();
Map<String,String> postData = new HashMap<>();
String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
if (username == null || password == null) {

View File

@ -72,7 +72,7 @@ public class DrawcrowdRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("div.item.asset img")) {
String image = thumb.attr("src");
image = image

View File

@ -0,0 +1,74 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class DribbbleRipper extends AbstractHTMLRipper {
public DribbbleRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "dribbble";
}
@Override
public String getDomain() {
return "dribbble.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wm.]*dribbble\\.com/([a-zA-Z0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected dribbble.com URL format: " +
"dribbble.com/albumid - got " + url + "instead");
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements hrefs = doc.select("a.next_page");
if (hrefs.size() == 0) {
throw new IOException("No more pages");
}
String nextUrl = "https://www.dribbble.com" + hrefs.first().attr("href");
sleep(500);
return Http.url(nextUrl).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a.dribbble-link > picture > source")) {
// nl skips thumbnails
if ( thumb.attr("srcset").contains("teaser")) continue;
String image = thumb.attr("srcset").replace("_1x", "");
imageURLs.add(image);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -5,167 +5,137 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class E621Ripper extends AbstractHTMLRipper {
public static final int POOL_IMAGES_PER_PAGE = 24;
public class E621Ripper extends AbstractHTMLRipper{
private static final Logger logger = Logger.getLogger(E621Ripper.class);
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
private static Pattern gidPattern=null;
private static Pattern gidPattern2=null;
private static Pattern gidPatternPool=null;
public E621Ripper(URL url) throws IOException {
super(url);
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
public E621Ripper(URL url) throws IOException {
super(url);
}
@Override
public DownloadThreadPool getThreadPool() {
return e621ThreadPool;
}
@Override
public String getDomain() {
return "e621.net";
}
@Override
public String getHost() {
return "e621";
}
@Override
public Document getFirstPage() throws IOException {
if(url.getPath().startsWith("/pool/show/"))
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
else
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
}
private String getFullSizedImage(String url) {
try {
return Http.url("https://e621.net" + url).get().select("div > img#image").attr("src");
} catch (IOException e) {
logger.error("Unable to get full sized image from " + url);
return null;
}
}
@Override
public DownloadThreadPool getThreadPool() {
return e621ThreadPool;
}
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements = page.select("div > span.thumb > a");
List<String> res = new ArrayList<>();
@Override
public String getDomain() {
return "e621.net";
}
for(Element e:elements) {
if (!e.attr("href").isEmpty()) {
String fullSizedImage = getFullSizedImage(e.attr("href"));
if (fullSizedImage != null && !fullSizedImage.equals("")) {
res.add(getFullSizedImage(e.attr("href")));
}
}
}
@Override
public String getHost() {
return "e621";
}
return res;
}
@Override
public Document getFirstPage() throws IOException {
if (url.getPath().startsWith("/pool/show/")) {
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
@Override
public Document getNextPage(Document page) throws IOException {
if (page.select("a.next_page") != null) {
return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get();
} else {
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
throw new IOException("No more pages");
}
}
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a");
List<String> res = new ArrayList<String>(elements.size());
@Override
public void downloadURL(final URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
if (page.getElementById("pool-show") != null) {
int index = 0;
private String getTerm(URL url) throws MalformedURLException{
if(gidPattern==null)
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
if(gidPatternPool==null)
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
Element e = page.getElementById("paginator");
if (e != null) {
e = e.getElementsByClass("current").first();
if (e != null) {
index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
}
}
Matcher m = gidPattern.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
for (Element e_ : elements) {
res.add(e_.absUrl("href") + "#" + ++index);
}
m = gidPatternPool.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
} else {
for (Element e : elements) {
res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
}
}
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
}
return res;
}
@Override
public String getGID(URL url) throws MalformedURLException {
try {
String prefix="";
if(url.getPath().startsWith("/pool/show/"))
prefix="pool_";
@Override
public Document getNextPage(Document page) throws IOException {
for (Element e : page.select("#paginator a")) {
if (e.attr("rel").equals("next")) {
return Http.url(e.absUrl("href")).get();
}
}
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) {
logger.error(ex);
}
return null;
}
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
}
@Override
public void downloadURL(final URL url, int index) {
e621ThreadPool.addThread(new Thread(new Runnable() {
public void run() {
try {
Document page = Http.url(url).get();
Element e = page.getElementById("image");
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
if(gidPattern2==null)
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
if (e != null) {
addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
} else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) {
addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
} else {
Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
}
Matcher m = gidPattern2.matcher(url.toExternalForm());
if(m.matches())
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
} catch (IOException ex) {
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
}
}
}));
}
return url;
}
private String getTerm(URL url) throws MalformedURLException {
String query = url.getQuery();
if (query != null) {
return Utils.parseUrlQuery(query, "tags");
}
if (query == null) {
if ((query = url.getPath()).startsWith("/post/index/")) {
query = query.substring(12);
int pos = query.indexOf('/');
if (pos == -1) {
return null;
}
// skip page number
query = query.substring(pos + 1);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
try {
return URLDecoder.decode(query, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Shouldn't happen since UTF-8 is required to be supported
throw new RuntimeException(e);
}
} else if (query.startsWith("/pool/show/")) {
query = query.substring(11);
if (query.endsWith("/")) {
query = query.substring(0, query.length() - 1);
}
return query;
}
}
return null;
}
@Override
public String getGID(URL url) throws MalformedURLException {
String prefix = "";
if (url.getPath().startsWith("/pool/show/")) {
prefix = "pool_";
} else {
prefix = "term_";
}
return Utils.filesystemSafe(prefix + getTerm(url));
}
}

View File

@ -39,7 +39,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
// Current HTML document
private Document albumDoc = null;
private static final Map<String,String> cookies = new HashMap<String,String>();
private static final Map<String,String> cookies = new HashMap<>();
static {
cookies.put("nw", "1");
cookies.put("tip", "1");
@ -162,7 +162,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Elements thumbs = page.select("#gdt > .gdtm a");
// Iterate over images on page
for (Element thumb : thumbs) {
@ -193,7 +193,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
private int index;
private File workingDir;
public EHentaiImageThread(URL url, int index, File workingDir) {
EHentaiImageThread(URL url, int index, File workingDir) {
super();
this.url = url;
this.index = index;

View File

@ -23,7 +23,12 @@ import com.rarchives.ripme.utils.Http;
public class EightmusesRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
// TODO put up a wiki page on using maps to store titles
// the map for storing the title of each album when downloading sub albums
private Map<URL,String> urlTitles = new HashMap<>();
private Boolean rippingSubalbums = false;
public EightmusesRipper(URL url) throws IOException {
super(url);
@ -76,33 +81,62 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
if (page.select(".preview > span").size() > 0) {
List<String> imageURLs = new ArrayList<>();
// get the first image link on the page and check if the last char in it is a number
// if it is a number then we're ripping a comic if not it's a subalbum
String firstImageLink = page.select("div.gallery > a.t-hover").first().attr("href");
Pattern p = Pattern.compile("/comix/picture/([a-zA-Z0-9\\-_/]*/)?\\d+");
Matcher m = p.matcher(firstImageLink);
if (!m.matches()) {
logger.info("Ripping subalbums");
// Page contains subalbums (not images)
Elements albumElements = page.select("a.preview");
Elements albumElements = page.select("div.gallery > a.t-hover");
List<Element> albumsList = albumElements.subList(0, albumElements.size());
Collections.reverse(albumsList);
// Iterate over elements in reverse order
for (Element subalbum : albumsList) {
String subUrl = subalbum.attr("href");
subUrl = subUrl.replaceAll("\\.\\./", "");
if (subUrl.startsWith("//")) {
subUrl = "http:";
}
else if (!subUrl.startsWith("http://")) {
subUrl = "http://www.8muses.com/" + subUrl;
}
try {
logger.info("Retrieving " + subUrl);
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
Document subPage = Http.url(subUrl).get();
// Get all images in subalbum, add to list.
List<String> subalbumImages = getURLsFromPage(subPage);
logger.info("Found " + subalbumImages.size() + " images in subalbum");
imageURLs.addAll(subalbumImages);
} catch (IOException e) {
logger.warn("Error while loading subalbum " + subUrl, e);
continue;
// This if is to skip ads which don't have a href
if (subUrl != "") {
subUrl = subUrl.replaceAll("\\.\\./", "");
if (subUrl.startsWith("//")) {
subUrl = "https:";
}
else if (!subUrl.startsWith("http://")) {
subUrl = "https://www.8muses.com" + subUrl;
}
try {
logger.info("Retrieving " + subUrl);
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
Document subPage = Http.url(subUrl).get();
// Get all images in subalbum, add to list.
List<String> subalbumImages = getURLsFromPage(subPage);
String albumTitle = subPage.select("meta[name=description]").attr("content");
albumTitle = albumTitle.replace("A huge collection of free porn comics for adults. Read ", "");
albumTitle = albumTitle.replace(" online for free at 8muses.com", "");
albumTitle = albumTitle.replace(" ", "_");
// albumTitle = albumTitle.replace("Sex and Porn Comics", "");
// albumTitle = albumTitle.replace("|", "");
// albumTitle = albumTitle.replace("8muses", "");
// albumTitle = albumTitle.replaceAll("-", "_");
// albumTitle = albumTitle.replaceAll(" ", "_");
// albumTitle = albumTitle.replaceAll("___", "_");
// albumTitle = albumTitle.replaceAll("__", "_");
// // This is here to remove the trailing __ from folder names
// albumTitle = albumTitle.replaceAll("__", "");
logger.info("Found " + subalbumImages.size() + " images in subalbum");
int prefix = 1;
for (String image : subalbumImages) {
URL imageUrl = new URL(image);
// urlTitles.put(imageUrl, albumTitle);
addURLToDownload(imageUrl, getPrefix(prefix), albumTitle, this.url.toExternalForm(), cookies);
prefix = prefix + 1;
}
rippingSubalbums = true;
imageURLs.addAll(subalbumImages);
} catch (IOException e) {
logger.warn("Error while loading subalbum " + subUrl, e);
}
}
}
}
@ -142,10 +176,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
private String getFullSizeImage(String imageUrl) throws IOException {
sendUpdate(STATUS.LOADING_RESOURCE, imageUrl);
logger.info("Getting full sized image from " + imageUrl);
Document doc = new Http(imageUrl).get(); // Retrieve the webpage of the image URL
Element fullSizeImage = doc.select(".photo").first(); // Select the "photo" element from the page (there should only be 1)
String path = "https://cdn.ampproject.org/i/s/www.8muses.com/data/ufu/small/" + fullSizeImage.children().select("#imageName").attr("value"); // Append the path to the fullsize image file to the standard prefix
return path;
String imageName = doc.select("input[id=imageName]").attr("value"); // Select the "input" element from the page
return "https://www.8muses.com/image/fm/" + imageName;
}
@Override

View File

@ -9,7 +9,6 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -18,10 +17,8 @@ import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.Connection.Method;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
/**
@ -71,19 +68,13 @@ public class EroShareRipper extends AbstractHTMLRipper {
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
if (m_eroshare_profile.matches()) {
return true;
}
return false;
return m_eroshare_profile.matches();
}
public boolean is_profile(URL url) {
private boolean is_profile(URL url) {
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}
return false;
return ma.matches();
}
@Override
@ -103,7 +94,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
if (is_profile(url) == false) {
if (!is_profile(url)) {
try {
// Attempt to use album title as GID
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
@ -122,7 +113,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> URLs = new ArrayList<String>();
List<String> URLs = new ArrayList<>();
//Pictures
Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) {
@ -172,9 +163,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
.ignoreContentType()
.response();
Document doc = resp.parse();
return doc;
return resp.parse();
}
@Override
@ -214,7 +203,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
Document doc = resp.parse();
List<URL> URLs = new ArrayList<URL>();
List<URL> URLs = new ArrayList<>();
//Pictures
Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) {

View File

@ -1,15 +1,9 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -18,10 +12,8 @@ import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.Connection.Method;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
/**
@ -65,10 +57,15 @@ public class EromeRipper extends AbstractHTMLRipper {
return super.getAlbumTitle(url);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> URLs = new ArrayList<String>();
List<String> URLs = new ArrayList<>();
//Pictures
Elements imgs = doc.select("div.img > img.img-front");
for (Element img : imgs) {
@ -92,9 +89,7 @@ public class EromeRipper extends AbstractHTMLRipper {
.ignoreContentType()
.response();
Document doc = resp.parse();
return doc;
return resp.parse();
}
@Override
@ -104,7 +99,15 @@ public class EromeRipper extends AbstractHTMLRipper {
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("erome album not found in " + url + ", expected https://erome.com/album");
p = Pattern.compile("^https?://erome.com/a/([a-zA-Z0-9]*)/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("erome album not found in " + url + ", expected https://www.erome.com/album");
}
public static List<URL> getURLs(URL url) throws IOException{
@ -115,7 +118,7 @@ public class EromeRipper extends AbstractHTMLRipper {
Document doc = resp.parse();
List<URL> URLs = new ArrayList<URL>();
List<URL> URLs = new ArrayList<>();
//Pictures
Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) {

View File

@ -1,101 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class FapprovedRipper extends AbstractHTMLRipper {
private int pageIndex = 1;
private String username = null;
public FapprovedRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "fapproved";
}
@Override
public String getDomain() {
return "fapproved.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
username = m.group(1);
return username;
}
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://fapproved.com/users/" + getGID(url));
}
@Override
public Document getFirstPage() throws IOException {
pageIndex = 1;
String pageURL = getPageURL(pageIndex);
return Http.url(pageURL)
.ignoreContentType()
.get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if ((doc.select("div.pagination li.next.disabled").size() != 0)
|| (doc.select("div.pagination").size() == 0)) {
throw new IOException("No more pages found");
}
sleep(1000);
pageIndex++;
String pageURL = getPageURL(pageIndex);
return Http.url(pageURL)
.ignoreContentType()
.get();
}
private String getPageURL(int index) throws IOException {
if (username == null) {
username = getGID(this.url);
}
return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex;
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
for (Element image : page.select("div.actual-image img")) {
String imageURL = image.attr("src");
if (imageURL.startsWith("//")) {
imageURL = "http:" + imageURL;
}
else if (imageURL.startsWith("/")) {
imageURL = "http://fapproved.com" + imageURL;
}
imageURLs.add(imageURL);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -1,91 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import org.jsoup.select.Elements;
public class FineboxRipper extends AlbumRipper {
private static final String DOMAIN = "finebox.co",
DOMAIN_OLD = "vinebox.co",
HOST = "finebox";
public FineboxRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN) || url.getHost().endsWith(DOMAIN_OLD);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://"+DOMAIN+"/u/" + getGID(url));
}
@Override
public void rip() throws IOException {
int page = 0;
Document doc;
Boolean hasPagesLeft = true;
while (hasPagesLeft) {
page++;
String urlPaged = this.url.toExternalForm() + "?page=" + page;
logger.info("Retrieving " + urlPaged);
sendUpdate(STATUS.LOADING_RESOURCE, urlPaged);
try {
doc = Http.url(this.url).get();
} catch (HttpStatusException e) {
logger.debug("Hit end of pages at page " + page, e);
break;
}
Elements videos = doc.select("video");
for (Element element : videos) {
String videourl = element.select("source").attr("src");
if (!videourl.startsWith("http")) {
videourl = "http://" + DOMAIN + videourl;
}
logger.info("URL to download: " + videourl);
if (!addURLToDownload(new URL(videourl))) {
hasPagesLeft = false;
break;
}
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next page", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?(v|f)inebox\\.co/u/([a-zA-Z0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://"+DOMAIN+"/u/USERNAME");
}
return m.group(m.groupCount());
}
}

View File

@ -256,7 +256,7 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
JSONArray photos = json.getJSONArray("photos");
for (int i = 0; i < photos.length(); i++) {
if (super.isStopped()) {

View File

@ -28,9 +28,9 @@ import com.rarchives.ripme.utils.Utils;
public class FlickrRipper extends AbstractHTMLRipper {
private int page = 1;
private Set<String> attempted = new HashSet<String>();
private Set<String> attempted = new HashSet<>();
private Document albumDoc = null;
private DownloadThreadPool flickrThreadPool;
private final DownloadThreadPool flickrThreadPool;
@Override
public DownloadThreadPool getThreadPool() {
return flickrThreadPool;
@ -162,7 +162,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("a[data-track=photo-click]")) {
/* TODO find a way to persist the image title
String imageTitle = null;
@ -215,7 +215,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
.method(Method.GET)
.execute();
Document doc = resp.parse();
Map<String,String> postData = new HashMap<String,String>();
Map<String,String> postData = new HashMap<>();
for (Element input : doc.select("input[type=hidden]")) {
postData.put(input.attr("name"), input.attr("value"));
}
@ -239,7 +239,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
private URL url;
private int index;
public FlickrImageThread(URL url, int index) {
FlickrImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
@ -252,7 +252,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
Elements fullsizeImages = doc.select("div#allsizes-photo img");
if (fullsizeImages.size() == 0) {
logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
return;
}
else {
String prefix = "";

View File

@ -28,12 +28,16 @@ import com.rarchives.ripme.utils.Http;
public class FuraffinityRipper extends AbstractHTMLRipper {
static Map<String, String> cookies=null;
static final String urlBase = "https://www.furaffinity.net";
private static final String urlBase = "https://www.furaffinity.net";
private static Map<String,String> cookies = new HashMap<>();
static {
cookies.put("b", "bd5ccac8-51dc-4265-8ae1-7eac685ad667");
cookies.put("a", "7c41b782-d01d-4b0e-b45b-62a4f0b2a369");
}
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool furaffinityThreadPool
= new DownloadThreadPool( "furaffinity");
= new DownloadThreadPool( "furaffinity");
@Override
public DownloadThreadPool getThreadPool() {
@ -55,73 +59,49 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
}
@Override
public boolean hasDescriptionSupport() {
return true;
return false;
}
@Override
public Document getFirstPage() throws IOException {
if (cookies == null || cookies.size() == 0) {
login();
}
return Http.url(url).cookies(cookies).get();
}
private void login() throws IOException {
String user = new String(Base64.decode("cmlwbWU="));
String pass = new String(Base64.decode("cmlwbWVwYXNzd29yZA=="));
Response loginPage = Http.url(urlBase + "/login/")
.referrer(urlBase)
.response();
cookies = loginPage.cookies();
Map<String,String> formData = new HashMap<String,String>();
formData.put("action", "login");
formData.put("retard_protection", "1");
formData.put("name", user);
formData.put("pass", pass);
formData.put("login", "Login to FurAffinity");
Response doLogin = Http.url(urlBase + "/login/?ref=" + url)
.referrer(urlBase + "/login/")
.cookies(cookies)
.data(formData)
.method(Method.POST)
.response();
cookies.putAll(doLogin.cookies());
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements nextPageUrl = doc.select("td[align=right] form");
Elements nextPageUrl = doc.select("a.right");
if (nextPageUrl.size() == 0) {
throw new IOException("No more pages");
}
String nextUrl = urlBase + nextPageUrl.first().attr("action");
String nextUrl = urlBase + nextPageUrl.first().attr("href");
sleep(500);
Document nextPage = Http.url(nextUrl).cookies(cookies).get();
Elements hrefs = nextPage.select("div#no-images");
if (hrefs.size() != 0) {
throw new IOException("No more pages");
}
return nextPage;
}
private String getImageFromPost(String url) {
try {
logger.info("found url " + Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content"));
return Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content");
} catch (IOException e) {
return "";
}
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>();
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) {
urls.add(urlBase + e.select("a").first().attr("href"));
urls.add(getImageFromPost(urlBase + e.select("a").first().attr("href")));
}
return urls;
}
@Override
public List<String> getDescriptionsFromPage(Document page) {
List<String> urls = new ArrayList<String>();
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) {
urls.add(urlBase + e.select("a").first().attr("href"));
@ -138,7 +118,6 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
// Fetch the image page
Response resp = Http.url(page)
.referrer(this.url)
.cookies(cookies)
.response();
cookies.putAll(resp.cookies());
@ -157,9 +136,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
ele.select("p").prepend("\\n\\n");
logger.debug("Returning description at " + page);
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
String title = documentz.select("meta[property=og:title]").attr("content");
String tempText = title;
return tempText + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
} catch (IOException ioe) {
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
return null;
@ -167,8 +144,8 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
}
@Override
public boolean saveText(URL url, String subdirectory, String text, int index) {
//TODO Make this better please?
try {
//TODO Make this better please?
try {
stopCheck();
} catch (IOException e) {
return false;
@ -179,7 +156,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
saveAs = text.split("\n")[0];
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
for (int i = 1;i < text.split("\n").length; i++) {
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
}
try {
if (!subdirectory.equals("")) {
@ -212,8 +189,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
}
@Override
public void downloadURL(URL url, int index) {
furaffinityThreadPool.addThread(new FuraffinityDocumentThread(url));
sleep(250);
addURLToDownload(url, getPrefix(index));
}
@Override
@ -224,6 +200,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected furaffinity.net URL format: "
+ "www.furaffinity.net/gallery/username - got " + url
+ " instead");
@ -232,42 +209,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
private class FuraffinityDocumentThread extends Thread {
private URL url;
public FuraffinityDocumentThread(URL url) {
FuraffinityDocumentThread(URL url) {
super();
this.url = url;
}
@Override
public void run() {
try {
Document doc = Http.url(url).cookies(cookies).get();
// Find image
Elements donwloadLink = doc.select("div.alt1 b a[href^=//d.facdn.net/]");
if (donwloadLink.size() == 0) {
logger.warn("Could not download " + this.url);
return;
}
String link = "http:" + donwloadLink.first().attr("href");
logger.info("Found URL " + link);
String[] fileNameSplit = link.split("/");
String fileName = fileNameSplit[fileNameSplit.length -1];
fileName = fileName.replaceAll("[0-9]*\\.", "");
String[] fileExtSplit = link.split("\\.");
String fileExt = fileExtSplit[fileExtSplit.length -1];
fileName = fileName.replaceAll(fileExt, "");
File saveAS;
fileName = fileName.replace("[0-9]*\\.", "");
saveAS = new File(
workingDir.getCanonicalPath()
+ File.separator
+ fileName
+ "."
+ fileExt);
addURLToDownload(new URL(link),saveAS,"",cookies);
} catch (IOException e) {
logger.error("[!] Exception while loading/parsing " + this.url, e);
}
}
}
}
}

View File

@ -60,7 +60,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
String html = doc.html();
// Get "baseUrl"
String baseUrl = Utils.between(html, "unescape('", "'").get(0);

View File

@ -1,113 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class GifyoRipper extends AbstractHTMLRipper {
private int page = 0;
private Map<String,String> cookies = new HashMap<String,String>();
public GifyoRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "gifyo";
}
@Override
public String getDomain() {
return "gifyo.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*gifyo.com/([a-zA-Z0-9\\-_]+)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return new URL("http://gifyo.com/" + getGID(url) + "/");
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(this.url)
.ignoreContentType()
.response();
cookies = resp.cookies();
Document doc = resp.parse();
if (doc.html().contains("profile is private")) {
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
throw new IOException("User has private profile");
}
return doc;
}
@Override
public Document getNextPage(Document doc) throws IOException {
page++;
Map<String,String> postData = new HashMap<String,String>();
postData.put("cmd", "refreshData");
postData.put("view", "gif");
postData.put("layout", "grid");
postData.put("page", Integer.toString(page));
Response resp = Http.url(this.url)
.ignoreContentType()
.data(postData)
.cookies(cookies)
.method(Method.POST)
.response();
cookies.putAll(resp.cookies());
Document nextDoc = resp.parse();
if (nextDoc.select("div.gif img").size() == 0) {
throw new IOException("No more images found");
}
sleep(2000);
return nextDoc;
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element image : doc.select("img.profile_gif")) {
String imageUrl = image.attr("data-animated");
if (imageUrl.startsWith("//")) {
imageUrl = "http:" + imageUrl;
}
imageUrl = imageUrl.replace("/medium/", "/large/");
imageUrl = imageUrl.replace("_s.gif", ".gif");
imageURLs.add(imageUrl);
}
logger.debug("Found " + imageURLs.size() + " images");
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url);
}
}

View File

@ -50,7 +50,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
Pattern p;
Matcher m;
p = Pattern.compile("^www\\.girlsofdesire\\.org\\/galleries\\/([\\w\\d-]+)\\/$");
p = Pattern.compile("^www\\.girlsofdesire\\.org/galleries/([\\w\\d-]+)/$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@ -72,7 +72,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("td.vtop > a > img")) {
String imgSrc = thumb.attr("src");
imgSrc = imgSrc.replaceAll("_thumb\\.", ".");

View File

@ -0,0 +1,78 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class HbrowseRipper extends AbstractHTMLRipper {
public HbrowseRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hbrowse";
}
@Override
public String getDomain() {
return "hbrowse.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hbrowse.com URL format: " +
"hbrowse.com/ID/COMICID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
Document tempDoc = Http.url(url).get();
return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("table > tbody > tr > td > a > img")) {
String imageURL = el.attr("src").replace("/zzz", "");
result.add(imageURL);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,113 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class Hentai2readRipper extends AbstractHTMLRipper {
String lastPage;
public Hentai2readRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hentai2read";
}
@Override
public String getDomain() {
return "hentai2read.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https://hentai2read\\.com/([a-zA-Z0-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentai2read.com URL format: " +
"hbrowse.com/COMICID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
Document tempDoc;
// get the first page of the comic
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
tempDoc = Http.url(url + "1").get();
} else {
tempDoc = Http.url(url + "/1").get();
}
for (Element el : tempDoc.select("ul.nav > li > a")) {
if (el.attr("href").startsWith("https://hentai2read.com/thumbnails/")) {
// Get the page with the thumbnails
return Http.url(el.attr("href")).get();
}
}
throw new IOException("Unable to get first page");
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("span[itemprop=title]").text();
return getHost() + "_" + title;
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) {
String imageURL = "https:" + el.attr("src");
imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com");
imageURL = imageURL.replace("thumbnails/", "");
imageURL = imageURL.replace("tmb", "");
result.add(imageURL);
}
return result;
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("div.bg-white > ul.pagination > li > a").last();
if (elem == null) {
throw new IOException("No more pages");
}
nextUrl = elem.attr("href");
// We use the global lastPage to check if we've already ripped this page
// and is so we quit as there are no more pages
if (nextUrl.equals(lastPage)) {
throw new IOException("No more pages");
}
lastPage = nextUrl;
// Sleep for half a sec to avoid getting IP banned
sleep(500);
return Http.url(nextUrl).get();
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,73 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class HentaiCafeRipper extends AbstractHTMLRipper {
public HentaiCafeRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "hentai";
}
@Override
public String getDomain() {
return "hentai.cafe";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://hentai\\.cafe/([a-zA-Z0-9_\\-%]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentai.cafe URL format: " +
"hentai.cafe/COMIC - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
Document tempDoc = Http.url(url).get();
return Http.url(tempDoc.select("div.last > p > a.x-btn").attr("href")).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
String nextPageURL = doc.select("div[id=page] > div.inner > a").attr("href");
int totalPages = Integer.parseInt(doc.select("div.panel > div.topbar > div > div.topbar_right > div.tbtitle > div.text").text().replace("", ""));
String[] nextPageURLSplite = nextPageURL.split("/");
// This checks if the next page number is greater than the total number of pages
if (totalPages >= Integer.parseInt(nextPageURLSplite[nextPageURLSplite.length -1])) {
return Http.url(nextPageURL).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
result.add(doc.select("div[id=page] > div.inner > a > img.open").attr("src"));
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
public class HentaifoundryRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
public HentaifoundryRipper(URL url) throws IOException {
super(url);
}
@ -84,7 +84,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
for (Element thumb : doc.select("div.thumb_square > a.thumbLink")) {
if (isStopped()) {
@ -115,7 +115,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
imagePage = null;
}
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.") == true) {
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.")) {
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
}
else {

View File

@ -31,7 +31,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@ -43,7 +43,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
}
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$");
Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// URL points to imagearn *image*, not gallery
@ -75,9 +75,22 @@ public class ImagearnRipper extends AbstractHTMLRipper {
return Http.url(url).get();
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document doc = getFirstPage();
String title = doc.select("h3 > strong").first().text(); // profile name
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div#gallery > div > a")) {
String imageURL = thumb.attr("href");
try {

View File

@ -81,7 +81,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
imageURLs.add(thumb.attr("href"));
}
@ -124,7 +124,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
private URL url;
private int index;
public ImagebamImageThread(URL url, int index) {
ImagebamImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;

View File

@ -120,7 +120,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue;
@ -129,7 +129,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
image = image.replaceAll(
"http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/");
image = image.replaceAll("w[0-9]{1,}-h[0-9]{1,}/", "");
image = image.replaceAll("w[0-9]+-h[0-9]+/", "");
imageURLs.add(image);
if (isThisATest()) {
break;
@ -152,7 +152,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
Matcher m = p.matcher(title);
if (m.matches()) {
return getHost() + "_" + m.group(1);
return getHost() + "_" + m.group(1) + "_" + getGID(url);
}
} catch (IOException e) {
// Fall back to default album naming convention
@ -160,4 +160,4 @@ public class ImagefapRipper extends AbstractHTMLRipper {
return super.getAlbumTitle(url);
}
}
}

View File

@ -1,87 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
public class ImagestashRipper extends AbstractJSONRipper {
private int page = 1;
public ImagestashRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "imagestash";
}
@Override
public String getDomain() {
return "imagestash.org";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagestash.org/tag/([a-zA-Z0-9\\-_]+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagestash.org tag formats: "
+ "imagestash.org/tag/tagname"
+ " Got: " + url);
}
@Override
public JSONObject getFirstPage() throws IOException {
String baseURL = "https://imagestash.org/images?tags="
+ getGID(url)
+ "&page=" + page;
return Http.url(baseURL).getJSON();
}
@Override
public JSONObject getNextPage(JSONObject json) throws IOException {
int count = json.getInt("count"),
offset = json.getInt("offset"),
total = json.getInt("total");
if (count + offset >= total || json.getJSONArray("images").length() == 0) {
throw new IOException("No more images");
}
sleep(1000);
page++;
return getFirstPage();
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray images = json.getJSONArray("images");
for (int i = 0; i < images.length(); i++) {
JSONObject image = images.getJSONObject(i);
String imageURL = image.getString("src");
if (imageURL.startsWith("/")) {
imageURL = "https://imagestash.org" + imageURL;
}
imageURLs.add(imageURL);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -62,7 +62,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
}
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[target=_blank]")) {
imageURLs.add(thumb.attr("href"));
}
@ -83,7 +83,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
private URL url;
private int index;
public ImagevenueImageThread(URL url, int index) {
ImagevenueImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;

View File

@ -46,11 +46,11 @@ public class ImgboxRipper extends AbstractHTMLRipper {
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.boxed-content > a > img")) {
String image = thumb.attr("src")
.replaceAll("[-a-zA-Z0-9.]+s.imgbox.com",
"i.imgbox.com");
String image = thumb.attr("src").replaceAll("thumbs", "images");
image = image.replace("_b", "_o");
image = image.replaceAll("\\d-s", "i");
imageURLs.add(image);
}
return imageURLs;

View File

@ -31,14 +31,15 @@ public class ImgurRipper extends AlbumRipper {
private Document albumDoc;
static enum ALBUM_TYPE {
enum ALBUM_TYPE {
ALBUM,
USER,
USER_ALBUM,
USER_IMAGES,
SERIES_OF_IMAGES,
SUBREDDIT
};
}
private ALBUM_TYPE albumType;
public ImgurRipper(URL url) throws IOException {
@ -104,7 +105,8 @@ public class ImgurRipper extends AlbumRipper {
*/
String title = null;
final String defaultTitle = "Imgur: The most awesome images on the Internet";
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
final String defaultTitle2 = "Imgur: The magic of the Internet";
logger.info("Trying to get album title");
elems = albumDoc.select("meta[property=og:title]");
if (elems != null) {
@ -113,7 +115,7 @@ public class ImgurRipper extends AlbumRipper {
}
// This is here encase the album is unnamed, to prevent
// Imgur: The most awesome images on the Internet from being added onto the album name
if (title.contains(defaultTitle)) {
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
logger.debug("Album is untitled or imgur is returning the default title");
// We set the title to "" here because if it's found in the next few attempts it will be changed
// but if it's nto found there will be no reason to set it later
@ -121,7 +123,7 @@ public class ImgurRipper extends AlbumRipper {
logger.debug("Trying to use title tag to get title");
elems = albumDoc.select("title");
if (elems != null) {
if (elems.text().contains(defaultTitle)) {
if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
logger.debug("Was unable to get album title or album was untitled");
}
else {
@ -223,7 +225,7 @@ public class ImgurRipper extends AlbumRipper {
String[] imageIds = m.group(1).split(",");
for (String imageId : imageIds) {
// TODO: Fetch image with ID imageId
logger.debug("Fetching image info for ID " + imageId);;
logger.debug("Fetching image info for ID " + imageId);
try {
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
if (!json.has("image")) {
@ -350,7 +352,6 @@ public class ImgurRipper extends AlbumRipper {
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
} catch (Exception e) {
logger.error("Error while ripping album: " + e.getMessage(), e);
continue;
}
}
}
@ -448,6 +449,15 @@ public class ImgurRipper extends AlbumRipper {
this.url = new URL("http://imgur.com/a/" + gid);
return gid;
}
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
this.url = new URL("http://imgur.com/a/" + gid);
return gid;
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
@ -515,12 +525,12 @@ public class ImgurRipper extends AlbumRipper {
}
public static class ImgurImage {
public String title = "",
description = "",
extension = "";
String title = "";
String description = "";
String extension = "";
public URL url = null;
public ImgurImage(URL url) {
ImgurImage(URL url) {
this.url = url;
String tempUrl = url.toExternalForm();
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
@ -528,7 +538,7 @@ public class ImgurRipper extends AlbumRipper {
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
}
}
public ImgurImage(URL url, String title) {
ImgurImage(URL url, String title) {
this(url);
this.title = title;
}
@ -536,7 +546,7 @@ public class ImgurRipper extends AlbumRipper {
this(url, title);
this.description = description;
}
public String getSaveAs() {
String getSaveAs() {
String saveAs = this.title;
String u = url.toExternalForm();
if (u.contains("?")) {
@ -554,17 +564,17 @@ public class ImgurRipper extends AlbumRipper {
}
public static class ImgurAlbum {
public String title = null;
String title = null;
public URL url = null;
public List<ImgurImage> images = new ArrayList<ImgurImage>();
public ImgurAlbum(URL url) {
public List<ImgurImage> images = new ArrayList<>();
ImgurAlbum(URL url) {
this.url = url;
}
public ImgurAlbum(URL url, String title) {
this(url);
this.title = title;
}
public void addImage(ImgurImage image) {
void addImage(ImgurImage image) {
images.add(image);
}
}

View File

@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@ -12,10 +14,16 @@ import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class InstagramRipper extends AbstractJSONRipper {
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper {
private String userID;
@ -37,131 +45,248 @@ public class InstagramRipper extends AbstractJSONRipper {
return (url.getHost().endsWith("instagram.com"));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
logger.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
}
private List<String> getPostsFromSinglePage(Document Doc) {
List<String> imageURLs = new ArrayList<>();
JSONArray datas;
try {
JSONObject json = getJSONFromPage(Doc);
if (json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
.has("edge_sidecar_to_children")) {
datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
.getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
data = data.getJSONObject("node");
if (data.has("is_video") && data.getBoolean("is_video")) {
imageURLs.add(data.getString("video_url"));
} else {
imageURLs.add(data.getString("display_url"));
}
}
} else {
JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
if (data.getBoolean("is_video")) {
imageURLs.add(data.getString("video_url"));
} else {
imageURLs.add(data.getString("display_url"));
}
}
return imageURLs;
} catch (IOException e) {
logger.error("Unable to get JSON from page " + url.toExternalForm());
return null;
}
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)");
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2) + "_" + m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Unable to find user in " + url);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return new URL("http://instagram.com/" + m.group(1));
}
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
}
private String getUserID(URL url) throws IOException {
Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new IOException("Unable to find userID at " + this.url);
}
@Override
public JSONObject getFirstPage() throws IOException {
userID = getUserID(url);
String baseURL = "http://instagram.com/" + userID + "/media";
private JSONObject getJSONFromPage(Document firstPage) throws IOException {
String jsonText = "";
try {
JSONObject result = Http.url(baseURL).getJSON();
return result;
for (Element script : firstPage.select("script[type=text/javascript]")) {
if (script.data().contains("window._sharedData = ")) {
jsonText = script.data().replaceAll("window._sharedData = ", "");
jsonText = jsonText.replaceAll("};", "}");
}
}
return new JSONObject(jsonText);
} catch (JSONException e) {
throw new IOException("Could not get instagram user via: " + baseURL);
throw new IOException("Could not get JSON from page");
}
}
@Override
public JSONObject getNextPage(JSONObject json) throws IOException {
public Document getFirstPage() throws IOException {
userID = getGID(url);
return Http.url(url).get();
}
boolean nextPageAvailable;
private String getVideoFromPage(String videoID) {
try {
nextPageAvailable = json.getBoolean("more_available");
} catch (Exception e) {
throw new IOException("No additional pages found");
}
if (nextPageAvailable) {
JSONArray items = json.getJSONArray("items");
JSONObject last_item = items.getJSONObject(items.length() - 1);
String nextMaxID = last_item.getString("id");
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
logger.info("Loading " + baseURL);
sleep(1000);
JSONObject nextJSON = Http.url(baseURL).getJSON();
return nextJSON;
} else {
throw new IOException("No more images found");
Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
return doc.select("meta[property=og:video]").attr("content");
} catch (IOException e) {
logger.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
}
return "";
}
private String getOriginalUrl(String imageURL) {
// Without this regex most images will return a 403 error
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
// TODO replace this with a single regex
imageURL = imageURL.replaceAll("p150x150/", "");
imageURL = imageURL.replaceAll("p320x320/", "");
imageURL = imageURL.replaceAll("p480x480/", "");
imageURL = imageURL.replaceAll("p640x640/", "");
imageURL = imageURL.replaceAll("p720x720/", "");
imageURL = imageURL.replaceAll("p1080x1080/", "");
imageURL = imageURL.replaceAll("p2048x2048/", "");
imageURL = imageURL.replaceAll("s150x150/", "");
imageURL = imageURL.replaceAll("s320x320/", "");
imageURL = imageURL.replaceAll("s480x480/", "");
imageURL = imageURL.replaceAll("s640x640/", "");
imageURL = imageURL.replaceAll("s720x720/", "");
imageURL = imageURL.replaceAll("s1080x1080/", "");
imageURL = imageURL.replaceAll("s2048x2048/", "");
// Instagram returns cropped images to unauthenticated applications to maintain legacy support.
// To retrieve the uncropped image, remove this segment from the URL.
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080
imageURL = imageURL.replaceAll("\\/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
return imageURL;
}
private String getMedia(JSONObject data) {
String imageURL = "";
if (data.has("videos")) {
imageURL = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url");
} else if (data.has("images")) {
imageURL = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url");
}
return imageURL;
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray datas = json.getJSONArray("items");
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
public List<String> getURLsFromPage(Document doc) {
String nextPageID = "";
List<String> imageURLs = new ArrayList<>();
JSONObject json = new JSONObject();
try {
json = getJSONFromPage(doc);
} catch (IOException e) {
logger.warn("Unable to exact json from page");
}
String dataType = data.getString("type");
if (dataType.equals("carousel")) {
JSONArray carouselMedias = data.getJSONArray("carousel_media");
for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) {
JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex);
String imageURL = getMedia(carouselMedia);
if (!imageURL.equals("")) {
imageURL = getOriginalUrl(imageURL);
imageURLs.add(imageURL);
if (!url.toExternalForm().contains("/p/")) {
JSONArray datas = new JSONArray();
try {
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
} catch (JSONException e) {
// Handle hashtag pages
datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
.getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
}
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
Long epoch = data.getLong("date");
Instant instant = Instant.ofEpochSecond(epoch);
String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
if (data.getString("__typename").equals("GraphSidecar")) {
try {
Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get();
List<String> toAdd = getPostsFromSinglePage(slideShowDoc);
for (int slideShowInt=0; slideShowInt<toAdd.size(); slideShowInt++) {
addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("code"));
}
} catch (MalformedURLException e) {
logger.error("Unable to download slide show, URL was malformed");
} catch (IOException e) {
logger.error("Unable to download slide show");
}
}
} else {
String imageURL = getMedia(data);
if (!imageURL.equals("")) {
imageURL = getOriginalUrl(imageURL);
imageURLs.add(imageURL);
try {
if (!data.getBoolean("is_video")) {
if (imageURLs.size() == 0) {
// We add this one item to the array because either wise
// the ripper will error out because we returned an empty array
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
}
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date);
} else {
if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
addURLToDownload(new URL(getVideoFromPage(data.getString("code"))), image_date);
} else {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("code"));
}
}
} catch (MalformedURLException e) {
return imageURLs;
}
nextPageID = data.getString("id");
if (isThisATest()) {
break;
}
}
// Rip the next page
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} else {
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
}
if (isThisATest()) {
break;
} catch (IOException e) {
return imageURLs;
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
} else {
logger.warn("Can't get net page");
}
} else { // We're ripping from a single page
logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc);
}
return imageURLs;
}

View File

@ -4,14 +4,12 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@ -50,7 +48,7 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
for (Element comicPageUrl : doc.select("div.gallery-icon > a")) {
try {
sleep(500);

View File

@ -1,24 +1,15 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@ -51,7 +42,7 @@ public class LusciousRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>();
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("img#single_picture");
for (Element e : urlElements) {
urls.add(e.attr("src"));

View File

@ -1,166 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.awt.Desktop;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.SSLException;
import javax.swing.JOptionPane;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class MediacrushRipper extends AbstractJSONRipper {
/** Ordered list of preferred formats, sorted by preference (low-to-high) */
private static final Map<String, Integer> PREFERRED_FORMATS = new HashMap<String,Integer>();
static {
PREFERRED_FORMATS.put("mp4", 0);
PREFERRED_FORMATS.put("wemb",1);
PREFERRED_FORMATS.put("ogv", 2);
PREFERRED_FORMATS.put("mp3", 3);
PREFERRED_FORMATS.put("ogg", 4);
PREFERRED_FORMATS.put("gif", 5);
PREFERRED_FORMATS.put("png", 6);
PREFERRED_FORMATS.put("jpg", 7);
PREFERRED_FORMATS.put("jpeg",8);
};
public MediacrushRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "mediacrush";
}
@Override
public String getDomain() {
return "mediacru.sh";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Could not find mediacru.sh page ID from " + url
+ " expected format: http://mediacru.sh/pageid");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
// Strip trailing "/" characters
while (u.endsWith("/")) {
u = u.substring(0, u.length() - 1);
}
// Append .json
if (!u.endsWith(".json")) {
u += ".json";
}
return new URL(u);
}
@Override
public JSONObject getFirstPage() throws IOException {
try {
String jsonString = Http.url(url)
.ignoreContentType()
.connection()
.execute().body();
jsonString = jsonString.replace("&quot;", "\"");
return new JSONObject(jsonString);
} catch (SSLException re) {
// Check for >1024 bit encryption but in older versions of Java
// It's the bug. Suggest downloading the latest version.
int selection = JOptionPane.showOptionDialog(null,
"You need to upgrade to the latest Java (7+) to rip this album.\n"
+ "Do you want to open java.com and download the latest version?",
"RipMe - Java Error",
JOptionPane.OK_CANCEL_OPTION,
JOptionPane.ERROR_MESSAGE,
null,
new String[] {"Go to java.com", "Cancel"},
0);
sendUpdate(STATUS.RIP_ERRORED, "Your version of Java can't handle some secure websites");
if (selection == 0) {
URL javaUrl = new URL("https://www.java.com/en/download/");
try {
Desktop.getDesktop().browse(javaUrl.toURI());
} catch (URISyntaxException use) { }
}
throw new IOException("Cannot rip due to limitations in Java installation, consider upgrading Java", re.getCause());
}
catch (Exception e) {
throw new IOException("Unexpected error: " + e.getMessage(), e);
}
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
// Iterate over all files
JSONArray files = json.getJSONArray("files");
for (int i = 0; i < files.length(); i++) {
JSONObject file = (JSONObject) files.get(i);
// Find preferred file format
JSONArray subfiles = file.getJSONArray("files");
String preferredUrl = getPreferredUrl(subfiles);
if (preferredUrl == null) {
logger.warn("Could not find 'file' inside of " + file);
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Could not find file inside of " + file);
continue;
}
imageURLs.add(preferredUrl);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
/**
* Iterates over list if "file" objects and returns the preferred
* image format.
* @param subfiles Array of "files" (JSONObjects) which contain
* @return Preferred media format.
*/
private String getPreferredUrl(JSONArray subfiles) {
String preferredUrl = null;
int preferredIndex = Integer.MAX_VALUE;
// Iterate over all media types
for (int j = 0; j < subfiles.length(); j++) {
JSONObject subfile = subfiles.getJSONObject(j);
String thisurl = subfile.getString("url");
String extension = thisurl.substring(thisurl.lastIndexOf(".") + 1);
if (!PREFERRED_FORMATS.containsKey(extension)) {
continue;
}
// Keep track of the most-preferred format
int thisindex = PREFERRED_FORMATS.get(extension);
if (preferredUrl == null || thisindex < preferredIndex) {
preferredIndex = thisindex;
preferredUrl = thisurl;
}
}
return preferredUrl;
}
}

View File

@ -3,134 +3,64 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ModelmayhemRipper extends AlbumRipper {
private static final String DOMAIN = "modelmayhem.com",
HOST = "modelmayhem";
public class ModelmayhemRipper extends AbstractHTMLRipper {
public ModelmayhemRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return (url.getHost().endsWith(DOMAIN));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
Map<String,String> cookies = null,
postData = new HashMap<String,String>();
String gid = getGID(this.url),
ref = "http://www.modelmayhem.com/" + gid;
Response resp = null;
String theurl = "http://www.modelmayhem.com/" + gid;
logger.info("Loading " + theurl);
resp = Jsoup.connect(theurl)
.timeout(5000)
.referrer("")
.userAgent(USER_AGENT)
.method(Method.GET)
.execute();
cookies = resp.cookies();
resp = Jsoup.connect("http://www.modelmayhem.com/includes/js/auth.php")
.cookies(cookies)
.ignoreContentType(true)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.GET)
.execute();
String authText = resp.parse().html();
String mmservice = authText.substring(authText.indexOf("token = '") + 9);
mmservice = mmservice.substring(0, mmservice.indexOf("'"));
cookies.putAll(resp.cookies());
cookies.put("worksafe", "0");
theurl = "http://www.modelmayhem.com/services/photo_viewer/albums/" + gid;
postData.put("MMSERVICE", mmservice);
resp = Jsoup.connect(theurl)
.data(postData)
.cookies(cookies)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.POST)
.execute();
cookies.putAll(resp.cookies());
theurl = "http://www.modelmayhem.com/services/photo_viewer/pictures/" + gid + "/0/0/1/0";
this.sendUpdate(STATUS.LOADING_RESOURCE, theurl);
logger.info("Loading " + theurl);
resp = Jsoup.connect(theurl)
.data(postData)
.cookies(cookies)
.referrer(ref)
.userAgent(USER_AGENT)
.method(Method.POST)
.execute();
Document doc = resp.parse();
String jsonText = doc.body().html();
jsonText = jsonText.replace("&quot;", "\"");
System.err.println(jsonText);
JSONObject json = new JSONObject(jsonText);
JSONArray pictures = json.getJSONArray("pictures");
for (int i = 0; i < pictures.length(); i++) {
JSONObject picture = pictures.getJSONObject(i);
String bigImage = picture.getString("big_image");
if (bigImage.trim().equals("")) {
logger.info("Got empty image for " + picture.toString(2));
continue;
}
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", i + 1);
}
addURLToDownload(new URL(bigImage), prefix);
if (isThisATest()) {
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
return "modelmayhem";
}
@Override
public String getDomain() {
return "modelmayhem.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*modelmayhem.com.*/([0-9]+)/?.*$");
Pattern p = Pattern.compile("https?://www\\.modelmayhem\\.com/portfolio/(\\d+)/viewall");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Modelmayhem user ID not found in " + url + ", expected http://modelmayhem.com/userid");
throw new MalformedURLException("Expected modelmayhem URL format: " +
"modelmayhem.com/portfolio/ID/viewall - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("tr.a_pics > td > div > a")) {
String image_URL = el.select("img").attr("src").replaceAll("_m", "");
if (image_URL.contains("http")) {
result.add(image_URL);
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -126,7 +126,7 @@ public class MotherlessRipper extends AlbumRipper {
private URL url;
private int index;
public MotherlessImageThread(URL url, int index) {
MotherlessImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
@ -142,7 +142,7 @@ public class MotherlessRipper extends AlbumRipper {
Document doc = Http.url(u)
.referrer(u)
.get();
Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL);
Pattern p = Pattern.compile("^.*__fileurl = '([^']+)';.*$", Pattern.DOTALL);
Matcher m = p.matcher(doc.outerHtml());
if (m.matches()) {
String file = m.group(1);

View File

@ -11,11 +11,9 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.Arrays;
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
public static boolean isTag;
private static boolean isTag;
public MyhentaicomicsRipper(URL url) throws IOException {
super(url);
@ -47,7 +45,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return ma.group(1);
}
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$");
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
isTag = true;
@ -84,8 +82,8 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
}
// This replaces getNextPage when downloading from searchs and tags
public List<String> getNextAlbumPage(String pageUrl) {
List<String> albumPagesList = new ArrayList<String>();
private List<String> getNextAlbumPage(String pageUrl) {
List<String> albumPagesList = new ArrayList<>();
int pageNumber = 1;
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
while (true) {
@ -115,9 +113,9 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return albumPagesList;
}
public List<String> getAlbumsFromPage(String url) {
private List<String> getAlbumsFromPage(String url) {
List<String> pagesToRip;
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
logger.info("Running getAlbumsFromPage");
Document doc;
try {
@ -161,7 +159,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
url_string = url_string.replace("%28", "_");
url_string = url_string.replace("%29", "_");
url_string = url_string.replace("%2C", "_");
if (isTag == true) {
if (isTag) {
logger.info("Downloading from a tag or search");
try {
sleep(500);
@ -180,11 +178,11 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
return result;
}
public List<String> getListOfPages(Document doc) {
List<String> pages = new ArrayList<String>();
private List<String> getListOfPages(Document doc) {
List<String> pages = new ArrayList<>();
// Get the link from the last button
String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href");
Pattern pat = Pattern.compile("\\/index\\.php\\/tag\\/[0-9]*\\/[a-zA-Z0-9_\\-\\:+]*\\?page=(\\d+)");
Pattern pat = Pattern.compile("/index\\.php/tag/[0-9]*/[a-zA-Z0-9_\\-:+]*\\?page=(\\d+)");
Matcher mat = pat.matcher(nextPageUrl);
if (mat.matches()) {
logger.debug("Getting pages from a tag");
@ -197,7 +195,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
pages.add(link);
}
} else {
Pattern pa = Pattern.compile("\\/index\\.php\\/search\\?q=[a-zA-Z0-9_\\-\\:]*\\&page=(\\d+)");
Pattern pa = Pattern.compile("/index\\.php/search\\?q=[a-zA-Z0-9_\\-:]*&page=(\\d+)");
Matcher ma = pa.matcher(nextPageUrl);
if (ma.matches()) {
logger.debug("Getting pages from a search");
@ -217,7 +215,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
// Checks if this is a comic page or a page of albums
// If true the page is a page of albums
if (doc.toString().contains("class=\"g-item g-album\"")) {
@ -241,7 +239,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
for (Element el : doc.select("img")) {
String imageSource = el.attr("src");
// This bool is here so we don't try and download the site logo
if (!imageSource.startsWith("http://")) {
if (!imageSource.startsWith("http://") && !imageSource.startsWith("https://")) {
// We replace thumbs with resizes so we can the full sized images
imageSource = imageSource.replace("thumbs", "resizes");
result.add("http://myhentaicomics.com/" + imageSource);

View File

@ -86,7 +86,7 @@ public class NatalieMuRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Pattern p; Matcher m;
//select all album thumbnails
for (Element span : page.select(".NA_articleGallery span")) {

View File

@ -75,7 +75,7 @@ public class NfsfwRipper extends AlbumRipper {
@Override
public void rip() throws IOException {
List<Pair> subAlbums = new ArrayList<Pair>();
List<Pair> subAlbums = new ArrayList<>();
int index = 0;
subAlbums.add(new Pair(this.url.toExternalForm(), ""));
while (subAlbums.size() > 0) {
@ -153,7 +153,7 @@ public class NfsfwRipper extends AlbumRipper {
private String subdir;
private int index;
public NfsfwImageThread(URL url, String subdir, int index) {
NfsfwImageThread(URL url, String subdir, int index) {
super();
this.url = url;
this.subdir = subdir;
@ -187,8 +187,9 @@ public class NfsfwRipper extends AlbumRipper {
}
private class Pair {
public String first, second;
public Pair(String first, String second) {
String first;
String second;
Pair(String first, String second) {
this.first = first;
this.second = second;
}

View File

@ -61,7 +61,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
if (title == null) {
return getAlbumTitle(url);
}
return title;
return "nhentai" + title;
}
@Override
@ -87,7 +87,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Elements thumbs = page.select(".gallerythumb");
for (Element el : thumbs) {
String imageUrl = el.attr("href");

View File

@ -55,7 +55,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
Pattern p;
Matcher m;
p = Pattern.compile("^.*nude-gals\\.com\\/photoshoot\\.php\\?photoshoot_id=(\\d+)$");
p = Pattern.compile("^.*nude-gals\\.com/photoshoot\\.php\\?photoshoot_id=(\\d+)$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@ -77,7 +77,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Elements thumbs = doc.select("#grid_container .grid > .grid_box");
for (Element thumb : thumbs) {

View File

@ -0,0 +1,87 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class OglafRipper extends AbstractHTMLRipper {
public OglafRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "oglaf";
}
@Override
public String getDomain() {
return "oglaf.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("http://oglaf\\.com/([a-zA-Z1-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected oglaf URL format: " +
"oglaf.com/NAME - got " + url + " instead");
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
return getDomain();
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("div#nav > a > div#nx").first() == null) {
throw new IOException("No more pages");
}
Element elem = doc.select("div#nav > a > div#nx").first().parent();
String nextPage = elem.attr("href");
// Some times this returns a empty string
// This for stops that
if (nextPage.equals("")) {
throw new IOException("No more pages");
}
else {
sleep(1000);
return Http.url("http://oglaf.com" + nextPage).get();
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("b > img#strip")) {
String imageSource = el.select("img").attr("src");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -13,21 +13,22 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class PahealRipper extends AbstractHTMLRipper {
private static final Logger logger = Logger.getLogger(PahealRipper.class);
private static Map<String, String> cookies = null;
private static Pattern gidPattern = null;
private static Map<String, String> getCookies() {
if (cookies == null) {
cookies = new HashMap<String, String>(1);
cookies = new HashMap<>(1);
cookies.put("ui-tnc-agreed", "true");
}
return cookies;
@ -66,7 +67,7 @@ public class PahealRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
List<String> res = new ArrayList<String>(elements.size());
List<String> res = new ArrayList<>(elements.size());
for (Element e : elements) {
res.add(e.absUrl("href"));
@ -92,10 +93,8 @@ public class PahealRipper extends AbstractHTMLRipper {
+ Utils.filesystemSafe(new URI(name).getPath())
+ ext);
addURLToDownload(url, outFile);
} catch (IOException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException | URISyntaxException ex) {
logger.error("Error while downloading URL " + url, ex);
}
}
@ -117,7 +116,7 @@ public class PahealRipper extends AbstractHTMLRipper {
try {
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
logger.error(ex);
}
throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got " + url + " instead");

View File

@ -85,8 +85,8 @@ public class PhotobucketRipper extends AlbumRipper {
public void rip() throws IOException {
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
List<String> subsToRip = new ArrayList<String>(),
rippedSubs = new ArrayList<String>();
List<String> subsToRip = new ArrayList<>(),
rippedSubs = new ArrayList<>();
for (String sub : subalbums) {
subsToRip.add(sub);
@ -117,7 +117,7 @@ public class PhotobucketRipper extends AlbumRipper {
waitForThreads();
}
public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
private List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
int filesIndex = 0,
filesTotal = 0,
pageIndex = 0;
@ -145,7 +145,7 @@ public class PhotobucketRipper extends AlbumRipper {
}
// Grab the JSON
Pattern p; Matcher m;
p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL);
p = Pattern.compile("^.*collectionData: (\\{.*}).*$", Pattern.DOTALL);
m = p.matcher(data);
if (m.matches()) {
jsonString = m.group(1);
@ -176,12 +176,12 @@ public class PhotobucketRipper extends AlbumRipper {
if (url != null) {
return getSubAlbums(url, currentAlbumPath);
} else {
return new ArrayList<String>();
return new ArrayList<>();
}
}
private List<String> getSubAlbums(String url, String currentAlbumPath) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
String subdomain = url.substring(url.indexOf("://")+3);
subdomain = subdomain.substring(0, subdomain.indexOf("."));
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"

View File

@ -0,0 +1,107 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class PichunterRipper extends AbstractHTMLRipper {
public PichunterRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "pichunter";
}
@Override
public String getDomain() {
return "pichunter.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://www.pichunter.com/(|tags|models|sites)/(\\S*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
p = Pattern.compile("https?://www.pichunter.com/(tags|models|sites)/(\\S*)/photos/\\d+/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
p = Pattern.compile("https?://www.pichunter.com/tags/all/(\\S*)/\\d+/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected pichunter URL format: " +
"pichunter.com/(tags|models|sites)/Name/ - got " + url + " instead");
}
private boolean isPhotoSet(URL url) {
Pattern p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
return false;
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// We use comic-nav-next to the find the next page
Element elem = doc.select("div.paperSpacings > ul > li.arrow").last();
if (elem != null) {
String nextPage = elem.select("a").attr("href");
// Some times this returns a empty string
// This for stops that
return Http.url("http://www.pichunter.com" + nextPage).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
if (!isPhotoSet(url)) {
for (Element el : doc.select("div.thumbtable > a.thumb > img")) {
result.add(el.attr("src").replaceAll("_i", "_o"));
}
} else {
for (Element el : doc.select("div.flex-images > figure > a.item > img")) {
result.add(el.attr("src").replaceAll("_i", "_o"));
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,67 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class PorncomixRipper extends AbstractHTMLRipper {
public PorncomixRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "porncomix";
}
@Override
public String getDomain() {
return "porncomix.info";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://www.porncomix.info/([a-zA-Z0-9_\\-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected proncomix URL format: " +
"porncomix.info/comic - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("div.single-post > div.gallery > dl > dt > a > img")) {
String imageSource = el.attr("data-lazy-src");
// We remove the .md from images so we download the full size image
// not the thumbnail ones
imageSource = imageSource.replaceAll("-\\d\\d\\dx\\d\\d\\d", "");
result.add(imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Utils;
public class PornhubRipper extends AlbumRipper {
// All sleep times are in milliseconds
private static final int IMAGE_SLEEP_TIME = 1 * 1000;
private static final int IMAGE_SLEEP_TIME = 1000;
private static final String DOMAIN = "pornhub.com", HOST = "Pornhub";
@ -134,7 +134,7 @@ public class PornhubRipper extends AlbumRipper {
private URL url;
private int index;
public PornhubImageThread(URL url, int index, File workingDir) {
PornhubImageThread(URL url, int index, File workingDir) {
super();
this.url = url;
this.index = index;

View File

@ -1,5 +1,10 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
@ -8,54 +13,47 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class PornpicsRipper extends AbstractHTMLRipper {
public class DatwinRipper extends AbstractHTMLRipper {
public DatwinRipper(URL url) throws IOException {
public PornpicsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "datwin";
return "pornpics";
}
@Override
public String getDomain() {
return "datw.in";
return "pornpics.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*datw.in/([a-zA-Z0-9\\-_]+).*$");
Pattern p = Pattern.compile("https?://www.pornpics.com/galleries/([a-zA-Z0-9_-]*)/?");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected datw.in gallery formats: "
+ "datw.in/..."
+ " Got: " + url);
throw new MalformedURLException("Expected pornpics URL format: " +
"www.pornpics.com/galleries/ID - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : doc.select("img.attachment-thumbnail")) {
String image = thumb.attr("src");
image = image.replaceAll("-\\d{1,3}x\\d{1,3}", "");
imageURLs.add(image);
List<String> result = new ArrayList<>();
for (Element el : doc.select("a.rel-link")) {
result.add(el.attr("href"));
}
return imageURLs;
return result;
}
@Override

View File

@ -64,7 +64,7 @@ public class RajceRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
for (Element el : page.select("a.photoThumb")) {
result.add(el.attr("href"));
}

View File

@ -27,7 +27,7 @@ public class RedditRipper extends AlbumRipper {
private static final String HOST = "reddit";
private static final String DOMAIN = "reddit.com";
private static final String REDDIT_USER_AGENT = "RipMe:github/4pr0n/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/4_pr0n)";
private static final String REDDIT_USER_AGENT = "RipMe:github.com/RipMeApp/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/metaprime and /u/ineedmorealts)";
private static final int SLEEP_TIME = 2000;
@ -131,7 +131,7 @@ public class RedditRipper extends AlbumRipper {
Object jsonObj = new JSONTokener(jsonString).nextValue();
JSONArray jsonArray = new JSONArray();
if (jsonObj instanceof JSONObject) {
jsonArray.put( (JSONObject) jsonObj);
jsonArray.put(jsonObj);
} else if (jsonObj instanceof JSONArray) {
jsonArray = (JSONArray) jsonObj;
} else {
@ -167,7 +167,7 @@ public class RedditRipper extends AlbumRipper {
}
}
public void handleBody(String body, String id) {
private void handleBody(String body, String id) {
Pattern p = RipUtils.getURLRegex();
Matcher m = p.matcher(body);
while (m.find()) {
@ -179,7 +179,7 @@ public class RedditRipper extends AlbumRipper {
}
}
public void handleURL(String theUrl, String id) {
private void handleURL(String theUrl, String id) {
URL originalURL;
try {
originalURL = new URL(theUrl);
@ -220,21 +220,21 @@ public class RedditRipper extends AlbumRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
// User
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$");
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "user_" + m.group(m.groupCount());
}
// Post
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$");
p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "post_" + m.group(m.groupCount());
}
// Subreddit
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]{1,}).*$");
p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "sub_" + m.group(m.groupCount());

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Http;
public class SankakuComplexRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
public SankakuComplexRipper(URL url) throws IOException {
super(url);
@ -43,7 +43,7 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
try {
return URLDecoder.decode(m.group(1), "UTF-8");
return URLDecoder.decode(m.group(2), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'");
}
@ -65,34 +65,41 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
// Image URLs are basically thumbnail URLs with a different domain, a simple
// path replacement, and a ?xxxxxx post ID at the end (obtainable from the href)
for (Element thumbSpan : doc.select("div.content > div > span.thumb")) {
String postId = thumbSpan.attr("id").replaceAll("p", "");
Element thumb = thumbSpan.getElementsByTag("img").first();
String image = thumb.attr("abs:src")
.replace(".sankakucomplex.com/data/preview",
"s.sankakucomplex.com/data") + "?" + postId;
imageURLs.add(image);
for (Element thumbSpan : doc.select("div.content > div > span.thumb > a")) {
String postLink = thumbSpan.attr("href");
try {
// Get the page the full sized image is on
Document subPage = Http.url("https://chan.sankakucomplex.com" + postLink).get();
logger.info("Checking page " + "https://chan.sankakucomplex.com" + postLink);
imageURLs.add("https:" + subPage.select("div[id=stats] > ul > li > a[id=highres]").attr("href"));
} catch (IOException e) {
logger.warn("Error while loading page " + postLink, e);
}
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
// Mock up the URL of the post page based on the post ID at the end of the URL.
String postId = url.toExternalForm().replaceAll(".*\\?", "");
addURLToDownload(url, postId + "_", "", "", null);
sleep(8000);
addURLToDownload(url, getPrefix(index));
}
@Override
public Document getNextPage(Document doc) throws IOException {
Element pagination = doc.select("div.pagination").first();
if (pagination.hasAttr("next-page-url")) {
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
} else {
return null;
String nextPage = pagination.attr("abs:next-page-url");
// Only logged in users can see past page 25
// Trying to rip page 26 will throw a no images found error
if (!nextPage.contains("page=26")) {
logger.info("Getting next page: " + pagination.attr("abs:next-page-url"));
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
}
}
throw new IOException("No more pages");
}
}

View File

@ -48,10 +48,10 @@ public class ShesFreakyRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[data-lightbox=\"gallery\"]")) {
String image = thumb.attr("href");
imageURLs.add(image);
imageURLs.add("https:" + image);
}
return imageURLs;
}

View File

@ -2,24 +2,16 @@ package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SinnercomicsRipper extends AbstractHTMLRipper {
@ -71,7 +63,7 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
for (Element el : doc.select("meta[property=og:image]")) {
String imageSource = el.attr("content");
imageSource = imageSource.replace(" alt=", "");

View File

@ -1,95 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
/**
* Appears to be broken as of 2015-02-11.
* Looks like supertangas changed their site completely.
*/
public class SupertangasRipper extends AlbumRipper {
private static final String DOMAIN = "supertangas.com",
HOST = "supertangas";
public SupertangasRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
int page = 0;
String baseURL = "http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=" + this.getGID(this.url);
Document doc;
while (true) {
page++;
String theURL = baseURL;
if (page > 1) {
theURL += "&plog_page=" + page;
}
try {
logger.info(" Retrieving " + theURL);
sendUpdate(STATUS.LOADING_RESOURCE, theURL);
doc = Http.url(theURL).get();
} catch (HttpStatusException e) {
logger.debug("Hit end of pages at page " + page, e);
break;
}
Elements images = doc.select("li.thumbnail a");
if (images.size() == 0) {
break;
}
for (Element imageElement : images) {
String image = imageElement.attr("href");
image = image.replaceAll("\\/fotos\\/", "/fotos/images/");
addURLToDownload(new URL(image));
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next page", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
// http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=Tahiticora%20(France)
Pattern p = Pattern.compile("^https?://[w.]*supertangas\\.com/fotos/\\?.*&searchterms=([a-zA-Z0-9%()+]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://supertangas.com/fotos/?level=search&exact=1&searchterms=...");
}
return m.group(m.groupCount());
}
}

View File

@ -18,19 +18,19 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
class TapasticEpisode {
protected int index, id;
protected String title, filename;
int id;
String filename;
public TapasticEpisode(int index, int id, String title) {
this.index = index;
int index1 = index;
this.id = id;
this.title = title;
String title1 = title;
this.filename = Utils.filesystemSafe(title);
}
}
public class TapasticRipper extends AbstractHTMLRipper {
private List<TapasticEpisode> episodes=new ArrayList<TapasticEpisode>();
private List<TapasticEpisode> episodes= new ArrayList<>();
public TapasticRipper(URL url) throws IOException {
super(url);
@ -38,12 +38,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override
public String getDomain() {
return "tapastic.com";
return "tapas.io";
}
@Override
public String getHost() {
return "tapastic";
return "tapas";
}
@Override
@ -53,7 +53,7 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<String>();
List<String> urls = new ArrayList<>();
String html = page.data();
if (!html.contains("episodeList : ")) {
logger.error("No 'episodeList' found at " + this.url);
@ -100,12 +100,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^http://tapastic.com/series/([^/?]+).*$");
Pattern p = Pattern.compile("^https?://tapas.io/series/([^/?]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "series_ " + m.group(1);
}
p = Pattern.compile("^http://tapastic.com/episode/([^/?]+).*$");
p = Pattern.compile("^https?://tapas.io/episode/([^/?]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "ep_" + m.group(1);

View File

@ -3,27 +3,18 @@ package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper {
public static boolean isTag;
public ThechiveRipper(URL url) throws IOException {
super(url);
@ -44,7 +35,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
isTag = false;
boolean isTag = false;
return m.group(1);
}
throw new MalformedURLException("Expected thechive.com URL format: " +
@ -59,7 +50,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
for (Element el : doc.select("img.attachment-gallery-item-full")) {
String imageSource = el.attr("src");
// We replace thumbs with resizes so we can the full sized images

View File

@ -0,0 +1,75 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class TheyiffgalleryRipper extends AbstractHTMLRipper {
public TheyiffgalleryRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "theyiffgallery";
}
@Override
public String getDomain() {
return "theyiffgallery.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://theyiffgallery.com/index\\?/category/(\\d+)");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected theyiffgallery URL format: " +
"theyiffgallery.com/index?/category/#### - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
String nextPage = doc.select("span.navPrevNext > a").attr("href");
if (nextPage != null && !nextPage.isEmpty() && nextPage.contains("start-")) {
return Http.url("https://theyiffgallery.com/" + nextPage).get();
}
throw new IOException("No more pages");
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element el : doc.select("ul.thumbnails > li.gdthumb")) {
String imageSource = el.select("a > img").attr("src");
imageSource = imageSource.replaceAll("_data/i", "");
imageSource = imageSource.replaceAll("-\\w\\w_\\w\\d+x\\d+", "");
result.add("https://theyiffgallery.com" + imageSource);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -4,6 +4,9 @@ import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -34,18 +37,19 @@ public class TumblrRipper extends AlbumRipper {
private static String TUMBLR_AUTH_CONFIG_KEY = "tumblr.auth";
private static boolean useDefaultApiKey = false; // fall-back for bad user-specified key
private static final String DEFAULT_API_KEY = "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX";
private static final List<String> apiKeys = Arrays.asList("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX",
"FQrwZMCxVnzonv90rgNUJcAk4FpnoS0mYuSuGYqIpM2cFgp9L4",
"qpdkY6nMknksfvYAhf2xIHp0iNRLkMlcWShxqzXyFJRxIsZ1Zz");
private static final String API_KEY = apiKeys.get(new Random().nextInt(apiKeys.size()));
private static final String API_KEY;
static {
API_KEY = Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, DEFAULT_API_KEY);
}
private static String getApiKey() {
if (useDefaultApiKey) {
return DEFAULT_API_KEY;
} else {
if (useDefaultApiKey || Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX").equals("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX")) {
logger.info("Using api key: " + API_KEY);
return API_KEY;
} else {
logger.info("Using user tumblr.auth api key");
return Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX");
}
}
@ -77,7 +81,7 @@ public class TumblrRipper extends AlbumRipper {
return url;
}
public boolean isTumblrURL(URL url) {
private boolean isTumblrURL(URL url) {
String checkURL = "http://api.tumblr.com/v2/blog/";
checkURL += url.getHost();
checkURL += "/info?api_key=" + getApiKey();
@ -95,6 +99,7 @@ public class TumblrRipper extends AlbumRipper {
@Override
public void rip() throws IOException {
String[] mediaTypes;
boolean exceededRateLimit = false;
if (albumType == ALBUM_TYPE.POST) {
mediaTypes = new String[] { "post" };
} else {
@ -105,12 +110,21 @@ public class TumblrRipper extends AlbumRipper {
if (isStopped()) {
break;
}
if (exceededRateLimit) {
break;
}
offset = 0;
while (true) {
if (isStopped()) {
break;
}
if (exceededRateLimit) {
break;
}
String apiURL = getTumblrApiURL(mediaType, offset);
logger.info("Retrieving " + apiURL);
sendUpdate(STATUS.LOADING_RESOURCE, apiURL);
@ -126,6 +140,11 @@ public class TumblrRipper extends AlbumRipper {
HttpStatusException status = (HttpStatusException)cause;
if (status.getStatusCode() == HttpURLConnection.HTTP_UNAUTHORIZED && !useDefaultApiKey) {
retry = true;
} else if (status.getStatusCode() == 429) {
logger.error("Tumblr rate limit has been exceeded");
sendUpdate(STATUS.DOWNLOAD_ERRORED,"Tumblr rate limit has been exceeded");
exceededRateLimit = true;
break;
}
}
}
@ -192,7 +211,14 @@ public class TumblrRipper extends AlbumRipper {
for (int j = 0; j < photos.length(); j++) {
photo = photos.getJSONObject(j);
try {
fileURL = new URL(photo.getJSONObject("original_size").getString("url"));
if (Utils.getConfigBoolean("tumblr.get_raw_image", false)) {
String urlString = photo.getJSONObject("original_size").getString("url").replaceAll("https", "http");
urlString = urlString.replaceAll("https?://[a-sA-Z0-9_\\-\\.]*\\.tumblr", "http://data.tumblr");
urlString = urlString.replaceAll("_\\d+\\.", "_raw.");
fileURL = new URL(urlString);
} else {
fileURL = new URL(photo.getJSONObject("original_size").getString("url").replaceAll("http", "https"));
}
m = p.matcher(fileURL.toString());
if (m.matches()) {
addURLToDownload(fileURL);
@ -202,12 +228,11 @@ public class TumblrRipper extends AlbumRipper {
}
} catch (Exception e) {
logger.error("[!] Error while parsing photo in " + photo, e);
continue;
}
}
} else if (post.has("video_url")) {
try {
fileURL = new URL(post.getString("video_url"));
fileURL = new URL(post.getString("video_url").replaceAll("http", "https"));
addURLToDownload(fileURL);
} catch (Exception e) {
logger.error("[!] Error while parsing video in " + post, e);
@ -254,7 +279,7 @@ public class TumblrRipper extends AlbumRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-\\.]+)";
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-.]+)";
Pattern p;
Matcher m;

View File

@ -54,14 +54,14 @@ public class TwitterRipper extends AlbumRipper {
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
// https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd
Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]{1,}).*$");
Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumType = ALBUM_TYPE.SEARCH;
searchText = m.group(2);
return url;
}
p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]{1,}).*$");
p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumType = ALBUM_TYPE.ACCOUNT;
@ -83,7 +83,6 @@ public class TwitterRipper extends AlbumRipper {
try {
JSONObject json = new JSONObject(body);
accessToken = json.getString("access_token");
return;
} catch (JSONException e) {
// Fall through
throw new IOException("Failure while parsing JSON: " + body, e);
@ -142,7 +141,7 @@ public class TwitterRipper extends AlbumRipper {
}
private List<JSONObject> getTweets(String url) throws IOException {
List<JSONObject> tweets = new ArrayList<JSONObject>();
List<JSONObject> tweets = new ArrayList<>();
logger.info(" Retrieving " + url);
Document doc = Http.url(url)
.ignoreContentType()
@ -283,7 +282,6 @@ public class TwitterRipper extends AlbumRipper {
if (c == '%') {
gid.append('_');
i += 2;
continue;
// Ignore non-alphanumeric chars
} else if (
(c >= 'a' && c <= 'z')

View File

@ -22,7 +22,7 @@ import com.rarchives.ripme.utils.Http;
public class TwodgalleriesRipper extends AbstractHTMLRipper {
private int offset = 0;
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
public TwodgalleriesRipper(URL url) throws IOException {
super(url);
@ -90,7 +90,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
for (Element thumb : doc.select("div.hcaption > img")) {
String image = thumb.attr("src");
image = image.replace("/200H/", "/");
@ -114,7 +114,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
cookies = resp.cookies();
String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value");
Map<String,String> postdata = new HashMap<String,String>();
Map<String,String> postdata = new HashMap<>();
postdata.put("user[login]", new String(Base64.decode("cmlwbWU=")));
postdata.put("user[password]", new String(Base64.decode("cmlwcGVy")));
postdata.put("rememberme", "1");

View File

@ -56,7 +56,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
}
private static List<String> getURLsFromPageStatic(Document doc) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
Elements els = doc.select("#ContentPlaceHolder1_divContent");
Elements imgs = els.select("img");
for (Element img : imgs) {
@ -76,7 +76,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
}
public static List<URL> getURLsFromPage(URL url) throws IOException {
List<URL> urls = new ArrayList<URL>();
List<URL> urls = new ArrayList<>();
Document doc = Http.url(url).get();
for (String stringURL : getURLsFromPageStatic(doc)) {
urls.add(new URL(stringURL));

View File

@ -0,0 +1,84 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class ViewcomicRipper extends AbstractHTMLRipper {
public ViewcomicRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "view-comic";
}
@Override
public String getDomain() {
return "view-comic.com";
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
String titleText = getFirstPage().select("title").first().text();
String title = titleText.replace("Viewcomic reading comics online for free", "");
title = title.replace("_", "");
title = title.replace("|", "");
title = title.replace("", "");
title = title.replace(".", "");
return getHost() + "_" + title.trim();
} catch (IOException e) {
// Fall back to default album naming convention
logger.info("Unable to find title at " + url);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://view-comic.com/([a-zA-Z1-9_-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected view-comic URL format: " +
"view-comic.com/COMIC_NAME - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("div.pinbin-copy > a > img")) {
result.add(el.attr("src"));
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -84,7 +84,7 @@ public class VineRipper extends AlbumRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]{1,}).*$");
Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://vine.co/u/######");

View File

@ -37,11 +37,7 @@ public class VkRipper extends AlbumRipper {
}
// Ignore /video pages (but not /videos pages)
String u = url.toExternalForm();
if (u.contains("/video") && !u.contains("videos")) {
// Single video page
return false;
}
return true;
return !u.contains("/video") || u.contains("videos");
}
@Override
@ -62,7 +58,7 @@ public class VkRipper extends AlbumRipper {
private void ripVideos() throws IOException {
String oid = getGID(this.url).replace("videos", "");
String u = "http://vk.com/al_video.php";
Map<String,String> postData = new HashMap<String,String>();
Map<String,String> postData = new HashMap<>();
postData.put("al", "1");
postData.put("act", "load_videos_silent");
postData.put("offset", "0");
@ -97,13 +93,13 @@ public class VkRipper extends AlbumRipper {
}
private void ripImages() throws IOException {
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
Map<String,String> photoIDsToURLs = new HashMap<>();
int offset = 0;
while (true) {
logger.info(" Retrieving " + this.url);
// al=1&offset=80&part=1
Map<String,String> postData = new HashMap<String,String>();
Map<String,String> postData = new HashMap<>();
postData.put("al", "1");
postData.put("offset", Integer.toString(offset));
postData.put("part", "1");
@ -120,7 +116,7 @@ public class VkRipper extends AlbumRipper {
body = body.substring(body.indexOf("<div"));
doc = Jsoup.parseBodyFragment(body);
List<Element> elements = doc.select("a");
Set<String> photoIDsToGet = new HashSet<String>();
Set<String> photoIDsToGet = new HashSet<>();
for (Element a : elements) {
if (!a.attr("onclick").contains("showPhoto('")) {
logger.error("a: " + a);
@ -162,8 +158,8 @@ public class VkRipper extends AlbumRipper {
}
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
Map<String,String> postData = new HashMap<String,String>();
Map<String,String> photoIDsToURLs = new HashMap<>();
Map<String,String> postData = new HashMap<>();
// act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
postData.put("list", getGID(this.url));
postData.put("act", "show");
@ -202,7 +198,7 @@ public class VkRipper extends AlbumRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]{1,}).*$");
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####");

View File

@ -0,0 +1,102 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.Map;
import java.util.HashMap;
import org.jsoup.Connection.Response;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class WebtoonsRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<String,String>();
public WebtoonsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "webtoons";
}
@Override
public String getDomain() {
return "www.webtoons.com";
}
@Override
public boolean canRip(URL url) {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return true;
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return getHost() + "_" + mat.group(1);
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
return mat.group(1);
}
throw new MalformedURLException("Expected URL format: http://www.webtoons.com/LANG/CAT/TITLE/VOL/, got: " + url);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element elem : doc.select("div.viewer_img > img")) {
result.add(elem.attr("data-url"));
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(url).response();
cookies = resp.cookies();
return Http.url(url).get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("a.pg_next").first();
nextUrl = elem.attr("href");
if (nextUrl.equals("") || nextUrl.equals("#")) {
throw new IOException("No more pages");
}
return Http.url(nextUrl).get();
}
}

View File

@ -16,25 +16,25 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class WordpressComicRipper extends AbstractHTMLRipper {
String pageTitle = "";
private String pageTitle = "";
public WordpressComicRipper(URL url) throws IOException {
super(url);
}
// Test links:
// Test links (see also WordpressComicRipperTest.java)
// http://www.totempole666.com/comic/first-time-for-everything-00-cover/
// http://buttsmithy.com/archives/comic/p1
// http://themonsterunderthebed.net/?comic=test-post
// http://prismblush.com/comic/hella-trap-pg-01/
// http://www.konradokonski.com/sawdust/
// http://www.konradokonski.com/wiory/
// http://www.konradokonski.com/sawdust/comic/get-up/
// http://www.konradokonski.com/wiory/comic/08182008/
// http://freeadultcomix.com/finders-feepaid-in-full-sparrow/
// http://comics-xxx.com/republic-rendezvous-palcomix-star-wars-xxx/
// http://thisis.delvecomic.com/NewWP/comic/in-too-deep/
// http://tnbtu.com/comic/01-00/
// http://shipinbottle.pepsaga.com/?p=281
public static List<String> explicit_domains = Arrays.asList(
private static List<String> explicit_domains = Arrays.asList(
"www.totempole666.com",
"buttsmithy.com",
"themonsterunderthebed.net",
@ -42,21 +42,18 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
"www.konradokonski.com",
"freeadultcomix.com",
"thisis.delvecomic.com",
"comics-xxx.com",
"tnbtu.com",
"shipinbottle.pepsaga.com"
);
@Override
public String getHost() {
String host = url.toExternalForm().split("/")[2];
return host;
return url.toExternalForm().split("/")[2];
}
@Override
public String getDomain() {
String host = url.toExternalForm().split("/")[2];
return host;
return url.toExternalForm().split("/")[2];
}
@Override
@ -70,12 +67,20 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
return true;
}
Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/([a-zA-Z0-9_-]*)/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) {
return true;
}
// This is hardcoded because it starts on the first page, unlike all the other
// konradokonski which start on the last page
konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) {
return true;
}
Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
if (buttsmithyMat.matches()) {
@ -125,12 +130,13 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
}
}
return false;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com\\/comic/([a-zA-Z0-9_-]*)/?$");
Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com/comic/([a-zA-Z0-9_-]*)/?$");
Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
if (totempole666Mat.matches()) {
return "totempole666.com" + "_" + "The_cummoner";
@ -142,16 +148,16 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
return "buttsmithy.com" + "_" + "Alfie";
}
Pattern konradokonskiSawdustPat = Pattern.compile("http://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiSawdustMat = konradokonskiSawdustPat.matcher(url.toExternalForm());
if (konradokonskiSawdustMat.matches()) {
return "konradokonski.com_sawdust";
Pattern konradokonskiPat = Pattern.compile("http://www.konradokonski.com/([a-zA-Z]+)/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) {
return "konradokonski.com_" + konradokonskiMat.group(1);
}
Pattern konradokonskiWioryPat = Pattern.compile("http://www.konradokonski.com/wiory/comic/([a-zA-Z0-9_-]*)/?$");
Matcher konradokonskiWioryMat = konradokonskiWioryPat.matcher(url.toExternalForm());
if (konradokonskiWioryMat.matches()) {
return "konradokonski.com_wiory";
konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
if (konradokonskiMat.matches()) {
return "konradokonski.com_aquartzbead";
}
Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
@ -237,7 +243,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
if (getHost().contains("www.totempole666.com")
|| getHost().contains("buttsmithy.com")
|| getHost().contains("themonsterunderthebed.net")
@ -277,9 +283,10 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
}
// freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
// TODO review the above comment which no longer applies -- see if there's a refactoring we should do here.
if (url.toExternalForm().contains("freeadultcomix.com")) {
for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
result.add("http://freeadultcomix.com" + elem.attr("src"));
result.add(elem.attr("src"));
}
}

View File

@ -10,14 +10,15 @@ import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class XbooruRipper extends AbstractHTMLRipper {
private static final Logger logger = Logger.getLogger(XbooruRipper.class);
private static Pattern gidPattern = null;
public XbooruRipper(URL url) throws IOException {
@ -57,7 +58,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> res = new ArrayList<String>(100);
List<String> res = new ArrayList<>(100);
for (Element e : page.getElementsByTag("post")) {
res.add(e.absUrl("file_url") + "#" + e.attr("id"));
}
@ -71,7 +72,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
private String getTerm(URL url) throws MalformedURLException {
if (gidPattern == null) {
gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(\\&|(#.*)?$)");
gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)");
}
Matcher m = gidPattern.matcher(url.toExternalForm());
@ -87,7 +88,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
try {
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
logger.error(ex);
}
throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead");

View File

@ -3,91 +3,46 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class XhamsterRipper extends AlbumRipper {
private static final String HOST = "xhamster";
public class XhamsterRipper extends AbstractHTMLRipper {
public XhamsterRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*[0-9]+$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
public String getHost() {
return "xhamster";
}
@Override
public String getDomain() {
return "xhamster.com";
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
int index = 0;
String nextURL = this.url.toExternalForm();
while (nextURL != null) {
logger.info(" Retrieving " + nextURL);
Document doc = Http.url(nextURL).get();
for (Element thumb : doc.select("table.iListing div.img img")) {
if (!thumb.hasAttr("src")) {
continue;
}
String image = thumb.attr("src");
// replace thumbnail urls with the urls to the full sized images
image = image.replaceAll(
"https://upt.xhcdn\\.",
"http://up.xhamster.");
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
image = image.replaceAll(
"_160\\.",
"_1000.");
// Xhamster has shitty cert management and uses the wrong cert for their ep.xhamster Domain
// so we change all https requests to http
image = image.replaceAll(
"https://",
"http://");
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(image), prefix);
if (isThisATest()) {
break;
}
}
if (isThisATest()) {
break;
}
nextURL = null;
for (Element element : doc.select("a.last")) {
nextURL = element.attr("href");
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
String URLToReturn = url.toExternalForm();
URLToReturn = URLToReturn.replaceAll("m.xhamster.com", "xhamster.com");
URLToReturn = URLToReturn.replaceAll("\\w\\w.xhamster.com", "xhamster.com");
URL san_url = new URL(URLToReturn.replaceAll("xhamster.com", "m.xhamster.com"));
logger.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d{1,})$");
Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@ -98,4 +53,54 @@ public class XhamsterRipper extends AlbumRipper {
+ " Got: " + url);
}
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
return Http.url(url).get();
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
return false;
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a.next").first().attr("href") != "") {
return Http.url(doc.select("a.next").first().attr("href")).get();
} else {
throw new IOException("No more pages");
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element thumb : doc.select("div.picture_view > div.pictures_block > div.items > div.item-container > a > div.thumb_container > div.img > img")) {
String image = thumb.attr("src");
// replace thumbnail urls with the urls to the full sized images
image = image.replaceAll(
"https://upt.xhcdn\\.",
"http://up.xhamster.");
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
image = image.replaceAll(
"_160\\.",
"_1000.");
// Xhamster has bad cert management and uses invalid certs for some cdns, so we change all our requests to http
image = image.replaceAll("https", "http");
result.add(image);
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
public class ZizkiRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private Map<String,String> cookies = new HashMap<String,String>();
private Map<String,String> cookies = new HashMap<>();
public ZizkiRipper(URL url) throws IOException {
super(url);
@ -76,7 +76,7 @@ public class ZizkiRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>();
List<String> imageURLs = new ArrayList<>();
// Page contains images
logger.info("Look for images.");
for (Element thumb : page.select("img")) {
@ -99,7 +99,6 @@ public class ZizkiRipper extends AbstractHTMLRipper {
src = thumb.attr("src");
logger.debug("Found url with " + src);
if (!src.contains("zizki.com")) {
continue;
} else {
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
}

Some files were not shown because too many files have changed in this diff Show More