# -*- coding: utf-8 -*- # Copyright 2015-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.hentai-foundry.com/""" from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com" class HentaifoundryExtractor(Extractor): """Base class for hentaifoundry extractors""" category = "hentaifoundry" directory_fmt = ("{category}", "{user}") filename_fmt = "{category}_{index}_{title}.{extension}" archive_fmt = "{index}" cookies_domain = "www.hentai-foundry.com" root = "https://www.hentai-foundry.com" per_page = 25 def __init__(self, match): self.root = (match[1] or "https://") + "www.hentai-foundry.com" self.user = match[2] Extractor.__init__(self, match) self.page_url = "" self.start_post = 0 self.start_page = 1 def _init(self): if self.config("descriptions") == "html": self._process_description = self._process_description_html def items(self): self._init_site_filters() data = self.metadata() for post_url in util.advance(self.posts(), self.start_post): image = self._parse_post(post_url) image.update(data) yield Message.Directory, "", image yield Message.Url, image["src"], image def skip(self, num): pages, posts = divmod(num, self.per_page) self.start_page += pages self.start_post += posts return num def metadata(self): return {"user": self.user} def posts(self): return self._pagination(self.page_url) def _pagination(self, url, begin='thumbTitle">', '<')), "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '

", '') .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "

"), "title='", "'")], "categories" : self._extract_categories(extr), "date" : self.parse_datetime_iso(extr("datetime='", "'")), "views" : text.parse_int(extr(">Views", "<")), "score" : text.parse_int(extr(">Vote Score", "<")), "media" : text.unescape(extr(">Media", "<").strip()), "tags" : text.split_html(extr( ">Tags ", "")), } body = data["_body"] if "