# -*- coding: utf-8 -*- # Copyright 2019-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://hentainexus.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text, util import binascii class HentainexusGalleryExtractor(GalleryExtractor): """Extractor for hentainexus galleries""" category = "hentainexus" root = "https://hentainexus.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" r"/(?:view|read)/(\d+)") example = "https://hentainexus.com/view/12345" def __init__(self, match): self.gallery_id = match[1] url = f"{self.root}/view/{self.gallery_id}" GalleryExtractor.__init__(self, match, url) def metadata(self, page): rmve = text.remove_html extr = text.extract_from(page) data = { "gallery_id": text.parse_int(self.gallery_id), "cover" : extr('"og:image" content="', '"'), "title" : extr('

', '

'), } for key in ("Artist", "Book", "Circle", "Event", "Language", "Magazine", "Parody", "Publisher", "Description"): value = rmve(extr('viewcolumn">' + key + '', '')) value, sep, rest = value.rpartition(" (") data[key.lower()] = value if sep else rest data["tags"] = tags = [] for k in text.extract_iter(page, '> 1 ^ 0xc else: C = C >> 1 k = primes[C & 0x7] x = 0 L = len(key) S = list(range(256)) for i in range(256): x = (x + S[i] + key[i % L]) & 255 S[i], S[x] = S[x], S[i] result = [] a = c = m = x = 0 for n in range(64, len(blob)): a = (a + k) & 255 x = (c + S[(x + S[a]) & 255]) & 255 c = (c + a + S[a]) & 255 S[a], S[x] = S[x], S[a] m = S[(x + S[(a + S[(m + c) & 255]) & 255]) & 255] result.append(chr(blob[n] ^ m)) return "".join(result) def _join_title(self, data): event = data['event'] artist = data['artist'] circle = data['circle'] title = data['title'] parody = data['parody'] book = data['book'] magazine = data['magazine'] # a few galleries have a large number of artists or parodies, # which get replaced with "Various" in the title string if artist.count(',') >= 3: artist = 'Various' if parody.count(',') >= 3: parody = 'Various' jt = '' if event: jt += f'({event}) ' if circle: jt += f'[{circle} ({artist})] ' else: jt += f'[{artist}] ' jt += title if parody.lower() != 'original work': jt += f' ({parody})' if book: jt += f' ({book})' if magazine: jt += f' ({magazine})' return jt class HentainexusSearchExtractor(Extractor): """Extractor for hentainexus search results""" category = "hentainexus" subcategory = "search" root = "https://hentainexus.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" r"(?:/page/\d+)?/?(?:\?(q=[^/?#]+))?$") example = "https://hentainexus.com/?q=QUERY" def items(self): params = text.parse_query(self.groups[0]) data = {"_extractor": HentainexusGalleryExtractor} path = "/" while path: page = self.request(self.root + path, params=params).text extr = text.extract_from(page) while True: gallery_id = extr('