mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 09:38:34 +02:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 26fa181b84 | |||
| 0f35ef7cd6 | |||
| b1ae576b2d |
@@ -10,10 +10,12 @@ import time
|
|||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.extended_types import SXNG_Response
|
from searx.extended_types import SXNG_Response
|
||||||
from searx.utils import extr, gen_useragent, html_to_text
|
from searx.utils import extr, gen_useragent, html_to_text, eval_xpath
|
||||||
from searx.network import get
|
from searx.network import get
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
@@ -40,6 +42,11 @@ time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
|||||||
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
|
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
|
||||||
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
|
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
|
||||||
|
|
||||||
|
# detect captcha (if any)
|
||||||
|
doc = html.fromstring(resp.text)
|
||||||
|
if eval_xpath(doc, "//*[@id='spam-messages']"):
|
||||||
|
raise SearxEngineCaptchaException()
|
||||||
|
|
||||||
# the text we search for looks like:
|
# the text we search for looks like:
|
||||||
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
|
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
|
||||||
return extr(resp.text, "&h=", "&t=")
|
return extr(resp.text, "&h=", "&t=")
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ Paging:
|
|||||||
- :py:obj:`paging`
|
- :py:obj:`paging`
|
||||||
- :py:obj:`page_size`
|
- :py:obj:`page_size`
|
||||||
- :py:obj:`first_page_num`
|
- :py:obj:`first_page_num`
|
||||||
|
- :py:obj:`send_page_num_on_first_page`
|
||||||
|
|
||||||
Time Range:
|
Time Range:
|
||||||
|
|
||||||
@@ -169,6 +170,10 @@ number, but an offset.'''
|
|||||||
first_page_num = 1
|
first_page_num = 1
|
||||||
'''Number of the first page (usually 0 or 1).'''
|
'''Number of the first page (usually 0 or 1).'''
|
||||||
|
|
||||||
|
send_page_num_on_first_page = True
|
||||||
|
'''Whether to include the page number in the request for the first page.
|
||||||
|
This can help if an engine blocks request that send a page number for the first page.'''
|
||||||
|
|
||||||
results_query = ''
|
results_query = ''
|
||||||
'''JSON query for the list of result items.
|
'''JSON query for the list of result items.
|
||||||
|
|
||||||
@@ -322,10 +327,13 @@ def request(query, params): # pylint: disable=redefined-outer-name
|
|||||||
if params['safesearch']:
|
if params['safesearch']:
|
||||||
safe_search = safe_search_map[params['safesearch']]
|
safe_search = safe_search_map[params['safesearch']]
|
||||||
|
|
||||||
|
pageno = ""
|
||||||
|
if send_page_num_on_first_page or params["pageno"] != 1:
|
||||||
|
pageno = (params['pageno'] - 1) * page_size + first_page_num
|
||||||
fp = { # pylint: disable=invalid-name
|
fp = { # pylint: disable=invalid-name
|
||||||
'query': urlencode({'q': query})[2:],
|
'query': urlencode({'q': query})[2:],
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
|
'pageno': pageno,
|
||||||
'time_range': time_range,
|
'time_range': time_range,
|
||||||
'safe_search': safe_search,
|
'safe_search': safe_search,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ Paging:
|
|||||||
- :py:obj:`paging`
|
- :py:obj:`paging`
|
||||||
- :py:obj:`page_size`
|
- :py:obj:`page_size`
|
||||||
- :py:obj:`first_page_num`
|
- :py:obj:`first_page_num`
|
||||||
|
- :py:obj:`send_page_num_on_first_page`
|
||||||
|
|
||||||
Time Range:
|
Time Range:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user