mirror of
https://github.com/searxng/searxng.git
synced 2026-06-01 23:47:16 +02:00
[mod] typification of SearXNG: add new result type Code
This patch adds a new result type: Code - Python class: searx/result_types/code.py - Jinja template: searx/templates/simple/result_templates/code.html - CSS (less) client/simple/src/less/result_types/code.less Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
This commit is contained in:
committed by
Markus Heiser
parent
b8085d27ac
commit
9ac9c8c4f5
@@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
|
||||
from __future__ import annotations
|
||||
|
||||
import typing as t
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from pygments.lexers import guess_lexer_for_filename
|
||||
from pygments.util import ClassNotFound
|
||||
from searx.result_types import EngineResults
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.network import raise_for_httperror
|
||||
@@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
|
||||
params['raise_for_httperror'] = False
|
||||
|
||||
|
||||
def get_code_language_name(filename: str, code_snippet: str) -> str | None:
|
||||
"""Returns a code language name by pulling information from the filename if
|
||||
possible otherwise by scanning the passed code snippet. In case there is any
|
||||
parsing error just default to no syntax highlighting."""
|
||||
try:
|
||||
lexer = guess_lexer_for_filename(filename, _text=code_snippet)
|
||||
if lexer is None:
|
||||
return None
|
||||
code_name_aliases = lexer.aliases
|
||||
if len(code_name_aliases) == 0:
|
||||
return None
|
||||
return code_name_aliases[0]
|
||||
except ClassNotFound:
|
||||
return None
|
||||
|
||||
|
||||
def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
|
||||
"""
|
||||
Iterate over multiple possible matches, for each extract a code fragment.
|
||||
GitHub additionally sends context for _word_ highlights; pygments supports
|
||||
Github additionally sends context for _word_ highlights; pygments supports
|
||||
highlighting lines, as such we calculate which lines to highlight while
|
||||
traversing the text.
|
||||
"""
|
||||
@@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
|
||||
|
||||
|
||||
def response(resp: SXNG_Response) -> EngineResults:
|
||||
results = EngineResults()
|
||||
res = EngineResults()
|
||||
|
||||
if resp.status_code == 422:
|
||||
# on a invalid search term the status code 422 "Unprocessable Content"
|
||||
# is returned / e.g. search term is "user: foo" instead "user:foo"
|
||||
return results
|
||||
return res
|
||||
# raise for other errors
|
||||
raise_for_httperror(resp)
|
||||
|
||||
for item in resp.json().get('items', []):
|
||||
repo = item['repository']
|
||||
text_matches = item['text_matches']
|
||||
repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny]
|
||||
text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny]
|
||||
# ensure picking only the code contents in the blob
|
||||
code_matches = [
|
||||
match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
|
||||
@@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
|
||||
if not ghc_highlight_matching_lines:
|
||||
highlighted_lines_index: set[int] = set()
|
||||
|
||||
code_snippet = "\n".join(lines)
|
||||
res.add(
|
||||
res.types.Code(
|
||||
url=item["html_url"], # pyright: ignore[reportAny]
|
||||
title=f"{repo['full_name']} · {item['name']}",
|
||||
filename=f"{item['path']}",
|
||||
content=repo['description'],
|
||||
repository=repo['html_url'],
|
||||
codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
|
||||
hl_lines=highlighted_lines_index,
|
||||
strip_whitespace=ghc_strip_whitespace,
|
||||
strip_new_lines=ghc_strip_new_lines,
|
||||
)
|
||||
)
|
||||
|
||||
kwargs: dict[str, t.Any] = {
|
||||
'template': 'code.html',
|
||||
'url': item['html_url'],
|
||||
'title': f"{repo['full_name']} · {item['path']}",
|
||||
'content': repo['description'],
|
||||
'repository': repo['html_url'],
|
||||
'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
|
||||
'hl_lines': highlighted_lines_index,
|
||||
'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
|
||||
# important to set for highlighing
|
||||
'strip_whitespace': ghc_strip_whitespace,
|
||||
'strip_new_lines': ghc_strip_new_lines,
|
||||
'parsed_url': urlparse(item['html_url']),
|
||||
}
|
||||
results.add(results.types.LegacyResult(**kwargs))
|
||||
|
||||
return results
|
||||
return res
|
||||
|
||||
@@ -1,79 +1,62 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Searchcode (IT)
|
||||
"""Searchcode (IT)"""
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typing as t
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.extended_types import SXNG_Response
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://searchcode.com/',
|
||||
"website": "https://searchcode.com/",
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": 'https://searchcode.com/api/',
|
||||
"official_api_documentation": "https://searchcode.com/api/",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
search_api = 'https://searchcode.com/api/codesearch_I/?'
|
||||
|
||||
# special code-endings which are not recognised by the file ending
|
||||
code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
|
||||
categories = ["it"]
|
||||
search_api = "https://searchcode.com/api/codesearch_I/?"
|
||||
|
||||
# paging is broken in searchcode.com's API .. not sure it will ever been fixed
|
||||
# paging = True
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = urlencode(
|
||||
{
|
||||
'q': query,
|
||||
# paging is broken in searchcode.com's API
|
||||
# 'p': params['pageno'] - 1,
|
||||
# 'per_page': 10,
|
||||
}
|
||||
)
|
||||
params['url'] = search_api + args
|
||||
logger.debug("query_url --> %s", params['url'])
|
||||
return params
|
||||
def request(query: str, params: dict[str, t.Any]) -> None:
|
||||
args = {
|
||||
"q": query,
|
||||
# paging is broken in searchcode.com's API
|
||||
# "p": params["pageno"] - 1,
|
||||
# "per_page": 10,
|
||||
}
|
||||
|
||||
params["url"] = search_api + urlencode(args)
|
||||
logger.debug("query_url --> %s", params["url"])
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_results = loads(resp.text)
|
||||
def response(resp: SXNG_Response) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
# parse results
|
||||
for result in search_results.get('results', []):
|
||||
href = result['url']
|
||||
title = "" + result['name'] + " - " + result['filename']
|
||||
repo = result['repo']
|
||||
|
||||
for result in resp.json().get("results", []):
|
||||
lines = {}
|
||||
for line, code in result['lines'].items():
|
||||
for line, code in result["lines"].items():
|
||||
lines[int(line)] = code
|
||||
|
||||
code_language = code_endings.get(
|
||||
result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
|
||||
res.add(
|
||||
res.types.Code(
|
||||
url=result["url"],
|
||||
title=f'{result["name"]} - {result["filename"]}',
|
||||
repository=result["repo"],
|
||||
filename=result["filename"],
|
||||
codelines=sorted(lines.items()),
|
||||
strip_whitespace=True,
|
||||
)
|
||||
)
|
||||
|
||||
# append result
|
||||
results.append(
|
||||
{
|
||||
'url': href,
|
||||
'title': title,
|
||||
'content': '',
|
||||
'repository': repo,
|
||||
'codelines': sorted(lines.items()),
|
||||
'code_language': code_language,
|
||||
'template': 'code.html',
|
||||
'strip_whitespace': True,
|
||||
'strip_new_lines': True,
|
||||
}
|
||||
)
|
||||
|
||||
# return results
|
||||
return results
|
||||
return res
|
||||
|
||||
Reference in New Issue
Block a user