mirror of
https://github.com/NomaDamas/k-skill.git
synced 2026-06-24 02:04:11 +00:00
* feat: 네이버 블로그 리서치 스킬 추가 API 키 없이 python3 표준 라이브러리만으로 네이버 블로그 검색, 원문 읽기, 이미지 다운로드를 수행하는 스킬. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Restore Naver blog search paging and newest-sort behavior Naver's current blog search surface does not honor the older where=blog + sort query pattern used by this skill. The request now targets the blog tab surface, uses the observed NSO sort controls, and trims each parsed page to the visible 15-result window so count-based pagination returns distinct results. Constraint: Must keep using stdlib-only HTTP scraping without adding dependencies Constraint: Current Naver blog tab behavior requires ssc/tab parameters plus nso sort controls Rejected: Keep where=blog and tune start values only | still returned repeated first-page results Rejected: Leave sort=date as-is | current endpoint ignored it and returned relevance ordering Confidence: medium Scope-risk: narrow Reversibility: clean Directive: Re-verify request params against live Naver markup before changing paging or sort semantics again Tested: python3 -m py_compile on naver-blog-research scripts and new regression test; PYTHONPATH=.:scripts python3 -m unittest scripts.test_naver_blog_search; npm run lint; live naver_search.py --count 20/30 --sort sim; live naver_search.py --count 10/20 --sort date Not-tested: Full npm run test remains blocked by unrelated local pyexpat/libexpat environment failures in patent-search tests * Surface the new Naver blog skill in the main README PR 107 adds the skill and feature guide, but the repository landing page still omitted it from the user-facing capability list. This commit keeps the README aligned with the actual shipped skill set so users can discover the new entry point from the main docs. Constraint: README capability tables and feature lists should stay aligned with docs/features entries Rejected: Leave README unchanged until merge | hides the new skill from the main index during PR review Confidence: high Scope-risk: narrow Reversibility: clean Directive: When adding a new skill guide, update both the summary table and the included-features list together Tested: README diff review; verified docs/features/naver-blog-research.md link target exists Not-tested: Full npm run ci (docs-only change) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Jeffrey (Dongkyu) Kim <vkehfdl1@gmail.com>
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
"""Shared HTTP utilities for Naver blog scripts (SSL handling, URL validation, urlopen wrapper)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import ssl
|
|
import sys
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
|
|
TAG_RE = re.compile(r"<[^>]+>")
|
|
|
|
_ssl_ctx_secure: ssl.SSLContext | None = None
|
|
_ssl_ctx_insecure: ssl.SSLContext | None = None
|
|
|
|
|
|
def _get_ssl_context(*, insecure: bool = False) -> ssl.SSLContext:
|
|
global _ssl_ctx_secure, _ssl_ctx_insecure
|
|
if insecure:
|
|
if _ssl_ctx_insecure is None:
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
_ssl_ctx_insecure = ctx
|
|
return _ssl_ctx_insecure
|
|
if _ssl_ctx_secure is None:
|
|
_ssl_ctx_secure = ssl.create_default_context()
|
|
return _ssl_ctx_secure
|
|
|
|
|
|
_NAVER_DOMAINS = (".naver.com", ".naver.net", ".pstatic.net")
|
|
|
|
|
|
def is_naver_url(url: str) -> bool:
|
|
host = urllib.parse.urlparse(url).hostname or ""
|
|
return any(host == d.lstrip(".") or host.endswith(d) for d in _NAVER_DOMAINS)
|
|
|
|
|
|
def urlopen(request: urllib.request.Request, timeout: int, *, insecure: bool = False):
|
|
"""urlopen with explicit SSL insecure mode for Naver domains.
|
|
|
|
When *insecure* is True and the target is a Naver domain, SSL certificate
|
|
verification is skipped. A warning is printed to stderr on every call so
|
|
the caller is always aware.
|
|
"""
|
|
if insecure:
|
|
if not is_naver_url(request.full_url):
|
|
raise ValueError("insecure 모드는 네이버 도메인에만 사용할 수 있습니다.")
|
|
print(
|
|
"[warn] SSL 인증서 검증이 비활성화되었습니다. 연결이 안전하지 않을 수 있습니다.",
|
|
file=sys.stderr,
|
|
)
|
|
return urllib.request.urlopen(
|
|
request, timeout=timeout, context=_get_ssl_context(insecure=True),
|
|
)
|
|
return urllib.request.urlopen(request, timeout=timeout, context=_get_ssl_context())
|