Wire JobKorea talent helper into CI

2026-06-24 02:04:11 +00:00 · 2026-06-18 10:37:08 +09:00 · 2026-06-18 10:37:08 +09:00 · b14f65361f
commit b14f65361f
parent caa1f0fd0d
6 changed files with 384 additions and 355 deletions
--- a/jobkorea-talent-search/scripts/jobkorea_talent_models.py
+++ b/jobkorea-talent-search/scripts/jobkorea_talent_models.py
@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Final
+
+BASE_URL: Final = "https://www.jobkorea.co.kr"
+FIND_PATH: Final = "/corp/person/find"
+AJAX_PATH: Final = "/corp/person/detailsearchajax"
+DEFAULT_UA: Final = (
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"
+)
+
+
+@dataclass(frozen=True, slots=True)
+class Candidate:
+    rno: str
+    url: str
+    name: str = ""
+    meta: str = ""
+    career: str = ""
+    education: str = ""
+    locations: str = ""
+    salary: str = ""
+    skills: str = ""
+    badges: str = ""
+    raw_summary: str = ""
--- a/jobkorea-talent-search/scripts/jobkorea_talent_parse.py
+++ b/jobkorea-talent-search/scripts/jobkorea_talent_parse.py
@ -0,0 +1,186 @@
+from __future__ import annotations
+
+import html
+import re
+import urllib.parse
+
+from jobkorea_talent_models import BASE_URL, Candidate
+
+ACTION_CONTROL_RE = re.compile(
+    r"^(?:스크랩\s*\d*|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)$"
+)
+ACTION_CONTROL_INLINE_RE = re.compile(
+    r"(?:스크랩\s*\d+|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)"
+)
+RESUME_LINK_RE = re.compile(r'href="(?P<href>/corp/person/find/resume/view\?rNo=(?P<rno>\d+))"')
+
+
+def clean_text(value: str) -> str:
+    value = html.unescape(value)
+    value = re.sub(r"<script[\s\S]*?</script>", " ", value, flags=re.I)
+    value = re.sub(r"<style[\s\S]*?</style>", " ", value, flags=re.I)
+    value = re.sub(r"<[^>]+>", " ", value)
+    value = re.sub(r"[ \t\r\f\v]+", " ", value)
+    value = re.sub(r"\n\s*\n+", "\n", value)
+    return value.strip()
+
+
+def is_action_control_label(value: str) -> bool:
+    label = re.sub(r"\s+", " ", html.unescape(value)).strip()
+    return bool(label and ACTION_CONTROL_RE.match(label))
+
+
+def filter_action_control_text(value: str) -> str:
+    lines = []
+    for line in value.splitlines():
+        label = line.strip()
+        if not label or is_action_control_label(label):
+            continue
+        label = ACTION_CONTROL_INLINE_RE.sub(" ", label)
+        label = re.sub(r"\s+", " ", label).strip()
+        if label:
+            lines.append(label)
+    return "\n".join(lines).strip()
+
+
+def row_contains_other_resume(candidate_markup: str, rno: str) -> bool:
+    refs: list[str] = []
+    for href_rno, data_rno in re.findall(r"rNo=(\d+)|data-rno=[\"'](\d+)[\"']", candidate_markup):
+        refs.append(href_rno or data_rno)
+    return any(ref != rno for ref in refs)
+
+
+def extract_regex_candidate_markup(markup: str, match: re.Match[str], rno: str) -> str:
+    row_start = markup.rfind("<tr", 0, match.start())
+    if row_start >= 0:
+        row_open_end = markup.find(">", row_start, match.start())
+        row_end = markup.find("</tr>", match.end())
+        row_open = markup[row_start : row_open_end + 1] if row_open_end >= 0 else ""
+        if row_end >= 0 and f'data-rno="{rno}"' in row_open:
+            return markup[row_start : row_end + len("</tr>")]
+
+    booth_start = markup.rfind('<div class="booth"', 0, match.start())
+    if booth_start >= 0:
+        next_booth = markup.find('<div class="booth"', match.end())
+        section_end = markup.find("</section>", match.end())
+        end_candidates = [pos for pos in (next_booth, section_end) if pos >= 0]
+        booth_end = min(end_candidates) if end_candidates else min(len(markup), match.end() + 2500)
+        booth = markup[booth_start:booth_end]
+        if not row_contains_other_resume(booth, rno):
+            return booth
+
+    start = max(0, match.start() - 300)
+    end = min(len(markup), match.end() + 1200)
+    return markup[start:end]
+
+
+def parse_with_bs4(markup: str, limit: int) -> list[Candidate] | None:
+    try:
+        from bs4 import BeautifulSoup
+    except ImportError:
+        return None
+
+    soup = BeautifulSoup(markup, "html.parser")
+    candidates: list[Candidate] = []
+    seen: set[str] = set()
+
+    for link in soup.select('a[href*="/corp/person/find/resume/view?rNo="]'):
+        raw_href = link.get("href", "")
+        href = raw_href if isinstance(raw_href, str) else ""
+        matched_rno = re.search(r"rNo=(\d+)", href)
+        if not matched_rno:
+            continue
+        rno = matched_rno.group(1)
+        if rno in seen:
+            continue
+        seen.add(rno)
+
+        container = (
+            link.find_parent("tr", attrs={"data-rno": rno})
+            or link.find_parent(class_=re.compile(r"(^|\s)booth(\s|$)", re.I))
+            or link.parent
+        )
+        if container and row_contains_other_resume(str(container), rno):
+            container = link.parent
+
+        raw = clean_text(str(container)) if container else clean_text(str(link))
+        texts = []
+        for node in container.find_all(["dt", "dd", "p", "span", "li"]) if container else []:
+            label = node.get_text(" ", strip=True)
+            if label and not is_action_control_label(label):
+                texts.append(label)
+        for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
+            label = btn.get_text(" ", strip=True)
+            if label and not is_action_control_label(label):
+                texts.append(label)
+        text_join = " | ".join(dict.fromkeys(texts))
+
+        name_scope = container.select_one(".nameAge") if container else None
+        dt = (name_scope or container).find("dt") if container else None
+        name = dt.get_text(" ", strip=True) if dt else ""
+        dd = dt.find_next("dd") if dt else None
+        meta = dd.get_text(" ", strip=True) if dd else ""
+        if not name:
+            m_name = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
+            if m_name:
+                name = m_name.group(1)
+                meta = "(" + m_name.group(2) + ")"
+
+        skills = []
+        for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
+            label = btn.get_text(" ", strip=True)
+            if label and not is_action_control_label(label):
+                skills.append(label)
+
+        career_node = container.select_one(".career") if container else None
+        candidates.append(
+            Candidate(
+                rno=rno,
+                url=urllib.parse.urljoin(BASE_URL, href),
+                name=name,
+                meta=meta,
+                career=career_node.get_text(" ", strip=True) if career_node else "",
+                skills=", ".join(skills[:25]),
+                raw_summary=filter_action_control_text(text_join[:1000] or raw[:1000]),
+            )
+        )
+        if len(candidates) >= limit:
+            break
+    return candidates
+
+
+def parse_with_regex(markup: str, limit: int) -> list[Candidate]:
+    candidates: list[Candidate] = []
+    seen: set[str] = set()
+    for match in RESUME_LINK_RE.finditer(markup):
+        rno = match.group("rno")
+        if rno in seen:
+            continue
+        seen.add(rno)
+        raw_markup = extract_regex_candidate_markup(markup, match, rno)
+        raw = clean_text(raw_markup)
+        name = ""
+        meta = ""
+        name_match = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
+        if name_match:
+            name = name_match.group(1)
+            meta = "(" + name_match.group(2) + ")"
+        candidates.append(
+            Candidate(
+                rno=rno,
+                url=urllib.parse.urljoin(BASE_URL, match.group("href")),
+                name=name,
+                meta=meta,
+                raw_summary=filter_action_control_text(raw[:1000]),
+            )
+        )
+        if len(candidates) >= limit:
+            break
+    return candidates
+
+
+def parse_candidates(markup: str, limit: int) -> list[Candidate]:
+    parsed = parse_with_bs4(markup, limit)
+    if parsed is not None:
+        return parsed
+    return parse_with_regex(markup, limit)
--- a/jobkorea-talent-search/scripts/jobkorea_talent_search.py
+++ b/jobkorea-talent-search/scripts/jobkorea_talent_search.py
@ -10,368 +10,19 @@ paid entitlements, or user confirmation.
 from __future__ import annotations

 import argparse
-import html
 import json
-import re
 import sys
 import urllib.error
-import urllib.parse
-import urllib.request
-from dataclasses import dataclass, asdict
-from typing import Any
+from dataclasses import asdict

-BASE_URL = "https://www.jobkorea.co.kr"
-FIND_PATH = "/corp/person/find"
-AJAX_PATH = "/corp/person/detailsearchajax"
-DEFAULT_UA = (
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"
-)
+from jobkorea_talent_models import Candidate
+from jobkorea_talent_parse import clean_text, parse_candidates
+from jobkorea_talent_search_condition import build_search_condition, post_search
+
+__all__ = ["parse_candidates"]


-@dataclass
-class Candidate:
-    rno: str
-    url: str
-    name: str = ""
-    meta: str = ""
-    career: str = ""
-    education: str = ""
-    locations: str = ""
-    salary: str = ""
-    skills: str = ""
-    badges: str = ""
-    raw_summary: str = ""
-
-
-def fetch(url: str, *, data: bytes | None = None, headers: dict[str, str] | None = None) -> str:
-    req_headers = {"User-Agent": DEFAULT_UA, "Referer": BASE_URL + FIND_PATH}
-    if headers:
-        req_headers.update(headers)
-    req = urllib.request.Request(url, data=data, headers=req_headers, method="POST" if data else "GET")
-    with urllib.request.urlopen(req, timeout=30) as resp:
-        return resp.read().decode("utf-8", "ignore")
-
-
-def extract_json_object(source: str, marker: str) -> dict[str, Any]:
-    idx = source.find(marker)
-    if idx < 0:
-        raise RuntimeError(f"cannot find marker: {marker}")
-    start = source.find("{", idx)
-    if start < 0:
-        raise RuntimeError("cannot find JSON object start")
-    depth = 0
-    in_string = False
-    escape = False
-    for pos in range(start, len(source)):
-        ch = source[pos]
-        if in_string:
-            if escape:
-                escape = False
-            elif ch == "\\":
-                escape = True
-            elif ch == '"':
-                in_string = False
-            continue
-        if ch == '"':
-            in_string = True
-        elif ch == "{":
-            depth += 1
-        elif ch == "}":
-            depth -= 1
-            if depth == 0:
-                return json.loads(source[start : pos + 1])
-    raise RuntimeError("unterminated JSON object")
-
-
-def iter_nodes(node: Any):
-    if isinstance(node, dict):
-        yield node
-        for value in node.values():
-            yield from iter_nodes(value)
-    elif isinstance(node, list):
-        for item in node:
-            yield from iter_nodes(item)
-
-
-def mark_matching_nodes(sc: dict[str, Any], top_key: str, labels: list[str]) -> list[str]:
-    if not labels:
-        return []
-    section = sc.get(top_key)
-    if section is None:
-        return []
-    wanted = [x.strip().lower() for x in labels if x.strip()]
-    matched: list[str] = []
-    for node in iter_nodes(section):
-        title = str(node.get("t", ""))
-        code = str(node.get("v", ""))
-        title_l = title.lower()
-        code_l = code.lower()
-        if any(w == title_l or w == code_l or w in title_l for w in wanted):
-            for k in ("s", "c", "use"):
-                if k in node:
-                    node[k] = 1
-            matched.append(title or code)
-    return matched
-
-
-def build_search_condition(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
-    first = fetch(BASE_URL + FIND_PATH)
-    sc = extract_json_object(first, "var searchcondition =")
-
-    sc["p"] = args.page
-    sc["ps"] = args.limit
-    sc["saveno"] = 0
-    sc["ff"] = 0
-    sc["sf"] = args.sort
-
-    terms: list[dict[str, Any]] = []
-    for kw in args.keyword:
-        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 0})
-    for kw in args.and_keyword:
-        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 1})
-    for kw in args.or_keyword:
-        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 3})
-    for kw in args.exclude_keyword:
-        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 2})
-    sc["totalkeywordlist"] = terms
-
-    if terms:
-        first_kw = terms[0]["t"]
-        sc.setdefault("pfr", {}).setdefault("ck", {})["Keyword"] = first_kw
-        sc["pfr"]["ck"]["KeywordType"] = 1
-        sc["pfr"]["n"] = 1
-
-    if args.career_min is not None:
-        sc.setdefault("career", {})["s"] = str(args.career_min)
-    if args.career_max is not None:
-        sc.setdefault("career", {})["e"] = str(args.career_max)
-
-    matched = {
-        "job_category": mark_matching_nodes(sc, "jobtype", args.job_category),
-        "work_area": mark_matching_nodes(sc, "workarea", args.work_area),
-        "residential_area": mark_matching_nodes(sc, "residentialarea", args.residential_area),
-    }
-    return sc, matched
-
-
-def post_search(sc: dict[str, Any]) -> str:
-    body = urllib.parse.urlencode({"searchCondition": json.dumps(sc, ensure_ascii=False)}).encode()
-    return fetch(
-        BASE_URL + AJAX_PATH,
-        data=body,
-        headers={
-            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-            "X-Requested-With": "XMLHttpRequest",
-        },
-    )
-
-
-def clean_text(value: str) -> str:
-    value = html.unescape(value)
-    value = re.sub(r"<script[\s\S]*?</script>", " ", value, flags=re.I)
-    value = re.sub(r"<style[\s\S]*?</style>", " ", value, flags=re.I)
-    value = re.sub(r"<[^>]+>", " ", value)
-    value = re.sub(r"[ \t\r\f\v]+", " ", value)
-    value = re.sub(r"\n\s*\n+", "\n", value)
-    return value.strip()
-
-
-ACTION_CONTROL_RE = re.compile(
-    r"^(?:스크랩\s*\d*|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)$"
-)
-ACTION_CONTROL_INLINE_RE = re.compile(
-    r"(?:스크랩\s*\d+|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)"
-)
-
-
-def is_action_control_label(value: str) -> bool:
-    label = re.sub(r"\s+", " ", html.unescape(value)).strip()
-    return bool(label and ACTION_CONTROL_RE.match(label))
-
-
-def filter_action_control_text(value: str) -> str:
-    lines = []
-    for line in value.splitlines():
-        label = line.strip()
-        if not label or is_action_control_label(label):
-            continue
-        label = ACTION_CONTROL_INLINE_RE.sub(" ", label)
-        label = re.sub(r"\s+", " ", label).strip()
-        if label:
-            lines.append(label)
-    return "\n".join(lines).strip()
-
-
-def row_contains_other_resume(candidate_markup: str, rno: str) -> bool:
-    refs: list[str] = []
-    for href_rno, data_rno in re.findall(r"rNo=(\d+)|data-rno=[\"'](\d+)[\"']", candidate_markup):
-        refs.append(href_rno or data_rno)
-    return any(ref != rno for ref in refs)
-
-
-def extract_regex_candidate_markup(markup: str, match: re.Match[str], rno: str) -> str:
-    row_start = markup.rfind("<tr", 0, match.start())
-    if row_start >= 0:
-        row_open_end = markup.find(">", row_start, match.start())
-        row_end = markup.find("</tr>", match.end())
-        row_open = markup[row_start : row_open_end + 1] if row_open_end >= 0 else ""
-        if row_end >= 0 and f'data-rno="{rno}"' in row_open:
-            return markup[row_start : row_end + len("</tr>")]
-
-    booth_start = markup.rfind('<div class="booth"', 0, match.start())
-    if booth_start >= 0:
-        next_booth = markup.find('<div class="booth"', match.end())
-        section_end = markup.find('</section>', match.end())
-        end_candidates = [pos for pos in (next_booth, section_end) if pos >= 0]
-        booth_end = min(end_candidates) if end_candidates else min(len(markup), match.end() + 2500)
-        booth = markup[booth_start:booth_end]
-        if not row_contains_other_resume(booth, rno):
-            return booth
-
-    start = max(0, match.start() - 300)
-    end = min(len(markup), match.end() + 1200)
-    return markup[start:end]
-
-
-def parse_with_bs4(markup: str, limit: int) -> list[Candidate] | None:
-    try:
-        from bs4 import BeautifulSoup  # type: ignore
-    except Exception:
-        return None
-
-    soup = BeautifulSoup(markup, "html.parser")
-    candidates: list[Candidate] = []
-    seen: set[str] = set()
-
-    for link in soup.select('a[href*="/corp/person/find/resume/view?rNo="]'):
-        href = link.get("href", "")
-        m = re.search(r"rNo=(\d+)", href)
-        if not m:
-            continue
-        rno = m.group(1)
-        if rno in seen:
-            continue
-        seen.add(rno)
-
-        container = (
-            link.find_parent("tr", attrs={"data-rno": rno})
-            or link.find_parent(class_=re.compile(r"(^|\s)booth(\s|$)", re.I))
-            or link.parent
-        )
-        if container and row_contains_other_resume(str(container), rno):
-            # Broad ancestors such as tblSearchList/personList can contain several resumes.
-            # Falling back to the link itself is safer than mixing candidate evidence.
-            container = link.parent
-
-        raw = clean_text(str(container)) if container else clean_text(str(link))
-        texts = []
-        for node in container.find_all(["dt", "dd", "p", "span", "li"]) if container else []:
-            label = node.get_text(" ", strip=True)
-            if label and not is_action_control_label(label):
-                texts.append(label)
-        for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
-            label = btn.get_text(" ", strip=True)
-            if label and not is_action_control_label(label):
-                texts.append(label)
-        text_join = " | ".join(dict.fromkeys(texts))
-
-        name = ""
-        meta = ""
-        name_scope = container.select_one(".nameAge") if container else None
-        dt = (name_scope or container).find("dt") if container else None
-        if dt:
-            name = dt.get_text(" ", strip=True)
-            dd = dt.find_next("dd")
-            if dd:
-                meta = dd.get_text(" ", strip=True)
-        if not name:
-            m_name = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
-            if m_name:
-                name = m_name.group(1)
-                meta = "(" + m_name.group(2) + ")"
-
-        skills = []
-        for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
-            label = btn.get_text(" ", strip=True)
-            if label and not is_action_control_label(label):
-                skills.append(label)
-
-        candidates.append(
-            Candidate(
-                rno=rno,
-                url=urllib.parse.urljoin(BASE_URL, href),
-                name=name,
-                meta=meta,
-                career=(container.select_one(".career").get_text(" ", strip=True) if container and container.select_one(".career") else ""),
-                skills=", ".join(skills[:25]),
-                raw_summary=filter_action_control_text(text_join[:1000] or raw[:1000]),
-            )
-        )
-        if len(candidates) >= limit:
-            break
-    return candidates
-
-
-def parse_with_regex(markup: str, limit: int) -> list[Candidate]:
-    candidates: list[Candidate] = []
-    seen: set[str] = set()
-    for m in re.finditer(r'href="(?P<href>/corp/person/find/resume/view\?rNo=(?P<rno>\d+))"', markup):
-        rno = m.group("rno")
-        if rno in seen:
-            continue
-        seen.add(rno)
-        raw_markup = extract_regex_candidate_markup(markup, m, rno)
-        raw = clean_text(raw_markup)
-        name = ""
-        meta = ""
-        nm = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
-        if nm:
-            name = nm.group(1)
-            meta = "(" + nm.group(2) + ")"
-        candidates.append(
-            Candidate(
-                rno=rno,
-                url=urllib.parse.urljoin(BASE_URL, m.group("href")),
-                name=name,
-                meta=meta,
-                raw_summary=filter_action_control_text(raw[:1000]),
-            )
-        )
-        if len(candidates) >= limit:
-            break
-    return candidates
-
-
-def parse_candidates(markup: str, limit: int) -> list[Candidate]:
-    parsed = parse_with_bs4(markup, limit)
-    if parsed is not None:
-        return parsed
-    return parse_with_regex(markup, limit)
-
-
-def print_markdown(candidates: list[Candidate], matched: dict[str, Any], args: argparse.Namespace) -> None:
-    print(f"# 잡코리아 인재검색 결과\n")
-    print(f"- 검색어: {', '.join(args.keyword + args.and_keyword + args.or_keyword) or '(없음)'}")
-    print(f"- 제외어: {', '.join(args.exclude_keyword) or '(없음)'}")
-    if any(matched.values()):
-        print(f"- 매칭된 필터: {json.dumps(matched, ensure_ascii=False)}")
-    print(f"- 결과 수: {len(candidates)}")
-    print("- 주의: 이름/회사명은 잡코리아 공개 화면 기준으로 마스킹되어 있으며, 상세 이력서 확인·포지션 제안은 기업회원 로그인/권한/사용자 확인이 필요합니다.\n")
-    for idx, c in enumerate(candidates, 1):
-        bits = [c.name, c.meta, c.career]
-        title = " ".join(x for x in bits if x).strip() or f"rNo={c.rno}"
-        print(f"## {idx}. {title}")
-        print(f"- URL: {c.url}")
-        if c.skills:
-            print(f"- 키워드/스킬: {c.skills}")
-        summary = c.raw_summary.replace("\n", " ")
-        if summary:
-            print(f"- 요약: {summary[:500]}")
-        print()
-
-
-def main() -> int:
+def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Search public JobKorea talent summaries")
    parser.add_argument("--keyword", "-k", action="append", default=[], help="통합검색 키워드. 여러 번 지정 가능")
    parser.add_argument("--and-keyword", action="append", default=[], help="AND 키워드")
@ -386,14 +37,42 @@ def main() -> int:
    parser.add_argument("--limit", type=int, default=20, choices=[10, 20, 30, 50, 100])
    parser.add_argument("--sort", default="0", help="잡코리아 sf 정렬 코드. 기본 0")
    parser.add_argument("--json", action="store_true", help="JSON으로 출력")
-    args = parser.parse_args()
+    return parser
+
+
+def print_markdown(candidates: list[Candidate], matched: dict[str, list[str]], args: argparse.Namespace) -> None:
+    print("# 잡코리아 인재검색 결과\n")
+    print(f"- 검색어: {', '.join(args.keyword + args.and_keyword + args.or_keyword) or '(없음)'}")
+    print(f"- 제외어: {', '.join(args.exclude_keyword) or '(없음)'}")
+    if any(matched.values()):
+        print(f"- 매칭된 필터: {json.dumps(matched, ensure_ascii=False)}")
+    print(f"- 결과 수: {len(candidates)}")
+    print("- 주의: 이름/회사명은 잡코리아 공개 화면 기준으로 마스킹되어 있으며, 상세 이력서 확인·포지션 제안은 기업회원 로그인/권한/사용자 확인이 필요합니다.\n")
+    for idx, candidate in enumerate(candidates, 1):
+        c = candidate
+        bits = [c.name, c.meta, c.career]
+        title = " ".join(x for x in bits if x).strip() or f"rNo={c.rno}"
+        print(f"## {idx}. {title}")
+        print(f"- URL: {c.url}")
+        if c.skills:
+            print(f"- 키워드/스킬: {c.skills}")
+        summary = c.raw_summary.replace("\n", " ")
+        if summary:
+            print(f"- 요약: {summary[:500]}")
+        print()
+
+
+def run(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)

    if not (args.keyword or args.and_keyword or args.or_keyword or args.job_category or args.work_area or args.residential_area):
        parser.error("최소 하나 이상의 --keyword, --job-category, --work-area 등을 지정하세요")

    sc, matched = build_search_condition(args)
    markup = post_search(sc)
-    if "로그인" in clean_text(markup)[:500] and "인재" not in clean_text(markup)[:2000]:
+    cleaned = clean_text(markup)
+    if "로그인" in cleaned[:500] and "인재" not in cleaned[:2000]:
        raise RuntimeError("잡코리아가 로그인/차단 화면을 반환했습니다")
    candidates = parse_candidates(markup, args.limit)

@ -406,10 +85,10 @@ def main() -> int:

 if __name__ == "__main__":
    try:
-        raise SystemExit(main())
+        raise SystemExit(run())
    except urllib.error.HTTPError as exc:
        print(f"HTTP error: {exc.code} {exc.reason}", file=sys.stderr)
        raise SystemExit(2)
-    except Exception as exc:
+    except (RuntimeError, urllib.error.URLError) as exc:
        print(f"error: {exc}", file=sys.stderr)
        raise SystemExit(1)
--- a/jobkorea-talent-search/scripts/jobkorea_talent_search_condition.py
+++ b/jobkorea-talent-search/scripts/jobkorea_talent_search_condition.py
@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import argparse
+import json
+import urllib.parse
+import urllib.request
+from collections.abc import Iterator
+from typing import Any
+
+from jobkorea_talent_models import AJAX_PATH, BASE_URL, DEFAULT_UA, FIND_PATH
+
+
+def fetch(url: str, *, data: bytes | None = None, headers: dict[str, str] | None = None) -> str:
+    req_headers = {"User-Agent": DEFAULT_UA, "Referer": BASE_URL + FIND_PATH}
+    if headers:
+        req_headers.update(headers)
+    req = urllib.request.Request(url, data=data, headers=req_headers, method="POST" if data else "GET")
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8", "ignore")
+
+
+def extract_json_object(source: str, marker: str) -> dict[str, Any]:
+    idx = source.find(marker)
+    if idx < 0:
+        raise RuntimeError(f"cannot find marker: {marker}")
+    start = source.find("{", idx)
+    if start < 0:
+        raise RuntimeError("cannot find JSON object start")
+    depth = 0
+    in_string = False
+    escape = False
+    for pos in range(start, len(source)):
+        ch = source[pos]
+        if in_string:
+            if escape:
+                escape = False
+            elif ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_string = False
+            continue
+        if ch == '"':
+            in_string = True
+        elif ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                loaded = json.loads(source[start : pos + 1])
+                if not isinstance(loaded, dict):
+                    raise RuntimeError("search condition was not a JSON object")
+                return loaded
+    raise RuntimeError("unterminated JSON object")
+
+
+def iter_nodes(node: Any) -> Iterator[dict[str, Any]]:
+    if isinstance(node, dict):
+        yield node
+        for value in node.values():
+            yield from iter_nodes(value)
+    elif isinstance(node, list):
+        for item in node:
+            yield from iter_nodes(item)
+
+
+def mark_matching_nodes(sc: dict[str, Any], top_key: str, labels: list[str]) -> list[str]:
+    if not labels:
+        return []
+    section = sc.get(top_key)
+    if section is None:
+        return []
+    wanted = [x.strip().lower() for x in labels if x.strip()]
+    matched: list[str] = []
+    for node in iter_nodes(section):
+        title = str(node.get("t", ""))
+        code = str(node.get("v", ""))
+        title_l = title.lower()
+        code_l = code.lower()
+        if any(w == title_l or w == code_l or w in title_l for w in wanted):
+            for key in ("s", "c", "use"):
+                if key in node:
+                    node[key] = 1
+            matched.append(title or code)
+    return matched
+
+
+def build_search_condition(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, list[str]]]:
+    first = fetch(BASE_URL + FIND_PATH)
+    sc = extract_json_object(first, "var searchcondition =")
+
+    sc["p"] = args.page
+    sc["ps"] = args.limit
+    sc["saveno"] = 0
+    sc["ff"] = 0
+    sc["sf"] = args.sort
+
+    terms: list[dict[str, Any]] = []
+    for kw in args.keyword:
+        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 0})
+    for kw in args.and_keyword:
+        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 1})
+    for kw in args.or_keyword:
+        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 3})
+    for kw in args.exclude_keyword:
+        terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 2})
+    sc["totalkeywordlist"] = terms
+
+    if terms:
+        first_kw = terms[0]["t"]
+        sc.setdefault("pfr", {}).setdefault("ck", {})["Keyword"] = first_kw
+        sc["pfr"]["ck"]["KeywordType"] = 1
+        sc["pfr"]["n"] = 1
+
+    if args.career_min is not None:
+        sc.setdefault("career", {})["s"] = str(args.career_min)
+    if args.career_max is not None:
+        sc.setdefault("career", {})["e"] = str(args.career_max)
+
+    matched = {
+        "job_category": mark_matching_nodes(sc, "jobtype", args.job_category),
+        "work_area": mark_matching_nodes(sc, "workarea", args.work_area),
+        "residential_area": mark_matching_nodes(sc, "residentialarea", args.residential_area),
+    }
+    return sc, matched
+
+
+def post_search(sc: dict[str, Any]) -> str:
+    body = urllib.parse.urlencode({"searchCondition": json.dumps(sc, ensure_ascii=False)}).encode()
+    return fetch(
+        BASE_URL + AJAX_PATH,
+        data=body,
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+            "X-Requested-With": "XMLHttpRequest",
+        },
+    )
--- a/jobkorea-talent-search/scripts/test_jobkorea_talent_search.py
+++ b/jobkorea-talent-search/scripts/test_jobkorea_talent_search.py
@ -8,6 +8,7 @@ import unittest
 from pathlib import Path

 SCRIPT = Path(__file__).with_name("jobkorea_talent_search.py")
+sys.path.insert(0, str(SCRIPT.parent))
 spec = importlib.util.spec_from_file_location("jobkorea_talent_search", SCRIPT)
 assert spec is not None
 helper = importlib.util.module_from_spec(spec)
--- a/package.json
+++ b/package.json
@ -11,10 +11,10 @@
    "build": "npm run build --workspaces --if-present",
    "build:manus-bundle": "node scripts/build-manus-bundle.js",
    "generate:plugin-manifest": "node scripts/generate-plugin-manifest.js",
-    "lint": "node --check scripts/skill-docs.test.js scripts/korean_character_count.js scripts/test_korean_character_count.js scripts/korean_middle_korean.js scripts/test_korean_middle_korean.js scripts/build-manus-bundle.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/generate-plugin-manifest.js scripts/test_generate_plugin_manifest.js && python3 -m py_compile scripts/k_skill_cleaner.py scripts/test_k_skill_cleaner.py corporate-registration-consulting/scripts/fill_official_hwp.py k-skill-cleaner/scripts/k_skill_cleaner.py scripts/fine_dust.py scripts/test_fine_dust.py scripts/ktx_booking.py scripts/test_ktx_booking.py scripts/srt_booking.py scripts/srt_seats.py scripts/srt_booking_test_support.py scripts/test_srt_booking.py scripts/test_srt_seats.py scripts/sillok_search.py scripts/test_sillok_search.py scripts/korean_spell_check.py scripts/test_korean_spell_check.py scripts/patent_search.py scripts/test_patent_search.py scripts/mfds_drug_safety.py scripts/test_mfds_drug_safety.py scripts/nts_business_registration.py scripts/test_nts_business_registration.py scripts/mfds_food_safety.py scripts/test_mfds_food_safety.py scripts/zipcode_search.py scripts/test_zipcode_search.py scripts/subway_lost_property.py scripts/test_subway_lost_property.py scripts/geeknews_search.py scripts/test_geeknews_search.py nts-business-registration/scripts/nts_business_registration.py biz-health-check/scripts/biz_health_check.py nts-tax-delinquency/scripts/nts_tax_delinquency.py localdata-business-status/scripts/localdata_business_status.py g2b-sanctioned-supplier/scripts/g2b_sanctioned_supplier.py fsc-corporate-info/scripts/fsc_corporate_info.py national-pension-workplace/scripts/national_pension_workplace.py scripts/test_naver_blog_search.py scripts/test_korean_slang_writing.py scripts/test_coupang_partners_mcp_wrapper.py scripts/test_ohou_today_deal.py scripts/ticket_availability.py scripts/test_ticket_availability.py scripts/test_danawa_price_search.py ticket-availability/scripts/ticket_availability.py coupang-product-search/scripts/coupang_partners_mcp.py ohou-today-deal/scripts/ohou_today_deal.py naver-blog-research/scripts/_naver_http.py naver-blog-research/scripts/naver_search.py naver-blog-research/scripts/naver_read.py naver-blog-research/scripts/naver_download_images.py korean-slang-writing/scripts/_slang_http.py korean-slang-writing/scripts/slang_search.py korean-slang-writing/scripts/slang_lookup.py korean-scholarship-search/scripts/scholarship_filter.py korean-scholarship-search/scripts/test_scholarship_filter.py korean-scholarship-search/scripts/university_search_plan.py seoul-bike/scripts/seoul_bike.py scripts/test_seoul_bike.py danawa-price-search/scripts/danawa_search.py kosis-stats/scripts/run_kosis_stats.py kosis-stats/tests/test_run_kosis_stats.py kstartup-search/scripts/run_kstartup.py kstartup-search/tests/test_run_kstartup.py intercity-bus-booking/scripts/intercity_bus_search.py daangn-used-goods-search/scripts/daangn_used_goods.py daangn-realty-search/scripts/daangn_realty.py daangn-jobs-search/scripts/daangn_jobs.py daangn-cars-search/scripts/daangn_cars.py foresttrip-vacancy/scripts/run_foresttrip_vacancy.py foresttrip-vacancy/tests/test_run_foresttrip_vacancy.py && npm run lint --workspaces --if-present && ./scripts/validate-skills.sh && node scripts/generate-plugin-manifest.js --check",
+    "lint": "node --check scripts/skill-docs.test.js scripts/korean_character_count.js scripts/test_korean_character_count.js scripts/korean_middle_korean.js scripts/test_korean_middle_korean.js scripts/build-manus-bundle.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/generate-plugin-manifest.js scripts/test_generate_plugin_manifest.js && python3 -m py_compile scripts/k_skill_cleaner.py scripts/test_k_skill_cleaner.py corporate-registration-consulting/scripts/fill_official_hwp.py k-skill-cleaner/scripts/k_skill_cleaner.py scripts/fine_dust.py scripts/test_fine_dust.py scripts/ktx_booking.py scripts/test_ktx_booking.py scripts/srt_booking.py scripts/srt_seats.py scripts/srt_booking_test_support.py scripts/test_srt_booking.py scripts/test_srt_seats.py scripts/sillok_search.py scripts/test_sillok_search.py scripts/korean_spell_check.py scripts/test_korean_spell_check.py scripts/patent_search.py scripts/test_patent_search.py scripts/mfds_drug_safety.py scripts/test_mfds_drug_safety.py scripts/nts_business_registration.py scripts/test_nts_business_registration.py scripts/mfds_food_safety.py scripts/test_mfds_food_safety.py scripts/zipcode_search.py scripts/test_zipcode_search.py scripts/subway_lost_property.py scripts/test_subway_lost_property.py scripts/geeknews_search.py scripts/test_geeknews_search.py nts-business-registration/scripts/nts_business_registration.py biz-health-check/scripts/biz_health_check.py nts-tax-delinquency/scripts/nts_tax_delinquency.py localdata-business-status/scripts/localdata_business_status.py g2b-sanctioned-supplier/scripts/g2b_sanctioned_supplier.py fsc-corporate-info/scripts/fsc_corporate_info.py national-pension-workplace/scripts/national_pension_workplace.py scripts/test_naver_blog_search.py scripts/test_korean_slang_writing.py scripts/test_coupang_partners_mcp_wrapper.py scripts/test_ohou_today_deal.py scripts/ticket_availability.py scripts/test_ticket_availability.py scripts/test_danawa_price_search.py ticket-availability/scripts/ticket_availability.py coupang-product-search/scripts/coupang_partners_mcp.py ohou-today-deal/scripts/ohou_today_deal.py naver-blog-research/scripts/_naver_http.py naver-blog-research/scripts/naver_search.py naver-blog-research/scripts/naver_read.py naver-blog-research/scripts/naver_download_images.py korean-slang-writing/scripts/_slang_http.py korean-slang-writing/scripts/slang_search.py korean-slang-writing/scripts/slang_lookup.py korean-scholarship-search/scripts/scholarship_filter.py korean-scholarship-search/scripts/test_scholarship_filter.py korean-scholarship-search/scripts/university_search_plan.py seoul-bike/scripts/seoul_bike.py scripts/test_seoul_bike.py danawa-price-search/scripts/danawa_search.py kosis-stats/scripts/run_kosis_stats.py kosis-stats/tests/test_run_kosis_stats.py kstartup-search/scripts/run_kstartup.py kstartup-search/tests/test_run_kstartup.py intercity-bus-booking/scripts/intercity_bus_search.py daangn-used-goods-search/scripts/daangn_used_goods.py daangn-realty-search/scripts/daangn_realty.py daangn-jobs-search/scripts/daangn_jobs.py daangn-cars-search/scripts/daangn_cars.py foresttrip-vacancy/scripts/run_foresttrip_vacancy.py foresttrip-vacancy/tests/test_run_foresttrip_vacancy.py jobkorea-talent-search/scripts/jobkorea_talent_models.py jobkorea-talent-search/scripts/jobkorea_talent_parse.py jobkorea-talent-search/scripts/jobkorea_talent_search_condition.py jobkorea-talent-search/scripts/jobkorea_talent_search.py jobkorea-talent-search/scripts/test_jobkorea_talent_search.py && npm run lint --workspaces --if-present && ./scripts/validate-skills.sh && node scripts/generate-plugin-manifest.js --check",
    "typecheck": "tsc --noEmit",
    "prepare:python-test-env": "python3 -m venv .cache/python-test-venv && ./.cache/python-test-venv/bin/python -m pip install --quiet beautifulsoup4",
-    "test": "npm run prepare:python-test-env && node --test scripts/skill-docs.test.js scripts/test_korean_character_count.js scripts/test_korean_middle_korean.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/test_generate_plugin_manifest.js && PYTHONPATH=.:scripts ./.cache/python-test-venv/bin/python -m unittest scripts.test_k_skill_cleaner scripts.test_fine_dust scripts.test_ktx_booking scripts.test_srt_booking scripts.test_srt_seats scripts.test_sillok_search scripts.test_korean_spell_check scripts.test_patent_search scripts.test_mfds_drug_safety scripts.test_nts_business_registration scripts.test_mfds_food_safety scripts.test_zipcode_search scripts.test_subway_lost_property scripts.test_geeknews_search scripts.test_naver_blog_search scripts.test_korean_slang_writing scripts.test_coupang_partners_mcp_wrapper scripts.test_ohou_today_deal scripts.test_ticket_availability scripts.test_seoul_bike scripts.test_danawa_price_search && PYTHONPATH=.:scripts:korean-scholarship-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s korean-scholarship-search/scripts -p 'test_scholarship_filter.py' && PYTHONPATH=.:scripts:kosis-stats/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kosis-stats/tests -p 'test_run_kosis_stats.py' && PYTHONPATH=.:scripts:kstartup-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kstartup-search/tests -p 'test_run_kstartup.py' && PYTHONPATH=.:foresttrip-vacancy/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s foresttrip-vacancy/tests -p 'test_run_foresttrip_vacancy.py' && npm run test --workspaces --if-present && ./scripts/validate-skills.sh",
+    "test": "npm run prepare:python-test-env && node --test scripts/skill-docs.test.js scripts/test_korean_character_count.js scripts/test_korean_middle_korean.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/test_generate_plugin_manifest.js && PYTHONPATH=.:scripts ./.cache/python-test-venv/bin/python -m unittest scripts.test_k_skill_cleaner scripts.test_fine_dust scripts.test_ktx_booking scripts.test_srt_booking scripts.test_srt_seats scripts.test_sillok_search scripts.test_korean_spell_check scripts.test_patent_search scripts.test_mfds_drug_safety scripts.test_nts_business_registration scripts.test_mfds_food_safety scripts.test_zipcode_search scripts.test_subway_lost_property scripts.test_geeknews_search scripts.test_naver_blog_search scripts.test_korean_slang_writing scripts.test_coupang_partners_mcp_wrapper scripts.test_ohou_today_deal scripts.test_ticket_availability scripts.test_seoul_bike scripts.test_danawa_price_search && PYTHONPATH=.:scripts:korean-scholarship-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s korean-scholarship-search/scripts -p 'test_scholarship_filter.py' && PYTHONPATH=.:scripts:kosis-stats/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kosis-stats/tests -p 'test_run_kosis_stats.py' && PYTHONPATH=.:scripts:kstartup-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kstartup-search/tests -p 'test_run_kstartup.py' && PYTHONPATH=.:foresttrip-vacancy/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s foresttrip-vacancy/tests -p 'test_run_foresttrip_vacancy.py' && PYTHONPATH=.:jobkorea-talent-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s jobkorea-talent-search/scripts -p 'test_jobkorea_talent_search.py' && npm run test --workspaces --if-present && ./scripts/validate-skills.sh",
    "pack:dry-run": "npm pack --workspace k-lotto --dry-run && npm pack --workspace daiso-product-search --dry-run && npm pack --workspace market-kurly-search --dry-run && npm pack --workspace kakao-bar-nearby --dry-run && npm pack --workspace cheap-gas-nearby --dry-run && npm pack --workspace public-restroom-nearby --dry-run && npm pack --workspace parking-lot-search --dry-run && npm pack --workspace court-auction-notice-search --dry-run && npm pack --workspace donation-place-search --dry-run && npm pack --workspace gongsijiga-search --dry-run && npm pack --workspace kbl-results --dry-run && npm pack --workspace kleague-results --dry-run && npm pack --workspace lck-analytics --dry-run && npm pack --workspace toss-securities --dry-run && npm pack --workspace hipass-receipt --dry-run && npm pack --workspace used-car-price-search --dry-run && npm pack --workspace k-skill-rhwp --dry-run && npm pack --workspace korean-marathon-schedule --dry-run && npm pack --workspace gangnamunni-clinic-search --dry-run && npm pack --workspace daishin-report-search --dry-run && npm pack --workspace sh-notice-search --dry-run && npm pack --workspace emergency-room-beds --dry-run && npm pack --workspace local-election-candidate-search --dry-run",
    "ci": "npm run lint && npm run typecheck && npm run test && npm run pack:dry-run",
    "version-packages": "changeset version",