Wire JobKorea talent helper into CI

This commit is contained in:
Jeffrey (Dongkyu) Kim 2026-06-18 10:37:08 +09:00
commit b14f65361f
6 changed files with 384 additions and 355 deletions

View file

@ -0,0 +1,27 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Final
BASE_URL: Final = "https://www.jobkorea.co.kr"
FIND_PATH: Final = "/corp/person/find"
AJAX_PATH: Final = "/corp/person/detailsearchajax"
DEFAULT_UA: Final = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"
)
@dataclass(frozen=True, slots=True)
class Candidate:
rno: str
url: str
name: str = ""
meta: str = ""
career: str = ""
education: str = ""
locations: str = ""
salary: str = ""
skills: str = ""
badges: str = ""
raw_summary: str = ""

View file

@ -0,0 +1,186 @@
from __future__ import annotations
import html
import re
import urllib.parse
from jobkorea_talent_models import BASE_URL, Candidate
ACTION_CONTROL_RE = re.compile(
r"^(?:스크랩\s*\d*|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)$"
)
ACTION_CONTROL_INLINE_RE = re.compile(
r"(?:스크랩\s*\d+|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)"
)
RESUME_LINK_RE = re.compile(r'href="(?P<href>/corp/person/find/resume/view\?rNo=(?P<rno>\d+))"')
def clean_text(value: str) -> str:
value = html.unescape(value)
value = re.sub(r"<script[\s\S]*?</script>", " ", value, flags=re.I)
value = re.sub(r"<style[\s\S]*?</style>", " ", value, flags=re.I)
value = re.sub(r"<[^>]+>", " ", value)
value = re.sub(r"[ \t\r\f\v]+", " ", value)
value = re.sub(r"\n\s*\n+", "\n", value)
return value.strip()
def is_action_control_label(value: str) -> bool:
label = re.sub(r"\s+", " ", html.unescape(value)).strip()
return bool(label and ACTION_CONTROL_RE.match(label))
def filter_action_control_text(value: str) -> str:
lines = []
for line in value.splitlines():
label = line.strip()
if not label or is_action_control_label(label):
continue
label = ACTION_CONTROL_INLINE_RE.sub(" ", label)
label = re.sub(r"\s+", " ", label).strip()
if label:
lines.append(label)
return "\n".join(lines).strip()
def row_contains_other_resume(candidate_markup: str, rno: str) -> bool:
refs: list[str] = []
for href_rno, data_rno in re.findall(r"rNo=(\d+)|data-rno=[\"'](\d+)[\"']", candidate_markup):
refs.append(href_rno or data_rno)
return any(ref != rno for ref in refs)
def extract_regex_candidate_markup(markup: str, match: re.Match[str], rno: str) -> str:
row_start = markup.rfind("<tr", 0, match.start())
if row_start >= 0:
row_open_end = markup.find(">", row_start, match.start())
row_end = markup.find("</tr>", match.end())
row_open = markup[row_start : row_open_end + 1] if row_open_end >= 0 else ""
if row_end >= 0 and f'data-rno="{rno}"' in row_open:
return markup[row_start : row_end + len("</tr>")]
booth_start = markup.rfind('<div class="booth"', 0, match.start())
if booth_start >= 0:
next_booth = markup.find('<div class="booth"', match.end())
section_end = markup.find("</section>", match.end())
end_candidates = [pos for pos in (next_booth, section_end) if pos >= 0]
booth_end = min(end_candidates) if end_candidates else min(len(markup), match.end() + 2500)
booth = markup[booth_start:booth_end]
if not row_contains_other_resume(booth, rno):
return booth
start = max(0, match.start() - 300)
end = min(len(markup), match.end() + 1200)
return markup[start:end]
def parse_with_bs4(markup: str, limit: int) -> list[Candidate] | None:
try:
from bs4 import BeautifulSoup
except ImportError:
return None
soup = BeautifulSoup(markup, "html.parser")
candidates: list[Candidate] = []
seen: set[str] = set()
for link in soup.select('a[href*="/corp/person/find/resume/view?rNo="]'):
raw_href = link.get("href", "")
href = raw_href if isinstance(raw_href, str) else ""
matched_rno = re.search(r"rNo=(\d+)", href)
if not matched_rno:
continue
rno = matched_rno.group(1)
if rno in seen:
continue
seen.add(rno)
container = (
link.find_parent("tr", attrs={"data-rno": rno})
or link.find_parent(class_=re.compile(r"(^|\s)booth(\s|$)", re.I))
or link.parent
)
if container and row_contains_other_resume(str(container), rno):
container = link.parent
raw = clean_text(str(container)) if container else clean_text(str(link))
texts = []
for node in container.find_all(["dt", "dd", "p", "span", "li"]) if container else []:
label = node.get_text(" ", strip=True)
if label and not is_action_control_label(label):
texts.append(label)
for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
label = btn.get_text(" ", strip=True)
if label and not is_action_control_label(label):
texts.append(label)
text_join = " | ".join(dict.fromkeys(texts))
name_scope = container.select_one(".nameAge") if container else None
dt = (name_scope or container).find("dt") if container else None
name = dt.get_text(" ", strip=True) if dt else ""
dd = dt.find_next("dd") if dt else None
meta = dd.get_text(" ", strip=True) if dd else ""
if not name:
m_name = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
if m_name:
name = m_name.group(1)
meta = "(" + m_name.group(2) + ")"
skills = []
for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
label = btn.get_text(" ", strip=True)
if label and not is_action_control_label(label):
skills.append(label)
career_node = container.select_one(".career") if container else None
candidates.append(
Candidate(
rno=rno,
url=urllib.parse.urljoin(BASE_URL, href),
name=name,
meta=meta,
career=career_node.get_text(" ", strip=True) if career_node else "",
skills=", ".join(skills[:25]),
raw_summary=filter_action_control_text(text_join[:1000] or raw[:1000]),
)
)
if len(candidates) >= limit:
break
return candidates
def parse_with_regex(markup: str, limit: int) -> list[Candidate]:
candidates: list[Candidate] = []
seen: set[str] = set()
for match in RESUME_LINK_RE.finditer(markup):
rno = match.group("rno")
if rno in seen:
continue
seen.add(rno)
raw_markup = extract_regex_candidate_markup(markup, match, rno)
raw = clean_text(raw_markup)
name = ""
meta = ""
name_match = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
if name_match:
name = name_match.group(1)
meta = "(" + name_match.group(2) + ")"
candidates.append(
Candidate(
rno=rno,
url=urllib.parse.urljoin(BASE_URL, match.group("href")),
name=name,
meta=meta,
raw_summary=filter_action_control_text(raw[:1000]),
)
)
if len(candidates) >= limit:
break
return candidates
def parse_candidates(markup: str, limit: int) -> list[Candidate]:
parsed = parse_with_bs4(markup, limit)
if parsed is not None:
return parsed
return parse_with_regex(markup, limit)

View file

@ -10,368 +10,19 @@ paid entitlements, or user confirmation.
from __future__ import annotations
import argparse
import html
import json
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
from dataclasses import dataclass, asdict
from typing import Any
from dataclasses import asdict
BASE_URL = "https://www.jobkorea.co.kr"
FIND_PATH = "/corp/person/find"
AJAX_PATH = "/corp/person/detailsearchajax"
DEFAULT_UA = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"
)
from jobkorea_talent_models import Candidate
from jobkorea_talent_parse import clean_text, parse_candidates
from jobkorea_talent_search_condition import build_search_condition, post_search
__all__ = ["parse_candidates"]
@dataclass
class Candidate:
rno: str
url: str
name: str = ""
meta: str = ""
career: str = ""
education: str = ""
locations: str = ""
salary: str = ""
skills: str = ""
badges: str = ""
raw_summary: str = ""
def fetch(url: str, *, data: bytes | None = None, headers: dict[str, str] | None = None) -> str:
req_headers = {"User-Agent": DEFAULT_UA, "Referer": BASE_URL + FIND_PATH}
if headers:
req_headers.update(headers)
req = urllib.request.Request(url, data=data, headers=req_headers, method="POST" if data else "GET")
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8", "ignore")
def extract_json_object(source: str, marker: str) -> dict[str, Any]:
idx = source.find(marker)
if idx < 0:
raise RuntimeError(f"cannot find marker: {marker}")
start = source.find("{", idx)
if start < 0:
raise RuntimeError("cannot find JSON object start")
depth = 0
in_string = False
escape = False
for pos in range(start, len(source)):
ch = source[pos]
if in_string:
if escape:
escape = False
elif ch == "\\":
escape = True
elif ch == '"':
in_string = False
continue
if ch == '"':
in_string = True
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return json.loads(source[start : pos + 1])
raise RuntimeError("unterminated JSON object")
def iter_nodes(node: Any):
if isinstance(node, dict):
yield node
for value in node.values():
yield from iter_nodes(value)
elif isinstance(node, list):
for item in node:
yield from iter_nodes(item)
def mark_matching_nodes(sc: dict[str, Any], top_key: str, labels: list[str]) -> list[str]:
if not labels:
return []
section = sc.get(top_key)
if section is None:
return []
wanted = [x.strip().lower() for x in labels if x.strip()]
matched: list[str] = []
for node in iter_nodes(section):
title = str(node.get("t", ""))
code = str(node.get("v", ""))
title_l = title.lower()
code_l = code.lower()
if any(w == title_l or w == code_l or w in title_l for w in wanted):
for k in ("s", "c", "use"):
if k in node:
node[k] = 1
matched.append(title or code)
return matched
def build_search_condition(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
first = fetch(BASE_URL + FIND_PATH)
sc = extract_json_object(first, "var searchcondition =")
sc["p"] = args.page
sc["ps"] = args.limit
sc["saveno"] = 0
sc["ff"] = 0
sc["sf"] = args.sort
terms: list[dict[str, Any]] = []
for kw in args.keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 0})
for kw in args.and_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 1})
for kw in args.or_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 3})
for kw in args.exclude_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 2})
sc["totalkeywordlist"] = terms
if terms:
first_kw = terms[0]["t"]
sc.setdefault("pfr", {}).setdefault("ck", {})["Keyword"] = first_kw
sc["pfr"]["ck"]["KeywordType"] = 1
sc["pfr"]["n"] = 1
if args.career_min is not None:
sc.setdefault("career", {})["s"] = str(args.career_min)
if args.career_max is not None:
sc.setdefault("career", {})["e"] = str(args.career_max)
matched = {
"job_category": mark_matching_nodes(sc, "jobtype", args.job_category),
"work_area": mark_matching_nodes(sc, "workarea", args.work_area),
"residential_area": mark_matching_nodes(sc, "residentialarea", args.residential_area),
}
return sc, matched
def post_search(sc: dict[str, Any]) -> str:
body = urllib.parse.urlencode({"searchCondition": json.dumps(sc, ensure_ascii=False)}).encode()
return fetch(
BASE_URL + AJAX_PATH,
data=body,
headers={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"X-Requested-With": "XMLHttpRequest",
},
)
def clean_text(value: str) -> str:
value = html.unescape(value)
value = re.sub(r"<script[\s\S]*?</script>", " ", value, flags=re.I)
value = re.sub(r"<style[\s\S]*?</style>", " ", value, flags=re.I)
value = re.sub(r"<[^>]+>", " ", value)
value = re.sub(r"[ \t\r\f\v]+", " ", value)
value = re.sub(r"\n\s*\n+", "\n", value)
return value.strip()
ACTION_CONTROL_RE = re.compile(
r"^(?:스크랩\s*\d*|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)$"
)
ACTION_CONTROL_INLINE_RE = re.compile(
r"(?:스크랩\s*\d+|저장하기|닫기|포지션\s*제안|메모하기|프로필\s*확인|이력서\s*확인|펼쳐보기|접기|이전|다음)"
)
def is_action_control_label(value: str) -> bool:
label = re.sub(r"\s+", " ", html.unescape(value)).strip()
return bool(label and ACTION_CONTROL_RE.match(label))
def filter_action_control_text(value: str) -> str:
lines = []
for line in value.splitlines():
label = line.strip()
if not label or is_action_control_label(label):
continue
label = ACTION_CONTROL_INLINE_RE.sub(" ", label)
label = re.sub(r"\s+", " ", label).strip()
if label:
lines.append(label)
return "\n".join(lines).strip()
def row_contains_other_resume(candidate_markup: str, rno: str) -> bool:
refs: list[str] = []
for href_rno, data_rno in re.findall(r"rNo=(\d+)|data-rno=[\"'](\d+)[\"']", candidate_markup):
refs.append(href_rno or data_rno)
return any(ref != rno for ref in refs)
def extract_regex_candidate_markup(markup: str, match: re.Match[str], rno: str) -> str:
row_start = markup.rfind("<tr", 0, match.start())
if row_start >= 0:
row_open_end = markup.find(">", row_start, match.start())
row_end = markup.find("</tr>", match.end())
row_open = markup[row_start : row_open_end + 1] if row_open_end >= 0 else ""
if row_end >= 0 and f'data-rno="{rno}"' in row_open:
return markup[row_start : row_end + len("</tr>")]
booth_start = markup.rfind('<div class="booth"', 0, match.start())
if booth_start >= 0:
next_booth = markup.find('<div class="booth"', match.end())
section_end = markup.find('</section>', match.end())
end_candidates = [pos for pos in (next_booth, section_end) if pos >= 0]
booth_end = min(end_candidates) if end_candidates else min(len(markup), match.end() + 2500)
booth = markup[booth_start:booth_end]
if not row_contains_other_resume(booth, rno):
return booth
start = max(0, match.start() - 300)
end = min(len(markup), match.end() + 1200)
return markup[start:end]
def parse_with_bs4(markup: str, limit: int) -> list[Candidate] | None:
try:
from bs4 import BeautifulSoup # type: ignore
except Exception:
return None
soup = BeautifulSoup(markup, "html.parser")
candidates: list[Candidate] = []
seen: set[str] = set()
for link in soup.select('a[href*="/corp/person/find/resume/view?rNo="]'):
href = link.get("href", "")
m = re.search(r"rNo=(\d+)", href)
if not m:
continue
rno = m.group(1)
if rno in seen:
continue
seen.add(rno)
container = (
link.find_parent("tr", attrs={"data-rno": rno})
or link.find_parent(class_=re.compile(r"(^|\s)booth(\s|$)", re.I))
or link.parent
)
if container and row_contains_other_resume(str(container), rno):
# Broad ancestors such as tblSearchList/personList can contain several resumes.
# Falling back to the link itself is safer than mixing candidate evidence.
container = link.parent
raw = clean_text(str(container)) if container else clean_text(str(link))
texts = []
for node in container.find_all(["dt", "dd", "p", "span", "li"]) if container else []:
label = node.get_text(" ", strip=True)
if label and not is_action_control_label(label):
texts.append(label)
for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
label = btn.get_text(" ", strip=True)
if label and not is_action_control_label(label):
texts.append(label)
text_join = " | ".join(dict.fromkeys(texts))
name = ""
meta = ""
name_scope = container.select_one(".nameAge") if container else None
dt = (name_scope or container).find("dt") if container else None
if dt:
name = dt.get_text(" ", strip=True)
dd = dt.find_next("dd")
if dd:
meta = dd.get_text(" ", strip=True)
if not name:
m_name = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
if m_name:
name = m_name.group(1)
meta = "(" + m_name.group(2) + ")"
skills = []
for btn in container.select(".keywordSkill button, .keywordBox button") if container else []:
label = btn.get_text(" ", strip=True)
if label and not is_action_control_label(label):
skills.append(label)
candidates.append(
Candidate(
rno=rno,
url=urllib.parse.urljoin(BASE_URL, href),
name=name,
meta=meta,
career=(container.select_one(".career").get_text(" ", strip=True) if container and container.select_one(".career") else ""),
skills=", ".join(skills[:25]),
raw_summary=filter_action_control_text(text_join[:1000] or raw[:1000]),
)
)
if len(candidates) >= limit:
break
return candidates
def parse_with_regex(markup: str, limit: int) -> list[Candidate]:
candidates: list[Candidate] = []
seen: set[str] = set()
for m in re.finditer(r'href="(?P<href>/corp/person/find/resume/view\?rNo=(?P<rno>\d+))"', markup):
rno = m.group("rno")
if rno in seen:
continue
seen.add(rno)
raw_markup = extract_regex_candidate_markup(markup, m, rno)
raw = clean_text(raw_markup)
name = ""
meta = ""
nm = re.search(r"([가-힣A-Za-z]OO)\s*\(([^)]*)\)", raw)
if nm:
name = nm.group(1)
meta = "(" + nm.group(2) + ")"
candidates.append(
Candidate(
rno=rno,
url=urllib.parse.urljoin(BASE_URL, m.group("href")),
name=name,
meta=meta,
raw_summary=filter_action_control_text(raw[:1000]),
)
)
if len(candidates) >= limit:
break
return candidates
def parse_candidates(markup: str, limit: int) -> list[Candidate]:
parsed = parse_with_bs4(markup, limit)
if parsed is not None:
return parsed
return parse_with_regex(markup, limit)
def print_markdown(candidates: list[Candidate], matched: dict[str, Any], args: argparse.Namespace) -> None:
print(f"# 잡코리아 인재검색 결과\n")
print(f"- 검색어: {', '.join(args.keyword + args.and_keyword + args.or_keyword) or '(없음)'}")
print(f"- 제외어: {', '.join(args.exclude_keyword) or '(없음)'}")
if any(matched.values()):
print(f"- 매칭된 필터: {json.dumps(matched, ensure_ascii=False)}")
print(f"- 결과 수: {len(candidates)}")
print("- 주의: 이름/회사명은 잡코리아 공개 화면 기준으로 마스킹되어 있으며, 상세 이력서 확인·포지션 제안은 기업회원 로그인/권한/사용자 확인이 필요합니다.\n")
for idx, c in enumerate(candidates, 1):
bits = [c.name, c.meta, c.career]
title = " ".join(x for x in bits if x).strip() or f"rNo={c.rno}"
print(f"## {idx}. {title}")
print(f"- URL: {c.url}")
if c.skills:
print(f"- 키워드/스킬: {c.skills}")
summary = c.raw_summary.replace("\n", " ")
if summary:
print(f"- 요약: {summary[:500]}")
print()
def main() -> int:
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Search public JobKorea talent summaries")
parser.add_argument("--keyword", "-k", action="append", default=[], help="통합검색 키워드. 여러 번 지정 가능")
parser.add_argument("--and-keyword", action="append", default=[], help="AND 키워드")
@ -386,14 +37,42 @@ def main() -> int:
parser.add_argument("--limit", type=int, default=20, choices=[10, 20, 30, 50, 100])
parser.add_argument("--sort", default="0", help="잡코리아 sf 정렬 코드. 기본 0")
parser.add_argument("--json", action="store_true", help="JSON으로 출력")
args = parser.parse_args()
return parser
def print_markdown(candidates: list[Candidate], matched: dict[str, list[str]], args: argparse.Namespace) -> None:
print("# 잡코리아 인재검색 결과\n")
print(f"- 검색어: {', '.join(args.keyword + args.and_keyword + args.or_keyword) or '(없음)'}")
print(f"- 제외어: {', '.join(args.exclude_keyword) or '(없음)'}")
if any(matched.values()):
print(f"- 매칭된 필터: {json.dumps(matched, ensure_ascii=False)}")
print(f"- 결과 수: {len(candidates)}")
print("- 주의: 이름/회사명은 잡코리아 공개 화면 기준으로 마스킹되어 있으며, 상세 이력서 확인·포지션 제안은 기업회원 로그인/권한/사용자 확인이 필요합니다.\n")
for idx, candidate in enumerate(candidates, 1):
c = candidate
bits = [c.name, c.meta, c.career]
title = " ".join(x for x in bits if x).strip() or f"rNo={c.rno}"
print(f"## {idx}. {title}")
print(f"- URL: {c.url}")
if c.skills:
print(f"- 키워드/스킬: {c.skills}")
summary = c.raw_summary.replace("\n", " ")
if summary:
print(f"- 요약: {summary[:500]}")
print()
def run(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
if not (args.keyword or args.and_keyword or args.or_keyword or args.job_category or args.work_area or args.residential_area):
parser.error("최소 하나 이상의 --keyword, --job-category, --work-area 등을 지정하세요")
sc, matched = build_search_condition(args)
markup = post_search(sc)
if "로그인" in clean_text(markup)[:500] and "인재" not in clean_text(markup)[:2000]:
cleaned = clean_text(markup)
if "로그인" in cleaned[:500] and "인재" not in cleaned[:2000]:
raise RuntimeError("잡코리아가 로그인/차단 화면을 반환했습니다")
candidates = parse_candidates(markup, args.limit)
@ -406,10 +85,10 @@ def main() -> int:
if __name__ == "__main__":
try:
raise SystemExit(main())
raise SystemExit(run())
except urllib.error.HTTPError as exc:
print(f"HTTP error: {exc.code} {exc.reason}", file=sys.stderr)
raise SystemExit(2)
except Exception as exc:
except (RuntimeError, urllib.error.URLError) as exc:
print(f"error: {exc}", file=sys.stderr)
raise SystemExit(1)

View file

@ -0,0 +1,136 @@
from __future__ import annotations
import argparse
import json
import urllib.parse
import urllib.request
from collections.abc import Iterator
from typing import Any
from jobkorea_talent_models import AJAX_PATH, BASE_URL, DEFAULT_UA, FIND_PATH
def fetch(url: str, *, data: bytes | None = None, headers: dict[str, str] | None = None) -> str:
req_headers = {"User-Agent": DEFAULT_UA, "Referer": BASE_URL + FIND_PATH}
if headers:
req_headers.update(headers)
req = urllib.request.Request(url, data=data, headers=req_headers, method="POST" if data else "GET")
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8", "ignore")
def extract_json_object(source: str, marker: str) -> dict[str, Any]:
idx = source.find(marker)
if idx < 0:
raise RuntimeError(f"cannot find marker: {marker}")
start = source.find("{", idx)
if start < 0:
raise RuntimeError("cannot find JSON object start")
depth = 0
in_string = False
escape = False
for pos in range(start, len(source)):
ch = source[pos]
if in_string:
if escape:
escape = False
elif ch == "\\":
escape = True
elif ch == '"':
in_string = False
continue
if ch == '"':
in_string = True
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
loaded = json.loads(source[start : pos + 1])
if not isinstance(loaded, dict):
raise RuntimeError("search condition was not a JSON object")
return loaded
raise RuntimeError("unterminated JSON object")
def iter_nodes(node: Any) -> Iterator[dict[str, Any]]:
if isinstance(node, dict):
yield node
for value in node.values():
yield from iter_nodes(value)
elif isinstance(node, list):
for item in node:
yield from iter_nodes(item)
def mark_matching_nodes(sc: dict[str, Any], top_key: str, labels: list[str]) -> list[str]:
if not labels:
return []
section = sc.get(top_key)
if section is None:
return []
wanted = [x.strip().lower() for x in labels if x.strip()]
matched: list[str] = []
for node in iter_nodes(section):
title = str(node.get("t", ""))
code = str(node.get("v", ""))
title_l = title.lower()
code_l = code.lower()
if any(w == title_l or w == code_l or w in title_l for w in wanted):
for key in ("s", "c", "use"):
if key in node:
node[key] = 1
matched.append(title or code)
return matched
def build_search_condition(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, list[str]]]:
first = fetch(BASE_URL + FIND_PATH)
sc = extract_json_object(first, "var searchcondition =")
sc["p"] = args.page
sc["ps"] = args.limit
sc["saveno"] = 0
sc["ff"] = 0
sc["sf"] = args.sort
terms: list[dict[str, Any]] = []
for kw in args.keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 0})
for kw in args.and_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 1})
for kw in args.or_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 3})
for kw in args.exclude_keyword:
terms.append({"s": 1, "c": 1, "t": kw, "v": kw, "kwdtypecode": 1, "logictypecode": 2})
sc["totalkeywordlist"] = terms
if terms:
first_kw = terms[0]["t"]
sc.setdefault("pfr", {}).setdefault("ck", {})["Keyword"] = first_kw
sc["pfr"]["ck"]["KeywordType"] = 1
sc["pfr"]["n"] = 1
if args.career_min is not None:
sc.setdefault("career", {})["s"] = str(args.career_min)
if args.career_max is not None:
sc.setdefault("career", {})["e"] = str(args.career_max)
matched = {
"job_category": mark_matching_nodes(sc, "jobtype", args.job_category),
"work_area": mark_matching_nodes(sc, "workarea", args.work_area),
"residential_area": mark_matching_nodes(sc, "residentialarea", args.residential_area),
}
return sc, matched
def post_search(sc: dict[str, Any]) -> str:
body = urllib.parse.urlencode({"searchCondition": json.dumps(sc, ensure_ascii=False)}).encode()
return fetch(
BASE_URL + AJAX_PATH,
data=body,
headers={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"X-Requested-With": "XMLHttpRequest",
},
)

View file

@ -8,6 +8,7 @@ import unittest
from pathlib import Path
SCRIPT = Path(__file__).with_name("jobkorea_talent_search.py")
sys.path.insert(0, str(SCRIPT.parent))
spec = importlib.util.spec_from_file_location("jobkorea_talent_search", SCRIPT)
assert spec is not None
helper = importlib.util.module_from_spec(spec)

View file

@ -11,10 +11,10 @@
"build": "npm run build --workspaces --if-present",
"build:manus-bundle": "node scripts/build-manus-bundle.js",
"generate:plugin-manifest": "node scripts/generate-plugin-manifest.js",
"lint": "node --check scripts/skill-docs.test.js scripts/korean_character_count.js scripts/test_korean_character_count.js scripts/korean_middle_korean.js scripts/test_korean_middle_korean.js scripts/build-manus-bundle.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/generate-plugin-manifest.js scripts/test_generate_plugin_manifest.js && python3 -m py_compile scripts/k_skill_cleaner.py scripts/test_k_skill_cleaner.py corporate-registration-consulting/scripts/fill_official_hwp.py k-skill-cleaner/scripts/k_skill_cleaner.py scripts/fine_dust.py scripts/test_fine_dust.py scripts/ktx_booking.py scripts/test_ktx_booking.py scripts/srt_booking.py scripts/srt_seats.py scripts/srt_booking_test_support.py scripts/test_srt_booking.py scripts/test_srt_seats.py scripts/sillok_search.py scripts/test_sillok_search.py scripts/korean_spell_check.py scripts/test_korean_spell_check.py scripts/patent_search.py scripts/test_patent_search.py scripts/mfds_drug_safety.py scripts/test_mfds_drug_safety.py scripts/nts_business_registration.py scripts/test_nts_business_registration.py scripts/mfds_food_safety.py scripts/test_mfds_food_safety.py scripts/zipcode_search.py scripts/test_zipcode_search.py scripts/subway_lost_property.py scripts/test_subway_lost_property.py scripts/geeknews_search.py scripts/test_geeknews_search.py nts-business-registration/scripts/nts_business_registration.py biz-health-check/scripts/biz_health_check.py nts-tax-delinquency/scripts/nts_tax_delinquency.py localdata-business-status/scripts/localdata_business_status.py g2b-sanctioned-supplier/scripts/g2b_sanctioned_supplier.py fsc-corporate-info/scripts/fsc_corporate_info.py national-pension-workplace/scripts/national_pension_workplace.py scripts/test_naver_blog_search.py scripts/test_korean_slang_writing.py scripts/test_coupang_partners_mcp_wrapper.py scripts/test_ohou_today_deal.py scripts/ticket_availability.py scripts/test_ticket_availability.py scripts/test_danawa_price_search.py ticket-availability/scripts/ticket_availability.py coupang-product-search/scripts/coupang_partners_mcp.py ohou-today-deal/scripts/ohou_today_deal.py naver-blog-research/scripts/_naver_http.py naver-blog-research/scripts/naver_search.py naver-blog-research/scripts/naver_read.py naver-blog-research/scripts/naver_download_images.py korean-slang-writing/scripts/_slang_http.py korean-slang-writing/scripts/slang_search.py korean-slang-writing/scripts/slang_lookup.py korean-scholarship-search/scripts/scholarship_filter.py korean-scholarship-search/scripts/test_scholarship_filter.py korean-scholarship-search/scripts/university_search_plan.py seoul-bike/scripts/seoul_bike.py scripts/test_seoul_bike.py danawa-price-search/scripts/danawa_search.py kosis-stats/scripts/run_kosis_stats.py kosis-stats/tests/test_run_kosis_stats.py kstartup-search/scripts/run_kstartup.py kstartup-search/tests/test_run_kstartup.py intercity-bus-booking/scripts/intercity_bus_search.py daangn-used-goods-search/scripts/daangn_used_goods.py daangn-realty-search/scripts/daangn_realty.py daangn-jobs-search/scripts/daangn_jobs.py daangn-cars-search/scripts/daangn_cars.py foresttrip-vacancy/scripts/run_foresttrip_vacancy.py foresttrip-vacancy/tests/test_run_foresttrip_vacancy.py && npm run lint --workspaces --if-present && ./scripts/validate-skills.sh && node scripts/generate-plugin-manifest.js --check",
"lint": "node --check scripts/skill-docs.test.js scripts/korean_character_count.js scripts/test_korean_character_count.js scripts/korean_middle_korean.js scripts/test_korean_middle_korean.js scripts/build-manus-bundle.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/generate-plugin-manifest.js scripts/test_generate_plugin_manifest.js && python3 -m py_compile scripts/k_skill_cleaner.py scripts/test_k_skill_cleaner.py corporate-registration-consulting/scripts/fill_official_hwp.py k-skill-cleaner/scripts/k_skill_cleaner.py scripts/fine_dust.py scripts/test_fine_dust.py scripts/ktx_booking.py scripts/test_ktx_booking.py scripts/srt_booking.py scripts/srt_seats.py scripts/srt_booking_test_support.py scripts/test_srt_booking.py scripts/test_srt_seats.py scripts/sillok_search.py scripts/test_sillok_search.py scripts/korean_spell_check.py scripts/test_korean_spell_check.py scripts/patent_search.py scripts/test_patent_search.py scripts/mfds_drug_safety.py scripts/test_mfds_drug_safety.py scripts/nts_business_registration.py scripts/test_nts_business_registration.py scripts/mfds_food_safety.py scripts/test_mfds_food_safety.py scripts/zipcode_search.py scripts/test_zipcode_search.py scripts/subway_lost_property.py scripts/test_subway_lost_property.py scripts/geeknews_search.py scripts/test_geeknews_search.py nts-business-registration/scripts/nts_business_registration.py biz-health-check/scripts/biz_health_check.py nts-tax-delinquency/scripts/nts_tax_delinquency.py localdata-business-status/scripts/localdata_business_status.py g2b-sanctioned-supplier/scripts/g2b_sanctioned_supplier.py fsc-corporate-info/scripts/fsc_corporate_info.py national-pension-workplace/scripts/national_pension_workplace.py scripts/test_naver_blog_search.py scripts/test_korean_slang_writing.py scripts/test_coupang_partners_mcp_wrapper.py scripts/test_ohou_today_deal.py scripts/ticket_availability.py scripts/test_ticket_availability.py scripts/test_danawa_price_search.py ticket-availability/scripts/ticket_availability.py coupang-product-search/scripts/coupang_partners_mcp.py ohou-today-deal/scripts/ohou_today_deal.py naver-blog-research/scripts/_naver_http.py naver-blog-research/scripts/naver_search.py naver-blog-research/scripts/naver_read.py naver-blog-research/scripts/naver_download_images.py korean-slang-writing/scripts/_slang_http.py korean-slang-writing/scripts/slang_search.py korean-slang-writing/scripts/slang_lookup.py korean-scholarship-search/scripts/scholarship_filter.py korean-scholarship-search/scripts/test_scholarship_filter.py korean-scholarship-search/scripts/university_search_plan.py seoul-bike/scripts/seoul_bike.py scripts/test_seoul_bike.py danawa-price-search/scripts/danawa_search.py kosis-stats/scripts/run_kosis_stats.py kosis-stats/tests/test_run_kosis_stats.py kstartup-search/scripts/run_kstartup.py kstartup-search/tests/test_run_kstartup.py intercity-bus-booking/scripts/intercity_bus_search.py daangn-used-goods-search/scripts/daangn_used_goods.py daangn-realty-search/scripts/daangn_realty.py daangn-jobs-search/scripts/daangn_jobs.py daangn-cars-search/scripts/daangn_cars.py foresttrip-vacancy/scripts/run_foresttrip_vacancy.py foresttrip-vacancy/tests/test_run_foresttrip_vacancy.py jobkorea-talent-search/scripts/jobkorea_talent_models.py jobkorea-talent-search/scripts/jobkorea_talent_parse.py jobkorea-talent-search/scripts/jobkorea_talent_search_condition.py jobkorea-talent-search/scripts/jobkorea_talent_search.py jobkorea-talent-search/scripts/test_jobkorea_talent_search.py && npm run lint --workspaces --if-present && ./scripts/validate-skills.sh && node scripts/generate-plugin-manifest.js --check",
"typecheck": "tsc --noEmit",
"prepare:python-test-env": "python3 -m venv .cache/python-test-venv && ./.cache/python-test-venv/bin/python -m pip install --quiet beautifulsoup4",
"test": "npm run prepare:python-test-env && node --test scripts/skill-docs.test.js scripts/test_korean_character_count.js scripts/test_korean_middle_korean.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/test_generate_plugin_manifest.js && PYTHONPATH=.:scripts ./.cache/python-test-venv/bin/python -m unittest scripts.test_k_skill_cleaner scripts.test_fine_dust scripts.test_ktx_booking scripts.test_srt_booking scripts.test_srt_seats scripts.test_sillok_search scripts.test_korean_spell_check scripts.test_patent_search scripts.test_mfds_drug_safety scripts.test_nts_business_registration scripts.test_mfds_food_safety scripts.test_zipcode_search scripts.test_subway_lost_property scripts.test_geeknews_search scripts.test_naver_blog_search scripts.test_korean_slang_writing scripts.test_coupang_partners_mcp_wrapper scripts.test_ohou_today_deal scripts.test_ticket_availability scripts.test_seoul_bike scripts.test_danawa_price_search && PYTHONPATH=.:scripts:korean-scholarship-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s korean-scholarship-search/scripts -p 'test_scholarship_filter.py' && PYTHONPATH=.:scripts:kosis-stats/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kosis-stats/tests -p 'test_run_kosis_stats.py' && PYTHONPATH=.:scripts:kstartup-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kstartup-search/tests -p 'test_run_kstartup.py' && PYTHONPATH=.:foresttrip-vacancy/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s foresttrip-vacancy/tests -p 'test_run_foresttrip_vacancy.py' && npm run test --workspaces --if-present && ./scripts/validate-skills.sh",
"test": "npm run prepare:python-test-env && node --test scripts/skill-docs.test.js scripts/test_korean_character_count.js scripts/test_korean_middle_korean.js scripts/test_build_manus_bundle.js scripts/workflow-actions.test.js scripts/test_generate_plugin_manifest.js && PYTHONPATH=.:scripts ./.cache/python-test-venv/bin/python -m unittest scripts.test_k_skill_cleaner scripts.test_fine_dust scripts.test_ktx_booking scripts.test_srt_booking scripts.test_srt_seats scripts.test_sillok_search scripts.test_korean_spell_check scripts.test_patent_search scripts.test_mfds_drug_safety scripts.test_nts_business_registration scripts.test_mfds_food_safety scripts.test_zipcode_search scripts.test_subway_lost_property scripts.test_geeknews_search scripts.test_naver_blog_search scripts.test_korean_slang_writing scripts.test_coupang_partners_mcp_wrapper scripts.test_ohou_today_deal scripts.test_ticket_availability scripts.test_seoul_bike scripts.test_danawa_price_search && PYTHONPATH=.:scripts:korean-scholarship-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s korean-scholarship-search/scripts -p 'test_scholarship_filter.py' && PYTHONPATH=.:scripts:kosis-stats/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kosis-stats/tests -p 'test_run_kosis_stats.py' && PYTHONPATH=.:scripts:kstartup-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s kstartup-search/tests -p 'test_run_kstartup.py' && PYTHONPATH=.:foresttrip-vacancy/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s foresttrip-vacancy/tests -p 'test_run_foresttrip_vacancy.py' && PYTHONPATH=.:jobkorea-talent-search/scripts ./.cache/python-test-venv/bin/python -m unittest discover -s jobkorea-talent-search/scripts -p 'test_jobkorea_talent_search.py' && npm run test --workspaces --if-present && ./scripts/validate-skills.sh",
"pack:dry-run": "npm pack --workspace k-lotto --dry-run && npm pack --workspace daiso-product-search --dry-run && npm pack --workspace market-kurly-search --dry-run && npm pack --workspace kakao-bar-nearby --dry-run && npm pack --workspace cheap-gas-nearby --dry-run && npm pack --workspace public-restroom-nearby --dry-run && npm pack --workspace parking-lot-search --dry-run && npm pack --workspace court-auction-notice-search --dry-run && npm pack --workspace donation-place-search --dry-run && npm pack --workspace gongsijiga-search --dry-run && npm pack --workspace kbl-results --dry-run && npm pack --workspace kleague-results --dry-run && npm pack --workspace lck-analytics --dry-run && npm pack --workspace toss-securities --dry-run && npm pack --workspace hipass-receipt --dry-run && npm pack --workspace used-car-price-search --dry-run && npm pack --workspace k-skill-rhwp --dry-run && npm pack --workspace korean-marathon-schedule --dry-run && npm pack --workspace gangnamunni-clinic-search --dry-run && npm pack --workspace daishin-report-search --dry-run && npm pack --workspace sh-notice-search --dry-run && npm pack --workspace emergency-room-beds --dry-run && npm pack --workspace local-election-candidate-search --dry-run",
"ci": "npm run lint && npm run typecheck && npm run test && npm run pack:dry-run",
"version-packages": "changeset version",