初代目
# 部署教程
下载 poppler。
准备待填答题卡,字体, answers.json
随便找个 AI 按照试卷格式调一下处理代码
# fill_sheet_auto_calibrate.py | |
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from pdf2image import convert_from_path | |
import img2pdf | |
import os | |
import textwrap | |
import sys | |
import io | |
import json | |
if sys.stdout.encoding != 'utf-8': | |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') | |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
POPPLER_PATH = os.path.join(SCRIPT_DIR, "poppler-25.12.0", "bin") | |
if not os.path.exists(POPPLER_PATH): | |
for suffix in ["", "_x64", "-windows-x64"]: | |
alt = os.path.join(SCRIPT_DIR, f"poppler-25.12.0{suffix}", "bin") | |
if os.path.exists(alt): | |
POPPLER_PATH = alt | |
break | |
else: | |
raise FileNotFoundError("Poppler 路径不存在,请检查。") | |
PDF_INPUT = "1_2025-2026学年福州市高二年级适应性练习语文答题卡(1).pdf" | |
PDF_OUTPUT = "filled_语文答题卡.pdf" | |
COORDS_CACHE = os.path.join(SCRIPT_DIR, "coords_cache.json") | |
ANSWERS_JSON = os.path.join(SCRIPT_DIR, "answers.json") | |
# ================== 加载答案 ================== | |
def load_answers(json_path): | |
with open(json_path, "r", encoding="utf-8") as f: | |
raw = json.load(f) | |
# 将键转为整数 | |
obj = {int(k): v for k, v in raw["objective"].items()} | |
subj = {int(k): v for k, v in raw["subjective"].items()} | |
return {"objective": obj, "subjective": subj} | |
try: | |
ANSWERS = load_answers(ANSWERS_JSON) | |
except Exception as e: | |
print(f"[ERROR] 无法加载答案文件 {ANSWERS_JSON}: {e}") | |
sys.exit(1) | |
# ================== 自动校准模块 ================== | |
def calibrate_interactive(pdf_path, poppler_path): | |
print("[INFO] 正在加载答题卡用于校准...") | |
images = convert_from_path(pdf_path, dpi=400, poppler_path=poppler_path) | |
img = np.array(images[0]) | |
h, w = img.shape[:2] | |
MAX_DISPLAY_WIDTH = 1400 | |
scale = min(MAX_DISPLAY_WIDTH / w, 1.0) | |
display_img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) | |
print("\n[INFO] 请按以下顺序点击(每点一个,终端会确认):") | |
objective_order = [] | |
for q in [1, 2, 6]: | |
for opt in ["A", "B", "C", "D"]: | |
objective_order.append((q, opt)) | |
for opt in ["A", "B", "C", "D", "E", "F", "G"]: | |
objective_order.append((10, opt)) | |
for q in [11, 12, 15, 20, 21]: | |
for opt in ["A", "B", "C", "D"]: | |
objective_order.append((q, opt)) | |
# 移除了 23 的作文题 | |
subjective_order = [3, 4, 5, 7, 8, 9, 13, 14, 16, 17, 18, 19, 22] | |
all_points = [] | |
current_index = 0 | |
def click_event(event, x, y, flags, param): | |
nonlocal current_index | |
if event == cv2.EVENT_LBUTTONDOWN and current_index < len(objective_order) + len(subjective_order): | |
orig_x = int(round(x / scale)) | |
orig_y = int(round(y / scale)) | |
all_points.append((orig_x, orig_y)) | |
if current_index < len(objective_order): | |
q, opt = objective_order[current_index] | |
print(f"[SUCCESS] 已记录 第{q}题 {opt}: ({orig_x}, {orig_y})") | |
else: | |
idx = current_index - len(objective_order) | |
q = subjective_order[idx] | |
print(f"[SUCCESS] 已记录 主观题 {q} 起始点: ({orig_x}, {orig_y})") | |
current_index += 1 | |
cv2.circle(display_img, (x, y), 5, (0, 255, 0), -1) | |
cv2.imshow("[INFO] 校准模式 - 按顺序点击 (按 'q' 退出,'r' 重置)", display_img) | |
cv2.imshow("[INFO] 校准模式 - 按顺序点击 (按 'q' 退出,'r' 重置)", display_img) | |
cv2.setMouseCallback("[INFO] 校准模式 - 按顺序点击 (按 'q' 退出,'r' 重置)", click_event) | |
total_clicks = len(objective_order) + len(subjective_order) | |
print(f"\n[INFO] 共需点击定位 {total_clicks} 次") | |
while True: | |
key = cv2.waitKey(1) & 0xFF | |
if key == ord('q'): | |
break | |
elif key == ord('r'): | |
current_index = 0 | |
all_points.clear() | |
display_img[:] = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) | |
print("[WARNING] 已重置,重新开始点击") | |
cv2.imshow("[INFO] 校准模式 - 按顺序点击 (按 'q' 退出,'r' 重置)", display_img) | |
cv2.destroyAllWindows() | |
if len(all_points) != total_clicks: | |
print(f"[WARNING] 警告:只记录了 {len(all_points)} 个点,需要 {total_clicks} 个") | |
return None, None | |
# 构建 OBJECTIVE_BOXES | |
obj_boxes = {} | |
idx = 0 | |
for q in [1, 2, 6]: | |
obj_boxes[q] = {opt: all_points[idx + i] for i, opt in enumerate(["A", "B", "C", "D"])} | |
idx += 4 | |
obj_boxes[10] = {opt: all_points[idx + i] for i, opt in enumerate(["A", "B", "C", "D", "E", "F", "G"])} | |
idx += 7 | |
for q in [11, 12, 15, 20, 21]: | |
obj_boxes[q] = {opt: all_points[idx + i] for i, opt in enumerate(["A", "B", "C", "D"])} | |
idx += 4 | |
# 构建 SUBJ_STARTS | |
subj_starts = {} | |
for i, q in enumerate(subjective_order): | |
subj_starts[q] = all_points[idx + i] | |
return obj_boxes, subj_starts | |
def save_coords(obj_boxes, subj_starts, path): | |
data = {"objective": obj_boxes, "subjective_starts": subj_starts} | |
with open(path, "w", encoding="utf-8") as f: | |
json.dump(data, f, ensure_ascii=False, indent=2) | |
print(f"[INFO] 坐标已保存至: {path}") | |
def load_coords(path): | |
if not os.path.exists(path): | |
return None, None | |
with open(path, "r", encoding="utf-8") as f: | |
data = json.load(f) | |
# 键转为 int | |
obj = {int(q): {opt: tuple(pos) for opt, pos in opts.items()} for q, opts in data["objective"].items()} | |
subj = {int(q): tuple(pos) for q, pos in data["subjective_starts"].items()} | |
return obj, subj | |
# ================== 渲染模块 ================== | |
COLUMN_WIDTH = 1400 | |
FONT_PATH = os.path.join(SCRIPT_DIR, "QuanHengDuLiang-TTF.ttf") | |
FONT_SIZE = 70 | |
LINE_SPACING = 134 | |
FILL_RADIUS = 18 | |
def build_subjective_boxes(subj_starts): | |
boxes = {} | |
for q, (x, y) in subj_starts.items(): | |
if q in [8, 9, 17]: | |
boxes[q] = (x, y, COLUMN_WIDTH, 160) | |
elif q in [5, 13, 14, 16, 19, 22]: | |
boxes[q] = (x, y, COLUMN_WIDTH, 120) | |
elif q == 3: | |
boxes[q] = (x, y, COLUMN_WIDTH, 150) | |
elif q == 4: | |
boxes[q] = (x, y, COLUMN_WIDTH, 100) | |
else: | |
boxes[q] = (x, y, COLUMN_WIDTH, 100) | |
return boxes | |
def pdf_to_image(pdf_path, poppler_path, dpi=400): | |
images = convert_from_path(pdf_path, dpi=dpi, poppler_path=poppler_path) | |
return np.array(images[0]) | |
def fill_bubbles(img, answers, boxes, radius=18): | |
for q, ans in answers.items(): | |
if q not in boxes: | |
print(f"[WARNING] 客观题 {q} 未定义坐标") | |
continue | |
opts = boxes[q] | |
if isinstance(ans, list): | |
for a in ans: | |
if a in opts: | |
x, y = opts[a] | |
cv2.circle(img, (x, y), radius, (0, 0, 0), -1) | |
else: | |
if ans in opts: | |
x, y = opts[ans] | |
cv2.circle(img, (x, y), radius, (0, 0, 0), -1) | |
def draw_text_in_box(draw, text, box, font, line_spacing): | |
x, y, max_width, max_height = box | |
paragraphs = text.split('\n') | |
wrapped_lines = [] | |
avg_char_width = font.getlength("中") | |
max_chars = max(1, int(max_width / avg_char_width)) | |
for para in paragraphs: | |
if para.strip(): | |
lines = textwrap.wrap(para, width=max_chars, break_long_words=False) | |
wrapped_lines.extend(lines) | |
else: | |
wrapped_lines.append("") | |
total_height = len(wrapped_lines) * line_spacing | |
start_y = y if total_height > max_height else y + (max_height - total_height) // 2 | |
for i, line in enumerate(wrapped_lines): | |
draw.text((x, start_y + i * line_spacing), line, fill=(0, 0, 0), font=font) | |
def draw_text_on_image(pil_img, answers, boxes, font_path, font_size, line_spacing): | |
try: | |
font = ImageFont.truetype(font_path, font_size) | |
except: | |
print(f"[WARNING] 无法加载字体 {font_path}") | |
font = ImageFont.load_default() | |
draw = ImageDraw.Draw(pil_img) | |
for q, text in answers.items(): | |
if q not in boxes: | |
print(f"[WARNING] 主观题 {q} 无定义区域") | |
continue | |
draw_text_in_box(draw, text, boxes[q], font, line_spacing) | |
# ================== 主流程 ================== | |
def main(): | |
obj_boxes, subj_starts = load_coords(COORDS_CACHE) | |
if obj_boxes is None or subj_starts is None: | |
print("[INFO] 未找到坐标缓存,进入校准模式...") | |
obj_boxes, subj_starts = calibrate_interactive(PDF_INPUT, POPPLER_PATH) | |
if obj_boxes is None: | |
print("[ERROR] 校准未完成,退出。") | |
return | |
save_coords(obj_boxes, subj_starts, COORDS_CACHE) | |
else: | |
print(f"[INFO] 已从 {COORDS_CACHE} 加载坐标") | |
img = pdf_to_image(PDF_INPUT, POPPLER_PATH, dpi=400) | |
fill_bubbles(img, ANSWERS["objective"], obj_boxes, FILL_RADIUS) | |
pil_img = Image.fromarray(img) | |
subj_boxes = build_subjective_boxes(subj_starts) | |
draw_text_on_image(pil_img, ANSWERS["subjective"], subj_boxes, FONT_PATH, FONT_SIZE, LINE_SPACING) | |
temp_png = "temp_filled.png" | |
pil_img.save(temp_png, dpi=(400, 400)) | |
with open(PDF_OUTPUT, "wb") as f: | |
f.write(img2pdf.convert(temp_png, dpi=400)) | |
os.remove(temp_png) | |
print(f"[SUCCESS] 输出文件: {PDF_OUTPUT}") | |
if __name__ == "__main__": | |
main() |