NL2SQL이란?

NL2SQL(Natural Language to SQL)은 "지난 달 매출이 가장 높은 상품 TOP 5는?" 같은 자연어 질문을 SELECT product, SUM(revenue) FROM orders... SQL 쿼리로 자동 변환하는 기술입니다.

mermaid


flowchart LR
    User["사용자 질문<br/>자연어"] --> LLM[LLM<br/>SQL 변환기]
    Schema["DB 스키마<br/>테이블/컬럼 정보"] --> LLM
    LLM --> SQL[SQL 쿼리 생성]
    SQL --> DB[(데이터베이스)]
    DB --> Result[결과 반환]
    Result --> Answer["자연어 답변<br/>LLM 해석"]

기본 NL2SQL 구현

python

import anthropic
import psycopg2

client = anthropic.Anthropic()

SCHEMA = '''
테이블: orders (id, user_id, product_id, amount, created_at)
테이블: products (id, name, category, price)
테이블: users (id, name, email, created_at)
'''

def nl2sql(question: str) -> str:
    response = client.messages.create(
        model="claude-sonnet-4-6-20251001",
        max_tokens=1024,
        system=f'''당신은 PostgreSQL 전문가입니다.

다음 스키마를 참고해서 자연어 질문을 SQL로 변환하세요:
{SCHEMA}

규칙:
- SELECT 쿼리만 생성 (보안상 DML 금지)
- 쿼리만 반환, 설명 없이
- 컬럼명은 항상 테이블명.컬럼명 형식으로''',
        messages=[{"role": "user", "content": f"질문: {question}"}]
    )
    return response.content[0].text.strip()

def execute_query(sql: str, conn) -> list[dict]:
    with conn.cursor() as cur:
        cur.execute(sql)
        cols = [desc[0] for desc in cur.description]
        return [dict(zip(cols, row)) for row in cur.fetchall()]

# 사용 예시
conn = psycopg2.connect("postgresql://localhost/mydb")
sql = nl2sql("지난 달 매출 TOP 5 상품과 총 매출액을 알려줘")
print(f"생성된 SQL:
{sql}")

results = execute_query(sql, conn)
for row in results:
    print(row)

스키마 자동 추출 + 컨텍스트 최적화

LLM 컨텍스트 한계를 고려해 관련 테이블만 선택적으로 주입합니다.

python

from openai import OpenAI
import json

openai_client = OpenAI()

def extract_schema(conn, tables: list[str] = None) -> str:
    with conn.cursor() as cur:
        if tables:
            placeholders = ','.join(['%s'] * len(tables))
            cur.execute(f'''
                SELECT table_name, column_name, data_type, is_nullable
                FROM information_schema.columns
                WHERE table_name IN ({placeholders})
                ORDER BY table_name, ordinal_position
            ''', tables)
        else:
            cur.execute('''
                SELECT table_name, column_name, data_type, is_nullable
                FROM information_schema.columns
                WHERE table_schema = 'public'
                ORDER BY table_name, ordinal_position
            ''')

        rows = cur.fetchall()

    schema_dict = {}
    for table, col, dtype, nullable in rows:
        if table not in schema_dict:
            schema_dict[table] = []
        schema_dict[table].append(f"{col} ({dtype}{'?' if nullable == 'YES' else ''})")

    return "
".join(
        f"CREATE TABLE {t} ({', '.join(cols)});"
        for t, cols in schema_dict.items()
    )

def smart_nl2sql(question: str, conn) -> tuple[str, list[dict]]:
    # 1단계: 관련 테이블 파악
    all_tables = get_all_tables(conn)
    table_selection = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "user",
            "content": f"질문: {question}

테이블 목록: {all_tables}

관련 테이블명만 JSON 배열로 반환: ["table1", "table2"]"
        }]
    )
    relevant_tables = json.loads(table_selection.choices[0].message.content)

    # 2단계: 관련 스키마만 추출해서 SQL 생성
    schema = extract_schema(conn, relevant_tables)
    sql_response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"PostgreSQL 전문가. 스키마:
{schema}

SELECT 쿼리만 반환."},
            {"role": "user", "content": question}
        ]
    )
    sql = sql_response.choices[0].message.content.strip()
    sql = sql.replace("```sql", "").replace("```", "").strip()

    results = execute_query(sql, conn)
    return sql, results

def get_all_tables(conn) -> list[str]:
    with conn.cursor() as cur:
        cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
        return [row[0] for row in cur.fetchall()]

에러 핸들링 & Self-Correction

LLM이 생성한 SQL이 틀릴 수 있습니다. 에러 시 자동 수정합니다.

python

def nl2sql_with_retry(question: str, schema: str, conn, max_retries: int = 3) -> tuple[str, list]:
    sql = None
    error_history = []

    for attempt in range(max_retries):
        context = f"스키마:
{schema}"
        if error_history:
            context += f"

이전 시도 실패:
" + "
".join(
                f"SQL: {e['sql']}
ERROR: {e['error']}"
                for e in error_history
            )

        response = client.messages.create(
            model="claude-sonnet-4-6-20251001",
            max_tokens=1024,
            system=context + "

SELECT 쿼리만 반환.",
            messages=[{"role": "user", "content": question}]
        )
        sql = response.content[0].text.strip().replace("```sql", "").replace("```", "").strip()

        try:
            results = execute_query(sql, conn)
            return sql, results
        except Exception as e:
            error_history.append({"sql": sql, "error": str(e)})
            print(f"시도 {attempt+1} 실패: {e}")

    raise RuntimeError(f"SQL 생성 실패 ({max_retries}회 시도): {error_history}")

주요 모델 NL2SQL 성능 비교

모델	정확도 (Spider)	컨텍스트	비용/1K토큰	특징
GPT-4o	85-90%	128K	$0.005	복잡한 조인 강점
Claude Sonnet 4.5	83-88%	200K	$0.003	긴 스키마 처리 우수
Gemini 2.5 Pro	82-87%	1M	$0.0035	초대형 스키마 지원
GPT-4o mini	70-78%	128K	$0.0002	간단한 쿼리 가성비 최고
DeepSeek-V3	78-84%	64K	$0.001	오픈소스, 자체 호스팅 가능

Spider 벤치마크: 200개+ DB, 10K+ 자연어-SQL 쌍으로 구성된 NL2SQL 표준 평가 데이터셋

보안: SQL Injection 방어

LLM이 생성한 SQL을 그대로 실행하면 위험합니다.

python

import sqlparse
from sqlparse.sql import Statement
from sqlparse.tokens import Keyword, DML

ALLOWED_STATEMENTS = {'SELECT'}
FORBIDDEN_KEYWORDS = {'DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'TRUNCATE', 'CREATE', 'EXEC', 'EXECUTE'}

def validate_sql(sql: str) -> bool:
    parsed = sqlparse.parse(sql)
    if not parsed:
        return False

    stmt = parsed[0]

    # DML 타입 확인
    first_token = next(
        (t for t in stmt.flatten() if t.ttype in (Keyword, DML)),
        None
    )
    if not first_token or first_token.normalized.upper() not in ALLOWED_STATEMENTS:
        return False

    # 금지 키워드 확인
    all_tokens = {t.normalized.upper() for t in stmt.flatten() if t.ttype is Keyword}
    if all_tokens & FORBIDDEN_KEYWORDS:
        return False

    # 서브쿼리에서도 검사
    sql_upper = sql.upper()
    return not any(kw in sql_upper for kw in FORBIDDEN_KEYWORDS)

# 읽기 전용 DB 유저 사용 (추가 보안)
READ_ONLY_DB_URL = "postgresql://readonly_user:pass@localhost/mydb"

기술 심층 분석

NL2SQL 파이프라인 최적화

프로덕션 NL2SQL은 단순히 LLM에 쿼리를 보내는 것보다 훨씬 복잡합니다.

Few-shot 예시 주입: 자주 사용하는 쿼리 패턴을 예시로 제공하면 정확도 15-20% 향상.

python

FEW_SHOT_EXAMPLES = [
    {
        "question": "이번 달 신규 가입자 수",
        "sql": "SELECT COUNT(*) FROM users WHERE created_at >= DATE_TRUNC('month', NOW())"
    },
    {
        "question": "상품별 평균 주문 금액",
        "sql": "SELECT p.name, AVG(o.amount) as avg_amount FROM orders o JOIN products p ON o.product_id = p.id GROUP BY p.id, p.name ORDER BY avg_amount DESC"
    }
]

def format_examples(examples: list[dict]) -> str:
    return "
".join(
        f"Q: {ex['question']}
SQL: {ex['sql']}"
        for ex in examples
    )

벡터 기반 유사 예시 검색: Few-shot 예시가 많을 때는 RAG로 관련 예시만 선택.

python

from openai import OpenAI

def find_similar_examples(question: str, examples: list[dict], top_k: int = 3) -> list[dict]:
    openai = OpenAI()

    question_vec = openai.embeddings.create(
        input=question, model="text-embedding-3-small"
    ).data[0].embedding

    example_vecs = [
        openai.embeddings.create(
            input=ex["question"], model="text-embedding-3-small"
        ).data[0].embedding
        for ex in examples
    ]

    import numpy as np
    scores = [
        np.dot(question_vec, ev) / (np.linalg.norm(question_vec) * np.linalg.norm(ev))
        for ev in example_vecs
    ]

    top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
    return [examples[i] for i in top_indices]

평가 지표

EX (Execution Accuracy): 쿼리 실행 결과가 정답과 일치하는 비율
EM (Exact Match): SQL 텍스트 완전 일치 비율 (과도하게 엄격)
VES (Valid Efficiency Score): 올바른 결과 + 효율적인 쿼리 평가

NL2SQL 완전 가이드: 자연어로 데이터베이스 조회하기

핵심 포인트

NL2SQL이란?

기본 NL2SQL 구현

스키마 자동 추출 + 컨텍스트 최적화

에러 핸들링 & Self-Correction

주요 모델 NL2SQL 성능 비교

보안: SQL Injection 방어

기술 심층 분석

NL2SQL 파이프라인 최적화

평가 지표

Footnotes

이 글에서 다루는 AI

관련 글 더 보기

댓글

관련 모델

관련 방법론