App-Review-Analyzer/analyzer.py at develop · DongLab-DevTools/App-Review-Analyzer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""Gemini AI로 리뷰 감성/카테고리 분석

사용법: python analyzer.py
입력: data/{app_key}_reviews.json
출력: data/{app_key}_analyzed.json
"""

import json
import os
import time
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

import google.generativeai as genai
from config import APPS, ANALYSIS_CATEGORIES, GEMINI_BATCH_SIZE

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

POSITIVE_CATS = ANALYSIS_CATEGORIES["positive"]
NEGATIVE_CATS = ANALYSIS_CATEGORIES["negative"]

PROMPT_TEMPLATE = """다음은 앱 리뷰 목록입니다. 각 리뷰를 분석해주세요.

## 분석 규칙
- 별점 3점 이상: sentiment = "positive", category는 긍정 카테고리 중 선택
- 별점 2점 이하: sentiment = "negative", category는 부정 카테고리 중 선택
- summary: 리뷰 핵심을 한 줄로 요약 (15자 이내)

## 긍정 카테고리
{positive_cats}

## 부정 카테고리
{negative_cats}

## 리뷰 목록
{reviews_text}

## 응답 형식
반드시 JSON 배열로만 응답하세요. 다른 텍스트 없이 JSON만 출력하세요.
[
  {{"reviewId": "...", "sentiment": "positive|negative", "category": "카테고리명", "summary": "요약"}}
]
"""


def load_reviews(app_key: str) -> dict | None:
    path = f"data/{app_key}_reviews.json"
    if not os.path.exists(path):
        return None
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def analyze_batch(batch: list) -> list:
    """Gemini에 배치 분석 요청"""
    reviews_text = ""
    for r in batch:
        reviews_text += f'- [ID:{r["reviewId"]}] 별점:{r["score"]} 내용:"{r["content"][:200]}"\n'

    prompt = PROMPT_TEMPLATE.format(
        positive_cats=", ".join(POSITIVE_CATS),
        negative_cats=", ".join(NEGATIVE_CATS),
        reviews_text=reviews_text,
    )

    try:
        response = model.generate_content(prompt)
        text = response.text.strip()
        # JSON 블록 추출
        if "```json" in text:
            text = text.split("```json")[1].split("```")[0].strip()
        elif "```" in text:
            text = text.split("```")[1].split("```")[0].strip()
        return json.loads(text)
    except Exception as e:
        print(f"    [ERROR] Gemini 분석 실패: {e}")
        return []


def fallback_analysis(review: dict) -> dict:
    """Gemini 실패 시 별점 기반 폴백"""
    score = review.get("score", 3)
    return {
        "reviewId": review["reviewId"],
        "sentiment": "positive" if score >= 3 else "negative",
        "category": "기타 칭찬" if score >= 3 else "기타 불만",
        "summary": "",
    }


def analyze_app(app_key: str, reviews_data: dict):
    """앱 전체 리뷰 분석"""
    reviews = reviews_data.get("reviews", [])
    if not reviews:
        print(f"  리뷰 없음")
        return

    results = []
    total = len(reviews)

    for i in range(0, total, GEMINI_BATCH_SIZE):
        batch = reviews[i : i + GEMINI_BATCH_SIZE]
        print(f"  분석 중... {i+1}~{min(i+GEMINI_BATCH_SIZE, total)} / {total}")

        analyzed = analyze_batch(batch)

        # 분석 결과를 reviewId로 매핑
        analyzed_map = {a["reviewId"]: a for a in analyzed if "reviewId" in a}

        for r in batch:
            rid = r["reviewId"]
            if rid in analyzed_map:
                result = analyzed_map[rid]
                # 원본 리뷰 필드 병합
                result.update({
                    "userName": r.get("userName", ""),
                    "score": r.get("score", 0),
                    "content": r.get("content", ""),
                    "thumbsUpCount": r.get("thumbsUpCount", 0),
                    "at": r.get("at", ""),
                    "replyContent": r.get("replyContent", ""),
                    "repliedAt": r.get("repliedAt", ""),
                    "appVersion": r.get("appVersion", ""),
                    "store": r.get("store", "PLAY"),
                })
            else:
                result = fallback_analysis(r)
                result.update({
                    "userName": r.get("userName", ""),
                    "score": r.get("score", 0),
                    "content": r.get("content", ""),
                    "thumbsUpCount": r.get("thumbsUpCount", 0),
                    "at": r.get("at", ""),
                    "replyContent": r.get("replyContent", ""),
                    "repliedAt": r.get("repliedAt", ""),
                    "appVersion": r.get("appVersion", ""),
                    "store": r.get("store", "PLAY"),
                })
            results.append(result)

        time.sleep(2)  # API rate limit

    # 저장
    output = {
        "app_key": app_key,
        "app_name": reviews_data.get("app_name", ""),
        "analyzed_at": datetime.now().isoformat(),
        "total_analyzed": len(results),
        "results": results,
    }
    filepath = f"data/{app_key}_analyzed.json"
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(output, f, ensure_ascii=False, indent=2)
    print(f"  -> {filepath} ({len(results)}건)")


def main():
    print("=" * 60)
    print("Gemini AI 리뷰 분석 시작")
    print("=" * 60)

    for key, config in APPS.items():
        print(f"\n[{config['name']}]")
        reviews_data = load_reviews(key)
        if not reviews_data:
            print("  데이터 없음 - 건너뜀")
            continue
        analyze_app(key, reviews_data)

    print("\n" + "=" * 60)
    print("분석 완료!")
    print("=" * 60)


if __name__ == "__main__":
    main()