-
Notifications
You must be signed in to change notification settings - Fork 0
139 lines (127 loc) · 4.34 KB
/
weekly-ingest.yml
File metadata and controls
139 lines (127 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
name: weekly-ingest
# Weekly: scrape upstream catalogs, draft missing SKUs into a TechAPI worktree,
# open a PR for curator review.
on:
schedule:
- cron: "29 6 * * 1" # Mondays 06:29 UTC, after coverage-report (06:23)
workflow_dispatch:
inputs:
category:
description: "Category to ingest"
type: choice
options: [cpu, gpu, smartphone]
default: cpu
limit:
description: "Max candidates per source"
type: string
default: "50"
include_drafts:
description: "Write incomplete records too (PR marked as draft)"
type: boolean
default: false
permissions:
contents: read
jobs:
ingest:
runs-on: ubuntu-latest
env:
CATEGORY: ${{ inputs.category || 'cpu' }}
LIMIT: ${{ inputs.limit || '50' }}
INCLUDE_DRAFTS: ${{ inputs.include_drafts || 'false' }}
TECHAPI_TOKEN: ${{ secrets.TECHAPI_TOKEN }}
steps:
- uses: actions/checkout@v4
# Use the PAT when present so we can push to TechAPI later;
# fall back to the default token for read-only test runs.
- uses: actions/checkout@v4
with:
repository: GetTechAPI/TechAPI
path: TechAPI
token: ${{ secrets.TECHAPI_TOKEN || secrets.GITHUB_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install
run: pip install -e .
- name: Run ingest
env:
TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data
run: |
DRAFTS_FLAG=""
if [ "$INCLUDE_DRAFTS" = "true" ]; then
DRAFTS_FLAG="--include-drafts"
fi
python -m app.ingest \
--category "$CATEGORY" \
--limit "$LIMIT" \
--data-root TechAPI/data \
--summary ingest-summary.md \
$DRAFTS_FLAG
# Variant-safe benchmark backfill on existing CPU records (PassMark).
# CPU-only; never overwrites, only fills nulls on exact heading matches.
# Non-fatal: a scrape hiccup must not sink the weekly ingest PR.
- name: Enrich benchmarks (PassMark, cpu only)
if: env.CATEGORY == 'cpu'
continue-on-error: true
env:
TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data
run: |
python -m app.ingest.enrich \
--data-root TechAPI/data \
--limit "$LIMIT" \
--min-year 2008 \
--sleep 0.5 \
--summary enrich-summary.md
- name: Combine summaries for PR body
run: |
cp ingest-summary.md pr-body.md
if [ -f enrich-summary.md ]; then
printf '\n\n---\n\n' >> pr-body.md
cat enrich-summary.md >> pr-body.md
fi
- name: Upload summary artifact
uses: actions/upload-artifact@v4
with:
name: ingest-summary
path: |
ingest-summary.md
enrich-summary.md
pr-body.md
- name: Check whether ingest produced any additions
id: changes
run: |
cd TechAPI
if [ -n "$(git status --porcelain)" ]; then
echo "has_changes=true" >> "$GITHUB_OUTPUT"
else
echo "has_changes=false" >> "$GITHUB_OUTPUT"
fi
- name: Open PR against TechAPI
if: steps.changes.outputs.has_changes == 'true'
env:
GH_TOKEN: ${{ secrets.TECHAPI_TOKEN }}
run: |
set -euo pipefail
if [ -z "${GH_TOKEN:-}" ]; then
echo "::warning::Ingest produced additions but TECHAPI_TOKEN is unset; skipping PR. Summary attached as artifact."
exit 0
fi
cd TechAPI
BRANCH="ingest/${CATEGORY}-$(date -u +%Y%m%d-%H%M%S)"
git config user.name "TechEngineBot"
git config user.email "289859915+TechEngineBot@users.noreply.github.com"
git checkout -b "$BRANCH"
git add data/
git commit -m "feat(data/${CATEGORY}): weekly ingest"
git push -u origin "$BRANCH"
DRAFT_FLAG=""
if [ "$INCLUDE_DRAFTS" = "true" ]; then
DRAFT_FLAG="--draft"
fi
gh pr create \
--title "feat(data/${CATEGORY}): weekly ingest" \
--body-file ../pr-body.md \
--base main \
--head "$BRANCH" \
$DRAFT_FLAG