{
  "id": "ft-eval-10",
  "meta": {
    "instanceId": "vorlux-hub"
  },
  "name": "Vorlux AI | Fine-Tune Model Evaluation",
  "active": true,
  "nodes": [
    {
      "id": "d0e1f2a3-0010-4ddd-8010-000000000001",
      "name": "Webhook Trigger",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [220, 300],
      "parameters": {
        "path": "finetune-eval",
        "httpMethod": "POST",
        "responseMode": "lastNode",
        "options": {}
      }
    },
    {
      "id": "d0e1f2a3-0010-4ddd-8010-000000000002",
      "name": "Run Evaluation Suite",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [460, 300],
      "notes": "Runs 10 test workflow creation tasks against both custom model and base model",
      "parameters": {
        "mode": "runOnceForAllItems",
        "jsCode": "const ollamaUrl = $env.OLLAMA_BASE_URL || 'http://localhost:11434';\nconst customModel = $input.first().json.body?.model || 'workflow-agent-v2';\nconst baseModel = $input.first().json.body?.baseModel || 'qwen2.5:7b';\n\nconst testCases = [\n  { id: 1, prompt: 'Create an n8n workflow that monitors RSS feeds every 2 hours and posts new articles to Discord', expectedNodes: ['scheduleTrigger', 'httpRequest', 'code'] },\n  { id: 2, prompt: 'Build a workflow with a webhook trigger that receives data, validates it, and stores in the Hub API', expectedNodes: ['webhook', 'if', 'httpRequest'] },\n  { id: 3, prompt: 'Create a daily scheduled workflow that pulls YouTube analytics and generates a report', expectedNodes: ['scheduleTrigger', 'httpRequest', 'code'] },\n  { id: 4, prompt: 'Design a workflow that takes form submissions, sends a confirmation email, and logs to a spreadsheet', expectedNodes: ['formTrigger', 'emailSend', 'httpRequest'] },\n  { id: 5, prompt: 'Build an error handler workflow that catches failed executions and alerts via Discord', expectedNodes: ['httpRequest', 'code'] },\n  { id: 6, prompt: 'Create a workflow that fetches blog posts, generates social media content, and queues posts', expectedNodes: ['scheduleTrigger', 'httpRequest', 'code'] },\n  { id: 7, prompt: 'Design a workflow for processing incoming emails, extracting action items, and creating tasks', expectedNodes: ['httpRequest', 'code'] },\n  { id: 8, prompt: 'Build a content pipeline that takes a topic, generates an article via AI, and publishes it', expectedNodes: ['webhook', 'httpRequest', 'code'] },\n  { id: 9, prompt: 'Create a workflow that syncs data between two APIs every 30 minutes with deduplication', expectedNodes: ['scheduleTrigger', 'httpRequest', 'code'] },\n  { id: 10, prompt: 'Design a multi-step approval workflow with Discord notifications at each stage', expectedNodes: ['webhook', 'if', 'httpRequest'] }\n];\n\nasync function evaluateModel(model, prompt) {\n  try {\n    const res = await fetch(ollamaUrl + '/api/generate', {\n      method: 'POST', headers: {'Content-Type':'application/json'},\n      body: JSON.stringify({ model, prompt, stream: false, options: { temperature: 0.3 } }),\n      signal: AbortSignal.timeout(90000)\n    });\n    const data = await res.json();\n    const output = data.response || '';\n    \n    let score = 0;\n    let parsed = null;\n    let issues = [];\n    \n    // 1. Valid JSON? (30 pts)\n    try { parsed = JSON.parse(output); score += 30; } catch { issues.push('invalid_json'); }\n    \n    if (parsed) {\n      // 2. Has nodes array? (15 pts)\n      if (Array.isArray(parsed.nodes) && parsed.nodes.length > 0) {\n        score += 15;\n        // 3. Nodes have required fields? (15 pts)\n        const validNodes = parsed.nodes.filter(n => n.type && n.name && n.id);\n        if (validNodes.length === parsed.nodes.length) score += 15;\n        else issues.push('incomplete_nodes');\n        \n        // 4. Node types are real? (10 pts)\n        const knownPrefixes = ['n8n-nodes-base.', '@n8n/'];\n        const realTypes = parsed.nodes.filter(n => knownPrefixes.some(p => (n.type || '').startsWith(p)));\n        if (realTypes.length >= parsed.nodes.length * 0.8) score += 10;\n        else issues.push('unknown_node_types');\n      } else { issues.push('no_nodes'); }\n      \n      // 5. Has connections? (15 pts)\n      if (parsed.connections && typeof parsed.connections === 'object' && Object.keys(parsed.connections).length > 0) {\n        score += 15;\n      } else { issues.push('no_connections'); }\n      \n      // 6. Has positions? (5 pts)\n      if (parsed.nodes?.some(n => n.position)) score += 5;\n      \n      // 7. Reasonable structure (10 pts)\n      if (parsed.nodes?.length >= 2 && parsed.nodes?.length <= 20) score += 10;\n      else issues.push('unreasonable_size');\n    }\n    \n    return { score, outputLength: output.length, nodeCount: parsed?.nodes?.length || 0, issues };\n  } catch (err) {\n    return { score: 0, error: String(err).substring(0, 100), issues: ['generation_failed'] };\n  }\n}\n\nconst results = [];\nfor (const tc of testCases) {\n  const customResult = await evaluateModel(customModel, tc.prompt);\n  const baseResult = await evaluateModel(baseModel, tc.prompt);\n  results.push({\n    id: tc.id,\n    prompt: tc.prompt.substring(0, 80),\n    custom: customResult,\n    base: baseResult,\n    improvement: customResult.score - baseResult.score\n  });\n}\n\nconst customAvg = Math.round(results.reduce((s, r) => s + r.custom.score, 0) / results.length);\nconst baseAvg = Math.round(results.reduce((s, r) => s + r.base.score, 0) / results.length);\n\nreturn [{ json: { results, customModel, baseModel, customAvg, baseAvg, improvement: customAvg - baseAvg, passed: customAvg >= 60 } }];"
      }
    },
    {
      "id": "d0e1f2a3-0010-4ddd-8010-000000000003",
      "name": "Store Eval Report",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [740, 200],
      "parameters": {
        "method": "POST",
        "url": "={{$env.VORLUX_HUB_URL}}/api/admin/finetune/eval-report",
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={{ JSON.stringify({ report: $json, timestamp: new Date().toISOString() }) }}",
        "options": {
          "timeout": 15000
        }
      }
    },
    {
      "id": "d0e1f2a3-0010-4ddd-8010-000000000004",
      "name": "Discord Report",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [740, 400],
      "parameters": {
        "method": "POST",
        "url": "={{$env.DISCORD_OPS_WEBHOOK}}",
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\"embeds\":[{\"title\":\"Model Evaluation Report\",\"description\":\"**Custom ({{ $json.customModel }}): {{ $json.customAvg }}%**\\n**Base ({{ $json.baseModel }}): {{ $json.baseAvg }}%**\\n**Improvement: {{ $json.improvement > 0 ? '+' : '' }}{{ $json.improvement }}%**\\n\\n**Test Results:**\\n{{ $json.results.map(r => (r.custom.score >= 60 ? '\\u2705' : '\\u274c') + ' #' + r.id + ' Custom:' + r.custom.score + '% Base:' + r.base.score + '%' + (r.improvement > 0 ? ' (+' + r.improvement + ')' : '')).join('\\\\n') }}\",\"color\":{{ $json.passed ? 5763719 : 15548997 }},\"footer\":{\"text\":\"Fine-Tune Model Evaluation\"}}]}",
        "options": {
          "timeout": 10000
        }
      }
    }
  ],
  "connections": {
    "Webhook Trigger": {
      "main": [
        [
          {
            "node": "Run Evaluation Suite",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Run Evaluation Suite": {
      "main": [
        [
          {
            "node": "Store Eval Report",
            "type": "main",
            "index": 0
          },
          {
            "node": "Discord Report",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1",
    "saveManualExecutions": true,
    "saveExecutionProgress": true
  },
  "tags": [
    { "name": "ai" },
    { "name": "finetune" },
    { "name": "evaluation" }
  ],
  "versionId": "2"
}