mvp 1.3 ready for internal test

This commit is contained in:
ValueOn AG 2025-04-30 23:30:07 +02:00
parent 14af3448c5
commit f6da59d98c
38 changed files with 11902 additions and 1 deletions

25
env_playground.env Normal file
View file

@ -0,0 +1,25 @@
# Playground Environment Configuration
# System Configuration
APP_ENV_TYPE = play
APP_ENV_LABEL = Playground Instance
APP_CALL=uvicorn app:app --host 0.0.0.0 --port 8000
# Database Configuration System
DB_SYSTEM_HOST=/home/_powerondb
DB_SYSTEM_DATABASE=system
DB_SYSTEM_USER=dev_user
DB_SYSTEM_PASSWORD_SECRET=prod_password
# Database Configuration LucyDOM
DB_LUCYDOM_HOST=/home/_powerondb
DB_LUCYDOM_DATABASE=lucydom
DB_LUCYDOM_USER=dev_user
DB_LUCYDOM_PASSWORD_SECRET=prod_password
# Security Configuration
APP_JWT_SECRET_SECRET=dev_jwt_secret_token
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS="http://localhost:8080","http://localhost:3000"

View file

@ -128,7 +128,13 @@ class LucyDOMInterface:
"userId": effectiveUserId,
"content": "Develop a UI/UX design concept for [APPLICATION/WEBSITE]. Consider the target audience, main functions, and brand identity. Describe the visual design, navigation, interaction patterns, and information architecture. Explain how the design optimizes user-friendliness and user experience.",
"name": "Design: UI/UX Design"
}
},
{
"mandateId": effectiveMandateId,
"userId": effectiveUserId,
"content": "Gib mir die ersten 1000 Primzahlen",
"name": "Code: Primzahlen"
}
]
# Create prompts

View file

@ -0,0 +1,77 @@
2
3
5
7
11
13
17
19
23
29
31
37
41
43
47
53
59
61
67
71
73
79
83
89
97
101
103
107
109
113
127
131
137
139
149
151
157
163
167
173
179
181
191
193
197
199
211
223
227
229
233
239
241
251
257
263
269
271
277
281
283
293
307
311
313
317
331
337
347
349
353
359
367
373
379
383
389

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(count):
primes = []
num = 2
while len(primes) < count:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(123)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

View file

@ -0,0 +1,19 @@
[
{
"attempt": 1,
"code": "inputFiles = [] # DO NOT CHANGE THIS LINE\n\ndef is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef generate_primes(count):\n primes = []\n num = 2\n while len(primes) < count:\n if is_prime(num):\n primes.append(num)\n num += 1\n return primes\n\nprimes = generate_primes(123)\nprime_numbers_content = \"\\n\".join(map(str, primes))\n\nresult = {\n \"prime_numbers.txt\": {\n \"content\": prime_numbers_content,\n \"base64Encoded\": False,\n \"contentType\": \"text/plain\"\n }\n}\n\nimport json\nprint(json.dumps(result))",
"result": {
"success": true,
"output": "{\"prime_numbers.txt\": {\"content\": \"2\\n3\\n5\\n7\\n11\\n13\\n17\\n19\\n23\\n29\\n31\\n37\\n41\\n43\\n47\\n53\\n59\\n61\\n67\\n71\\n73\\n79\\n83\\n89\\n97\\n101\\n103\\n107\\n109\\n113\\n127\\n131\\n137\\n139\\n149\\n151\\n157\\n163\\n167\\n173\\n179\\n181\\n191\\n193\\n197\\n199\\n211\\n223\\n227\\n229\\n233\\n239\\n241\\n251\\n257\\n263\\n269\\n271\\n277\\n281\\n283\\n293\\n307\\n311\\n313\\n317\\n331\\n337\\n347\\n349\\n353\\n359\\n367\\n373\\n379\\n383\\n389\\n397\\n401\\n409\\n419\\n421\\n431\\n433\\n439\\n443\\n449\\n457\\n461\\n463\\n467\\n479\\n487\\n491\\n499\\n503\\n509\\n521\\n523\\n541\\n547\\n557\\n563\\n569\\n571\\n577\\n587\\n593\\n599\\n601\\n607\\n613\\n617\\n619\\n631\\n641\\n643\\n647\\n653\\n659\\n661\\n673\\n677\", \"base64Encoded\": false, \"contentType\": \"text/plain\"}}\n",
"error": "",
"result": {
"prime_numbers.txt": {
"content": "2\n3\n5\n7\n11\n13\n17\n19\n23\n29\n31\n37\n41\n43\n47\n53\n59\n61\n67\n71\n73\n79\n83\n89\n97\n101\n103\n107\n109\n113\n127\n131\n137\n139\n149\n151\n157\n163\n167\n173\n179\n181\n191\n193\n197\n199\n211\n223\n227\n229\n233\n239\n241\n251\n257\n263\n269\n271\n277\n281\n283\n293\n307\n311\n313\n317\n331\n337\n347\n349\n353\n359\n367\n373\n379\n383\n389\n397\n401\n409\n419\n421\n431\n433\n439\n443\n449\n457\n461\n463\n467\n479\n487\n491\n499\n503\n509\n521\n523\n541\n547\n557\n563\n569\n571\n577\n587\n593\n599\n601\n607\n613\n617\n619\n631\n641\n643\n647\n653\n659\n661\n673\n677",
"base64Encoded": false,
"contentType": "text/plain"
}
},
"exitCode": 0
}
}
]

View file

@ -0,0 +1,40 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
# REQUIREMENTS:
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(count):
primes = []
num = 2
while len(primes) < count:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(202)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

View file

@ -0,0 +1,19 @@
[
{
"attempt": 1,
"code": "inputFiles = [] # DO NOT CHANGE THIS LINE\n\n# REQUIREMENTS: \n\ndef is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef generate_primes(count):\n primes = []\n num = 2\n while len(primes) < count:\n if is_prime(num):\n primes.append(num)\n num += 1\n return primes\n\nprimes = generate_primes(202)\nprime_numbers_content = \"\\n\".join(map(str, primes))\n\nresult = {\n \"prime_numbers.txt\": {\n \"content\": prime_numbers_content,\n \"base64Encoded\": False,\n \"contentType\": \"text/plain\"\n }\n}\n\nimport json\nprint(json.dumps(result))",
"result": {
"success": true,
"output": "{\"prime_numbers.txt\": {\"content\": \"2\\n3\\n5\\n7\\n11\\n13\\n17\\n19\\n23\\n29\\n31\\n37\\n41\\n43\\n47\\n53\\n59\\n61\\n67\\n71\\n73\\n79\\n83\\n89\\n97\\n101\\n103\\n107\\n109\\n113\\n127\\n131\\n137\\n139\\n149\\n151\\n157\\n163\\n167\\n173\\n179\\n181\\n191\\n193\\n197\\n199\\n211\\n223\\n227\\n229\\n233\\n239\\n241\\n251\\n257\\n263\\n269\\n271\\n277\\n281\\n283\\n293\\n307\\n311\\n313\\n317\\n331\\n337\\n347\\n349\\n353\\n359\\n367\\n373\\n379\\n383\\n389\\n397\\n401\\n409\\n419\\n421\\n431\\n433\\n439\\n443\\n449\\n457\\n461\\n463\\n467\\n479\\n487\\n491\\n499\\n503\\n509\\n521\\n523\\n541\\n547\\n557\\n563\\n569\\n571\\n577\\n587\\n593\\n599\\n601\\n607\\n613\\n617\\n619\\n631\\n641\\n643\\n647\\n653\\n659\\n661\\n673\\n677\\n683\\n691\\n701\\n709\\n719\\n727\\n733\\n739\\n743\\n751\\n757\\n761\\n769\\n773\\n787\\n797\\n809\\n811\\n821\\n823\\n827\\n829\\n839\\n853\\n857\\n859\\n863\\n877\\n881\\n883\\n887\\n907\\n911\\n919\\n929\\n937\\n941\\n947\\n953\\n967\\n971\\n977\\n983\\n991\\n997\\n1009\\n1013\\n1019\\n1021\\n1031\\n1033\\n1039\\n1049\\n1051\\n1061\\n1063\\n1069\\n1087\\n1091\\n1093\\n1097\\n1103\\n1109\\n1117\\n1123\\n1129\\n1151\\n1153\\n1163\\n1171\\n1181\\n1187\\n1193\\n1201\\n1213\\n1217\\n1223\\n1229\\n1231\", \"base64Encoded\": false, \"contentType\": \"text/plain\"}}\n",
"error": "",
"result": {
"prime_numbers.txt": {
"content": "2\n3\n5\n7\n11\n13\n17\n19\n23\n29\n31\n37\n41\n43\n47\n53\n59\n61\n67\n71\n73\n79\n83\n89\n97\n101\n103\n107\n109\n113\n127\n131\n137\n139\n149\n151\n157\n163\n167\n173\n179\n181\n191\n193\n197\n199\n211\n223\n227\n229\n233\n239\n241\n251\n257\n263\n269\n271\n277\n281\n283\n293\n307\n311\n313\n317\n331\n337\n347\n349\n353\n359\n367\n373\n379\n383\n389\n397\n401\n409\n419\n421\n431\n433\n439\n443\n449\n457\n461\n463\n467\n479\n487\n491\n499\n503\n509\n521\n523\n541\n547\n557\n563\n569\n571\n577\n587\n593\n599\n601\n607\n613\n617\n619\n631\n641\n643\n647\n653\n659\n661\n673\n677\n683\n691\n701\n709\n719\n727\n733\n739\n743\n751\n757\n761\n769\n773\n787\n797\n809\n811\n821\n823\n827\n829\n839\n853\n857\n859\n863\n877\n881\n883\n887\n907\n911\n919\n929\n937\n941\n947\n953\n967\n971\n977\n983\n991\n997\n1009\n1013\n1019\n1021\n1031\n1033\n1039\n1049\n1051\n1061\n1063\n1069\n1087\n1091\n1093\n1097\n1103\n1109\n1117\n1123\n1129\n1151\n1153\n1163\n1171\n1181\n1187\n1193\n1201\n1213\n1217\n1223\n1229\n1231",
"base64Encoded": false,
"contentType": "text/plain"
}
},
"exitCode": 0
}
}
]

View file

@ -0,0 +1,202 @@
2
3
5
7
11
13
17
19
23
29
31
37
41
43
47
53
59
61
67
71
73
79
83
89
97
101
103
107
109
113
127
131
137
139
149
151
157
163
167
173
179
181
191
193
197
199
211
223
227
229
233
239
241
251
257
263
269
271
277
281
283
293
307
311
313
317
331
337
347
349
353
359
367
373
379
383
389
397
401
409
419
421
431
433
439
443
449
457
461
463
467
479
487
491
499
503
509
521
523
541
547
557
563
569
571
577
587
593
599
601
607
613
617
619
631
641
643
647
653
659
661
673
677
683
691
701
709
719
727
733
739
743
751
757
761
769
773
787
797
809
811
821
823
827
829
839
853
857
859
863
877
881
883
887
907
911
919
929
937
941
947
953
967
971
977
983
991
997
1009
1013
1019
1021
1031
1033
1039
1049
1051
1061
1063
1069
1087
1091
1093
1097
1103
1109
1117
1123
1129
1151
1153
1163
1171
1181
1187
1193
1201
1213
1217
1223
1229
1231

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(limit):
primes = []
num = 2
while len(primes) < limit:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(1000)
primes_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": primes_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(count):
primes = []
num = 2
while len(primes) < count:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(999)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,999 @@
2
3
5
7
11
13
17
19
23
29
31
37
41
43
47
53
59
61
67
71
73
79
83
89
97
101
103
107
109
113
127
131
137
139
149
151
157
163
167
173
179
181
191
193
197
199
211
223
227
229
233
239
241
251
257
263
269
271
277
281
283
293
307
311
313
317
331
337
347
349
353
359
367
373
379
383
389
397
401
409
419
421
431
433
439
443
449
457
461
463
467
479
487
491
499
503
509
521
523
541
547
557
563
569
571
577
587
593
599
601
607
613
617
619
631
641
643
647
653
659
661
673
677
683
691
701
709
719
727
733
739
743
751
757
761
769
773
787
797
809
811
821
823
827
829
839
853
857
859
863
877
881
883
887
907
911
919
929
937
941
947
953
967
971
977
983
991
997
1009
1013
1019
1021
1031
1033
1039
1049
1051
1061
1063
1069
1087
1091
1093
1097
1103
1109
1117
1123
1129
1151
1153
1163
1171
1181
1187
1193
1201
1213
1217
1223
1229
1231
1237
1249
1259
1277
1279
1283
1289
1291
1297
1301
1303
1307
1319
1321
1327
1361
1367
1373
1381
1399
1409
1423
1427
1429
1433
1439
1447
1451
1453
1459
1471
1481
1483
1487
1489
1493
1499
1511
1523
1531
1543
1549
1553
1559
1567
1571
1579
1583
1597
1601
1607
1609
1613
1619
1621
1627
1637
1657
1663
1667
1669
1693
1697
1699
1709
1721
1723
1733
1741
1747
1753
1759
1777
1783
1787
1789
1801
1811
1823
1831
1847
1861
1867
1871
1873
1877
1879
1889
1901
1907
1913
1931
1933
1949
1951
1973
1979
1987
1993
1997
1999
2003
2011
2017
2027
2029
2039
2053
2063
2069
2081
2083
2087
2089
2099
2111
2113
2129
2131
2137
2141
2143
2153
2161
2179
2203
2207
2213
2221
2237
2239
2243
2251
2267
2269
2273
2281
2287
2293
2297
2309
2311
2333
2339
2341
2347
2351
2357
2371
2377
2381
2383
2389
2393
2399
2411
2417
2423
2437
2441
2447
2459
2467
2473
2477
2503
2521
2531
2539
2543
2549
2551
2557
2579
2591
2593
2609
2617
2621
2633
2647
2657
2659
2663
2671
2677
2683
2687
2689
2693
2699
2707
2711
2713
2719
2729
2731
2741
2749
2753
2767
2777
2789
2791
2797
2801
2803
2819
2833
2837
2843
2851
2857
2861
2879
2887
2897
2903
2909
2917
2927
2939
2953
2957
2963
2969
2971
2999
3001
3011
3019
3023
3037
3041
3049
3061
3067
3079
3083
3089
3109
3119
3121
3137
3163
3167
3169
3181
3187
3191
3203
3209
3217
3221
3229
3251
3253
3257
3259
3271
3299
3301
3307
3313
3319
3323
3329
3331
3343
3347
3359
3361
3371
3373
3389
3391
3407
3413
3433
3449
3457
3461
3463
3467
3469
3491
3499
3511
3517
3527
3529
3533
3539
3541
3547
3557
3559
3571
3581
3583
3593
3607
3613
3617
3623
3631
3637
3643
3659
3671
3673
3677
3691
3697
3701
3709
3719
3727
3733
3739
3761
3767
3769
3779
3793
3797
3803
3821
3823
3833
3847
3851
3853
3863
3877
3881
3889
3907
3911
3917
3919
3923
3929
3931
3943
3947
3967
3989
4001
4003
4007
4013
4019
4021
4027
4049
4051
4057
4073
4079
4091
4093
4099
4111
4127
4129
4133
4139
4153
4157
4159
4177
4201
4211
4217
4219
4229
4231
4241
4243
4253
4259
4261
4271
4273
4283
4289
4297
4327
4337
4339
4349
4357
4363
4373
4391
4397
4409
4421
4423
4441
4447
4451
4457
4463
4481
4483
4493
4507
4513
4517
4519
4523
4547
4549
4561
4567
4583
4591
4597
4603
4621
4637
4639
4643
4649
4651
4657
4663
4673
4679
4691
4703
4721
4723
4729
4733
4751
4759
4783
4787
4789
4793
4799
4801
4813
4817
4831
4861
4871
4877
4889
4903
4909
4919
4931
4933
4937
4943
4951
4957
4967
4969
4973
4987
4993
4999
5003
5009
5011
5021
5023
5039
5051
5059
5077
5081
5087
5099
5101
5107
5113
5119
5147
5153
5167
5171
5179
5189
5197
5209
5227
5231
5233
5237
5261
5273
5279
5281
5297
5303
5309
5323
5333
5347
5351
5381
5387
5393
5399
5407
5413
5417
5419
5431
5437
5441
5443
5449
5471
5477
5479
5483
5501
5503
5507
5519
5521
5527
5531
5557
5563
5569
5573
5581
5591
5623
5639
5641
5647
5651
5653
5657
5659
5669
5683
5689
5693
5701
5711
5717
5737
5741
5743
5749
5779
5783
5791
5801
5807
5813
5821
5827
5839
5843
5849
5851
5857
5861
5867
5869
5879
5881
5897
5903
5923
5927
5939
5953
5981
5987
6007
6011
6029
6037
6043
6047
6053
6067
6073
6079
6089
6091
6101
6113
6121
6131
6133
6143
6151
6163
6173
6197
6199
6203
6211
6217
6221
6229
6247
6257
6263
6269
6271
6277
6287
6299
6301
6311
6317
6323
6329
6337
6343
6353
6359
6361
6367
6373
6379
6389
6397
6421
6427
6449
6451
6469
6473
6481
6491
6521
6529
6547
6551
6553
6563
6569
6571
6577
6581
6599
6607
6619
6637
6653
6659
6661
6673
6679
6689
6691
6701
6703
6709
6719
6733
6737
6761
6763
6779
6781
6791
6793
6803
6823
6827
6829
6833
6841
6857
6863
6869
6871
6883
6899
6907
6911
6917
6947
6949
6959
6961
6967
6971
6977
6983
6991
6997
7001
7013
7019
7027
7039
7043
7057
7069
7079
7103
7109
7121
7127
7129
7151
7159
7177
7187
7193
7207
7211
7213
7219
7229
7237
7243
7247
7253
7283
7297
7307
7309
7321
7331
7333
7349
7351
7369
7393
7411
7417
7433
7451
7457
7459
7477
7481
7487
7489
7499
7507
7517
7523
7529
7537
7541
7547
7549
7559
7561
7573
7577
7583
7589
7591
7603
7607
7621
7639
7643
7649
7669
7673
7681
7687
7691
7699
7703
7717
7723
7727
7741
7753
7757
7759
7789
7793
7817
7823
7829
7841
7853
7867
7873
7877
7879
7883
7901
7907

View file

@ -0,0 +1,39 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
import json
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(count):
primes = []
num = 2
while len(primes) < count:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(779)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
print(json.dumps(result))

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,779 @@
2
3
5
7
11
13
17
19
23
29
31
37
41
43
47
53
59
61
67
71
73
79
83
89
97
101
103
107
109
113
127
131
137
139
149
151
157
163
167
173
179
181
191
193
197
199
211
223
227
229
233
239
241
251
257
263
269
271
277
281
283
293
307
311
313
317
331
337
347
349
353
359
367
373
379
383
389
397
401
409
419
421
431
433
439
443
449
457
461
463
467
479
487
491
499
503
509
521
523
541
547
557
563
569
571
577
587
593
599
601
607
613
617
619
631
641
643
647
653
659
661
673
677
683
691
701
709
719
727
733
739
743
751
757
761
769
773
787
797
809
811
821
823
827
829
839
853
857
859
863
877
881
883
887
907
911
919
929
937
941
947
953
967
971
977
983
991
997
1009
1013
1019
1021
1031
1033
1039
1049
1051
1061
1063
1069
1087
1091
1093
1097
1103
1109
1117
1123
1129
1151
1153
1163
1171
1181
1187
1193
1201
1213
1217
1223
1229
1231
1237
1249
1259
1277
1279
1283
1289
1291
1297
1301
1303
1307
1319
1321
1327
1361
1367
1373
1381
1399
1409
1423
1427
1429
1433
1439
1447
1451
1453
1459
1471
1481
1483
1487
1489
1493
1499
1511
1523
1531
1543
1549
1553
1559
1567
1571
1579
1583
1597
1601
1607
1609
1613
1619
1621
1627
1637
1657
1663
1667
1669
1693
1697
1699
1709
1721
1723
1733
1741
1747
1753
1759
1777
1783
1787
1789
1801
1811
1823
1831
1847
1861
1867
1871
1873
1877
1879
1889
1901
1907
1913
1931
1933
1949
1951
1973
1979
1987
1993
1997
1999
2003
2011
2017
2027
2029
2039
2053
2063
2069
2081
2083
2087
2089
2099
2111
2113
2129
2131
2137
2141
2143
2153
2161
2179
2203
2207
2213
2221
2237
2239
2243
2251
2267
2269
2273
2281
2287
2293
2297
2309
2311
2333
2339
2341
2347
2351
2357
2371
2377
2381
2383
2389
2393
2399
2411
2417
2423
2437
2441
2447
2459
2467
2473
2477
2503
2521
2531
2539
2543
2549
2551
2557
2579
2591
2593
2609
2617
2621
2633
2647
2657
2659
2663
2671
2677
2683
2687
2689
2693
2699
2707
2711
2713
2719
2729
2731
2741
2749
2753
2767
2777
2789
2791
2797
2801
2803
2819
2833
2837
2843
2851
2857
2861
2879
2887
2897
2903
2909
2917
2927
2939
2953
2957
2963
2969
2971
2999
3001
3011
3019
3023
3037
3041
3049
3061
3067
3079
3083
3089
3109
3119
3121
3137
3163
3167
3169
3181
3187
3191
3203
3209
3217
3221
3229
3251
3253
3257
3259
3271
3299
3301
3307
3313
3319
3323
3329
3331
3343
3347
3359
3361
3371
3373
3389
3391
3407
3413
3433
3449
3457
3461
3463
3467
3469
3491
3499
3511
3517
3527
3529
3533
3539
3541
3547
3557
3559
3571
3581
3583
3593
3607
3613
3617
3623
3631
3637
3643
3659
3671
3673
3677
3691
3697
3701
3709
3719
3727
3733
3739
3761
3767
3769
3779
3793
3797
3803
3821
3823
3833
3847
3851
3853
3863
3877
3881
3889
3907
3911
3917
3919
3923
3929
3931
3943
3947
3967
3989
4001
4003
4007
4013
4019
4021
4027
4049
4051
4057
4073
4079
4091
4093
4099
4111
4127
4129
4133
4139
4153
4157
4159
4177
4201
4211
4217
4219
4229
4231
4241
4243
4253
4259
4261
4271
4273
4283
4289
4297
4327
4337
4339
4349
4357
4363
4373
4391
4397
4409
4421
4423
4441
4447
4451
4457
4463
4481
4483
4493
4507
4513
4517
4519
4523
4547
4549
4561
4567
4583
4591
4597
4603
4621
4637
4639
4643
4649
4651
4657
4663
4673
4679
4691
4703
4721
4723
4729
4733
4751
4759
4783
4787
4789
4793
4799
4801
4813
4817
4831
4861
4871
4877
4889
4903
4909
4919
4931
4933
4937
4943
4951
4957
4967
4969
4973
4987
4993
4999
5003
5009
5011
5021
5023
5039
5051
5059
5077
5081
5087
5099
5101
5107
5113
5119
5147
5153
5167
5171
5179
5189
5197
5209
5227
5231
5233
5237
5261
5273
5279
5281
5297
5303
5309
5323
5333
5347
5351
5381
5387
5393
5399
5407
5413
5417
5419
5431
5437
5441
5443
5449
5471
5477
5479
5483
5501
5503
5507
5519
5521
5527
5531
5557
5563
5569
5573
5581
5591
5623
5639
5641
5647
5651
5653
5657
5659
5669
5683
5689
5693
5701
5711
5717
5737
5741
5743
5749
5779
5783
5791
5801
5807
5813
5821
5827
5839
5843
5849
5851
5857
5861
5867
5869
5879
5881
5897
5903
5923
5927

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(limit):
primes = []
num = 2
while len(primes) < limit:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(1000)
primes_content = "\n".join(map(str, primes))
result = {
"first_1000_primes.txt": {
"content": primes_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,270 @@
"""
Agent Registry Module.
Provides a central registry system for all available agents.
Optimized for the standardized task processing pattern.
"""
import os
import logging
import importlib
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional
from modules.mimeUtils import isTextMimeType, determineContentEncoding
logger = logging.getLogger(__name__)
"""
Updates to the AgentBase class in workflowAgentsRegistry.py to include base64Encoded flag handling.
"""
class AgentBase:
"""
Base class for all chat agents.
Defines the standardized interface for task processing.
"""
def __init__(self):
"""Initialize the base agent."""
self.name = "base-agent"
self.description = "Basic agent functionality"
self.capabilities = []
self.mydom = None
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
def getAgentInfo(self) -> Dict[str, Any]:
"""
Return standardized information about the agent's capabilities.
Returns:
Dictionary with name, description, and capabilities
"""
return {
"name": self.name,
"description": self.description,
"capabilities": self.capabilities
}
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and return results.
This method must be implemented by all concrete agent classes.
Args:
task: A dictionary containing:
- taskId: Unique ID for this task
- workflowId: ID of the parent workflow (optional)
- prompt: The main instruction for the agent
- inputDocuments: List of document objects to process
- outputSpecifications: List of required output documents
- context: Additional contextual information
Returns:
A dictionary containing:
- feedback: Text response explaining what the agent did
- documents: List of document objects created by the agent,
each containing a "base64Encoded" flag in addition to "label" and "content"
"""
# Base implementation - should be overridden by specialized agents
logger.warning(f"Agent {self.name} is using the default implementation of processTask")
return {
"feedback": f"The processTask method was not implemented by agent '{self.name}'.",
"documents": []
}
def determineBase64EncodingFlag(self, filename: str, content: Any, mimeType: str = None) -> bool:
"""Wrapper for the utility function"""
return determineContentEncoding(filename, content, mimeType)
def isTextMimeType(self, mimeType: str) -> bool:
"""Wrapper for the utility function"""
return isTextMimeType(mimeType)
def formatAgentDocumentOutput(self, label: str, content: Any, contentType: str = None) -> Dict[str, Any]:
"""
Helper method to properly format a document output with base64Encoded flag and metadata.
Args:
label: Name of the document
content: Content of the document
contentType: Optional content type for the document
Returns:
Properly formatted document dictionary
"""
import base64
# Determine if content should be base64 encoded
should_base64_encode = self.determineBase64EncodingFlag(label, content)
# Process content based on type and encoding flag
formatted_content = content
if should_base64_encode:
if isinstance(content, bytes):
# Convert binary to base64
formatted_content = base64.b64encode(content).decode('utf-8')
elif isinstance(content, str):
try:
# Check if it's already base64 encoded
base64.b64decode(content)
# If we get here, it appears to be valid base64
formatted_content = content
except:
# Not valid base64, so encode it
formatted_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
# Create document with metadata
doc = {
"label": label,
"content": formatted_content,
"base64Encoded": should_base64_encode,
"metadata": {}
}
# Add content type if provided
if contentType:
doc["metadata"]["contentType"] = contentType
return doc
class AgentRegistry:
"""Central registry for all available agents in the system."""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the agent registry."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialize the agent registry."""
if AgentRegistry._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.agents = {}
self.mydom = None
self._loadAgents()
def _loadAgents(self):
"""Load all available agents from modules."""
logger.info("Loading agent modules...")
# List of agent modules to load
agentModules = []
agentDir = os.path.dirname(__file__)
# Search the directory for agent modules
for filename in os.listdir(agentDir):
if filename.startswith("agent") and filename.endswith(".py"):
agentModules.append(filename[0:-3]) # Remove .py extension
if not agentModules:
logger.warning("No agent modules found")
return
logger.info(f"{len(agentModules)} agent modules found")
# Load each agent module
for moduleName in agentModules:
try:
# Import the module
module = importlib.import_module(f"modules.{moduleName}")
# Look for agent class or get_*_agent function
agentName = moduleName.split("agent")[-1]
className = f"Agent{agentName}"
getterName = f"getAgent{agentName}"
agent = None
# Try to get the agent via the get*Agent function
if hasattr(module, getterName):
getterFunc = getattr(module, getterName)
agent = getterFunc()
logger.info(f"Agent '{agent.name}' loaded via {getterName}()")
# Alternatively, try to instantiate the agent directly
elif hasattr(module, className):
agentClass = getattr(module, className)
agent = agentClass()
logger.info(f"Agent '{agent.name}' directly instantiated")
if agent:
# Register the agent
self.registerAgent(agent)
else:
logger.warning(f"No agent class or getter function found in module {moduleName}")
except ImportError as e:
logger.error(f"Module {moduleName} could not be imported: {e}")
except Exception as e:
logger.error(f"Error loading agent from module {moduleName}: {e}")
def setMydom(self, mydom):
"""Set the AI service for all agents."""
self.mydom = mydom
self.updateAgentDependencies()
def updateAgentDependencies(self):
"""Update dependencies for all registered agents."""
for agentId, agent in self.agents.items():
if hasattr(agent, 'setDependencies'):
agent.setDependencies(mydom=self.mydom)
def registerAgent(self, agent):
"""
Register an agent in the registry.
Args:
agent: The agent to register
"""
agentId = getattr(agent, 'name', "unknown_agent")
# Initialize agent with dependencies
if hasattr(agent, 'setDependencies'):
agent.setDependencies(mydom=self.mydom)
self.agents[agentId] = agent
logger.debug(f"Agent '{agent.name}' registered")
def getAgent(self, agentIdentifier: str):
"""
Return an agent instance
Args:
agentIdentifier: ID or type of the desired agent
Returns:
Agent instance or None if not found
"""
if agentIdentifier in self.agents:
agent = self.agents[agentIdentifier]
# Ensure the agent has the AI service
if hasattr(agent, 'setDependencies') and self.mydom:
agent.setDependencies(mydom=self.mydom)
return agent
logger.error(f"Agent with identifier '{agentIdentifier}' not found")
return None
def getAllAgents(self) -> Dict[str, Any]:
"""Return all registered agents."""
return self.agents
def getAgentInfos(self) -> List[Dict[str, Any]]:
"""Return information about all registered agents."""
agentInfos = []
seenAgents = set()
for agent in self.agents.values():
if agent not in seenAgents:
agentInfos.append(agent.getAgentInfo())
seenAgents.add(agent)
return agentInfos
# Singleton factory for the agent registry
def getAgentRegistry():
return AgentRegistry.getInstance()

670
static/118_agentAnalyst.py Normal file
View file

@ -0,0 +1,670 @@
"""
Data analyst agent for analysis and interpretation of data.
Focuses on output-first design with AI-powered analysis.
"""
import logging
import json
import io
import base64
from typing import Dict, Any, List
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from modules.workflowAgentsRegistry import AgentBase
logger = logging.getLogger(__name__)
class AgentAnalyst(AgentBase):
"""AI-driven agent for data analysis and visualization"""
def __init__(self):
"""Initialize the data analysis agent"""
super().__init__()
self.name = "analyst"
self.description = "Analyzes data using AI-powered insights and visualizations, produce diagrams and visualizations"
self.capabilities = [
"dataAnalysis",
"statistics",
"visualization",
"dataInterpretation",
"reportGeneration"
]
# Set default visualization settings
plt.style.use('seaborn-v0_8-whitegrid')
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
return {
"feedback": "The Analyst agent requires an AI service to function.",
"documents": []
}
# Extract data from documents - focusing only on dataExtracted
datasets, documentContext = self._extractData(inputDocuments)
# Generate task analysis to understand what's needed
analysisPlan = await self._analyzeTask(prompt, documentContext, datasets, outputSpecs)
# Generate all required output documents
documents = []
# If no output specs provided, create default analysis outputs
if not outputSpecs:
outputSpecs = []
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine type based on file extension
outputType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
# Generate appropriate content based on output type
if outputType in ['png', 'jpg', 'jpeg', 'svg']:
# Create visualization
document = await self._createVisualization(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
elif outputType in ['csv', 'json', 'xlsx']:
# Create data document
document = await self._createDataDocument(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
else:
# Create text document (report, analysis, etc.)
document = await self._createTextDocument(
datasets, documentContext, prompt, outputLabel,
outputType, analysisPlan, outputDescription
)
documents.append(document)
# Generate feedback
feedback = f"{analysisPlan.get('analysisApproach')}"
if analysisPlan.get("keyInsights"):
feedback += f"\n\n{analysisPlan.get('keyInsights')}"
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in analysis: {str(e)}", exc_info=True)
return {
"feedback": f"Error during analysis: {str(e)}",
"documents": []
}
def _extractData(self, documents: List[Dict[str, Any]]) -> tuple:
"""
Extract data from documents, focusing on dataExtracted fields.
Args:
documents: List of input documents
Returns:
Tuple of (datasets dictionary, document context text)
"""
datasets = {}
documentContext = ""
# Process each document
for doc in documents:
docName = doc.get("name", "unnamed")
if doc.get("ext"):
docName = f"{docName}.{doc.get('ext')}"
documentContext += f"\n\n--- {docName} ---\n"
# Process contents
for content in doc.get("contents", []):
# Focus only on dataExtracted
if content.get("dataExtracted"):
extractedText = content.get("dataExtracted", "")
documentContext += extractedText
# Try to parse as structured data if appropriate
if docName.lower().endswith(('.csv', '.tsv')):
try:
df = pd.read_csv(io.StringIO(extractedText))
datasets[docName] = df
except:
pass
elif docName.lower().endswith('.json'):
try:
jsonData = json.loads(extractedText)
if isinstance(jsonData, list):
df = pd.DataFrame(jsonData)
datasets[docName] = df
elif isinstance(jsonData, dict):
# Handle nested JSON structures
if any(isinstance(v, list) for v in jsonData.values()):
for key, value in jsonData.items():
if isinstance(value, list) and len(value) > 0:
df = pd.DataFrame(value)
datasets[f"{docName}:{key}"] = df
else:
df = pd.DataFrame([jsonData])
datasets[docName] = df
except:
pass
# Try to detect tabular data in text content
if docName not in datasets and len(extractedText.splitlines()) > 2:
lines = extractedText.splitlines()
if any(',' in line for line in lines[:5]):
try:
df = pd.read_csv(io.StringIO(extractedText))
if len(df.columns) > 1:
datasets[docName] = df
except:
pass
elif any('\t' in line for line in lines[:5]):
try:
df = pd.read_csv(io.StringIO(extractedText), sep='\t')
if len(df.columns) > 1:
datasets[docName] = df
except:
pass
return datasets, documentContext
async def _analyzeTask(self, prompt: str, context: str, datasets: Dict, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a plan for analysis.
Args:
prompt: The task prompt
context: Document context text
datasets: Dictionary of extracted datasets
outputSpecs: Output specifications
Returns:
Analysis plan dictionary
"""
# Prepare dataset information
datasetInfo = {}
for name, df in datasets.items():
try:
datasetInfo[name] = {
"shape": df.shape,
"columns": df.columns.tolist(),
"dtypes": {col: str(df[col].dtype) for col in df.columns},
"sample": df.head(3).to_dict(orient='records')
}
except:
datasetInfo[name] = {"error": "Could not process dataset"}
analysisPrompt = f"""
Analyze this data analysis task and create a plan.
TASK: {prompt}
AVAILABLE DATA:
{json.dumps(datasetInfo, indent=2)}
DOCUMENT CONTEXT:
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed analysis plan in JSON format with the following structure:
{{
"analysisType": "statistical|trend|comparative|predictive|cluster|general",
"keyQuestions": ["question1", "question2"],
"recommendedVisualizations": [{{
"type": "chart_type",
"dataSource": "dataset_name",
"variables": ["col1", "col2"],
"purpose": "explanation"
}}],
"keyInsights": "brief summary of initial insights",
"analysisApproach": "brief description of recommended approach"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.mydom.callAi([
{"role": "system", "content": "You are a data analysis expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer = True)
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
"analysisType": "general",
"keyQuestions": ["What insights can be extracted from this data?"],
"recommendedVisualizations": [],
"keyInsights": "Analysis plan could not be created",
"analysisApproach": "General exploratory analysis"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
return {
"analysisType": "general",
"keyQuestions": ["What insights can be extracted from this data?"],
"recommendedVisualizations": [],
"keyInsights": "Analysis plan could not be created",
"analysisApproach": "General exploratory analysis"
}
async def _createVisualization(self, datasets: Dict, prompt: str, outputLabel: str,
analysisPlan: Dict, description: str) -> Dict:
"""
Create visualization document using AI guidance.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
outputLabel: Output filename
analysisPlan: Analysis plan from AI
description: Output description
Returns:
Visualization document
"""
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower()
if formatType not in ['png', 'jpg', 'jpeg', 'svg']:
formatType = 'png'
# If no datasets available, create error message image
if not datasets:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, "No data available for visualization",
ha='center', va='center', fontsize=14)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return {
"label": outputLabel,
"content": imgData,
"metadata": {
"contentType": f"image/{formatType}"
}
}
# Get recommended visualization from plan
recommendedViz = analysisPlan.get("recommendedVisualizations", [])
# Prepare dataset info for the first dataset if none specified
if not recommendedViz and datasets:
name, df = next(iter(datasets.items()))
recommendedViz = [{
"type": "auto",
"dataSource": name,
"variables": df.columns.tolist()[:5],
"purpose": "general analysis"
}]
# Create visualization code prompt
vizPrompt = f"""
Generate Python matplotlib/seaborn code to create a visualization for:
TASK: {prompt}
VISUALIZATION REQUIREMENTS:
- Output format: {formatType}
- Filename: {outputLabel}
- Description: {description}
RECOMMENDED VISUALIZATION:
{json.dumps(recommendedViz, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info for recommended sources
for viz in recommendedViz:
dataSource = viz.get("dataSource")
if dataSource in datasets:
df = datasets[dataSource]
vizPrompt += f"\nDataset '{dataSource}':\n"
vizPrompt += f"- Shape: {df.shape}\n"
vizPrompt += f"- Columns: {df.columns.tolist()}\n"
vizPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
vizPrompt += """
Generate ONLY Python code that:
1. Uses matplotlib and/or seaborn to create a clear visualization
2. Sets figure size to (10, 6)
3. Includes appropriate titles, labels, and legend
4. Uses professional color schemes
5. Handles any missing data gracefully
Return ONLY executable Python code, no explanations or markdown.
"""
try:
# Get visualization code from AI
vizCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data visualization expert. Provide only executable Python code."},
{"role": "user", "content": vizPrompt}
], produceUserAnswer = True)
# Clean code
vizCode = vizCode.replace("```python", "").replace("```", "").strip()
# Execute visualization code
plt.figure(figsize=(10, 6))
# Make local variables available to the code
localVars = {
"plt": plt,
"sns": sns,
"pd": pd,
"np": __import__('numpy')
}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
varName = ''.join(c if c.isalnum() else '_' for c in name)
localVars[varName] = df
# Also add with standard names for simpler code
if "df" not in localVars:
localVars["df"] = df
elif "df2" not in localVars:
localVars["df2"] = df
# Execute the visualization code
exec(vizCode, globals(), localVars)
# Capture the image
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
except Exception as e:
logger.error(f"Error creating visualization: {str(e)}", exc_info=True)
# Create error message image
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, f"Visualization error: {str(e)}",
ha='center', va='center', fontsize=12)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
async def _createDataDocument(self, datasets: Dict, prompt: str, outputLabel: str,
analysisPlan: Dict, description: str) -> Dict:
"""
Create a data document (e.g., CSV, JSON) based on analysis.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
outputLabel: Output filename
analysisPlan: Analysis plan from AI
description: Output description
Returns:
Data document
"""
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower()
# If no datasets available, return error message
if not datasets:
return {
"label": outputLabel,
"content": f"No data available for processing into {formatType} format.",
"metadata": {
"contentType": "text/plain"
}
}
# Generate data processing instructions
dataPrompt = f"""
Create Python code to process datasets and generate a {formatType} file for:
TASK: {prompt}
OUTPUT REQUIREMENTS:
- Format: {formatType}
- Filename: {outputLabel}
- Description: {description}
ANALYSIS CONTEXT:
{json.dumps(analysisPlan, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info
for name, df in datasets.items():
dataPrompt += f"\nDataset '{name}':\n"
dataPrompt += f"- Shape: {df.shape}\n"
dataPrompt += f"- Columns: {df.columns.tolist()}\n"
dataPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
dataPrompt += """
Generate Python code that:
1. Processes the available dataset(s)
2. Performs necessary transformations, aggregations, or calculations
3. Outputs the result in the requested format
4. Returns the content as a string variable named 'result'
Return ONLY executable Python code, no explanations or markdown.
"""
try:
# Get data processing code from AI
dataCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data processing expert. Provide only executable Python code."},
{"role": "user", "content": dataPrompt}
], produceUserAnswer = True)
# Clean code
dataCode = dataCode.replace("```python", "").replace("```", "").strip()
# Setup execution environment
localVars = {"pd": pd, "np": __import__('numpy'), "io": io}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
varName = ''.join(c if c.isalnum() else '_' for c in name)
localVars[varName] = df
# Also add with standard names for simpler code
if "df" not in localVars:
localVars["df"] = df
elif "df2" not in localVars:
localVars["df2"] = df
# Execute the code
exec(dataCode, globals(), localVars)
# Get the result
result = localVars.get("result", "No output was generated.")
# Determine content type
contentType = "text/csv" if formatType == "csv" else \
"application/json" if formatType == "json" else \
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if formatType == "xlsx" else \
"text/plain"
return self.formatAgentDocumentOutput(outputLabel, result, contentType)
except Exception as e:
logger.error(f"Error creating data document: {str(e)}", exc_info=True)
return {
"label": outputLabel,
"content": f"Error generating {formatType} document: {str(e)}",
"metadata": {
"contentType": "text/plain"
}
}
async def _createTextDocument(self, datasets: Dict, context: str, prompt: str,
outputLabel: str, formatType: str,
analysisPlan: Dict, description: str) -> Dict:
"""
Create a text document (report, analysis, etc.) based on analysis.
Args:
datasets: Dictionary of datasets
context: Document context text
prompt: Original task prompt
outputLabel: Output filename
formatType: Output format type
analysisPlan: Analysis plan from AI
description: Output description
Returns:
Text document
"""
# Create dataset summaries
datasetSummaries = []
for name, df in datasets.items():
summary = f"Dataset: {name}\n"
summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
summary += f"- Columns: {', '.join(df.columns.tolist())}\n"
# Basic statistics for numeric columns
numericCols = df.select_dtypes(include=['number']).columns
if len(numericCols) > 0:
summary += "- Numeric Columns Stats:\n"
for col in numericCols[:3]: # Limit to first 3
stats = df[col].describe()
summary += f" - {col}: min={stats['min']:.2f}, max={stats['max']:.2f}, mean={stats['mean']:.2f}\n"
datasetSummaries.append(summary)
# Determine content type based on format
contentType = "text/markdown" if formatType in ["md", "markdown"] else \
"text/html" if formatType == "html" else \
"text/plain"
# Generate analysis prompt
analysisPrompt = f"""
Create a detailed {formatType} document for:
TASK: {prompt}
OUTPUT REQUIREMENTS:
- Format: {formatType}
- Filename: {outputLabel}
- Description: {description}
ANALYSIS CONTEXT:
{json.dumps(analysisPlan, indent=2)}
DATASET SUMMARIES:
{"".join(datasetSummaries)}
DOCUMENT CONTEXT:
{context[:2000]}... (truncated)
Create a comprehensive, professional analysis document that addresses the task requirements.
The document should:
1. Have a clear structure with headings and sections
2. Include relevant data findings and insights
3. Provide appropriate interpretations and recommendations
4. Format the content according to the required output format
Your response should be the complete document content in the specified format.
"""
try:
# Get document content from AI
documentContent = await self.mydom.callAi([
{"role": "system", "content": f"You are a data analysis expert creating a {formatType} document."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer = True)
# Clean HTML or Markdown if needed
if formatType in ["md", "markdown"] and not documentContent.strip().startswith("#"):
documentContent = f"# Analysis Report\n\n{documentContent}"
elif formatType == "html" and not "<html" in documentContent.lower():
documentContent = f"<html><body>{documentContent}</body></html>"
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
except Exception as e:
logger.error(f"Error creating text document: {str(e)}", exc_info=True)
# Create a simple error document
if formatType in ["md", "markdown"]:
content = f"# Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Error in Analysis</h1><p>There was an error generating the analysis: {str(e)}</p></body></html>"
else:
content = f"Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
return {
"label": outputLabel,
"content": content,
"metadata": {
"contentType": contentType
}
}
def _getImageBase64(self, formatType: str = 'png') -> str:
"""
Convert current matplotlib figure to base64 string.
Args:
formatType: Image format
Returns:
Base64 encoded string of the image
"""
buffer = io.BytesIO()
plt.savefig(buffer, format=formatType, dpi=100)
buffer.seek(0)
imageData = buffer.getvalue()
buffer.close()
# Convert to base64
return base64.b64encode(imageData).decode('utf-8')
# Factory function for the Analyst agent
def getAgentAnalyst():
"""Returns an instance of the Analyst agent."""
return AgentAnalyst()

764
static/119_agentCoder.py Normal file
View file

@ -0,0 +1,764 @@
"""
Simple Coder Agent for execution of Python code.
Modified to pass expected output document names to the generated code.
"""
import logging
import json
import os
import subprocess
import tempfile
import shutil
import sys
from typing import Dict, Any, List, Tuple
from modules.workflowAgentsRegistry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentCoder(AgentBase):
"""Simplified Agent for developing and executing Python code with integrated executor"""
def __init__(self):
"""Initialize the coder agent"""
super().__init__()
self.name = "coder"
self.description = "Develops and executes Python code for data processing and automation"
self.capabilities = [
"code_development",
"data_processing",
"file_processing",
"automation",
"code_execution"
]
# Executor settings
self.executorTimeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds
self.executionRetryLimit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries
self.tempDir = None
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task and perform code development/execution.
First checks if the task can be completed without code execution,
then falls back to code generation if needed.
Enhanced to ensure all generated documents are included in output.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
# 1. Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check if AI service is available
if not self.mydom:
logger.error("No AI service configured for the Coder agent")
return {
"feedback": "The Coder agent is not properly configured.",
"documents": []
}
# 2. Extract data from documents in separate categories
documentData = [] # For raw file data (for code execution)
contentData = [] # For content data (later use)
contentExtraction = [] # For AI-extracted data (for quick completion)
for doc in inputDocuments:
# Create proper filename from name and ext
filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name')
# Add main document data to documentData if it exists
docData = doc.get('data', '')
if docData:
isBase64 = True # Assume base64 encoded for document data
documentData.append([filename, docData, isBase64])
# Process contents for different uses
if doc.get('contents'):
for content in doc.get('contents', []):
contentName = content.get('name', 'unnamed')
# For AI-extracted data (quick completion)
if content.get('dataExtracted'):
contentExtraction.append({
"filename": filename,
"contentName": contentName,
"contentData": content.get('dataExtracted', ''),
"contentType": content.get('contentType', ''),
"summary": content.get('summary', '')
})
# For raw content data
if content.get('data'):
rawData = content.get('data', '')
isBase64 = content.get('metadata', {}).get('base64Encoded', False)
contentData.append({
"filename": filename,
"contentName": contentName,
"data": rawData,
"isBase64": isBase64,
"contentType": content.get('contentType', '')
})
# Also add to documentData for code execution if not already added
if not docData or docData != rawData:
documentData.append([filename, rawData, isBase64])
# 3. Check if task can be completed without code execution
quickCompletion = await self._checkQuickCompletion(prompt, contentExtraction, outputSpecs)
if quickCompletion and quickCompletion.get("complete") == 1:
logger.info("Task completed without code execution")
return {
"feedback": quickCompletion.get("prompt", "Task completed successfully."),
"documents": quickCompletion.get("documents", [])
}
else:
logger.debug(f"Code to generate, no quick check")
# If quick completion not possible, continue with code generation and execution
logger.info("Generating code to solve the task")
# 4. Generate code using AI
code, requirements = await self._generateCode(prompt, outputSpecs)
if not code:
return {
"feedback": "Failed to generate code for the task.",
"documents": []
}
# 5. Replace the placeholder with actual inputFiles data
documentDataJson = repr(documentData)
codeWithData = code.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}")
# 6. Execute code with retry logic
retryCount = 0
maxRetries = self.executionRetryLimit
executionHistory = []
while retryCount <= maxRetries:
executionResult = self._executeCode(codeWithData, requirements)
executionHistory.append({
"attempt": retryCount + 1,
"code": codeWithData,
"result": executionResult
})
# Check if execution was successful
if executionResult.get("success", False):
logger.info(f"Code execution succeeded on attempt {retryCount + 1}")
break
# If we've reached max retries, exit the loop
if retryCount >= maxRetries:
logger.info(f"Reached maximum retry limit ({maxRetries}). Giving up.")
break
# Log the error and attempt to improve the code
error = executionResult.get("error", "Unknown error")
logger.info(f"Execution attempt {retryCount + 1} failed: {error}. Attempting to improve code.")
# Generate improved code based on error
improvedCode, improvedRequirements = await self._improveCode(
originalCode=codeWithData,
error=error,
executionResult=executionResult,
attempt=retryCount + 1,
outputSpecs=outputSpecs
)
if improvedCode:
codeWithData = improvedCode
requirements = improvedRequirements
logger.info(f"Code improved for retry {retryCount + 2}")
else:
logger.warning("Failed to improve code, using original code for retry")
retryCount += 1
# 7. Process results and create output documents
documents = []
# Always add the final code document
documents.append(self.formatAgentDocumentOutput("generated_code.py", codeWithData, "text/plain"))
# Add execution history document
executionHistoryStr = json.dumps(executionHistory, indent=2)
documents.append(self.formatAgentDocumentOutput("execution_history.json", executionHistoryStr, "application/json"))
# Enhanced result handling: Create documents based on execution results - fixed for proper content extraction
if executionResult.get("success", False):
resultData = executionResult.get("result")
# Process results from the result dictionary if available
if isinstance(resultData, dict):
# First, create a mapping of expected output labels to their specs
expectedOutputs = {spec.get("label"): spec for spec in outputSpecs}
createdOutputs = set()
for label, result_item in resultData.items():
# Check if result follows the expected structure with nested content
if isinstance(result_item, dict) and "content" in result_item:
# Extract values from the properly structured result
content = result_item.get("content", "") # Extract the inner content
base64Encoded = result_item.get("base64Encoded", False)
contentType = result_item.get("contentType", "text/plain")
# Check if this label matches one of our expected output documents
# If not, but we haven't created all expected outputs yet, try to map it
finalLabel = label
if label not in expectedOutputs and len(expectedOutputs) > 0:
# Find an unused expected output label
for expectedLabel in expectedOutputs:
if expectedLabel not in createdOutputs:
logger.warning(f"Remapping output '{label}' to expected '{expectedLabel}'")
finalLabel = expectedLabel
break
# Create document by passing only the content to formatAgentDocumentOutput
doc = self.formatAgentDocumentOutput(finalLabel, content, contentType)
# Override the base64Encoded flag with the value from the result
# This is needed since formatAgentDocumentOutput might determine a different value
if isinstance(base64Encoded, bool):
doc["base64Encoded"] = base64Encoded
documents.append(doc)
createdOutputs.add(finalLabel)
logger.info(f"Created document from result: {finalLabel} ({contentType}, base64={base64Encoded})")
else:
# Not properly structured - log warning
logger.warning(f"Skipping improperly formatted result for '{label}'. Results must include 'content' field.")
else:
# No result dictionary found
logger.warning("No valid result dictionary found or it's not properly formatted")
# If no valid documents were created from the result dictionary but we have output specifications
if len(documents) <= 2 and outputSpecs: # Only code.py and history.json exist
logger.warning("No valid documents created from result dictionary, using execution output for specifications")
# Default to execution output
output = executionResult.get("output", "")
for spec in outputSpecs:
label = spec.get("label", "output.txt")
# Create basic document from output
doc = self.formatAgentDocumentOutput(label, output, "text/plain")
documents.append(doc)
logger.info(f"Created document from output specification: {label}")
if retryCount > 0:
feedback = f"Code executed successfully after {retryCount + 1} attempts. Generated {len(documents) - 2} output files."
else:
feedback = f"Code executed successfully. Generated {len(documents) - 2} output files."
else:
# Execution failed
error = executionResult.get("error", "Unknown error")
documents.append(self.formatAgentDocumentOutput("execution_error.txt", f"Error executing code:\n\n{error}", "text/plain"))
if retryCount > 0:
feedback = f"Error during code execution after {retryCount + 1} attempts: {error}"
else:
feedback = f"Error during code execution: {error}"
return {
"feedback": feedback,
"documents": documents
}
async def _improveCode(self, originalCode: str, error: str, executionResult: Dict[str, Any], attempt: int, outputSpecs: List[Dict[str, Any]] = None) -> Tuple[str, List[str]]:
"""
Improve code based on execution error.
Enhanced to maintain proper output handling with correct document structure.
Args:
originalCode: The code that failed to execute
error: The error message
executionResult: Complete execution result dictionary
attempt: Current attempt number
outputSpecs: List of expected output specifications
Returns:
Tuple of (improvedCode, requirements)
"""
# Create a string with output specifications to be included in the prompt
outputSpecsStr = ""
if outputSpecs:
outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n"
for i, spec in enumerate(outputSpecs, 1):
label = spec.get("label", f"output{i}.txt")
description = spec.get("description", "")
outputSpecsStr += f"{i}. {label} - {description}\n"
# Create prompt for code improvement
improvementPrompt = f"""
Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code.
ORIGINAL CODE:
{originalCode}
ERROR MESSAGE:
{error}
STDOUT:
{executionResult.get('output', '')}
{outputSpecsStr}
INSTRUCTIONS:
1. Fix all errors identified in the error message
2. Diagnose and fix any logical issues
3. Pay special attention to:
- Type conversions and data handling
- Error handling and edge cases
- Resource management (file handles, etc.)
- Syntax errors and typos
4. Keep the inputFiles handling logic intact
5. Maintain the same overall structure and purpose
OUTPUT REQUIREMENTS (VERY IMPORTANT):
- Your code MUST define a 'result' variable as a dictionary to store ALL outputs
- The key for each entry MUST be the full filename with extension (e.g., "output.txt")
- The value for each entry MUST be a dictionary with the following structure:
{{
"content": string, # The actual content (text or base64-encoded string)
"base64Encoded": boolean, # Set to true for binary data, false for text data
"contentType": string # MIME type of the content (e.g., "text/plain", "application/json")
}}
- Example result dictionary:
result = {{
"output.txt": {{
"content": "This is text content",
"base64Encoded": False,
"contentType": "text/plain"
}},
"chart.png": {{
"content": "base64encodedstring...",
"base64Encoded": True,
"contentType": "image/png"
}}
}}
- NEVER write files to disk using open() or similar methods - use the result dictionary instead
JSON OUTPUT (CRITICAL):
- After creating the result dictionary, you MUST print it as JSON to stdout
- Make sure your code includes: print(json.dumps(result)) as the final line
- This printed JSON is how the system captures your result
REQUIREMENTS:
Required packages should be specified as:
# REQUIREMENTS: library==version,library2>=version
- You may add/remove requirements as needed to fix the code
Return ONLY Python code without explanations or markdown.
"""
# Call AI service
messages = [
{"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting. Ensure all generated files are included in the 'result' dictionary and that result is printed as JSON with print(json.dumps(result))."},
{"role": "user", "content": improvementPrompt}
]
try:
improvedContent = await self.mydom.callAi(messages, temperature=0.2)
# Extract code and requirements
improvedCode = self._cleanCode(improvedContent)
# Extract requirements
requirements = []
for line in improvedCode.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
reqStr = line.replace("# REQUIREMENTS:", "").strip()
requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
break
return improvedCode, requirements
except Exception as e:
logger.error(f"Error improving code: {str(e)}")
return None, []
async def _checkQuickCompletion(self, prompt: str, contentExtraction: List[Dict], outputSpecs: List[Dict]) -> Dict:
"""
Check if the task can be completed without writing and executing code.
Args:
prompt: The task prompt
contentExtraction: List of extracted content data with contentName and dataExtracted
outputSpecs: List of output specifications
Returns:
Dictionary with completion status and results, or None if no quick completion
"""
# If no data or no output specs, can't do a quick completion
if not contentExtraction or not outputSpecs:
return None
# Create a prompt for the AI to check if this can be completed directly
specsJson = json.dumps(outputSpecs)
dataJson = json.dumps(contentExtraction)
checkPrompt = f"""
Analyze this task and determine if it can be completed directly without writing code.
TASK:
{prompt}
EXTRACTED DATA AVAILABLE:
{dataJson}
Each entry in the extracted data contains:
- filename: The source file name
- contentName: The specific content section name
- contentData: The AI-extracted text from the content
- contentType: The type of content (text, csv, etc.)
- summary: A brief summary of the content
REQUIRED OUTPUT:
{specsJson}
If the task can be completed directly with the available extracted data, respond with:
{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [
{{"label": "filename.ext", "content": "content here"}}
]}}
If code would be needed to properly complete this task, respond with:
{{"complete": 0, "prompt": "Explanation why code is needed"}}
Only return valid JSON. Your entire response must be parseable as JSON.
"""
# Call AI service
logger.debug(f"Checking if task can be completed without code execution: {checkPrompt}")
messages = [
{"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."},
{"role": "user", "content": checkPrompt}
]
try:
# Use a lower temperature for more deterministic response
response = await self.mydom.callAi(messages, produceUserAnswer = True, temperature=0.1)
# Parse response as JSON
if response:
try:
# Find JSON in response if there's any text around it
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
jsonStr = response[jsonStart:jsonEnd]
result = json.loads(jsonStr)
# Check if this is a proper response
if "complete" in result:
return result
except json.JSONDecodeError:
logger.debug("Failed to parse quick completion response as JSON")
pass
except Exception as e:
logger.debug(f"Error during quick completion check: {str(e)}")
# Default to requiring code execution
return None
async def _generateCode(self, prompt: str, outputSpecs: List[Dict[str, Any]] = None) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt with the inputFiles placeholder.
Enhanced to emphasize proper result output handling with correct document structure.
Args:
prompt: The task prompt
outputSpecs: List of expected output specifications
Returns:
Tuple of (code, requirements)
"""
# Create a string with output specifications to be included in the prompt
outputSpecsStr = ""
if outputSpecs:
outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n"
for i, spec in enumerate(outputSpecs, 1):
label = spec.get("label", f"output{i}.txt")
description = spec.get("description", "")
outputSpecsStr += f"{i}. {label} - {description}\n"
# Create improved prompt for code generation
aiPrompt = f"""
Generate Python code to solve the following task:
TASK:
{prompt}
{outputSpecsStr}
INPUT FILES:
- 'inputFiles' variable is provided as [[filename, data, isBase64], ...]
- For text files (isBase64=False): use data directly as string
- For binary files (isBase64=True): use base64.b64decode(data)
OUTPUT REQUIREMENTS (VERY IMPORTANT):
- Your code MUST define a 'result' variable as a dictionary to store ALL outputs
- The key for each entry MUST be the full filename with extension (e.g., "output.txt")
- The value for each entry MUST be a dictionary with the following structure:
{{
"content": string, # The actual content (text or base64-encoded string)
"base64Encoded": boolean, # Set to true for binary data, false for text data
"contentType": string # MIME type of the content (e.g., "text/plain", "application/json")
}}
- Example result dictionary:
result = {{
"output.txt": {{
"content": "This is text content",
"base64Encoded": False,
"contentType": "text/plain"
}},
"chart.png": {{
"content": "base64encodedstring...",
"base64Encoded": True,
"contentType": "image/png"
}}
}}
- NEVER write files to disk using open() or similar methods - use the result dictionary instead
- If you generate any charts, reports, or visualizations, ensure they are properly encoded and included
IMPORTANT - USE EXACT OUTPUT FILENAMES:
- You MUST use the EXACT filenames specified in EXPECTED OUTPUT DOCUMENTS section
- The key in the result dictionary must match these filenames precisely
- If no output documents are specified, use appropriate descriptive filenames
JSON OUTPUT (CRITICAL):
- After creating the result dictionary, you MUST print it as JSON to stdout using json.dumps()
- Add these lines at the end of your code:
import json # if not already imported
print(json.dumps(result))
- This printed JSON is how the system captures your result
- Make sure this is the last thing your code prints
BINARY DATA HANDLING:
- For binary content (images, PDFs, etc.), convert to base64 string and set base64Encoded=True
- For text content (text, JSON, HTML, etc.), use plain string and set base64Encoded=False
- Use appropriate MIME types for different content types
CODE QUALITY:
- Use explicit type conversions where needed (int/float/str)
- Implement feature detection, not version checks
- Handle errors gracefully with appropriate fallbacks
- Follow latest API conventions for libraries
- Validate inputs before processing
Your code must start with:
inputFiles = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE
REQUIREMENTS:
Required packages should be specified as:
# REQUIREMENTS: library==version,library2>=version
- Specify exact versions for critical libraries
- Use constraint operators (==,>=,<=) as needed
Return ONLY Python code without explanations or markdown.
"""
# Call AI service
messages = [
{"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting. Always output the result dictionary as JSON using print(json.dumps(result)) at the end of your code."},
{"role": "user", "content": aiPrompt}
]
generatedContent = await self.mydom.callAi(messages, temperature=0.1)
# Extract code and requirements
code = self._cleanCode(generatedContent)
# Extract requirements
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
reqStr = line.replace("# REQUIREMENTS:", "").strip()
requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
break
return code, requirements
def _executeCode(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
"""
Execute Python code in a virtual environment.
Integrated executor functionality with enhanced result extraction.
Args:
code: Python code to execute
requirements: List of required packages
Returns:
Execution result dictionary
"""
try:
# 1. Create temp directory and virtual environment
self.tempDir = tempfile.mkdtemp(prefix="code_exec_")
venvPath = os.path.join(self.tempDir, "venv")
# Create venv
logger.debug(f"Creating virtual environment at {venvPath}")
subprocess.run([sys.executable, "-m", "venv", venvPath],
check=True, capture_output=True)
# Get Python executable path
pythonExe = os.path.join(venvPath, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venvPath, "bin", "python")
# 2. Install requirements if provided
if requirements:
logger.info(f"Installing requirements: {requirements}")
# Create requirements.txt
reqFile = os.path.join(self.tempDir, "requirements.txt")
with open(reqFile, "w") as f:
f.write("\n".join(requirements))
x="\n".join(requirements)
logger.info(f"Requirements file: {x}.")
# Install requirements
try:
pipResult = subprocess.run(
[pythonExe, "-m", "pip", "install", "-r", reqFile],
capture_output=True,
text=True,
timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT"))
)
if pipResult.returncode != 0:
logger.debug(f"Error installing requirements: {pipResult.stderr}")
else:
logger.debug(f"Requirements installed successfully")
# Log installed packages if in debug mode
if logger.isEnabledFor(logging.DEBUG):
pipList = subprocess.run(
[pythonExe, "-m", "pip", "list"],
capture_output=True,
text=True
)
logger.debug(f"Installed packages:\n{pipList.stdout}")
except Exception as e:
logger.debug(f"Exception during requirements installation: {str(e)}")
# 3. Write code to file
codeFile = os.path.join(self.tempDir, "code.py")
with open(codeFile, "w", encoding="utf-8") as f:
f.write(code)
# 4. Execute code
logger.debug(f"Executing code with timeout of {self.executorTimeout} seconds. Code: {code}")
process = subprocess.run(
[pythonExe, codeFile],
timeout=self.executorTimeout,
capture_output=True,
text=True
)
# 5. Process results
stdout = process.stdout
stderr = process.stderr
# Try to extract result from stdout
resultData = None
if process.returncode == 0:
try:
# Find the last line that might be JSON
jsonLines = []
for line in stdout.strip().split('\n'):
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
parsed = json.loads(line)
jsonLines.append((line, parsed))
except json.JSONDecodeError:
continue
# Use the last valid JSON that appears to be a dictionary
if jsonLines:
for line, parsed in reversed(jsonLines):
if isinstance(parsed, dict):
resultData = parsed
logger.debug(f"Extracted result data from stdout: {type(resultData)}")
break
except Exception as e:
logger.debug(f"Error extracting result from stdout: {str(e)}")
# Enhanced logging of what was found
if resultData:
logger.info(f"Found result dictionary with {len(resultData)} entries: {list(resultData.keys())}")
else:
logger.warning("No result dictionary found in output")
# Create result dictionary
return {
"success": process.returncode == 0,
"output": stdout,
"error": stderr if process.returncode != 0 else "",
"result": resultData,
"exitCode": process.returncode
}
except subprocess.TimeoutExpired:
logger.error(f"Execution timed out after {self.executorTimeout} seconds")
return {
"success": False,
"output": "",
"error": f"Execution timed out after {self.executorTimeout} seconds",
"result": None,
"exitCode": -1
}
except Exception as e:
logger.error(f"Execution error: {str(e)}")
return {
"success": False,
"output": "",
"error": f"Execution error: {str(e)}",
"result": None,
"exitCode": -1
}
finally:
# Clean up resources
self._cleanupExecution()
def _cleanupExecution(self):
"""Clean up temporary resources from code execution."""
if self.tempDir and os.path.exists(self.tempDir):
try:
logger.debug(f"Cleaning up temporary directory: {self.tempDir}")
shutil.rmtree(self.tempDir)
self.tempDir = None
except Exception as e:
logger.warning(f"Error cleaning up temp directory: {str(e)}")
def _cleanCode(self, code: str) -> str:
"""Remove any markdown formatting or explanations."""
# Remove code block markers
code = code.replace("```python", "").replace("```", "")
# Remove explanations before or after code
lines = code.strip().split('\n')
startIndex = 0
endIndex = len(lines)
# Find start of actual code
for i, line in enumerate(lines):
if line.strip().startswith("inputFiles =") or line.strip().startswith("# REQUIREMENTS:"):
startIndex = i
break
# Clean code
cleanedCode = '\n'.join(lines[startIndex:endIndex])
return cleanedCode.strip()
# Factory function for the Coder agent
def getAgentCoder():
"""Returns an instance of the Coder agent."""
return AgentCoder()

View file

@ -0,0 +1,559 @@
"""
Documentation agent for creating documentation, reports, and structured content.
Reimagined with an output-first, AI-driven approach with multi-step document generation.
"""
import logging
import json
from typing import Dict, Any, List
from modules.workflowAgentsRegistry import AgentBase
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
def __init__(self):
"""Initialize the documentation agent"""
super().__init__()
self.name = "documentation"
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
self.capabilities = [
"report_generation",
"documentation",
"content_structuring",
"technical_writing",
"knowledge_organization"
]
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
return {
"feedback": "The Documentation agent requires an AI service to function.",
"documents": []
}
# Extract context from input documents - focusing only on dataExtracted
documentContext = self._extractDocumentContext(inputDocuments)
# Create task analysis to understand the requirements
documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
# Generate all required output documents
documents = []
# If no output specs provided, create default document
if not outputSpecs:
defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
defaultTitle = documentationPlan.get("title", "Documentation")
safeTitle = self._sanitizeFilename(defaultTitle)
outputSpecs = [
{"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
]
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Generate the document using multi-step approach
document = await self._createDocumentMultiStep(
prompt,
documentContext,
outputLabel,
outputDescription,
documentationPlan
)
documents.append(document)
# Generate feedback
feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
return {
"feedback": f"Error during documentation generation: {str(e)}",
"documents": []
}
def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents, focusing on dataExtracted.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
contextParts = []
for doc in documents:
docName = doc.get("name", "unnamed")
if doc.get("ext"):
docName = f"{docName}.{doc.get('ext')}"
contextParts.append(f"\n\n--- {docName} ---\n")
# Process contents for dataExtracted
for content in doc.get("contents", []):
if content.get("dataExtracted"):
contextParts.append(content.get("dataExtracted", ""))
return "\n".join(contextParts)
def _sanitizeFilename(self, filename: str) -> str:
"""
Sanitize a filename by removing invalid characters.
Args:
filename: Filename to sanitize
Returns:
Sanitized filename
"""
# Replace invalid characters with underscores
invalidChars = r'<>:"/\|?*'
for char in invalidChars:
filename = filename.replace(char, '_')
# Trim filename if too long
if len(filename) > 100:
filename = filename[:97] + "..."
return filename
async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a documentation plan.
Args:
prompt: The task prompt
context: Document context
outputSpecs: Output specifications
Returns:
Documentation plan dictionary
"""
analysisPrompt = f"""
Analyze this documentation task and create a detailed plan.
TASK: {prompt}
DOCUMENT CONTEXT SAMPLE:
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed documentation plan in JSON format with the following structure:
{{
"title": "Document Title",
"documentType": "report|manual|guide|whitepaper|etc",
"audience": "technical|general|executive|etc",
"detailedStructure": [
{{
"title": "Chapter/Section Title",
"keyPoints": ["point1", "point2", ...],
"subsections": ["subsection1", "subsection2", ...],
"importance": "high|medium|low",
"estimatedLength": "short|medium|long"
}},
... more sections ...
],
"keyTopics": ["topic1", "topic2", ...],
"tone": "formal|conversational|instructional|etc",
"recommendedFormat": "markdown|html|text|etc",
"formattingRequirements": ["requirement1", "requirement2", ...],
"executiveSummary": "Brief description of what the document will cover",
"feedback": "Brief message explaining the documentation approach"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.mydom.callAi([
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
"title": "Documentation",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
except Exception as e:
logger.warning(f"Error creating documentation plan: {str(e)}")
return {
"title": "Documentation",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
outputDescription: str, documentationPlan: Dict) -> Dict:
"""
Create a document using a multi-step approach with separate AI calls for each section.
Args:
prompt: Original task prompt
context: Document context
outputLabel: Output filename
outputDescription: Description of desired output
documentationPlan: Documentation plan from AI
Returns:
Document object
"""
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
# Map format to contentType
contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
"txt": "text/plain",
"text": "text/plain",
"json": "application/json",
"csv": "text/csv"
}
contentType = contentTypeMap.get(formatType, "text/plain")
# Get document information
title = documentationPlan.get("title", "Documentation")
documentType = documentationPlan.get("documentType", "document")
audience = documentationPlan.get("audience", "general")
tone = documentationPlan.get("tone", "formal")
keyTopics = documentationPlan.get("keyTopics", [])
formattingRequirements = documentationPlan.get("formattingRequirements", [])
# Get the detailed structure
detailedStructure = documentationPlan.get("detailedStructure", [])
if not detailedStructure:
# Fallback structure if none provided
detailedStructure = [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"importance": "high"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"importance": "high"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"importance": "medium"
}
]
try:
# Step 1: Generate document introduction
introPrompt = f"""
Create the introduction for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Tone: {tone}
- Key Topics: {', '.join(keyTopics)}
- Format: {formatType}
TASK CONTEXT: {prompt}
This introduction should:
1. Clearly state the purpose and scope of the document
2. Provide context and background information
3. Outline what the reader will find in the document
4. Set the appropriate tone for the {audience} audience
The introduction should be professional and engaging, formatted according to {formatType} standards.
"""
introduction = await self.mydom.callAi([
{"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
{"role": "user", "content": introPrompt}
], produceUserAnswer = True)
# Step 2: Generate executive summary (if applicable)
if documentType in ["report", "whitepaper", "case study"]:
summaryPrompt = f"""
Create an executive summary for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
This executive summary should:
1. Provide a concise overview of the entire document
2. Highlight key findings, recommendations, or conclusions
3. Be suitable for executives or busy readers who may only read this section
4. Be professionally formatted according to {formatType} standards
Keep the summary focused and impactful, approximately 200-300 words.
"""
executiveSummary = await self.mydom.callAi([
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
{"role": "user", "content": summaryPrompt}
], produceUserAnswer = True)
else:
executiveSummary = ""
# Step 3: Generate each section
sections = []
for section in detailedStructure:
sectionTitle = section.get("title", "Section")
keyPoints = section.get("keyPoints", [])
subsections = section.get("subsections", [])
importance = section.get("importance", "medium")
# Adjust depth based on importance
detailLevel = "high" if importance == "high" else "medium"
sectionPrompt = f"""
Create the "{sectionTitle}" section for a {documentType} titled "{title}".
SECTION DETAILS:
- Title: {sectionTitle}
- Key Points to Cover: {', '.join(keyPoints)}
- Subsections: {', '.join(subsections)}
- Detail Level: {detailLevel}
DOCUMENT CONTEXT:
- Type: {documentType}
- Audience: {audience}
- Tone: {tone}
- Format: {formatType}
TASK CONTEXT: {prompt}
AVAILABLE INFORMATION:
{context[:500]}... (truncated)
This section should:
1. Be comprehensive and well-structured
2. Cover all the key points listed
3. Include the specified subsections with appropriate headings
4. Maintain a {tone} tone suitable for the {audience} audience
5. Be properly formatted according to {formatType} standards
6. Include specific examples, data, or evidence where appropriate
Be thorough in your coverage of this section, providing substantive content.
"""
sectionContent = await self.mydom.callAi([
{"role": "system", "content": f"You are a documentation expert creating detailed content for the {sectionTitle} section."},
{"role": "user", "content": sectionPrompt}
], produceUserAnswer = True)
sections.append(sectionContent)
# Step 4: Generate conclusion
conclusionPrompt = f"""
Create the conclusion for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
This conclusion should:
1. Summarize the key points covered in the document
2. Provide closure to the topics discussed
3. Include any relevant recommendations or next steps
4. Leave the reader with a clear understanding of the document's significance
The conclusion should be professional and impactful, formatted according to {formatType} standards.
"""
conclusion = await self.mydom.callAi([
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
{"role": "user", "content": conclusionPrompt}
], produceUserAnswer = True)
# Step 5: Assemble the complete document
if formatType in ["md", "markdown"]:
# Markdown format
documentContent = f"# {title}\n\n"
if executiveSummary:
documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
# Ensure section starts with heading if not already
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
if not sectionContent.strip().startswith("#"):
documentContent += f"## {sectionTitle}\n\n"
documentContent += f"{sectionContent}\n\n"
documentContent += f"## Conclusion\n\n{conclusion}\n"
elif formatType == "html":
# HTML format
documentContent = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
documentContent += f"<h1>{title}</h1>\n\n"
if executiveSummary:
documentContent += f"<h2>Executive Summary</h2>\n<div>{executiveSummary}</div>\n\n"
documentContent += f"<div>{introduction}</div>\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"<h2>{sectionTitle}</h2>\n<div>{sectionContent}</div>\n\n"
documentContent += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
documentContent += "</body>\n</html>"
else:
# Plain text format
documentContent = f"{title}\n{'=' * len(title)}\n\n"
if executiveSummary:
documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
# Create document object
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
except Exception as e:
logger.error(f"Error creating document: {str(e)}", exc_info=True)
# Create a simple error document
if formatType in ["md", "markdown"]:
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
else:
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
return {
"label": outputLabel,
"content": content,
"metadata": {
"contentType": contentType
}
}
# Factory function for the Documentation agent
def getAgentDocumentation():
"""Returns an instance of the Documentation agent."""
return AgentDocumentation()

158
static/121_auth.py Normal file
View file

@ -0,0 +1,158 @@
"""
Authentication module for backend API.
Handles JWT-based authentication, token generation, and user context.
"""
from datetime import datetime, timedelta, timezone
from typing import Optional, Dict, Any, Tuple
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError, jwt
import logging
from modules.gatewayInterface import getGatewayInterface
from modules.configuration import APP_CONFIG
# Get Config Data
SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
# OAuth2 Setup
oauth2Scheme = OAuth2PasswordBearer(tokenUrl="token")
# Logger
logger = logging.getLogger(__name__)
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> str:
"""
Creates a JWT Access Token.
Args:
data: Data to encode (usually user ID or username)
expiresDelta: Validity duration of the token (optional)
Returns:
JWT Token as string
"""
toEncode = data.copy()
if expiresDelta:
expire = datetime.now(timezone.utc) + expiresDelta
else:
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt
async def getCurrentUser(token: str = Depends(oauth2Scheme)) -> Dict[str, Any]:
"""
Extracts and validates the current user from the JWT token.
Args:
token: JWT Token from the Authorization header
Returns:
User data
Raises:
HTTPException: For invalid token or user
"""
credentialsException = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid authentication credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
# Decode token
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
# Extract username from token
username: str = payload.get("sub")
if username is None:
raise credentialsException
# Extract mandate ID from token (if present)
mandateId: int = payload.get("mandateId", 1) # Default: Root mandate
except JWTError:
logger.warning("Invalid JWT Token")
raise credentialsException
# Initialize Gateway Interface without context
gateway = getGatewayInterface()
# Retrieve user from database
user = gateway.getUserByUsername(username)
if user is None:
logger.warning(f"User {username} not found")
raise credentialsException
if user.get("disabled", False):
logger.warning(f"User {username} is disabled")
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="User is disabled")
return user
async def getCurrentActiveUser(currentUser: Dict[str, Any] = Depends(getCurrentUser)) -> Dict[str, Any]:
"""
Ensures that the user is active.
Args:
currentUser: Current user data
Returns:
User data
Raises:
HTTPException: If the user is disabled
"""
if currentUser.get("disabled", False):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="User is disabled")
return currentUser
async def getUserContext(currentUser: Dict[str, Any]) -> Tuple[int, int]:
"""
Extracts the mandate ID and user ID from the current user.
Enhanced with better logging.
Args:
currentUser: The current user
Returns:
Tuple of (mandateId, userId)
"""
# Default values
defaultMandateId = 0
defaultUserId = 0
# Extract mandateId
mandateId = currentUser.get("mandateId", None)
if mandateId is None:
logger.warning(f"No mandateId found in currentUser, using default: {defaultMandateId}")
mandateId = defaultMandateId
else:
try:
mandateId = int(mandateId)
except (ValueError, TypeError):
logger.error(f"Invalid mandateId value: {mandateId}, using default: {defaultMandateId}")
mandateId = defaultMandateId
# Extract userId
userId = currentUser.get("id", None)
if userId is None:
logger.warning(f"No userId found in currentUser, using default: {defaultUserId}")
userId = defaultUserId
else:
try:
userId = int(userId)
except (ValueError, TypeError):
logger.error(f"Invalid userId value: {userId}, using default: {defaultUserId}")
userId = defaultUserId
return mandateId, userId

183
static/122_configuration.py Normal file
View file

@ -0,0 +1,183 @@
"""
Utility module for configuration management.
This module provides a global APP_CONFIG object for accessing configuration from both
config.ini files and environment variables stored in .env files, using a flat structure.
"""
import os
import logging
from typing import Any, Dict, Optional
from pathlib import Path
# Set up logging
logger = logging.getLogger(__name__)
class Configuration:
"""
Configuration class with attribute-style access to flattened configuration.
"""
def __init__(self):
"""Initialize the configuration object"""
self._data = {}
self._configFilePath = None
self._envFilePath = None
self._configMtime = 0
self._envMtime = 0
self.refresh()
def refresh(self):
"""Reload configuration from files"""
self._loadConfig()
self._loadEnv()
logger.info("Configuration refreshed")
def _loadConfig(self):
"""Load configuration from config.ini file in flattened format"""
# Find config.ini file (look in current directory and parent directory)
configPath = Path('config.ini')
if not configPath.exists():
# Try in parent directory
configPath = Path('../config.ini')
if not configPath.exists():
logger.warning(f"Configuration file not found at {configPath.absolute()}")
return
self._configFilePath = configPath
currentMtime = os.path.getmtime(configPath)
# Skip if file hasn't changed
if currentMtime <= self._configMtime:
return
self._configMtime = currentMtime
try:
with open(configPath, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Parse key-value pairs
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Add directly to data dictionary
self._data[key] = value
except Exception as e:
logger.error(f"Error loading configuration: {e}")
def _loadEnv(self):
"""Load environment variables from .env file"""
# Find .env file (look in current directory and parent directory)
envPath = Path('.env')
if not envPath.exists():
# Try in parent directory
envPath = Path('../.env')
if not envPath.exists():
logger.warning(f"Environment file not found at {envPath.absolute()}")
return
self._envFilePath = envPath
currentMtime = os.path.getmtime(envPath)
# Skip if file hasn't changed
if currentMtime <= self._envMtime:
return
self._envMtime = currentMtime
try:
with open(envPath, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Parse key-value pairs
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Add directly to data dictionary
self._data[key] = value
logger.info(f"Loaded environment variables from {envPath.absolute()}")
# Also load system environment variables (don't override existing)
for key, value in os.environ.items():
if key not in self._data:
self._data[key] = value
except Exception as e:
logger.error(f"Error loading environment variables: {e}")
def checkForUpdates(self):
"""Check if configuration files have changed and reload if necessary"""
if self._configFilePath and os.path.exists(self._configFilePath):
currentMtime = os.path.getmtime(self._configFilePath)
if currentMtime > self._configMtime:
logger.info("Config file has changed, reloading...")
self._loadConfig()
if self._envFilePath and os.path.exists(self._envFilePath):
currentMtime = os.path.getmtime(self._envFilePath)
if currentMtime > self._envMtime:
logger.info("Environment file has changed, reloading...")
self._loadEnv()
def get(self, key: str, default: Any = None) -> Any:
"""Get configuration value with optional default"""
self.checkForUpdates() # Check for file changes
if key in self._data:
value = self._data[key]
# Handle secrets (keys ending with _SECRET)
if key.endswith("_SECRET"):
return handleSecret(value)
return value
return default
def __getattr__(self, name: str) -> Any:
"""Enable attribute-style access to configuration"""
self.checkForUpdates() # Check for file changes
value = self.get(name)
if value is None:
raise AttributeError(f"Configuration key '{name}' not found")
return value
def __dir__(self) -> list:
"""Support auto-completion of attributes"""
self.checkForUpdates() # Check for file changes
return list(self._data.keys()) + super().__dir__()
def set(self, key: str, value: Any) -> None:
"""Set a configuration value (for testing/overrides)"""
self._data[key] = value
def handleSecret(value: str) -> str:
"""
Handle secret values. Currently just returns the plain text value,
but can be enhanced to provide actual decryption in the future.
Args:
value: The secret value to handle
Returns:
str: Processed secret value
"""
# For now, just return the value as-is
# In the future, this could be enhanced to decrypt values
return value
# Create the global APP_CONFIG instance
APP_CONFIG = Configuration()

View file

@ -0,0 +1,796 @@
"""
Webcrawler agent for research and retrieval of information from the web.
Reimagined with an output-first, AI-driven approach.
"""
import logging
import json
import re
import time
from typing import Dict, Any, List
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
import markdown
from modules.workflowAgentsRegistry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentWebcrawler(AgentBase):
"""AI-driven agent for web research and information retrieval"""
def __init__(self):
"""Initialize the webcrawler agent"""
super().__init__()
self.name = "webcrawler"
self.description = "Conducts web research and collects information from online sources"
self.capabilities = [
"webSearch",
"informationRetrieval",
"dataCollection",
"searchResultsAnalysis",
"webpageContentExtraction"
]
# Web crawling configuration
self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5"))
self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3"))
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_TIMEOUT", "30"))
self.searchEngine = APP_CONFIG.get("Agent_Webcrawler_SEARCH_ENGINE", "https://html.duckduckgo.com/html/?q=")
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to guide the research process.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
return {
"feedback": "The Webcrawler agent requires an AI service to function effectively.",
"documents": []
}
# Create research plan
researchPlan = await self._createResearchPlan(prompt)
# Check if this is truly a web research task
if not researchPlan.get("requiresWebResearch", True):
return {
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
"documents": []
}
# Gather raw material through web research
rawResults = await self._gatherResearchMaterial(researchPlan)
# Format results into requested output documents
documents = await self._createOutputDocuments(
prompt,
rawResults,
outputSpecs,
researchPlan
)
# Generate feedback
feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error during web research: {str(e)}", exc_info=True)
return {
"feedback": f"Error during web research: {str(e)}",
"documents": []
}
async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
"""
Use AI to create a detailed research plan.
Args:
prompt: The research query
Returns:
Research plan dictionary
"""
researchPrompt = f"""
Create a detailed web research plan for this task: "{prompt}"
Analyze the request carefully and create a structured plan in JSON format with the following elements:
{{
"requiresWebResearch": true/false, # Whether this genuinely requires web research
"researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
"searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
"directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
"expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
"contentFocus": "what specific content to extract or focus on",
"feedback": "explanation of how the research will be conducted"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
try:
# Get research plan from AI
response = await self.mydom.callAi([
{"role": "system", "content": "You are a web research planning expert. Create precise research plans in JSON format only."},
{"role": "user", "content": researchPrompt}
])
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
# Ensure we have the expected fields with defaults if missing
if "searchTerms" not in plan:
plan["searchTerms"] = [prompt]
if "directUrls" not in plan:
plan["directUrls"] = []
if "researchQuestions" not in plan:
plan["researchQuestions"] = ["What information can be found about this topic?"]
return plan
else:
# Fallback plan
logger.warning(f"Not able creating research plan, generating fallback plan")
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
except Exception as e:
logger.warning(f"Error creating research plan: {str(e)}")
# Simple fallback plan
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Gather research material based on the research plan.
Args:
researchPlan: Research plan dictionary
Returns:
List of research results
"""
allResults = []
# Process direct URLs
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
for url in directUrls:
logger.info(f"Processing direct URL: {url}")
try:
# Fetch and extract content
soup = self._readUrl(url)
if soup:
# Extract title and content
title = self._extractTitle(soup, url)
content = self._extractMainContent(soup)
# Add to results
allResults.append({
"title": title,
"url": url,
"sourceType": "directUrl",
"content": content,
"summary": "" # Will be filled later
})
except Exception as e:
logger.warning(f"Error processing URL {url}: {str(e)}")
# Process search terms
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
for term in searchTerms:
logger.info(f"Searching for: {term}")
try:
# Perform search
searchResults = self._searchWeb(term)
# Process each search result
for result in searchResults:
# Check if URL is already in results
if not any(r["url"] == result["url"] for r in allResults):
allResults.append({
"title": result["title"],
"url": result["url"],
"sourceType": "searchResult",
"content": result["data"],
"snippet": result["snippet"],
"summary": "" # Will be filled later
})
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
except Exception as e:
logger.warning(f"Error searching for {term}: {str(e)}")
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
# Create summaries in parallel for all results
allResults = await self._summarizeAllResults(allResults, researchPlan)
return allResults
async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create summaries for all research results.
Args:
results: List of research results
researchPlan: Research plan with questions and focus
Returns:
Results with added summaries
"""
for i, result in enumerate(results):
logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...")
try:
# Limit content length to avoid token issues
content = self._limitText(result.get("content", ""), maxChars=8000)
researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
contentFocus = researchPlan.get("contentFocus", "Relevant information")
# Create summary using AI
summaryPrompt = f"""
Summarize this web page content based on these research questions:
{', '.join(researchQuestions)}
Focus on: {contentFocus}
Web page: {result['url']}
Title: {result['title']}
Content:
{content}
Create a concise summary that:
1. Directly answers the research questions if possible
2. Extracts the most relevant information from the page
3. Includes specific facts, figures, or quotes if available
4. Is around 2000 characters long
Only include information actually found in the content. No fabrications or assumptions.
"""
if self.mydom:
summary = await self.mydom.callAi([
{"role": "system", "content": "You summarize web content accurately and concisely, focusing only on what is actually in the content."},
{"role": "user", "content": summaryPrompt}
])
# Store the summary
result["summary"] = summary
else:
# Fallback if no AI service
logger.warning(f"Not able to summarize result, using fallback plan.")
result["summary"] = f"Content from {result['url']} ({len(content)} characters)"
except Exception as e:
logger.warning(f"Error summarizing result {i+1}: {str(e)}")
result["summary"] = f"Error creating summary: {str(e)}"
return results
async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on research results and specifications.
Args:
prompt: Original research prompt
results: List of research results
outputSpecs: Output specifications
researchPlan: Research plan
Returns:
List of output documents
"""
# If no output specs provided, create default output
if not outputSpecs:
outputSpecs = [{
"label": "webResearchResults.md",
"description": "Comprehensive web research results"
}]
# Generate documents
documents = []
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine format based on file extension
formatType = self._determineFormatType(outputLabel)
# Create appropriate document based on format
if formatType == "json":
# JSON output - structured data
document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
elif formatType == "csv":
# CSV output - tabular data
document = await self._createCsvDocument(results, outputLabel)
else:
# Text-based output (markdown, html, text) - narrative report
document = await self._createNarrativeDocument(
prompt, results, researchPlan, formatType, outputLabel, outputDescription
)
documents.append(document)
return documents
async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], formatType: str,
outputLabel: str, outputDescription: str) -> Dict[str, Any]:
"""
Create a narrative document (markdown, html, text) from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
formatType: Output format (markdown, html, text)
outputLabel: Output filename
outputDescription: Output description
Returns:
Document object
"""
# Create content based on format
if formatType == "markdown":
contentType = "text/markdown"
templateFormat = "markdown"
elif formatType == "html":
contentType = "text/html"
templateFormat = "html"
else:
contentType = "text/plain"
templateFormat = "text"
# Prepare research context
researchQuestions = researchPlan.get("researchQuestions", [])
searchTerms = researchPlan.get("searchTerms", [])
# Create document structure based on results
sourcesSummary = []
for result in results:
sourcesSummary.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", "")
})
# Truncate content for prompt
sourcesJson = json.dumps(sourcesSummary, indent=2)
if len(sourcesJson) > 10000:
# Logic to truncate each summary while preserving structure
for i in range(len(sourcesSummary)):
if len(sourcesJson) <= 10000:
break
# Gradually truncate summaries
sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
sourcesJson = json.dumps(sourcesSummary, indent=2)
# Create report prompt
reportPrompt = f"""
Create a comprehensive {formatType} research report based on the following web research:
TASK: {prompt}
RESEARCH QUESTIONS:
{', '.join(researchQuestions)}
SEARCH TERMS USED:
{', '.join(searchTerms)}
SOURCES AND FINDINGS:
{sourcesJson}
REPORT DETAILS:
- Format: {templateFormat}
- Filename: {outputLabel}
- Description: {outputDescription}
Create a well-structured report that:
1. Includes an executive summary of key findings
2. Addresses each research question directly
3. Integrates information from all relevant sources
4. Cites sources appropriately for each piece of information
5. Provides a comprehensive synthesis of the research
6. Is formatted professionally and appropriately for {templateFormat}
The report should be scholarly, accurate, and focused on the original research task.
"""
try:
# Generate report with AI
reportContent = await self.mydom.callAi([
{"role": "system", "content": f"You create professional research reports in {templateFormat} format."},
{"role": "user", "content": reportPrompt}
])
# Convert to HTML if needed
if formatType == "html" and not reportContent.lower().startswith("<html"):
# Check if it's markdown that needs conversion
if reportContent.startswith("#"):
reportContent = markdown.markdown(reportContent)
# Wrap in basic HTML structure if needed
if not reportContent.lower().startswith("<html"):
reportContent = f"<html><head><title>Web Research Results</title></head><body>{reportContent}</body></html>"
return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType)
except Exception as e:
logger.error(f"Error creating narrative document: {str(e)}")
# Create error document
if formatType == "markdown":
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Web Research Error</h1><p>An error occurred: {str(e)}</p></body></html>"
else:
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
"""
Create a JSON document from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create structured data
sourcesData = []
for result in results:
sourcesData.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", ""),
"sourceType": result.get("sourceType", "")
})
# Create metadata
metadata = {
"query": prompt,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"researchQuestions": researchPlan.get("researchQuestions", []),
"searchTerms": researchPlan.get("searchTerms", [])
}
# Compile complete report object
jsonContent = {
"metadata": metadata,
"summary": researchPlan.get("feedback", "Web research results"),
"sources": sourcesData
}
# Convert to JSON string
content = json.dumps(jsonContent, indent=2)
return self.formatAgentDocumentOutput(outputLabel, content, "application/json")
except Exception as e:
logger.error(f"Error creating JSON document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json")
async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
"""
Create a CSV document from research results.
Args:
results: Research results
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create CSV header
csvLines = ["Title,URL,Source Type,Snippet"]
# Add results
for result in results:
# Escape CSV fields
title = result.get("title", "").replace('"', '""')
url = result.get("url", "").replace('"', '""')
sourceType = result.get("sourceType", "").replace('"', '""')
snippet = result.get("snippet", "").replace('"', '""')
csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
# Combine into CSV content
content = "\n".join(csvLines)
return self.formatAgentDocumentOutput(outputLabel, content, "text/csv")
except Exception as e:
logger.error(f"Error creating CSV document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv")
def _determineFormatType(self, outputLabel: str) -> str:
"""
Determine the format type based on the filename.
Args:
outputLabel: Output filename
Returns:
Format type (markdown, html, text, json, csv)
"""
outputLabelLower = outputLabel.lower()
if outputLabelLower.endswith(".md"):
return "markdown"
elif outputLabelLower.endswith(".html"):
return "html"
elif outputLabelLower.endswith(".txt"):
return "text"
elif outputLabelLower.endswith(".json"):
return "json"
elif outputLabelLower.endswith(".csv"):
return "csv"
else:
# Default to markdown
return "markdown"
def _searchWeb(self, query: str) -> List[Dict[str, str]]:
"""
Conduct a web search and return the results.
Args:
query: The search query
Returns:
List of search results
"""
formattedQuery = quote_plus(query)
url = f"{self.searchEngine}{formattedQuery}"
searchResultsSoup = self._readUrl(url)
if not searchResultsSoup or not searchResultsSoup.select('.result'):
logger.warning(f"No search results found for: {query}")
return []
# Extract search results
results = []
# Find all result containers
resultElements = searchResultsSoup.select('.result')
for result in resultElements:
# Extract title
titleElement = result.select_one('.result__a')
title = titleElement.text.strip() if titleElement else 'No title'
# Extract URL (DuckDuckGo uses redirects)
urlElement = titleElement.get('href') if titleElement else ''
extractedUrl = 'No URL'
if urlElement:
# Extract actual URL from DuckDuckGo's redirect
if urlElement.startswith('/d.js?q='):
start = urlElement.find('?q=') + 3
end = urlElement.find('&', start) if '&' in urlElement[start:] else None
extractedUrl = unquote(urlElement[start:end])
# Ensure URL has correct protocol prefix
if not extractedUrl.startswith(('http://', 'https://')):
if not extractedUrl.startswith('//'):
extractedUrl = 'https://' + extractedUrl
else:
extractedUrl = 'https:' + extractedUrl
else:
extractedUrl = urlElement
# Extract snippet directly from search results page
snippetElement = result.select_one('.result__snippet')
snippet = snippetElement.text.strip() if snippetElement else 'No description'
# Get actual page content
try:
targetPageSoup = self._readUrl(extractedUrl)
content = self._extractMainContent(targetPageSoup)
except Exception as e:
logger.warning(f"Error extracting content from {extractedUrl}: {str(e)}")
content = f"Error extracting content: {str(e)}"
results.append({
'title': title,
'url': extractedUrl,
'snippet': snippet,
'data': content
})
# Limit number of results
if len(results) >= self.maxResults:
break
return results
def _readUrl(self, url: str) -> BeautifulSoup:
"""
Read a URL and return a BeautifulSoup parser for the content.
Args:
url: The URL to read
Returns:
BeautifulSoup object with the content or None on errors
"""
if not url or not url.startswith(('http://', 'https://')):
return None
headers = {
'User-Agent': self.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
# Initial request
response = requests.get(url, headers=headers, timeout=self.timeout)
# Handling for status 202
if response.status_code == 202:
# Retry with backoff
backoffTimes = [0.5, 1.0, 2.0, 5.0]
for waitTime in backoffTimes:
time.sleep(waitTime)
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code != 202:
break
# Raise for error status codes
response.raise_for_status()
# Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
logger.error(f"Error reading URL {url}: {str(e)}")
return None
def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
"""
Extract the title from a webpage.
Args:
soup: BeautifulSoup object of the webpage
url: URL of the webpage
Returns:
Extracted title
"""
if not soup:
return f"Error with {url}"
# Extract title from title tag
titleTag = soup.find('title')
title = titleTag.text.strip() if titleTag else "No title"
# Alternative: Also look for h1 tags if title tag is missing
if title == "No title":
h1Tag = soup.find('h1')
if h1Tag:
title = h1Tag.text.strip()
return title
def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
"""
Extract the main content from an HTML page.
Args:
soup: BeautifulSoup object of the webpage
maxChars: Maximum number of characters
Returns:
Extracted main content as a string
"""
if not soup:
return ""
# Try to find main content elements in priority order
mainContent = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
if content:
mainContent = content
break
# If no main content found, use the body
if not mainContent:
mainContent = soup.find('body') or soup
# Remove script, style, nav, footer elements that don't contribute to main content
for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Extract text content
textContent = mainContent.get_text(separator=' ', strip=True)
# Limit to maxChars
return textContent[:maxChars]
def _limitText(self, text: str, maxChars: int = 10000) -> str:
"""
Limit text to a maximum number of characters.
Args:
text: Input text
maxChars: Maximum number of characters
Returns:
Limited text
"""
if not text:
return ""
# If text is already under the limit, return unchanged
if len(text) <= maxChars:
return text
# Otherwise limit text to maxChars
return text[:maxChars] + "... [Content truncated due to length]"
# Factory function for the Webcrawler agent
def getAgentWebcrawler():
"""Returns an instance of the Webcrawler agent."""
return AgentWebcrawler()

123
static/124_defAttributes.py Normal file
View file

@ -0,0 +1,123 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
# Define the model for attribute definitions
class AttributeDefinition(BaseModel):
name: str
label: str
type: str
required: bool = False
placeholder: Optional[str] = None
defaultValue: Optional[Any] = None
options: Optional[List[Dict[str, Any]]] = None
editable: bool = True
visible: bool = True
order: int = 0
validation: Optional[Dict[str, Any]] = None
helpText: Optional[str] = None
# Helper classes for type mapping
typeMappings = {
"int": "number",
"str": "string",
"float": "number",
"bool": "boolean",
"List[int]": "array",
"List[str]": "array",
"Dict[str, Any]": "object",
"Optional[str]": "string",
"Optional[int]": "number",
"Optional[Dict[str, Any]]": "object"
}
# Special field types based on naming conventions
specialFieldTypes = {
"content": "textarea",
"description": "textarea",
"instructions": "textarea",
"password": "password",
"email": "email",
"workspaceId": "select",
"agentId": "select",
"type": "select"
}
# Function to convert a Pydantic model into attribute definitions
def getModelAttributes(modelClass, userLanguage="de"):
"""
Converts a Pydantic model into a list of AttributeDefinition objects
"""
attributes = []
# Go through all fields in the model
for i, (fieldName, field) in enumerate(modelClass.__fields__.items()):
# Skip internal fields
if fieldName.startswith('_') or fieldName in ["label", "fieldLabels"]:
continue
# Determine the field type
fieldType = typeMappings.get(str(field.type_), "string")
# Check for special field types
if fieldName in specialFieldTypes:
fieldType = specialFieldTypes[fieldName]
# Get the label (if available)
fieldLabel = fieldName.replace('_', ' ').capitalize()
if hasattr(modelClass, 'fieldLabels') and fieldName in modelClass.fieldLabels:
labelObj = modelClass.fieldLabels[fieldName]
fieldLabel = labelObj.getLabel(userLanguage)
# Determine default values and required status
required = field.required
defaultValue = field.default if not field.required else None
# Check for validation rules
validation = None
if field.validators:
validation = {"hasValidators": True}
# Placeholder text
placeholder = f"Please enter {fieldLabel}"
# Special options for Select fields
options = None
if fieldType == "select":
if fieldName == "type" and modelClass.__name__ == "Agent":
options = [
{"value": "Analysis", "label": "Analysis"},
{"value": "Transformation", "label": "Transformation"},
{"value": "Generation", "label": "Generation"},
{"value": "Classification", "label": "Classification"},
{"value": "Custom", "label": "Custom"}
]
# Extract description from Field object
description = None
# Try to get description from various possible sources
if hasattr(field, 'field_info') and hasattr(field.field_info, 'description'):
description = field.field_info.description
elif hasattr(field, 'description'):
description = field.description
elif hasattr(field, 'schema') and hasattr(field.schema, 'description'):
description = field.schema.description
# Create attribute definition
attrDef = AttributeDefinition(
name=fieldName,
label=fieldLabel,
type=fieldType,
required=required,
placeholder=placeholder,
defaultValue=defaultValue,
options=options,
editable=fieldName not in ["id", "mandateId", "userId", "createdAt", "uploadDate"],
visible=fieldName not in ["hashedPassword", "mandateId", "userId"],
order=i,
validation=validation,
helpText=description or "" # Set empty string as default value if no description found
)
attributes.append(attrDef)
return attributes

View file

@ -0,0 +1,471 @@
"""
Interface to the Gateway system.
Manages users and mandates for authentication.
"""
import os
import logging
from typing import Dict, Any, List, Optional, Union
import importlib
from passlib.context import CryptContext
from connectors.connectorDbJson import DatabaseConnector
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
# Password-Hashing
pwdContext = CryptContext(schemes=["argon2"], deprecated="auto")
class GatewayInterface:
"""
Interface to the Gateway system.
Manages users and mandates.
"""
def __init__(self, mandateId: int = None, userId: int = None):
"""
Initializes the Gateway Interface with optional mandate and user context.
Args:
mandateId: ID of the current mandate (optional)
userId: ID of the current user (optional)
"""
# Context can be empty during initialization
self.mandateId = mandateId
self.userId = userId
# Import data model module
try:
self.modelModule = importlib.import_module("modules.gatewayModel")
logger.info("gatewayModel successfully imported")
except ImportError as e:
logger.error(f"Error importing gatewayModel: {e}")
raise
# Initialize database
self._initializeDatabase()
def _initializeDatabase(self):
"""
Initializes the database with minimal objects
"""
self.db = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
mandateId=self.mandateId if self.mandateId else 0,
userId=self.userId if self.userId else 0
)
# Create Root mandate if needed
existingMandateId = self.getInitialId("mandates")
mandates = self.db.getRecordset("mandates")
if existingMandateId is None or not mandates:
logger.info("Creating Root mandate")
rootMandate = {
"name": "Root",
"language": "de"
}
createdMandate = self.db.recordCreate("mandates", rootMandate)
logger.info(f"Root mandate created with ID {createdMandate['id']}")
# Update mandate context
self.mandateId = createdMandate['id']
self.userId = createdMandate['userId']
# Recreate connector with correct context
self.db = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
mandateId=self.mandateId,
userId=self.userId
)
# Create Admin user if needed
existingUserId = self.getInitialId("users")
users = self.db.getRecordset("users")
if existingUserId is None or not users:
logger.info("Creating Admin user")
adminUser = {
"mandateId": self.mandateId,
"username": "admin",
"email": "admin@example.com",
"fullName": "Administrator",
"disabled": False,
"language": "de",
"privilege": "sysadmin", # SysAdmin privilege
"hashedPassword": self._getPasswordHash("admin") # Use a secure password in production!
}
createdUser = self.db.recordCreate("users", adminUser)
logger.info(f"Admin user created with ID {createdUser['id']}")
# Update user context
self.userId = createdUser['id']
# Recreate connector with correct context
self.db = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
mandateId=self.mandateId,
userId=self.userId
)
def getInitialId(self, table: str) -> Optional[int]:
"""Returns the initial ID for a table"""
return self.db.getInitialId(table)
def _getPasswordHash(self, password: str) -> str:
"""Creates a hash for a password"""
return pwdContext.hash(password)
def _verifyPassword(self, plainPassword: str, hashedPassword: str) -> bool:
"""Checks if the password matches the hash"""
return pwdContext.verify(plainPassword, hashedPassword)
def _getCurrentTimestamp(self) -> str:
"""Returns the current timestamp in ISO format"""
from datetime import datetime
return datetime.now().isoformat()
# Mandate methods
def getAllMandates(self) -> List[Dict[str, Any]]:
"""Returns all mandates"""
return self.db.getRecordset("mandates")
def getMandate(self, mandateId: int) -> Optional[Dict[str, Any]]:
"""Returns a mandate by its ID"""
mandates = self.db.getRecordset("mandates", recordFilter={"id": mandateId})
if mandates:
return mandates[0]
return None
def createMandate(self, name: str, language: str = "de") -> Dict[str, Any]:
"""Creates a new mandate"""
mandateData = {
"name": name,
"language": language
}
return self.db.recordCreate("mandates", mandateData)
def updateMandate(self, mandateId: int, mandateData: Dict[str, Any]) -> Dict[str, Any]:
"""
Updates an existing mandate
Args:
mandateId: The ID of the mandate to update
mandateData: The mandate data to update
Returns:
Dict[str, Any]: The updated mandate data
Raises:
ValueError: If the mandate is not found
"""
# Check if the mandate exists
mandate = self.getMandate(mandateId)
if not mandate:
raise ValueError(f"Mandate with ID {mandateId} not found")
# Update the mandate
updatedMandate = self.db.recordModify("mandates", mandateId, mandateData)
return updatedMandate
def deleteMandate(self, mandateId: int) -> bool:
"""
Deletes a mandate and all associated users and data
Args:
mandateId: The ID of the mandate to delete
Returns:
bool: True if the mandate was successfully deleted, otherwise False
"""
# Check if the mandate exists
mandate = self.getMandate(mandateId)
if not mandate:
return False
# Check if it's the initial mandate
initialMandateId = self.getInitialId("mandates")
if initialMandateId is not None and mandateId == initialMandateId:
logger.warning(f"Attempt to delete the Root mandate was prevented")
return False
# Find all users of the mandate
users = self.getUsersByMandate(mandateId)
# Delete all users of the mandate and their associated data
for user in users:
self.deleteUser(user["id"])
# Delete the mandate
success = self.db.recordDelete("mandates", mandateId)
if success:
logger.info(f"Mandate with ID {mandateId} was successfully deleted")
else:
logger.error(f"Error deleting mandate with ID {mandateId}")
return success
# User methods
def getAllUsers(self) -> List[Dict[str, Any]]:
"""Returns all users"""
users = self.db.getRecordset("users")
# Remove password hashes from the response
for user in users:
if "hashedPassword" in user:
del user["hashedPassword"]
return users
def getUsersByMandate(self, mandateId: int) -> List[Dict[str, Any]]:
"""
Returns all users of a specific mandate
Args:
mandateId: The ID of the mandate
Returns:
List[Dict[str, Any]]: List of users in the mandate
"""
users = self.db.getRecordset("users", recordFilter={"mandateId": mandateId})
# Remove password hashes from the response
for user in users:
if "hashedPassword" in user:
del user["hashedPassword"]
return users
def getUserByUsername(self, username: str) -> Optional[Dict[str, Any]]:
"""Returns a user by username"""
users = self.db.getRecordset("users")
for user in users:
if user.get("username") == username:
return user
return None
def getUser(self, userId: int) -> Optional[Dict[str, Any]]:
"""Returns a user by ID"""
users = self.db.getRecordset("users", recordFilter={"id": userId})
if users:
user = users[0]
# Remove password hash from the API response
if "hashedPassword" in user:
userCopy = user.copy()
del userCopy["hashedPassword"]
return userCopy
return user
return None
def createUser(self, username: str, password: str, email: str = None,
fullName: str = None, language: str = "de", mandateId: int = None,
disabled: bool = False, privilege: str = "user") -> Dict[str, Any]:
"""
Creates a new user
Args:
username: The username
password: The password
email: The email address (optional)
fullName: The full name (optional)
language: The preferred language (default: "de")
mandateId: The ID of the mandate (optional)
disabled: Whether the user is disabled (default: False)
privilege: The privilege level (default: "user")
Returns:
Dict[str, Any]: The created user data
Raises:
ValueError: If the username already exists
"""
# Check if the username already exists
existingUser = self.getUserByUsername(username)
if existingUser:
raise ValueError(f"User '{username}' already exists")
# Use the provided mandateId or the current context
userMandateId = mandateId if mandateId is not None else self.mandateId
userData = {
"mandateId": userMandateId,
"username": username,
"email": email,
"fullName": fullName,
"disabled": disabled,
"language": language,
"privilege": privilege,
"hashedPassword": self._getPasswordHash(password)
}
createdUser = self.db.recordCreate("users", userData)
# Remove password hash from the response
if "hashedPassword" in createdUser:
del createdUser["hashedPassword"]
return createdUser
def authenticateUser(self, username: str, password: str) -> Optional[Dict[str, Any]]:
"""
Authenticates a user by username and password
Args:
username: The username
password: The password
Returns:
Optional[Dict[str, Any]]: The user data or None if authentication fails
"""
user = self.getUserByUsername(username)
if not user:
return None
if not self._verifyPassword(password, user.get("hashedPassword", "")):
return None
# Check if the user is disabled
if user.get("disabled", False):
return None
# Create a copy without password hash
authenticatedUser = {**user}
if "hashedPassword" in authenticatedUser:
del authenticatedUser["hashedPassword"]
return authenticatedUser
def updateUser(self, userId: int, userData: Dict[str, Any]) -> Dict[str, Any]:
"""
Updates a user
Args:
userId: The ID of the user to update
userData: The user data to update
Returns:
Dict[str, Any]: The updated user data
Raises:
ValueError: If the user is not found
"""
# Get the current user with password hash (directly from DB)
users = self.db.getRecordset("users", recordFilter={"id": userId})
if not users:
raise ValueError(f"User with ID {userId} not found")
user = users[0]
# If the password is being changed, hash it
if "password" in userData:
userData["hashedPassword"] = self._getPasswordHash(userData["password"])
del userData["password"]
# Update the user
updatedUser = self.db.recordModify("users", userId, userData)
# Remove password hash from the response
if "hashedPassword" in updatedUser:
del updatedUser["hashedPassword"]
return updatedUser
def disableUser(self, userId: int) -> Dict[str, Any]:
"""Disables a user"""
return self.updateUser(userId, {"disabled": True})
def enableUser(self, userId: int) -> Dict[str, Any]:
"""Enables a user"""
return self.updateUser(userId, {"disabled": False})
def _deleteUserReferencedData(self, userId: int) -> None:
"""
Deletes all data associated with a user
Args:
userId: The ID of the user
"""
# Here all tables are searched and all entries referencing this user are deleted
# Delete user attributes
try:
attributes = self.db.getRecordset("attributes", recordFilter={"userId": userId})
for attribute in attributes:
self.db.recordDelete("attributes", attribute["id"])
except Exception as e:
logger.error(f"Error deleting attributes for user {userId}: {e}")
# Other tables that might reference the user
# (Depending on the application's database structure)
logger.info(f"All referenced data for user {userId} has been deleted")
def deleteUser(self, userId: int) -> bool:
"""
Deletes a user and all associated data
Args:
userId: The ID of the user to delete
Returns:
bool: True if the user was successfully deleted, otherwise False
"""
# Check if the user exists
users = self.db.getRecordset("users", recordFilter={"id": userId})
if not users:
return False
# Check if it's the initial user
initialUserId = self.getInitialId("users")
if initialUserId is not None and userId == initialUserId:
logger.warning("Attempt to delete the Root Admin was prevented")
return False
# Delete all data associated with the user
self._deleteUserReferencedData(userId)
# Delete the user
success = self.db.recordDelete("users", userId)
if success:
logger.info(f"User with ID {userId} was successfully deleted")
else:
logger.error(f"Error deleting user with ID {userId}")
return success
# Singleton factory for GatewayInterface instances per context
_gatewayInterfaces = {}
def getGatewayInterface(mandateId: int = None, userId: int = None) -> GatewayInterface:
"""
Returns a GatewayInterface instance for the specified context.
Reuses existing instances.
Args:
mandateId: ID of the mandate
userId: ID of the user
Returns:
GatewayInterface instance
"""
contextKey = f"{mandateId}_{userId}"
if contextKey not in _gatewayInterfaces:
_gatewayInterfaces[contextKey] = GatewayInterface(mandateId, userId)
return _gatewayInterfaces[contextKey]
# Initialize the interface
getGatewayInterface()

103
static/126_gatewayModel.py Normal file
View file

@ -0,0 +1,103 @@
"""
Data models for the gateway system.
"""
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from datetime import datetime
class Label(BaseModel):
"""Label for an attribute or a class with support for multiple languages"""
default: str
translations: Dict[str, str] = {}
def getLabel(self, language: str = None):
"""Returns the label in the specified language, or the default value if not available"""
if language and language in self.translations:
return self.translations[language]
return self.default
class Mandate(BaseModel):
"""Data model for a mandate"""
id: int = Field(description="Unique ID of the mandate")
name: str = Field(description="Name of the mandate")
language: str = Field(description="Default language of the mandate")
label: Label = Field(
default=Label(default="Mandate", translations={"en": "Mandate", "fr": "Mandat"}),
description="Label for the class"
)
# Labels for attributes
fieldLabels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"name": Label(default="Name of the mandate", translations={"en": "Mandate name", "fr": "Nom du mandat"}),
"language": Label(default="Language", translations={"en": "Language", "fr": "Langue"})
}
class User(BaseModel):
"""Data model for a user"""
id: int = Field(description="Unique ID of the user")
mandateId: int = Field(description="ID of the associated mandate")
username: str = Field(description="Username for login")
email: Optional[str] = Field(None, description="Email address of the user")
fullName: Optional[str] = Field(None, description="Full name of the user")
language: str = Field(description="Preferred language of the user")
disabled: Optional[bool] = Field(False, description="Indicates whether the user is disabled")
privilege: str = Field(description="Permission level") #sysadmin,admin,user
label: Label = Field(
default=Label(default="User", translations={"en": "User", "fr": "Utilisateur"}),
description="Label for the class"
)
# Labels for attributes
fieldLabels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandateId": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"username": Label(default="Username", translations={"en": "Username", "fr": "Nom d'utilisateur"}),
"email": Label(default="Email", translations={"en": "Email", "fr": "E-mail"}),
"fullName": Label(default="Full name", translations={"en": "Full name", "fr": "Nom complet"}),
"language": Label(default="Language", translations={"en": "Language", "fr": "Langue"}),
"disabled": Label(default="Disabled", translations={"en": "Disabled", "fr": "Désactivé"}),
"privilege": Label(default="Permission level", translations={"en": "Access level", "fr": "Niveau d'accès"}),
}
class UserInDB(User):
"""Extended user class with password hash"""
hashedPassword: str = Field(description="Hash of the user password")
label: Label = Field(
default=Label(default="User Access", translations={"en": "User Access", "fr": "Accès de l'utilisateur"}),
description="Label for the class"
)
# Additional label for the password field
fieldLabels: Dict[str, Label] = {
"hashedPassword": Label(default="Password hash", translations={"en": "Password hash", "fr": "Hachage de mot de passe"})
}
class Token(BaseModel):
"""Data model for an authentication token"""
accessToken: str = Field(description="The issued access token")
tokenType: str = Field(description="Type of token (usually 'bearer')")
label: Label = Field(
default=Label(default="Token", translations={"en": "Token", "fr": "Jeton"}),
description="Label for the class"
)
# Labels for attributes
fieldLabels: Dict[str, Label] = {
"accessToken": Label(default="Access token", translations={"en": "Access token", "fr": "Jeton d'accès"}),
"tokenType": Label(default="Token type", translations={"en": "Token type", "fr": "Type de jeton"})
}
class TokenData(BaseModel):
"""Data for token decoding and validation"""
username: Optional[str] = None
mandateId: Optional[int] = None
exp: Optional[datetime] = None

View file

@ -0,0 +1,933 @@
"""
Module for extracting content from various file formats.
Provides specialized functions for processing text, PDF, Office documents, images, etc.
"""
import logging
import os
import io
from typing import Dict, Any, List, Optional, Union, Tuple
import base64
# Configure logger
logger = logging.getLogger(__name__)
# Optional imports - only loaded when needed
pdfExtractorLoaded = False
officeExtractorLoaded = False
imageProcessorLoaded = False
def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> List[Dict[str, Any]]:
"""
Main function for extracting content from a file based on its MIME type.
Delegates to specialized extraction functions.
Args:
fileMetadata: File metadata (Name, MIME type, etc.)
fileContent: Binary data of the file
Returns:
List of Document-Content objects with metadata and base64Encoded flag
"""
try:
mimeType = fileMetadata.get("mimeType", "application/octet-stream")
fileName = fileMetadata.get("name", "unknown")
logger.info(f"Extracting content from file '{fileName}' (MIME type: {mimeType})")
# Extract content based on MIME type
contents = []
# Text-based formats (excluding CSV which has its own handler)
if mimeType == "text/csv":
contents.extend(extractCsvContent(fileName, fileContent))
# Then handle other text-based formats
elif mimeType.startswith("text/") or mimeType in [
"application/json",
"application/xml",
"application/javascript",
"application/x-python"
]:
contents.extend(extractTextContent(fileName, fileContent, mimeType))
# SVG Files
elif mimeType == "image/svg+xml":
contents.extend(extractSvgContent(fileName, fileContent))
# Images
elif mimeType.startswith("image/"):
contents.extend(extractImageContent(fileName, fileContent, mimeType))
# PDF Documents
elif mimeType == "application/pdf":
contents.extend(extractPdfContent(fileName, fileContent))
# Word Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/msword"
]:
contents.extend(extractWordContent(fileName, fileContent, mimeType))
# Excel Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel"
]:
contents.extend(extractExcelContent(fileName, fileContent, mimeType))
# PowerPoint Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.ms-powerpoint"
]:
contents.extend(extractPowerpointContent(fileName, fileContent, mimeType))
# Binary data as fallback for unknown formats
else:
contents.extend(extractBinaryContent(fileName, fileContent, mimeType))
# Fallback when no content could be extracted
if not contents:
logger.warning(f"No content extracted from file '{fileName}', using binary fallback")
# Convert binary content to base64
encoded_data = base64.b64encode(fileContent).decode('utf-8')
contents.append({
"sequenceNr": 1,
"name": '1_undefined',
"ext": os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin",
"contentType": mimeType,
"data": encoded_data,
"base64Encoded": True,
"metadata": {
"isText": False
}
})
# Add generic attributes for all documents
for content in contents:
# Make sure all content items have the base64Encoded flag
if "base64Encoded" not in content:
if isinstance(content.get("data"), bytes):
# Convert bytes to base64
content["data"] = base64.b64encode(content["data"]).decode('utf-8')
content["base64Encoded"] = True
else:
# Assume text content if not explicitly marked
content["base64Encoded"] = False
# Maintain backward compatibility with old "base64Encoded" flag in metadata
if "metadata" not in content:
content["metadata"] = {}
# Set base64Encoded in metadata for backward compatibility
content["metadata"]["base64Encoded"] = content["base64Encoded"]
logger.info(f"Successfully extracted {len(contents)} content items from file '{fileName}'")
return contents
except Exception as e:
logger.error(f"Error during content extraction: {str(e)}")
# Fallback on error - return original data
return [{
"sequenceNr": 1,
"name": fileMetadata.get("name", "unknown"),
"ext": os.path.splitext(fileMetadata.get("name", ""))[1][1:] if os.path.splitext(fileMetadata.get("name", ""))[1] else "bin",
"contentType": fileMetadata.get("mimeType", "application/octet-stream"),
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"base64Encoded": True # For backward compatibility
}
}]
def _loadPdfExtractor():
"""Loads PDF extraction libraries when needed"""
global pdfExtractorLoaded
if not pdfExtractorLoaded:
try:
global PyPDF2, fitz
import PyPDF2
import fitz # PyMuPDF for more extensive PDF processing
pdfExtractorLoaded = True
logger.info("PDF extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"PDF extraction libraries could not be loaded: {e}")
def _loadOfficeExtractor():
"""Loads Office document extraction libraries when needed"""
global officeExtractorLoaded
if not officeExtractorLoaded:
try:
global docx, openpyxl
import docx # python-docx for Word documents
import openpyxl # for Excel files
officeExtractorLoaded = True
logger.info("Office extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"Office extraction libraries could not be loaded: {e}")
def _loadImageProcessor():
"""Loads image processing libraries when needed"""
global imageProcessorLoaded
if not imageProcessorLoaded:
try:
global PIL, Image
from PIL import Image
imageProcessorLoaded = True
logger.info("Image processing libraries successfully loaded")
except ImportError as e:
logger.warning(f"Image processing libraries could not be loaded: {e}")
def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts text from text files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Text-Content objects with base64Encoded = False
"""
try:
# Keep original file extension
fileExtension = os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "txt"
# Extract text content
textContent = fileContent.decode('utf-8')
return [{
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text",
"data": textContent,
"base64Encoded": False,
"metadata": {
"isText": True
}
}]
except UnicodeDecodeError:
logger.warning(f"Could not decode text from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
textContent = fileContent.decode(encoding)
logger.info(f"Text successfully decoded with encoding {encoding}")
return [{
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text",
"data": textContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"encoding": encoding
}
}]
except UnicodeDecodeError:
continue
# Fallback to binary data if no encoding works
logger.warning(f"Could not decode text, using binary data")
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
except Exception as e:
logger.error(f"Error in alternative text decoding: {str(e)}")
# Return binary data as fallback
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts content from CSV files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of CSV-Content objects with base64Encoded = False
"""
try:
# Extract text content
csvContent = fileContent.decode('utf-8')
return [{
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "csv"
}
}]
except UnicodeDecodeError:
logger.warning(f"Could not decode CSV from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings for CSV
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
csvContent = fileContent.decode(encoding)
logger.info(f"CSV successfully decoded with encoding {encoding}")
return [{
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"encoding": encoding,
"format": "csv"
}
}]
except UnicodeDecodeError:
continue
# Fallback to binary data
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
except Exception as e:
logger.error(f"Error in alternative CSV decoding: {str(e)}")
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts content from SVG files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of SVG-Content objects with dual text/image metadata
"""
contents = []
try:
# Extract SVG as text content (XML)
svgText = fileContent.decode('utf-8')
# Check if it's actually SVG by looking for the SVG tag
if "<svg" in svgText.lower():
# SVG is both text (XML) and an image
contents.append({
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True, # SVG is text-based (XML)
"format": "svg",
"isImage": True # But also represents an image
}
})
else:
# Doesn't appear to be a valid SVG file
logger.warning(f"File '{fileName}' has SVG extension but does not contain SVG markup")
contents.append({
"sequenceNr": 1,
"name": "1_text",
"ext": "svg",
"contentType": "text/plain",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "text"
}
})
except UnicodeDecodeError:
logger.warning(f"Could not decode SVG from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
svgText = fileContent.decode(encoding)
if "<svg" in svgText.lower():
logger.info(f"SVG successfully decoded with encoding {encoding}")
contents.append({
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "svg",
"isImage": True,
"encoding": encoding
}
})
break
except UnicodeDecodeError:
continue
# Fallback to binary data if no encoding works
if not contents:
logger.warning(f"Could not decode SVG text, using binary data")
contents.append({
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "svg",
"isImage": True
}
})
except Exception as e:
logger.error(f"Error in alternative SVG decoding: {str(e)}")
# Return binary data as fallback
contents.append({
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "svg",
"isImage": True
}
})
return contents
def extractImageContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts content from image files and optionally generates metadata descriptions.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Image-Content objects with base64Encoded = True
"""
# Extract file extension from MIME type or filename
fileExtension = mimeType.split('/')[-1]
if fileExtension == "jpeg":
fileExtension = "jpg"
# If possible, analyze image and extract metadata
imageMetadata = {
"isText": False,
"format": "image"
}
imageDescription = None
try:
_loadImageProcessor()
if imageProcessorLoaded and fileContent and len(fileContent) > 0:
with io.BytesIO(fileContent) as imgStream:
try:
img = Image.open(imgStream)
# Check if the image was actually loaded
img.verify()
# To safely continue working, reload
imgStream.seek(0)
img = Image.open(imgStream)
imageMetadata.update({
"format": img.format,
"mode": img.mode,
"width": img.width,
"height": img.height
})
# Extract EXIF data if available
if hasattr(img, '_getexif') and callable(img._getexif):
exif = img._getexif()
if exif:
exifData = {}
for tagId, value in exif.items():
exifData[f"tag_{tagId}"] = str(value)
imageMetadata["exif"] = exifData
# Generate image description
imageDescription = f"Image ({img.width}x{img.height}, {img.format}, {img.mode})"
except Exception as innerE:
logger.warning(f"Error processing image: {str(innerE)}")
imageMetadata["error"] = str(innerE)
imageDescription = f"Image (unable to process: {str(innerE)})"
except Exception as e:
logger.warning(f"Could not extract image metadata: {str(e)}")
imageMetadata["error"] = str(e)
# Convert binary image to base64
encoded_data = base64.b64encode(fileContent).decode('utf-8')
# Return image content
contents = [{
"sequenceNr": 1,
"name": "1_image", # Simplified naming
"ext": fileExtension,
"contentType": "image",
"data": encoded_data,
"base64Encoded": True,
"metadata": imageMetadata
}]
# If image description available, add as additional text content
if imageDescription:
contents.append({
"sequenceNr": 2,
"name": "2_text_image_info", # Simplified naming with label
"ext": "txt",
"contentType": "text",
"data": imageDescription,
"base64Encoded": False,
"metadata": {
"isText": True,
"imageDescription": True
}
})
return contents
def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts text and images from PDF files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of PDF-Content objects (text and images) with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
try:
# Load PDF extraction libraries
_loadPdfExtractor()
if not pdfExtractorLoaded:
logger.warning("PDF extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "pdf"
}
})
return contents
# Extract text with PyPDF2
extractedText = ""
pdfMetadata = {}
with io.BytesIO(fileContent) as pdfStream:
pdfReader = PyPDF2.PdfReader(pdfStream)
# Extract metadata
pdfInfo = pdfReader.metadata or {}
for key, value in pdfInfo.items():
if key.startswith('/'):
pdfMetadata[key[1:]] = value
else:
pdfMetadata[key] = value
# Extract text from all pages
for pageNum in range(len(pdfReader.pages)):
page = pdfReader.pages[pageNum]
pageText = page.extract_text()
if pageText:
extractedText += f"--- Page {pageNum + 1} ---\n{pageText}\n\n"
# If text was found, add as separate content
if extractedText.strip():
extractedContentFound = True
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_text", # Simplified naming
"ext": "txt",
"contentType": "text",
"data": extractedText,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "pdf",
"pages": len(pdfReader.pages),
"pdfMetadata": pdfMetadata
}
})
# Extract images with PyMuPDF (fitz)
try:
with io.BytesIO(fileContent) as pdfStream:
doc = fitz.open(stream=pdfStream, filetype="pdf")
imageCount = 0
for pageNum in range(len(doc)):
page = doc[pageNum]
imageList = page.get_images(full=True)
for imgIndex, imgInfo in enumerate(imageList):
try:
imageCount += 1
xref = imgInfo[0]
baseImage = doc.extract_image(xref)
imageBytes = baseImage["image"]
imageExt = baseImage["ext"]
# Add image as content - encode as base64
extractedContentFound = True
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_image_page{pageNum+1}_{imgIndex+1}", # Simplified naming with label
"ext": imageExt,
"contentType": f"image/{imageExt}",
"data": base64.b64encode(imageBytes).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"source": "pdf",
"page": pageNum + 1,
"index": imgIndex
}
})
except Exception as imgE:
logger.warning(f"Error extracting image {imgIndex} on page {pageNum + 1}: {str(imgE)}")
# Close document
doc.close()
except Exception as imgExtractE:
logger.warning(f"Error extracting images from PDF: {str(imgExtractE)}")
except Exception as e:
logger.error(f"Error in PDF extraction: {str(e)}")
# If no content was extracted, add the original PDF
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "pdf"
}
})
return contents
def extractWordContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts text and images from Word documents.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Word-Content objects (text and possibly images) with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
# Determine file extension
fileExtension = "docx" if mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" else "doc"
try:
# Load Office extraction libraries
_loadOfficeExtractor()
if not officeExtractorLoaded:
logger.warning("Word extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "word"
}
})
return contents
# Only supports DOCX (newer format)
if mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
with io.BytesIO(fileContent) as docxStream:
doc = docx.Document(docxStream)
# Extract text
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
# Extract tables
for table in doc.tables:
for row in table.rows:
rowText = []
for cell in row.cells:
rowText.append(cell.text)
fullText.append(" | ".join(rowText))
extractedText = "\n\n".join(fullText)
# Add extracted text as content
if extractedText.strip():
extractedContentFound = True
contents.append({
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": "txt",
"contentType": "text",
"data": extractedText,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "docx",
"paragraphCount": len(doc.paragraphs),
"tableCount": len(doc.tables)
}
})
else:
logger.warning(f"Extraction from old Word format (DOC) not supported")
except Exception as e:
logger.error(f"Error in Word extraction: {str(e)}")
# If no content was extracted, add the original document
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "word"
}
})
return contents
def extractExcelContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts table data from Excel files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Excel-Content objects with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
# Determine file extension
fileExtension = "xlsx" if mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else "xls"
try:
# Load Office extraction libraries
_loadOfficeExtractor()
if not officeExtractorLoaded:
logger.warning("Excel extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "excel"
}
})
return contents
# Only supports XLSX (newer format)
if mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
with io.BytesIO(fileContent) as xlsxStream:
workbook = openpyxl.load_workbook(xlsxStream, data_only=True)
# Extract each worksheet as separate CSV content
for sheetIndex, sheetName in enumerate(workbook.sheetnames):
sheet = workbook[sheetName]
# Format data as CSV
csvRows = []
for row in sheet.iter_rows():
csvRow = []
for cell in row:
value = cell.value
if value is None:
csvRow.append("")
else:
csvRow.append(str(value).replace('"', '""'))
csvRows.append(','.join(f'"{cell}"' for cell in csvRow))
csvContent = "\n".join(csvRows)
# Add as CSV content
if csvContent.strip():
extractedContentFound = True
sheetSafeName = sheetName.replace(" ", "_").replace("/", "_").replace("\\", "_")
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_csv_{sheetSafeName}", # Simplified naming with sheet label
"ext": "csv",
"contentType": "csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "xlsx",
"sheet": sheetName,
"format": "csv"
}
})
else:
logger.warning(f"Extraction from old Excel format (XLS) not supported")
except Exception as e:
logger.error(f"Error in Excel extraction: {str(e)}")
# If no content was extracted, add the original document
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "excel"
}
})
return contents
def extractPowerpointContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts content from PowerPoint presentations.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of PowerPoint-Content objects with base64Encoded = True
"""
# For PowerPoint, we currently only return the original binary file
# A complete extraction would require more specialized libraries
fileExtension = "pptx" if mimeType == "application/vnd.openxmlformats-officedocument.presentationml.presentation" else "ppt"
return [{
"sequenceNr": 1,
"name": "1_powerpoint", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "powerpoint"
}
}]
def extractBinaryContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Fallback for binary files where no specific extraction is possible.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List with a binary Content object with base64Encoded = True
"""
fileExtension = os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin"
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "binary"
}
}]

File diff suppressed because it is too large Load diff

67
static/129_mimeUtils.py Normal file
View file

@ -0,0 +1,67 @@
"""
Utility functions for MIME type handling and file format determination.
"""
def isTextMimeType(mimeType: str) -> bool:
"""
Determines if a MIME type represents a text format that should not be base64 encoded.
Args:
mimeType: The MIME type to check
Returns:
True if the content is a text format, False otherwise
"""
return (
mimeType.startswith("text/") or
mimeType in [
"application/json",
"application/xml",
"application/javascript",
"application/x-python",
"image/svg+xml"
]
)
def determineContentEncoding(fileName: str, content: any, mimeType: str = None) -> bool:
"""
Determines if content should be base64 encoded based on file type and MIME type.
Args:
fileName: Name of the file including extension
content: The content of the file
mimeType: Optional MIME type of the content
Returns:
True if content should be base64 encoded, False otherwise
"""
# If MIME type is provided, use it for determination
if mimeType:
if isTextMimeType(mimeType):
return False if isinstance(content, str) else True
# Import here to avoid circular imports
import os
# Extract file extension
_, extension = os.path.splitext(fileName)
extension = extension.lower().lstrip('.')
# Determine if we should base64 encode based on file type
text_extensions = {'txt', 'csv', 'json', 'xml', 'html', 'md', 'svg', 'js', 'css', 'py'}
# If it's a text format and content is a string, don't base64 encode
if extension in text_extensions and isinstance(content, str):
return False
# For binary formats, always base64 encode
binary_extensions = {'jpg', 'jpeg', 'png', 'gif', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'zip', 'rar'}
if extension in binary_extensions:
return True
# If content is bytes, base64 encode regardless of extension
if isinstance(content, bytes):
return True
# Default for unknown types
return not isinstance(content, str)

View file

@ -0,0 +1,608 @@
# Comprehensive System Documentation for Workflow Management
# Introduction
Welcome to the "Comprehensive System Documentation for Workflow Management." This manual serves as an essential resource for technical professionals seeking an in-depth understanding of our workflow management system. It is meticulously crafted to provide clarity and insight into the system's architecture, functionality, and operational intricacies.
## Purpose and Scope
The primary purpose of this document is to offer a detailed exposition of the workflow management system, focusing on its state machine design, system architecture, and error handling mechanisms. This manual aims to equip system architects, developers, and technical analysts with the knowledge required to effectively utilize, maintain, and enhance the system.
## Context and Background
In today's fast-paced digital environment, efficient workflow management is crucial for optimizing business processes and ensuring seamless operations. Our system is designed to address these needs by providing a robust framework that supports dynamic state transitions and error resilience. By leveraging state machine principles, the system ensures that workflows are executed consistently and reliably, adapting to various operational scenarios.
## Document Outline
Readers will find a comprehensive exploration of the following key topics:
- **State Machine**: An in-depth analysis of the state machine model employed within the system, detailing its role in managing workflow states and transitions.
- **Workflow Management**: A thorough examination of the workflow management processes, including task orchestration and process automation.
- **System Architecture**: A detailed overview of the system's architectural design, highlighting the components and their interactions.
- **Error Handling**: Insights into the error handling strategies implemented to ensure system robustness and reliability.
Each section is designed to provide technical insights and practical guidance, supported by code examples and diagrams where applicable.
## Tone and Audience
This manual is written in a formal tone, tailored for a technical audience with a foundational understanding of system design and workflow management principles. It aims to engage readers by presenting complex information in a structured and accessible manner, facilitating a deeper comprehension of the system's capabilities and potential applications.
We trust that this documentation will serve as a valuable tool in your technical endeavors, enhancing your ability to leverage our workflow management system to its fullest potential.
# Introduction
## Overview of the System
The Workflow Management System is a sophisticated software solution designed to streamline and automate the management of backend chat workflows. At its core, the system employs a state machine-based approach, allowing for efficient handling of complex workflow processes. This system is integral to ensuring that chat interactions are processed in a structured and consistent manner, thereby enhancing operational efficiency and user satisfaction.
The system's architecture is modular, with the **Workflow Manager Module** serving as the central component. This module is responsible for initializing workflows, managing their states, and ensuring seamless transitions between different stages of the workflow. By leveraging unique identifiers such as workflow ID, mandate ID, and user ID, the system ensures precise tracking and management of each workflow instance.
## Purpose of the Documentation
This documentation serves as a comprehensive guide for technical users who are involved in the development, maintenance, and optimization of the Workflow Management System. It aims to provide detailed insights into the system's architecture, functionality, and interconnections between various components. By offering a thorough understanding of the system, this manual facilitates effective troubleshooting, customization, and enhancement of the workflow management processes.
Key objectives of this documentation include:
- Providing a detailed description of the system's components and their interactions.
- Offering guidance on the initialization and management of workflows.
- Explaining the state machine approach and its application within the system.
- Assisting technical users in identifying and resolving potential issues.
## Scope and Limitations
### Scope
This documentation covers all aspects of the Workflow Management System, with a focus on the **workflowManager.py** module. It includes:
- Detailed descriptions of the system's architecture and components.
- Step-by-step instructions for initializing and managing workflows.
- An explanation of the state machine methodology and its implementation.
- Examples and scenarios illustrating typical workflow processes.
### Limitations
While this documentation provides an extensive overview of the Workflow Management System, it is important to note the following limitations:
- It does not cover external systems or integrations that may interact with the workflow management processes.
- The documentation assumes a foundational understanding of state machines and workflow management principles.
- It does not provide exhaustive coverage of every potential use case or customization scenario.
By understanding the scope and limitations outlined above, users can effectively utilize this documentation to enhance their interaction with the Workflow Management System, ensuring optimal performance and reliability.
# System Architecture
The System Architecture section provides a detailed overview of the structural design of the Workflow Management System. This section aims to elucidate the components, their interactions, and the data flow within the system, offering a comprehensive understanding for technical audiences.
## Component Overview
The Workflow Management System is designed around a modular architecture, primarily leveraging a state machine-based approach to manage backend chat workflows. Below is a detailed description of the core components:
### Workflow Manager Module
- **Purpose**: The Workflow Manager is the central component responsible for orchestrating chat workflows. It utilizes a state machine to ensure that workflows progress through predefined states in a controlled manner.
- **Functionality**:
- **Workflow Initialization**: This sub-component is responsible for initializing new workflows or loading existing ones. It assigns unique identifiers such as workflow ID, mandate ID, and user ID, ensuring each workflow instance is distinct and traceable.
- **State Management**: Manages transitions between different states of the workflow, ensuring that each step is executed in the correct sequence.
- **Event Handling**: Listens for and processes events that trigger state transitions, maintaining the integrity and continuity of the workflow.
### Interaction with Other Components
- **Database Interface**: The Workflow Manager interacts with a database to persist workflow states and retrieve necessary data. This ensures that workflows can be paused and resumed without loss of information.
- **User Interface**: Provides feedback and updates to the user interface, allowing users to monitor the progress of workflows and receive notifications about state changes.
- **External Services**: Integrates with external services for tasks such as authentication, notifications, and data retrieval, ensuring that workflows can leverage external capabilities seamlessly.
## Data Flow Diagram
The data flow within the Workflow Management System is structured to ensure efficient and reliable processing of information. Below is a high-level overview of the data flow and dependencies:
### Data Flow Description
1. **Workflow Initialization**:
- Data is retrieved from the database to initialize or resume workflows.
- Unique identifiers are generated and stored for tracking purposes.
2. **State Transition**:
- Upon receiving an event, the Workflow Manager processes the event and determines the next state.
- State changes are logged and updated in the database to maintain a history of the workflow.
3. **User Interaction**:
- The system sends updates to the user interface, providing real-time feedback on workflow progress.
- User inputs are captured and processed to influence workflow decisions.
4. **External Service Integration**:
- Data is exchanged with external services to perform specific tasks, such as sending notifications or fetching additional data.
- Responses from external services are processed and used to update the workflow state.
### Dependencies
- **Database**: The system relies heavily on a robust database to store workflow states, user information, and historical data. This dependency ensures data persistence and consistency across sessions.
- **External APIs**: Integration with external APIs is crucial for extending the functionality of workflows, allowing the system to perform complex operations beyond its core capabilities.
In summary, the Workflow Management System's architecture is designed to be modular, scalable, and efficient, with clearly defined components and data flows that ensure reliable operation and ease of maintenance. This architecture supports the dynamic nature of chat workflows, providing a robust framework for managing complex interactions.
# Module Descriptions
This section provides a detailed overview of the modules within the "Comprehensive System Documentation for Workflow Management." Each module is described in terms of its purpose, functionality, key functions, and methods. This documentation is intended for a technical audience and aims to offer a thorough understanding of the system's components and their interconnections.
## Workflow Manager Module
The Workflow Manager Module is a critical component of the system, responsible for managing chat workflows through a state machine-based approach. This module ensures that workflows are executed efficiently and consistently, providing a robust framework for handling complex interactions.
### Purpose and Functionality
The primary purpose of the Workflow Manager Module is to manage the lifecycle of chat workflows. It leverages a state machine to handle transitions between different states, ensuring that workflows progress smoothly and adhere to predefined rules. This module is essential for maintaining the integrity and efficiency of the workflow management system.
### Key Functions and Methods
1. **Workflow Initialization**
- **Functionality**: Initializes a new workflow or loads an existing one.
- **Key Methods**:
- `initialize_workflow(workflow_id, mandate_id, user_id)`: Sets up a new workflow with a unique identifier and associates it with a specific mandate and user.
- `load_existing_workflow(workflow_id)`: Retrieves and loads an existing workflow from the database.
2. **State Management**
- **Functionality**: Manages the transitions between different states within a workflow.
- **Key Methods**:
- `transition_to_state(new_state)`: Transitions the workflow to a new state, ensuring all conditions for the transition are met.
- `get_current_state()`: Returns the current state of the workflow, allowing for monitoring and debugging.
3. **Event Handling**
- **Functionality**: Processes events that trigger state transitions.
- **Key Methods**:
- `handle_event(event_type, event_data)`: Processes incoming events, determining the appropriate state transition based on the event type and data.
- `register_event_listener(listener)`: Allows external components to register listeners for specific events, facilitating integration with other system components.
4. **Error Handling and Recovery**
- **Functionality**: Ensures robust error handling and recovery mechanisms are in place.
- **Key Methods**:
- `handle_error(error_code, error_message)`: Manages errors by logging them and initiating recovery procedures.
- `recover_from_failure()`: Attempts to recover the workflow to a stable state after a failure.
## State Machine Implementation
The State Machine Implementation is a foundational aspect of the Workflow Manager Module, providing the logic and structure necessary for managing state transitions within workflows.
### Purpose and Functionality
The state machine is designed to model the dynamic behavior of workflows, allowing for precise control over state transitions. It ensures that workflows adhere to defined rules and constraints, preventing invalid transitions and maintaining system stability.
### Key Functions and Methods
1. **State Definition**
- **Functionality**: Defines the possible states within a workflow.
- **Key Methods**:
- `define_state(state_name, entry_action, exit_action)`: Establishes a new state with specific entry and exit actions, facilitating controlled transitions.
2. **Transition Logic**
- **Functionality**: Governs the logic for transitioning between states.
- **Key Methods**:
- `add_transition(from_state, to_state, condition)`: Adds a transition rule between states, specifying the condition under which the transition is valid.
- `evaluate_transition(current_state, event)`: Evaluates whether a transition should occur based on the current state and incoming event.
3. **State Persistence**
- **Functionality**: Ensures that state information is persistently stored and retrievable.
- **Key Methods**:
- `save_state(workflow_id, state)`: Persists the current state of a workflow to the database.
- `load_state(workflow_id)`: Retrieves the last known state of a workflow, enabling continuity after system restarts.
4. **Debugging and Monitoring**
- **Functionality**: Provides tools for monitoring and debugging state transitions.
- **Key Methods**:
- `log_state_transition(from_state, to_state)`: Logs each state transition for auditing and debugging purposes.
- `get_transition_history(workflow_id)`: Retrieves the history of state transitions for a specific workflow, aiding in analysis and troubleshooting.
This comprehensive description of the Workflow Manager Module and State Machine Implementation provides a detailed understanding of their roles and functionalities within the workflow management system. The structured approach ensures that workflows are managed efficiently, with robust mechanisms for handling state transitions and errors.
# Workflow Management
This section provides a detailed overview of the workflow management functionality within the system, focusing on the initialization and setup of workflows, state transitions, and error handling mechanisms. This documentation is intended for technical audiences who require a comprehensive understanding of the workflow management processes.
## Workflow Initialization
The workflow initialization process is a critical step in setting up and managing chat workflows within the system. This subsection details the procedures and components involved in initializing workflows.
### Key Components
- **Workflow Manager Module**: The core component responsible for implementing a state machine to manage chat workflows effectively.
- **Unique Identifiers**: Each workflow is initialized with a unique ID, mandate ID, and user ID to ensure distinct tracking and management.
- **Initialization Parameters**: The workflow setup includes parameters such as the initial state, user roles, and permissions, which are essential for defining the workflow's operational context.
### Initialization Process
1. **New Workflow Creation**:
- The system allows for the creation of new workflows by assigning a unique workflow ID.
- Initial parameters are set, including user roles and initial state configuration.
2. **Loading Existing Workflows**:
- Existing workflows can be loaded into the system using their unique identifiers.
- The system retrieves and restores the workflow's state and context from persistent storage.
3. **Setup Confirmation**:
- Upon initialization, the system confirms the setup by logging the workflow details and ensuring all parameters are correctly configured.
## Workflow States
This subsection describes the state transitions within the workflow management system, focusing on how workflows progress through various states.
### State Machine Overview
- **State Definitions**: Each workflow consists of predefined states, such as "Initialized," "In Progress," "Completed," and "Error."
- **Transition Rules**: The system enforces rules that dictate permissible transitions between states, ensuring logical progression and preventing invalid state changes.
### State Transition Process
1. **State Change Triggers**:
- Transitions are triggered by specific events or conditions, such as user actions or system notifications.
- Each trigger is associated with a corresponding state change, which is logged for audit purposes.
2. **State Validation**:
- Before a state transition occurs, the system validates the transition against predefined rules to ensure it is permissible.
- Invalid transitions are rejected, and appropriate error messages are generated.
3. **State Update**:
- Upon successful validation, the workflow's state is updated, and the system notifies relevant stakeholders of the change.
- The updated state is persisted in the system to maintain consistency and reliability.
## Error Handling Mechanisms
Effective error handling is crucial for maintaining the integrity and reliability of the workflow management system. This subsection outlines the mechanisms in place to manage errors.
### Error Detection
- **Monitoring and Alerts**: The system continuously monitors workflows for anomalies and generates alerts when errors are detected.
- **Error Logging**: All errors are logged with detailed information, including timestamps, error codes, and descriptions, to facilitate troubleshooting.
### Error Resolution
1. **Automated Recovery**:
- The system attempts to automatically resolve common errors through predefined recovery procedures.
- Successful recoveries are logged, and workflows are returned to a stable state.
2. **Manual Intervention**:
- For errors that cannot be resolved automatically, the system provides detailed error reports to administrators.
- Administrators can manually intervene to correct the issue and resume normal workflow operations.
3. **Error Escalation**:
- Critical errors that impact system stability are escalated to higher-level support teams for immediate attention.
- Escalation procedures include detailed documentation of the error and its impact on the system.
By understanding these components and processes, technical users can effectively manage and troubleshoot workflows within the system, ensuring smooth and reliable operations.
# Integration and Dependencies
This section provides a detailed overview of the integration points and dependencies within the Workflow Management system. It is crucial for understanding how the system interacts with external systems and manages its dependencies to ensure seamless operation. The section is divided into two main subsections: External Systems and APIs and Endpoints.
## External Systems
The Workflow Management system is designed to interact with various external systems to enhance its functionality and provide comprehensive workflow solutions. These integrations are essential for data exchange, process automation, and extending the capabilities of the system. Below are the key external systems integrated with the Workflow Management system:
1. **Customer Relationship Management (CRM) Systems**:
- The system integrates with popular CRM platforms to fetch and update customer data, ensuring that workflows are informed by the latest customer interactions and information.
2. **Enterprise Resource Planning (ERP) Systems**:
- Integration with ERP systems allows the Workflow Management system to access and utilize enterprise-wide data, facilitating more informed decision-making within workflows.
3. **Communication Platforms**:
- The system connects with various communication platforms (e.g., email, messaging apps) to send notifications and updates, ensuring that all stakeholders are informed of workflow progress and changes.
4. **Data Analytics Tools**:
- By integrating with data analytics tools, the system can provide insights and reports based on workflow data, aiding in performance tracking and optimization.
## APIs and Endpoints
The Workflow Management system exposes several APIs and endpoints that allow for seamless integration with external systems and facilitate communication between different components of the system. These APIs are designed to be robust, secure, and easy to use, enabling developers to extend and customize the system as needed.
### Key APIs
1. **Workflow Initialization API**:
- **Endpoint**: `/api/workflow/init`
- **Method**: POST
- **Description**: Initializes a new workflow or loads an existing one. Requires parameters such as unique ID, mandate ID, and user ID.
- **Example Request**:
```json
{
"unique_id": "12345",
"mandate_id": "67890",
"user_id": "user_001"
}
```
2. **State Transition API**:
- **Endpoint**: `/api/workflow/transition`
- **Method**: POST
- **Description**: Manages state transitions within a workflow. This API ensures that workflows progress through predefined states based on specific triggers or conditions.
- **Example Request**:
```json
{
"workflow_id": "12345",
"current_state": "pending",
"next_state": "approved"
}
```
3. **Notification API**:
- **Endpoint**: `/api/notifications/send`
- **Method**: POST
- **Description**: Sends notifications to users or systems based on workflow events. Supports multiple communication channels.
- **Example Request**:
```json
{
"recipient": "user_001",
"message": "Your workflow has been approved.",
"channel": "email"
}
```
### Dependency Management
Effective dependency management is critical for the stability and performance of the Workflow Management system. The system relies on several libraries and frameworks, which are managed through a package manager to ensure compatibility and ease of updates.
- **Python Libraries**: The system utilizes various Python libraries for state management, API handling, and data processing. These libraries are specified in a `requirements.txt` file, which can be used to install all necessary dependencies using pip.
- **Version Control**: Dependencies are version-controlled to prevent compatibility issues. The system is regularly updated to incorporate the latest stable versions of libraries, ensuring security and performance enhancements.
- **Testing and Validation**: Before integrating new dependencies or updating existing ones, thorough testing is conducted to validate their compatibility and performance within the system.
In conclusion, the integration and dependencies of the Workflow Management system are meticulously managed to ensure robust performance and seamless interaction with external systems. This section provides a comprehensive understanding of how these integrations and dependencies are structured and maintained.
# Usage and Examples
This section provides detailed guidance on how to effectively use the Workflow Management System, illustrating both basic and advanced scenarios. It includes examples of workflows and common use cases to help users understand the system's capabilities and applications.
## Basic Usage
The Workflow Management System is designed to streamline the management of chat workflows using a state machine-based approach. Below are the fundamental steps to get started with the system:
### Workflow Initialization
To begin using the system, you must initialize a workflow. This process involves setting up a new workflow or loading an existing one. Each workflow is identified by a unique ID, along with a mandate ID and user ID. The initialization process ensures that the workflow is correctly configured to handle subsequent operations.
**Example:**
```python
from workflowManager import WorkflowManager
# Initialize a new workflow
workflow = WorkflowManager.initialize_workflow(
unique_id="workflow123",
mandate_id="mandate456",
user_id="user789"
)
```
### Managing Workflow States
Once a workflow is initialized, it can transition between various states. The state machine manages these transitions, ensuring that the workflow progresses logically from one state to the next.
**Example:**
```python
# Transition to the next state
workflow.transition_to_next_state()
# Check current state
current_state = workflow.get_current_state()
print(f"Current State: {current_state}")
```
### Completing a Workflow
After all necessary states have been processed, the workflow can be completed. This marks the end of the workflow's lifecycle.
**Example:**
```python
# Complete the workflow
workflow.complete_workflow()
```
## Advanced Scenarios
For more complex use cases, the Workflow Management System offers advanced functionalities that cater to intricate workflow requirements.
### Conditional State Transitions
In some scenarios, state transitions may depend on specific conditions or external inputs. The system allows for conditional logic to be incorporated into the workflow.
**Example:**
```python
# Conditional transition based on external input
if workflow.check_condition("condition_met"):
workflow.transition_to_state("next_state")
```
### Parallel Workflow Execution
The system supports the execution of parallel workflows, enabling multiple workflows to run concurrently without interference.
**Example:**
```python
# Initialize multiple workflows
workflow1 = WorkflowManager.initialize_workflow("workflow1", "mandate1", "user1")
workflow2 = WorkflowManager.initialize_workflow("workflow2", "mandate2", "user2")
# Execute workflows in parallel
workflow1.transition_to_next_state()
workflow2.transition_to_next_state()
```
### Error Handling and Recovery
The system is equipped with robust error handling mechanisms to manage exceptions and ensure workflow continuity. In the event of an error, the system can revert to a safe state or retry operations.
**Example:**
```python
try:
workflow.transition_to_next_state()
except WorkflowError as e:
print(f"Error encountered: {e}")
workflow.revert_to_previous_state()
```
## Common Use Cases
The Workflow Management System is versatile and can be applied to various domains. Below are some common use cases:
- **Customer Support Chatbots**: Automating customer interactions by managing conversation states and responses.
- **Order Processing Systems**: Handling order states from initiation to completion, including payment and delivery.
- **Project Management Tools**: Tracking project phases and tasks, ensuring timely transitions and updates.
By understanding these examples and scenarios, users can leverage the Workflow Management System to optimize their processes and enhance operational efficiency.
# Troubleshooting and FAQs
This section provides guidance on resolving common issues and answers frequently asked questions related to the Workflow Management system. It also outlines available support and resources for further assistance.
## Common Issues
### 1. Workflow Initialization Errors
**Issue**: Errors occur during the initialization of a new workflow or when loading an existing one.
**Solution**:
- Ensure that all required parameters (unique ID, mandate ID, user ID) are correctly provided.
- Verify that the database connection is active and accessible.
- Check for any syntax errors in the configuration files.
**Example**: If you encounter an error message like `Initialization failed: Missing mandate ID`, double-check that the mandate ID is included in the initialization call.
### 2. State Transition Failures
**Issue**: The state machine fails to transition between states as expected.
**Solution**:
- Confirm that all state transition rules are correctly defined in the workflow configuration.
- Ensure that the current state is valid and that the transition conditions are met.
- Review the logs for any error messages that might indicate the cause of the failure.
**Example**: If a transition from `Pending` to `Approved` does not occur, check the conditions defined for this transition in the `workflowManager.py` file.
### 3. Performance Degradation
**Issue**: The system experiences slow performance during workflow processing.
**Solution**:
- Optimize database queries to reduce execution time.
- Increase system resources such as CPU and memory if necessary.
- Review the workflow logic for any inefficient loops or redundant operations.
**Example**: If processing a workflow takes significantly longer than expected, analyze the database query logs to identify slow queries.
## FAQs
### What is the purpose of the Workflow Manager Module?
The Workflow Manager Module implements a state machine to manage chat workflows, ensuring that each workflow progresses through predefined states based on specific conditions and triggers.
### How do I add a new state to the workflow?
To add a new state, update the workflow configuration file with the new state definition and specify the allowed transitions to and from this state. Ensure that the state machine logic in `workflowManager.py` is updated accordingly.
### Can I customize the workflow for different user roles?
Yes, workflows can be customized based on user roles by defining role-specific states and transitions in the configuration. Ensure that role-based access controls are implemented to enforce these customizations.
### Where can I find logs for debugging purposes?
Logs are typically stored in the `/var/log/workflow_manager/` directory. You can configure the logging level and output location in the system's configuration file.
### How do I contact support for further assistance?
For additional support, please contact our technical support team via email at support@workflowmanagement.com or call our helpline at +1-800-555-0199. Our support team is available 24/7 to assist with any issues.
## Support and Resources
- **Documentation**: Refer to the [Comprehensive System Documentation](#) for detailed information on system functionalities and configurations.
- **Community Forum**: Join our [User Community Forum](#) to discuss issues and share solutions with other users.
- **Training Sessions**: Sign up for our [Online Training Sessions](#) to enhance your understanding of the Workflow Management system.
For further inquiries, please refer to our [Support Page](#) for more resources and contact information.
# Appendices
This section provides additional resources and information to support the understanding and application of the "Comprehensive System Documentation for Workflow Management". It includes a glossary of terms used throughout the documentation and references for further reading.
## Glossary
This glossary defines key terms and concepts used in the workflow management system documentation. Understanding these terms is essential for comprehending the system's functionality and operations.
- **State Machine**: A computational model used to design algorithms. It consists of a finite number of states, transitions between these states, and actions, particularly useful in managing workflows where the system's state changes in response to events.
- **Workflow**: A sequence of processes through which a piece of work passes from initiation to completion. In this context, it refers to the automated processes managed by the system to handle chat interactions.
- **Workflow Initialization**: The process of setting up a new workflow instance or loading an existing one. This involves assigning unique identifiers and setting initial parameters.
- **Module**: A self-contained unit of code that encapsulates a specific functionality within the system. In the workflow management system, modules handle distinct aspects of workflow operations.
- **Backend**: The server-side part of the application, responsible for managing data, business logic, and workflows, as opposed to the frontend, which is the user interface.
- **Chat Workflow**: A specific type of workflow designed to manage interactions in a chat environment, ensuring that messages are processed and responded to according to predefined rules.
- **Unique ID**: A distinct identifier assigned to each workflow instance to differentiate it from others, ensuring accurate tracking and management.
- **Mandate ID**: An identifier used to associate a workflow with a specific mandate or task, providing context and purpose to the workflow's operations.
- **User ID**: An identifier that associates a workflow with a specific user, enabling personalized interactions and tracking.
## References
This section lists resources and literature that provide additional insights and information on workflow management systems, state machines, and related technologies. These references are valuable for readers seeking to deepen their understanding or explore advanced topics.
1. **"Design Patterns: Elements of Reusable Object-Oriented Software" by Erich Gamma, Richard Helm, Ralph Johnson, and John Vlissides**
- This book provides foundational knowledge on design patterns, including state machines, which are crucial for understanding workflow management systems.
2. **"Workflow Management: Models, Methods, and Systems" by Wil van der Aalst and Kees van Hee**
- A comprehensive resource on workflow management, covering theoretical models and practical implementations.
3. **"Finite State Machines in Software Development" by David M. Beazley**
- An article that explores the application of finite state machines in software development, offering practical examples and insights.
4. **"The Art of Scalability: Scalable Web Architecture, Processes, and Organizations for the Modern Enterprise" by Martin L. Abbott and Michael T. Fisher**
- This book discusses scalable architectures, including workflow management systems, providing strategies for building robust and efficient systems.
5. **Online Resources**:
- [State Machine Design Patterns](https://www.example.com/state-machine-design-patterns)
- [Workflow Management Coalition](https://www.example.com/workflow-management-coalition)
These resources are recommended for further exploration and understanding of the concepts and technologies underpinning the workflow management system described in this documentation.
## Conclusion
# Conclusion
In this "Comprehensive System Documentation for Workflow Management," we have meticulously explored the intricate components and functionalities that constitute the workflow management system. This manual has been crafted with a technical audience in mind, aiming to provide a thorough understanding of the system's architecture and operational dynamics.
## Summary of Key Points
1. **State Machine**: We delved into the state machine's pivotal role in managing the transitions and states within the workflow. The documentation detailed how state machines ensure the system's robustness and flexibility, allowing for seamless state transitions and efficient workflow management.
2. **Workflow Management**: The core principles and methodologies of workflow management were outlined, emphasizing the system's capability to streamline processes and enhance productivity. We discussed various workflow scenarios and how the system adapts to different operational needs.
3. **System Architecture**: A comprehensive overview of the system architecture was provided, highlighting the interconnections between various components. This section elucidated how each module interacts within the system, ensuring a cohesive and efficient workflow management environment.
4. **Error Handling**: Effective error handling mechanisms were described, showcasing the system's resilience and ability to maintain operational integrity. We covered strategies for identifying, logging, and resolving errors to minimize disruptions and maintain workflow continuity.
## Closure and Recommendations
This documentation serves as a foundational resource for understanding and optimizing the workflow management system. By detailing the system's components and their interactions, we have provided a roadmap for both current operations and future enhancements.
### Recommendations:
- **Continuous Monitoring**: Implement ongoing monitoring to identify potential bottlenecks or inefficiencies within the workflow. Regular audits can help maintain optimal performance and adaptability.
- **System Updates**: Stay abreast of technological advancements and consider integrating new tools or methodologies that could enhance the system's capabilities.
- **Training and Development**: Encourage continuous learning and development for team members to ensure they are proficient in utilizing the system to its fullest potential.
## Final Thoughts
The significance of this documentation lies in its ability to demystify the complexities of workflow management systems. By providing a clear and detailed account of the system's architecture and operations, we empower technical teams to effectively manage and optimize workflows. This manual not only serves as a guide but also as a catalyst for innovation and efficiency within the organization.
Through this documentation, we hope to have equipped you with the knowledge and insights necessary to harness the full potential of your workflow management system, fostering an environment of continuous improvement and success.

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(limit):
primes = []
num = 2
while len(primes) < limit:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(1000)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

1000
static/97_prime_numbers.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,38 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_first_n_primes(n):
primes = []
num = 2
while len(primes) < n:
if is_prime(num):
primes.append(num)
num += 1
return primes
first_77_primes = generate_first_n_primes(77)
primes_content = "\n".join(map(str, first_77_primes))
result = {
"first_77_primes.txt": {
"content": primes_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

View file

@ -0,0 +1,19 @@
[
{
"attempt": 1,
"code": "inputFiles = [] # DO NOT CHANGE THIS LINE\n\ndef is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef generate_first_n_primes(n):\n primes = []\n num = 2\n while len(primes) < n:\n if is_prime(num):\n primes.append(num)\n num += 1\n return primes\n\nfirst_77_primes = generate_first_n_primes(77)\nprimes_content = \"\\n\".join(map(str, first_77_primes))\n\nresult = {\n \"first_77_primes.txt\": {\n \"content\": primes_content,\n \"base64Encoded\": False,\n \"contentType\": \"text/plain\"\n }\n}\n\nimport json\nprint(json.dumps(result))",
"result": {
"success": true,
"output": "{\"first_77_primes.txt\": {\"content\": \"2\\n3\\n5\\n7\\n11\\n13\\n17\\n19\\n23\\n29\\n31\\n37\\n41\\n43\\n47\\n53\\n59\\n61\\n67\\n71\\n73\\n79\\n83\\n89\\n97\\n101\\n103\\n107\\n109\\n113\\n127\\n131\\n137\\n139\\n149\\n151\\n157\\n163\\n167\\n173\\n179\\n181\\n191\\n193\\n197\\n199\\n211\\n223\\n227\\n229\\n233\\n239\\n241\\n251\\n257\\n263\\n269\\n271\\n277\\n281\\n283\\n293\\n307\\n311\\n313\\n317\\n331\\n337\\n347\\n349\\n353\\n359\\n367\\n373\\n379\\n383\\n389\", \"base64Encoded\": false, \"contentType\": \"text/plain\"}}\n",
"error": "",
"result": {
"first_77_primes.txt": {
"content": "2\n3\n5\n7\n11\n13\n17\n19\n23\n29\n31\n37\n41\n43\n47\n53\n59\n61\n67\n71\n73\n79\n83\n89\n97\n101\n103\n107\n109\n113\n127\n131\n137\n139\n149\n151\n157\n163\n167\n173\n179\n181\n191\n193\n197\n199\n211\n223\n227\n229\n233\n239\n241\n251\n257\n263\n269\n271\n277\n281\n283\n293\n307\n311\n313\n317\n331\n337\n347\n349\n353\n359\n367\n373\n379\n383\n389",
"base64Encoded": false,
"contentType": "text/plain"
}
},
"exitCode": 0
}
}
]