akhaliq HF Staff commited on
Commit
511c44b
·
1 Parent(s): 7f9535b

fix transformers js issue

Browse files
Files changed (4) hide show
  1. Dockerfile +2 -0
  2. backend_deploy.py +21 -15
  3. backend_parsers.py +402 -0
  4. backend_prompts.py +45 -6
Dockerfile CHANGED
@@ -55,7 +55,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
55
  COPY --chown=user:user anycoder_app/ ./anycoder_app/
56
  COPY --chown=user:user backend_api.py .
57
  COPY --chown=user:user backend_models.py .
 
58
  COPY --chown=user:user backend_prompts.py .
 
59
  COPY --chown=user:user backend_deploy.py .
60
  COPY --chown=user:user project_importer.py .
61
  COPY --chown=user:user app.py .
 
55
  COPY --chown=user:user anycoder_app/ ./anycoder_app/
56
  COPY --chown=user:user backend_api.py .
57
  COPY --chown=user:user backend_models.py .
58
+ COPY --chown=user:user backend_docs_manager.py .
59
  COPY --chown=user:user backend_prompts.py .
60
+ COPY --chown=user:user backend_parsers.py .
61
  COPY --chown=user:user backend_deploy.py .
62
  COPY --chown=user:user project_importer.py .
63
  COPY --chown=user:user app.py .
backend_deploy.py CHANGED
@@ -14,6 +14,16 @@ from pathlib import Path
14
 
15
  from huggingface_hub import HfApi
16
  from backend_models import get_inference_client, get_real_model_id
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def parse_html_code(code: str) -> str:
@@ -870,26 +880,22 @@ def deploy_to_huggingface_space(
870
  else:
871
  print(f"[Deploy] {fname}: EMPTY")
872
 
873
- # Validate all three files are present
874
- missing_files = []
875
- if not files.get('index.html'):
876
- missing_files.append('index.html')
877
- if not files.get('index.js'):
878
- missing_files.append('index.js')
879
- if not files.get('style.css'):
880
- missing_files.append('style.css')
881
 
882
- if missing_files:
883
- error_msg = f"Missing required files: {', '.join(missing_files)}. "
884
- error_msg += f"Found only: {', '.join(files.keys()) if files else 'no files'}. "
885
- error_msg += "Transformers.js apps require all three files with === filename === markers. Please regenerate the code."
886
  print(f"[Deploy] {error_msg}")
887
  return False, error_msg, None
888
 
889
- # Validate files have content
890
- empty_files = [name for name, content in files.items() if not content or not content.strip()]
891
  if empty_files:
892
- error_msg = f"Empty files detected: {', '.join(empty_files)}. Please regenerate the code with actual content."
 
893
  print(f"[Deploy] {error_msg}")
894
  return False, error_msg, None
895
 
 
14
 
15
  from huggingface_hub import HfApi
16
  from backend_models import get_inference_client, get_real_model_id
17
+ from backend_parsers import (
18
+ parse_transformers_js_output,
19
+ parse_html_code,
20
+ parse_python_requirements,
21
+ parse_multi_file_python_output,
22
+ strip_tool_call_markers,
23
+ remove_code_block,
24
+ extract_import_statements,
25
+ generate_requirements_txt_with_llm
26
+ )
27
 
28
 
29
  def parse_html_code(code: str) -> str:
 
880
  else:
881
  print(f"[Deploy] {fname}: EMPTY")
882
 
883
+ # Validate all three files are present in the dict
884
+ required_files = {'index.html', 'index.js', 'style.css'}
885
+ missing_from_dict = required_files - set(files.keys())
 
 
 
 
 
886
 
887
+ if missing_from_dict:
888
+ error_msg = f"Failed to parse required files: {', '.join(sorted(missing_from_dict))}. "
889
+ error_msg += f"Parsed files: {', '.join(files.keys()) if files else 'none'}. "
890
+ error_msg += "Transformers.js apps require all three files (index.html, index.js, style.css). Please regenerate using the correct format."
891
  print(f"[Deploy] {error_msg}")
892
  return False, error_msg, None
893
 
894
+ # Validate files have actual content (not empty or whitespace-only)
895
+ empty_files = [name for name in required_files if not files.get(name, '').strip()]
896
  if empty_files:
897
+ error_msg = f"Empty file content detected: {', '.join(sorted(empty_files))}. "
898
+ error_msg += "All three files must contain actual code. Please regenerate with complete content."
899
  print(f"[Deploy] {error_msg}")
900
  return False, error_msg, None
901
 
backend_parsers.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Backend parser utilities for AnyCoder.
3
+ Handles parsing of various code formats including transformers.js, Python multi-file outputs, and more.
4
+ """
5
+ import re
6
+ import json
7
+ import ast
8
+ from typing import Dict, Optional
9
+ from backend_models import get_inference_client, get_real_model_id
10
+
11
+
12
+ def parse_transformers_js_output(code: str) -> Dict[str, str]:
13
+ """Parse transformers.js output into separate files (index.html, index.js, style.css)
14
+
15
+ Uses comprehensive parsing patterns to handle various LLM output formats.
16
+ Updated to use transformers.js v3.8.0 CDN.
17
+ """
18
+ files = {
19
+ 'index.html': '',
20
+ 'index.js': '',
21
+ 'style.css': ''
22
+ }
23
+
24
+ # Multiple patterns to match the three code blocks with different variations
25
+ html_patterns = [
26
+ r'```html\s*\n([\s\S]*?)(?:```|\Z)',
27
+ r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
28
+ r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
29
+ ]
30
+
31
+ js_patterns = [
32
+ r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
33
+ r'```js\s*\n([\s\S]*?)(?:```|\Z)',
34
+ r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
35
+ ]
36
+
37
+ css_patterns = [
38
+ r'```css\s*\n([\s\S]*?)(?:```|\Z)',
39
+ r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
40
+ ]
41
+
42
+ # Extract HTML content
43
+ for pattern in html_patterns:
44
+ html_match = re.search(pattern, code, re.IGNORECASE)
45
+ if html_match:
46
+ files['index.html'] = html_match.group(1).strip()
47
+ break
48
+
49
+ # Extract JavaScript content
50
+ for pattern in js_patterns:
51
+ js_match = re.search(pattern, code, re.IGNORECASE)
52
+ if js_match:
53
+ files['index.js'] = js_match.group(1).strip()
54
+ break
55
+
56
+ # Extract CSS content
57
+ for pattern in css_patterns:
58
+ css_match = re.search(pattern, code, re.IGNORECASE)
59
+ if css_match:
60
+ files['style.css'] = css_match.group(1).strip()
61
+ break
62
+
63
+ # Fallback: support === index.html === format if any file is missing
64
+ if not (files['index.html'] and files['index.js'] and files['style.css']):
65
+ # Use regex to extract sections
66
+ html_fallback = re.search(r'===\s*index\.html\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
67
+ js_fallback = re.search(r'===\s*index\.js\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
68
+ css_fallback = re.search(r'===\s*style\.css\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
69
+
70
+ if html_fallback:
71
+ files['index.html'] = html_fallback.group(1).strip()
72
+ if js_fallback:
73
+ files['index.js'] = js_fallback.group(1).strip()
74
+ if css_fallback:
75
+ files['style.css'] = css_fallback.group(1).strip()
76
+
77
+ # Additional fallback: extract from numbered sections or file headers
78
+ if not (files['index.html'] and files['index.js'] and files['style.css']):
79
+ # Try patterns like "1. index.html:" or "**index.html**"
80
+ patterns = [
81
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.html(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.html'),
82
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.js(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.js'),
83
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)style\.css(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'style.css')
84
+ ]
85
+
86
+ for pattern, file_key in patterns:
87
+ if not files[file_key]:
88
+ match = re.search(pattern, code, re.IGNORECASE | re.MULTILINE)
89
+ if match:
90
+ # Clean up the content by removing any code block markers
91
+ content = match.group(1).strip()
92
+ content = re.sub(r'^```\w*\s*\n', '', content)
93
+ content = re.sub(r'\n```\s*$', '', content)
94
+ files[file_key] = content.strip()
95
+
96
+ # Normalize transformers.js imports to use v3.8.0 CDN
97
+ cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"
98
+
99
+ for file_key in ['index.html', 'index.js']:
100
+ if files[file_key]:
101
+ content = files[file_key]
102
+ # Update import statements to use latest CDN
103
+ content = re.sub(
104
+ r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]",
105
+ f"from '{cdn_url}'",
106
+ content
107
+ )
108
+ content = re.sub(
109
+ r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]",
110
+ f"from '{cdn_url}'",
111
+ content
112
+ )
113
+ files[file_key] = content
114
+
115
+ return files
116
+
117
+
118
+ def parse_html_code(code: str) -> str:
119
+ """Extract HTML code from various formats"""
120
+ code = code.strip()
121
+
122
+ # If already clean HTML, return as-is
123
+ if code.startswith('<!DOCTYPE') or code.startswith('<html'):
124
+ return code
125
+
126
+ # Try to extract from code blocks
127
+ if '```html' in code:
128
+ match = re.search(r'```html\s*(.*?)\s*```', code, re.DOTALL)
129
+ if match:
130
+ return match.group(1).strip()
131
+
132
+ if '```' in code:
133
+ match = re.search(r'```\s*(.*?)\s*```', code, re.DOTALL)
134
+ if match:
135
+ return match.group(1).strip()
136
+
137
+ return code
138
+
139
+
140
+ def parse_python_requirements(code: str) -> Optional[str]:
141
+ """Extract requirements.txt content from code if present"""
142
+ # Look for requirements.txt section
143
+ req_pattern = r'===\s*requirements\.txt\s*===\s*(.*?)(?====|$)'
144
+ match = re.search(req_pattern, code, re.DOTALL | re.IGNORECASE)
145
+
146
+ if match:
147
+ requirements = match.group(1).strip()
148
+ # Clean up code blocks
149
+ requirements = re.sub(r'^```\w*\s*', '', requirements, flags=re.MULTILINE)
150
+ requirements = re.sub(r'```\s*$', '', requirements, flags=re.MULTILINE)
151
+ return requirements
152
+
153
+ return None
154
+
155
+
156
+ def parse_multi_file_python_output(code: str) -> Dict[str, str]:
157
+ """Parse multi-file Python output (e.g., Gradio, Streamlit)"""
158
+ files = {}
159
+
160
+ # Pattern to match file sections
161
+ pattern = r'===\s*(\S+\.(?:py|txt))\s*===\s*(.*?)(?====|$)'
162
+ matches = re.finditer(pattern, code, re.DOTALL | re.IGNORECASE)
163
+
164
+ for match in matches:
165
+ filename = match.group(1).strip()
166
+ content = match.group(2).strip()
167
+
168
+ # Clean up code blocks
169
+ content = re.sub(r'^```\w*\s*', '', content, flags=re.MULTILINE)
170
+ content = re.sub(r'```\s*$', '', content, flags=re.MULTILINE)
171
+
172
+ files[filename] = content
173
+
174
+ return files
175
+
176
+
177
+ def strip_tool_call_markers(text):
178
+ """Remove TOOL_CALL markers and thinking tags that some LLMs add to their output."""
179
+ if not text:
180
+ return text
181
+ # Remove [TOOL_CALL] and [/TOOL_CALL] markers
182
+ text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE)
183
+ # Remove <think> and </think> tags and their content
184
+ text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE)
185
+ # Remove any remaining unclosed <think> tags at the start
186
+ text = re.sub(r'^<think>[\s\S]*?(?=\n|$)', '', text, flags=re.IGNORECASE | re.MULTILINE)
187
+ # Remove any remaining </think> tags
188
+ text = re.sub(r'</think>', '', text, flags=re.IGNORECASE)
189
+ # Remove standalone }} that appears with tool calls
190
+ # Only remove if it's on its own line or at the end
191
+ text = re.sub(r'^\s*\}\}\s*$', '', text, flags=re.MULTILINE)
192
+ return text.strip()
193
+
194
+
195
+ def remove_code_block(text):
196
+ """Remove code block markers from text."""
197
+ # First strip any tool call markers
198
+ text = strip_tool_call_markers(text)
199
+
200
+ # Try to match code blocks with language markers
201
+ patterns = [
202
+ r'```(?:html|HTML)\n([\s\S]+?)\n```', # Match ```html or ```HTML
203
+ r'```\n([\s\S]+?)\n```', # Match code blocks without language markers
204
+ r'```([\s\S]+?)```' # Match code blocks without line breaks
205
+ ]
206
+ for pattern in patterns:
207
+ match = re.search(pattern, text, re.DOTALL)
208
+ if match:
209
+ extracted = match.group(1).strip()
210
+ # Remove a leading language marker line (e.g., 'python') if present
211
+ if extracted.split('\n', 1)[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql']:
212
+ return extracted.split('\n', 1)[1] if '\n' in extracted else ''
213
+ return extracted
214
+ # If no code block is found, return as-is
215
+ return text.strip()
216
+
217
+
218
+ def extract_import_statements(code):
219
+ """Extract import statements from generated code."""
220
+ import_statements = []
221
+
222
+ # Built-in Python modules to exclude
223
+ builtin_modules = {
224
+ 'os', 'sys', 'json', 'time', 'datetime', 'random', 'math', 're', 'collections',
225
+ 'itertools', 'functools', 'pathlib', 'urllib', 'http', 'email', 'html', 'xml',
226
+ 'csv', 'tempfile', 'shutil', 'subprocess', 'threading', 'multiprocessing',
227
+ 'asyncio', 'logging', 'typing', 'base64', 'hashlib', 'secrets', 'uuid',
228
+ 'copy', 'pickle', 'io', 'contextlib', 'warnings', 'sqlite3', 'gzip', 'zipfile',
229
+ 'tarfile', 'socket', 'ssl', 'platform', 'getpass', 'pwd', 'grp', 'stat',
230
+ 'glob', 'fnmatch', 'linecache', 'traceback', 'inspect', 'keyword', 'token',
231
+ 'tokenize', 'ast', 'code', 'codeop', 'dis', 'py_compile', 'compileall',
232
+ 'importlib', 'pkgutil', 'modulefinder', 'runpy', 'site', 'sysconfig'
233
+ }
234
+
235
+ try:
236
+ # Try to parse as Python AST
237
+ tree = ast.parse(code)
238
+
239
+ for node in ast.walk(tree):
240
+ if isinstance(node, ast.Import):
241
+ for alias in node.names:
242
+ module_name = alias.name.split('.')[0]
243
+ if module_name not in builtin_modules and not module_name.startswith('_'):
244
+ import_statements.append(f"import {alias.name}")
245
+
246
+ elif isinstance(node, ast.ImportFrom):
247
+ if node.module:
248
+ module_name = node.module.split('.')[0]
249
+ if module_name not in builtin_modules and not module_name.startswith('_'):
250
+ names = [alias.name for alias in node.names]
251
+ import_statements.append(f"from {node.module} import {', '.join(names)}")
252
+
253
+ except SyntaxError:
254
+ # Fallback: use regex to find import statements
255
+ for line in code.split('\n'):
256
+ line = line.strip()
257
+ if line.startswith('import ') or line.startswith('from '):
258
+ # Check if it's not a builtin module
259
+ if line.startswith('import '):
260
+ module_name = line.split()[1].split('.')[0]
261
+ elif line.startswith('from '):
262
+ module_name = line.split()[1].split('.')[0]
263
+
264
+ if module_name not in builtin_modules and not module_name.startswith('_'):
265
+ import_statements.append(line)
266
+
267
+ return list(set(import_statements)) # Remove duplicates
268
+
269
+
270
+ def generate_requirements_txt_with_llm(import_statements):
271
+ """Generate requirements.txt content using LLM based on import statements."""
272
+ if not import_statements:
273
+ return "# No additional dependencies required\n"
274
+
275
+ # Use a lightweight model for this task
276
+ try:
277
+ client = get_inference_client("zai-org/GLM-4.6", "auto")
278
+ actual_model_id = get_real_model_id("zai-org/GLM-4.6")
279
+
280
+ imports_text = '\n'.join(import_statements)
281
+
282
+ prompt = f"""Based on the following Python import statements, generate a comprehensive requirements.txt file with all necessary and commonly used related packages:
283
+
284
+ {imports_text}
285
+
286
+ Instructions:
287
+ - Include the direct packages needed for the imports
288
+ - Include commonly used companion packages and dependencies for better functionality
289
+ - Use correct PyPI package names (e.g., PIL -> Pillow, sklearn -> scikit-learn)
290
+ - IMPORTANT: For diffusers, ALWAYS use: git+https://github.com/huggingface/diffusers
291
+ - IMPORTANT: For transformers, ALWAYS use: git+https://github.com/huggingface/transformers
292
+ - IMPORTANT: If diffusers is installed, also include transformers and sentencepiece as they usually go together
293
+ - Examples of comprehensive dependencies:
294
+ * diffusers often needs: git+https://github.com/huggingface/transformers, sentencepiece, accelerate, torch, tokenizers
295
+ * transformers often needs: accelerate, torch, tokenizers, datasets
296
+ * gradio often needs: requests, Pillow for image handling
297
+ * pandas often needs: numpy, openpyxl for Excel files
298
+ * matplotlib often needs: numpy, pillow for image saving
299
+ * sklearn often needs: numpy, scipy, joblib
300
+ * streamlit often needs: pandas, numpy, requests
301
+ * opencv-python often needs: numpy, pillow
302
+ * fastapi often needs: uvicorn, pydantic
303
+ * torch often needs: torchvision, torchaudio (if doing computer vision/audio)
304
+ - Include packages for common file formats if relevant (openpyxl, python-docx, PyPDF2)
305
+ - Do not include Python built-in modules
306
+ - Do not specify versions unless there are known compatibility issues
307
+ - One package per line
308
+ - If no external packages are needed, return "# No additional dependencies required"
309
+
310
+ 🚨 CRITICAL OUTPUT FORMAT:
311
+ - Output ONLY the package names, one per line (plain text format)
312
+ - Do NOT use markdown formatting (no ```, no bold, no headings, no lists)
313
+ - Do NOT add any explanatory text before or after the package list
314
+ - Do NOT wrap the output in code blocks
315
+ - Just output raw package names as they would appear in requirements.txt
316
+
317
+ Generate a comprehensive requirements.txt that ensures the application will work smoothly:"""
318
+
319
+ messages = [
320
+ {"role": "system", "content": "You are a Python packaging expert specializing in creating comprehensive, production-ready requirements.txt files. Output ONLY plain text package names without any markdown formatting, code blocks, or explanatory text. Your goal is to ensure applications work smoothly by including not just direct dependencies but also commonly needed companion packages, popular extensions, and supporting libraries that developers typically need together."},
321
+ {"role": "user", "content": prompt}
322
+ ]
323
+
324
+ response = client.chat.completions.create(
325
+ model=actual_model_id,
326
+ messages=messages,
327
+ max_tokens=1024,
328
+ temperature=0.1
329
+ )
330
+
331
+ requirements_content = response.choices[0].message.content.strip()
332
+
333
+ # Clean up the response in case it includes extra formatting
334
+ if '```' in requirements_content:
335
+ requirements_content = remove_code_block(requirements_content)
336
+
337
+ # Enhanced cleanup for markdown and formatting
338
+ lines = requirements_content.split('\n')
339
+ clean_lines = []
340
+ for line in lines:
341
+ stripped_line = line.strip()
342
+
343
+ # Skip lines that are markdown formatting
344
+ if (stripped_line == '```' or
345
+ stripped_line.startswith('```') or
346
+ stripped_line.startswith('#') and not stripped_line.startswith('# ') or # Skip markdown headers but keep comments
347
+ stripped_line.startswith('**') or # Skip bold text
348
+ stripped_line.startswith('*') and not stripped_line[1:2].isalnum() or # Skip markdown lists but keep package names starting with *
349
+ stripped_line.startswith('-') and not stripped_line[1:2].isalnum() or # Skip markdown lists but keep package names starting with -
350
+ stripped_line.startswith('===') or # Skip section dividers
351
+ stripped_line.startswith('---') or # Skip horizontal rules
352
+ stripped_line.lower().startswith('here') or # Skip explanatory text
353
+ stripped_line.lower().startswith('this') or # Skip explanatory text
354
+ stripped_line.lower().startswith('the') or # Skip explanatory text
355
+ stripped_line.lower().startswith('based on') or # Skip explanatory text
356
+ stripped_line == ''): # Skip empty lines unless they're at natural boundaries
357
+ continue
358
+
359
+ # Keep lines that look like valid package specifications
360
+ # Valid lines: package names, git+https://, comments starting with "# "
361
+ if (stripped_line.startswith('# ') or # Valid comments
362
+ stripped_line.startswith('git+') or # Git dependencies
363
+ stripped_line[0].isalnum() or # Package names start with alphanumeric
364
+ '==' in stripped_line or # Version specifications
365
+ '>=' in stripped_line or # Version specifications
366
+ '<=' in stripped_line): # Version specifications
367
+ clean_lines.append(line)
368
+
369
+ requirements_content = '\n'.join(clean_lines).strip()
370
+
371
+ # Ensure it ends with a newline
372
+ if requirements_content and not requirements_content.endswith('\n'):
373
+ requirements_content += '\n'
374
+
375
+ return requirements_content if requirements_content else "# No additional dependencies required\n"
376
+
377
+ except Exception as e:
378
+ # Fallback: simple extraction with basic mapping
379
+ print(f"[Parser] Warning: LLM requirements generation failed: {e}, using fallback")
380
+ dependencies = set()
381
+ special_cases = {
382
+ 'PIL': 'Pillow',
383
+ 'sklearn': 'scikit-learn',
384
+ 'skimage': 'scikit-image',
385
+ 'bs4': 'beautifulsoup4'
386
+ }
387
+
388
+ for stmt in import_statements:
389
+ if stmt.startswith('import '):
390
+ module_name = stmt.split()[1].split('.')[0]
391
+ package_name = special_cases.get(module_name, module_name)
392
+ dependencies.add(package_name)
393
+ elif stmt.startswith('from '):
394
+ module_name = stmt.split()[1].split('.')[0]
395
+ package_name = special_cases.get(module_name, module_name)
396
+ dependencies.add(package_name)
397
+
398
+ if dependencies:
399
+ return '\n'.join(sorted(dependencies)) + '\n'
400
+ else:
401
+ return "# No additional dependencies required\n"
402
+
backend_prompts.py CHANGED
@@ -61,16 +61,55 @@ Requirements:
61
  6. Include proper error handling and loading states
62
  7. Follow accessibility best practices
63
 
64
- Library import (required): Add the following snippet to index.html to import transformers.js:
 
 
 
65
  <script type="module">
66
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3';
67
  </script>
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- Device Options: By default, transformers.js runs on CPU (via WASM). For better performance, you can run models on GPU using WebGPU:
70
- - CPU (default): const pipe = await pipeline('task', 'model-name');
71
- - GPU (WebGPU): const pipe = await pipeline('task', 'model-name', { device: 'webgpu' });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- Consider providing users with a toggle option to choose between CPU and GPU execution based on their browser's WebGPU support.
74
 
75
  The index.html should contain the basic HTML structure and link to the CSS and JS files.
76
  The index.js should contain all the JavaScript logic including transformers.js integration.
 
61
  6. Include proper error handling and loading states
62
  7. Follow accessibility best practices
63
 
64
+ **Transformers.js Library Usage:**
65
+
66
+ Import via CDN (use in index.html or index.js):
67
+ ```javascript
68
  <script type="module">
69
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0';
70
  </script>
71
+ ```
72
+
73
+ **Pipeline API - Quick Tour:**
74
+ Pipelines group together a pretrained model with preprocessing and postprocessing. Example:
75
+
76
+ ```javascript
77
+ import { pipeline } from '@huggingface/transformers';
78
+
79
+ // Allocate a pipeline for sentiment-analysis
80
+ const pipe = await pipeline('sentiment-analysis');
81
+
82
+ const out = await pipe('I love transformers!');
83
+ // [{'label': 'POSITIVE', 'score': 0.999817686}]
84
 
85
+ // Use a different model by specifying model id
86
+ const pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
87
+ ```
88
+
89
+ **Device Options:**
90
+ By default, models run on CPU (via WASM). For better performance, use WebGPU:
91
+ ```javascript
92
+ // Run on WebGPU (GPU)
93
+ const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
94
+ device: 'webgpu',
95
+ });
96
+ ```
97
+
98
+ **Quantization Options:**
99
+ In resource-constrained environments (browsers), use quantized models:
100
+ - "fp32" (default for WebGPU)
101
+ - "fp16"
102
+ - "q8" (default for WASM)
103
+ - "q4" (4-bit quantization for smaller size)
104
+
105
+ ```javascript
106
+ // Run at 4-bit quantization for better performance
107
+ const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
108
+ dtype: 'q4',
109
+ });
110
+ ```
111
 
112
+ Consider providing users with options to choose device (CPU/GPU) and quantization level based on their needs.
113
 
114
  The index.html should contain the basic HTML structure and link to the CSS and JS files.
115
  The index.js should contain all the JavaScript logic including transformers.js integration.