Spaces:

akhaliq
/

anycoder

Running

App Files Files Community

akhaliq HF Staff commited on 17 days ago

Commit

511c44b

1 Parent(s): 7f9535b

fix transformers js issue

Browse files

Files changed (4) hide show

Dockerfile +2 -0
backend_deploy.py +21 -15
backend_parsers.py +402 -0
backend_prompts.py +45 -6

Dockerfile CHANGED Viewed

@@ -55,7 +55,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
 COPY --chown=user:user anycoder_app/ ./anycoder_app/
 COPY --chown=user:user backend_api.py .
 COPY --chown=user:user backend_models.py .
 COPY --chown=user:user backend_prompts.py .
 COPY --chown=user:user backend_deploy.py .
 COPY --chown=user:user project_importer.py .
 COPY --chown=user:user app.py .

 COPY --chown=user:user anycoder_app/ ./anycoder_app/
 COPY --chown=user:user backend_api.py .
 COPY --chown=user:user backend_models.py .
+COPY --chown=user:user backend_docs_manager.py .
 COPY --chown=user:user backend_prompts.py .
+COPY --chown=user:user backend_parsers.py .
 COPY --chown=user:user backend_deploy.py .
 COPY --chown=user:user project_importer.py .
 COPY --chown=user:user app.py .

backend_deploy.py CHANGED Viewed

@@ -14,6 +14,16 @@ from pathlib import Path
 from huggingface_hub import HfApi
 from backend_models import get_inference_client, get_real_model_id
 def parse_html_code(code: str) -> str:
@@ -870,26 +880,22 @@ def deploy_to_huggingface_space(
                         else:
                             print(f"[Deploy] {fname}: EMPTY")
-                    # Validate all three files are present
-                    missing_files = []
-                    if not files.get('index.html'):
-                        missing_files.append('index.html')
-                    if not files.get('index.js'):
-                        missing_files.append('index.js')
-                    if not files.get('style.css'):
-                        missing_files.append('style.css')
-                    if missing_files:
-                        error_msg = f"Missing required files: {', '.join(missing_files)}. "
-                        error_msg += f"Found only: {', '.join(files.keys()) if files else 'no files'}. "
-                        error_msg += "Transformers.js apps require all three files with === filename === markers. Please regenerate the code."
                         print(f"[Deploy] {error_msg}")
                         return False, error_msg, None
-                    # Validate files have content
-                    empty_files = [name for name, content in files.items() if not content or not content.strip()]
                     if empty_files:
-                        error_msg = f"Empty files detected: {', '.join(empty_files)}. Please regenerate the code with actual content."
                         print(f"[Deploy] {error_msg}")
                         return False, error_msg, None

 from huggingface_hub import HfApi
 from backend_models import get_inference_client, get_real_model_id
+from backend_parsers import (
+    parse_transformers_js_output,
+    parse_html_code,
+    parse_python_requirements,
+    parse_multi_file_python_output,
+    strip_tool_call_markers,
+    remove_code_block,
+    extract_import_statements,
+    generate_requirements_txt_with_llm
+)
 def parse_html_code(code: str) -> str:
                         else:
                             print(f"[Deploy] {fname}: EMPTY")
+                    # Validate all three files are present in the dict
+                    required_files = {'index.html', 'index.js', 'style.css'}
+                    missing_from_dict = required_files - set(files.keys())
+                    if missing_from_dict:
+                        error_msg = f"Failed to parse required files: {', '.join(sorted(missing_from_dict))}. "
+                        error_msg += f"Parsed files: {', '.join(files.keys()) if files else 'none'}. "
+                        error_msg += "Transformers.js apps require all three files (index.html, index.js, style.css). Please regenerate using the correct format."
                         print(f"[Deploy] {error_msg}")
                         return False, error_msg, None
+                    # Validate files have actual content (not empty or whitespace-only)
+                    empty_files = [name for name in required_files if not files.get(name, '').strip()]
                     if empty_files:
+                        error_msg = f"Empty file content detected: {', '.join(sorted(empty_files))}. "
+                        error_msg += "All three files must contain actual code. Please regenerate with complete content."
                         print(f"[Deploy] {error_msg}")
                         return False, error_msg, None

backend_parsers.py ADDED Viewed

	@@ -0,0 +1,402 @@

+"""
+Backend parser utilities for AnyCoder.
+Handles parsing of various code formats including transformers.js, Python multi-file outputs, and more.
+"""
+import re
+import json
+import ast
+from typing import Dict, Optional
+from backend_models import get_inference_client, get_real_model_id
+def parse_transformers_js_output(code: str) -> Dict[str, str]:
+    """Parse transformers.js output into separate files (index.html, index.js, style.css)
+    Uses comprehensive parsing patterns to handle various LLM output formats.
+    Updated to use transformers.js v3.8.0 CDN.
+    """
+    files = {
+        'index.html': '',
+        'index.js': '',
+        'style.css': ''
+    }
+    # Multiple patterns to match the three code blocks with different variations
+    html_patterns = [
+        r'```html\s*\n([\s\S]*?)(?:```|\Z)',
+        r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
+        r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
+    ]
+    js_patterns = [
+        r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
+        r'```js\s*\n([\s\S]*?)(?:```|\Z)',
+        r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
+    ]
+    css_patterns = [
+        r'```css\s*\n([\s\S]*?)(?:```|\Z)',
+        r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
+    ]
+    # Extract HTML content
+    for pattern in html_patterns:
+        html_match = re.search(pattern, code, re.IGNORECASE)
+        if html_match:
+            files['index.html'] = html_match.group(1).strip()
+            break
+    # Extract JavaScript content
+    for pattern in js_patterns:
+        js_match = re.search(pattern, code, re.IGNORECASE)
+        if js_match:
+            files['index.js'] = js_match.group(1).strip()
+            break
+    # Extract CSS content
+    for pattern in css_patterns:
+        css_match = re.search(pattern, code, re.IGNORECASE)
+        if css_match:
+            files['style.css'] = css_match.group(1).strip()
+            break
+    # Fallback: support === index.html === format if any file is missing
+    if not (files['index.html'] and files['index.js'] and files['style.css']):
+        # Use regex to extract sections
+        html_fallback = re.search(r'===\s*index\.html\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
+        js_fallback = re.search(r'===\s*index\.js\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
+        css_fallback = re.search(r'===\s*style\.css\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
+        if html_fallback:
+            files['index.html'] = html_fallback.group(1).strip()
+        if js_fallback:
+            files['index.js'] = js_fallback.group(1).strip()
+        if css_fallback:
+            files['style.css'] = css_fallback.group(1).strip()
+    # Additional fallback: extract from numbered sections or file headers
+    if not (files['index.html'] and files['index.js'] and files['style.css']):
+        # Try patterns like "1. index.html:" or "**index.html**"
+        patterns = [
+            (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.html(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.html'),
+            (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.js(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.js'),
+            (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)style\.css(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'style.css')
+        ]
+        for pattern, file_key in patterns:
+            if not files[file_key]:
+                match = re.search(pattern, code, re.IGNORECASE | re.MULTILINE)
+                if match:
+                    # Clean up the content by removing any code block markers
+                    content = match.group(1).strip()
+                    content = re.sub(r'^```\w*\s*\n', '', content)
+                    content = re.sub(r'\n```\s*$', '', content)
+                    files[file_key] = content.strip()
+    # Normalize transformers.js imports to use v3.8.0 CDN
+    cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"
+    for file_key in ['index.html', 'index.js']:
+        if files[file_key]:
+            content = files[file_key]
+            # Update import statements to use latest CDN
+            content = re.sub(
+                r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]",
+                f"from '{cdn_url}'",
+                content
+            )
+            content = re.sub(
+                r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]",
+                f"from '{cdn_url}'",
+                content
+            )
+            files[file_key] = content
+    return files
+def parse_html_code(code: str) -> str:
+    """Extract HTML code from various formats"""
+    code = code.strip()
+    # If already clean HTML, return as-is
+    if code.startswith('<!DOCTYPE') or code.startswith('<html'):
+        return code
+    # Try to extract from code blocks
+    if '```html' in code:
+        match = re.search(r'```html\s*(.*?)\s*```', code, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+    if '```' in code:
+        match = re.search(r'```\s*(.*?)\s*```', code, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+    return code
+def parse_python_requirements(code: str) -> Optional[str]:
+    """Extract requirements.txt content from code if present"""
+    # Look for requirements.txt section
+    req_pattern = r'===\s*requirements\.txt\s*===\s*(.*?)(?====|$)'
+    match = re.search(req_pattern, code, re.DOTALL | re.IGNORECASE)
+    if match:
+        requirements = match.group(1).strip()
+        # Clean up code blocks
+        requirements = re.sub(r'^```\w*\s*', '', requirements, flags=re.MULTILINE)
+        requirements = re.sub(r'```\s*$', '', requirements, flags=re.MULTILINE)
+        return requirements
+    return None
+def parse_multi_file_python_output(code: str) -> Dict[str, str]:
+    """Parse multi-file Python output (e.g., Gradio, Streamlit)"""
+    files = {}
+    # Pattern to match file sections
+    pattern = r'===\s*(\S+\.(?:py|txt))\s*===\s*(.*?)(?====|$)'
+    matches = re.finditer(pattern, code, re.DOTALL | re.IGNORECASE)
+    for match in matches:
+        filename = match.group(1).strip()
+        content = match.group(2).strip()
+        # Clean up code blocks
+        content = re.sub(r'^```\w*\s*', '', content, flags=re.MULTILINE)
+        content = re.sub(r'```\s*$', '', content, flags=re.MULTILINE)
+        files[filename] = content
+    return files
+def strip_tool_call_markers(text):
+    """Remove TOOL_CALL markers and thinking tags that some LLMs add to their output."""
+    if not text:
+        return text
+    # Remove [TOOL_CALL] and [/TOOL_CALL] markers
+    text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE)
+    # Remove <think> and </think> tags and their content
+    text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE)
+    # Remove any remaining unclosed <think> tags at the start
+    text = re.sub(r'^<think>[\s\S]*?(?=\n|$)', '', text, flags=re.IGNORECASE | re.MULTILINE)
+    # Remove any remaining </think> tags
+    text = re.sub(r'</think>', '', text, flags=re.IGNORECASE)
+    # Remove standalone }} that appears with tool calls
+    # Only remove if it's on its own line or at the end
+    text = re.sub(r'^\s*\}\}\s*$', '', text, flags=re.MULTILINE)
+    return text.strip()
+def remove_code_block(text):
+    """Remove code block markers from text."""
+    # First strip any tool call markers
+    text = strip_tool_call_markers(text)
+    # Try to match code blocks with language markers
+    patterns = [
+        r'```(?:html|HTML)\n([\s\S]+?)\n```',  # Match ```html or ```HTML
+        r'```\n([\s\S]+?)\n```',               # Match code blocks without language markers
+        r'```([\s\S]+?)```'                      # Match code blocks without line breaks
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            extracted = match.group(1).strip()
+            # Remove a leading language marker line (e.g., 'python') if present
+            if extracted.split('\n', 1)[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql']:
+                return extracted.split('\n', 1)[1] if '\n' in extracted else ''
+            return extracted
+    # If no code block is found, return as-is
+    return text.strip()
+def extract_import_statements(code):
+    """Extract import statements from generated code."""
+    import_statements = []
+    # Built-in Python modules to exclude
+    builtin_modules = {
+        'os', 'sys', 'json', 'time', 'datetime', 'random', 'math', 're', 'collections',
+        'itertools', 'functools', 'pathlib', 'urllib', 'http', 'email', 'html', 'xml',
+        'csv', 'tempfile', 'shutil', 'subprocess', 'threading', 'multiprocessing',
+        'asyncio', 'logging', 'typing', 'base64', 'hashlib', 'secrets', 'uuid',
+        'copy', 'pickle', 'io', 'contextlib', 'warnings', 'sqlite3', 'gzip', 'zipfile',
+        'tarfile', 'socket', 'ssl', 'platform', 'getpass', 'pwd', 'grp', 'stat',
+        'glob', 'fnmatch', 'linecache', 'traceback', 'inspect', 'keyword', 'token',
+        'tokenize', 'ast', 'code', 'codeop', 'dis', 'py_compile', 'compileall',
+        'importlib', 'pkgutil', 'modulefinder', 'runpy', 'site', 'sysconfig'
+    }
+    try:
+        # Try to parse as Python AST
+        tree = ast.parse(code)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    module_name = alias.name.split('.')[0]
+                    if module_name not in builtin_modules and not module_name.startswith('_'):
+                        import_statements.append(f"import {alias.name}")
+            elif isinstance(node, ast.ImportFrom):
+                if node.module:
+                    module_name = node.module.split('.')[0]
+                    if module_name not in builtin_modules and not module_name.startswith('_'):
+                        names = [alias.name for alias in node.names]
+                        import_statements.append(f"from {node.module} import {', '.join(names)}")
+    except SyntaxError:
+        # Fallback: use regex to find import statements
+        for line in code.split('\n'):
+            line = line.strip()
+            if line.startswith('import ') or line.startswith('from '):
+                # Check if it's not a builtin module
+                if line.startswith('import '):
+                    module_name = line.split()[1].split('.')[0]
+                elif line.startswith('from '):
+                    module_name = line.split()[1].split('.')[0]
+                if module_name not in builtin_modules and not module_name.startswith('_'):
+                    import_statements.append(line)
+    return list(set(import_statements))  # Remove duplicates
+def generate_requirements_txt_with_llm(import_statements):
+    """Generate requirements.txt content using LLM based on import statements."""
+    if not import_statements:
+        return "# No additional dependencies required\n"
+    # Use a lightweight model for this task
+    try:
+        client = get_inference_client("zai-org/GLM-4.6", "auto")
+        actual_model_id = get_real_model_id("zai-org/GLM-4.6")
+        imports_text = '\n'.join(import_statements)
+        prompt = f"""Based on the following Python import statements, generate a comprehensive requirements.txt file with all necessary and commonly used related packages:
+{imports_text}
+Instructions:
+- Include the direct packages needed for the imports
+- Include commonly used companion packages and dependencies for better functionality
+- Use correct PyPI package names (e.g., PIL -> Pillow, sklearn -> scikit-learn)
+- IMPORTANT: For diffusers, ALWAYS use: git+https://github.com/huggingface/diffusers
+- IMPORTANT: For transformers, ALWAYS use: git+https://github.com/huggingface/transformers
+- IMPORTANT: If diffusers is installed, also include transformers and sentencepiece as they usually go together
+- Examples of comprehensive dependencies:
+  * diffusers often needs: git+https://github.com/huggingface/transformers, sentencepiece, accelerate, torch, tokenizers
+  * transformers often needs: accelerate, torch, tokenizers, datasets
+  * gradio often needs: requests, Pillow for image handling
+  * pandas often needs: numpy, openpyxl for Excel files
+  * matplotlib often needs: numpy, pillow for image saving
+  * sklearn often needs: numpy, scipy, joblib
+  * streamlit often needs: pandas, numpy, requests
+  * opencv-python often needs: numpy, pillow
+  * fastapi often needs: uvicorn, pydantic
+  * torch often needs: torchvision, torchaudio (if doing computer vision/audio)
+- Include packages for common file formats if relevant (openpyxl, python-docx, PyPDF2)
+- Do not include Python built-in modules
+- Do not specify versions unless there are known compatibility issues
+- One package per line
+- If no external packages are needed, return "# No additional dependencies required"
+🚨 CRITICAL OUTPUT FORMAT:
+- Output ONLY the package names, one per line (plain text format)
+- Do NOT use markdown formatting (no ```, no bold, no headings, no lists)
+- Do NOT add any explanatory text before or after the package list
+- Do NOT wrap the output in code blocks
+- Just output raw package names as they would appear in requirements.txt
+Generate a comprehensive requirements.txt that ensures the application will work smoothly:"""
+        messages = [
+            {"role": "system", "content": "You are a Python packaging expert specializing in creating comprehensive, production-ready requirements.txt files. Output ONLY plain text package names without any markdown formatting, code blocks, or explanatory text. Your goal is to ensure applications work smoothly by including not just direct dependencies but also commonly needed companion packages, popular extensions, and supporting libraries that developers typically need together."},
+            {"role": "user", "content": prompt}
+        ]
+        response = client.chat.completions.create(
+            model=actual_model_id,
+            messages=messages,
+            max_tokens=1024,
+            temperature=0.1
+        )
+        requirements_content = response.choices[0].message.content.strip()
+        # Clean up the response in case it includes extra formatting
+        if '```' in requirements_content:
+            requirements_content = remove_code_block(requirements_content)
+        # Enhanced cleanup for markdown and formatting
+        lines = requirements_content.split('\n')
+        clean_lines = []
+        for line in lines:
+            stripped_line = line.strip()
+            # Skip lines that are markdown formatting
+            if (stripped_line == '```' or
+                stripped_line.startswith('```') or
+                stripped_line.startswith('#') and not stripped_line.startswith('# ') or  # Skip markdown headers but keep comments
+                stripped_line.startswith('**') or  # Skip bold text
+                stripped_line.startswith('*') and not stripped_line[1:2].isalnum() or  # Skip markdown lists but keep package names starting with *
+                stripped_line.startswith('-') and not stripped_line[1:2].isalnum() or  # Skip markdown lists but keep package names starting with -
+                stripped_line.startswith('===') or  # Skip section dividers
+                stripped_line.startswith('---') or  # Skip horizontal rules
+                stripped_line.lower().startswith('here') or  # Skip explanatory text
+                stripped_line.lower().startswith('this') or  # Skip explanatory text
+                stripped_line.lower().startswith('the') or  # Skip explanatory text
+                stripped_line.lower().startswith('based on') or  # Skip explanatory text
+                stripped_line == ''):  # Skip empty lines unless they're at natural boundaries
+                continue
+            # Keep lines that look like valid package specifications
+            # Valid lines: package names, git+https://, comments starting with "# "
+            if (stripped_line.startswith('# ') or  # Valid comments
+                stripped_line.startswith('git+') or  # Git dependencies
+                stripped_line[0].isalnum() or  # Package names start with alphanumeric
+                '==' in stripped_line or  # Version specifications
+                '>=' in stripped_line or  # Version specifications
+                '<=' in stripped_line):  # Version specifications
+                clean_lines.append(line)
+        requirements_content = '\n'.join(clean_lines).strip()
+        # Ensure it ends with a newline
+        if requirements_content and not requirements_content.endswith('\n'):
+            requirements_content += '\n'
+        return requirements_content if requirements_content else "# No additional dependencies required\n"
+    except Exception as e:
+        # Fallback: simple extraction with basic mapping
+        print(f"[Parser] Warning: LLM requirements generation failed: {e}, using fallback")
+        dependencies = set()
+        special_cases = {
+            'PIL': 'Pillow',
+            'sklearn': 'scikit-learn',
+            'skimage': 'scikit-image',
+            'bs4': 'beautifulsoup4'
+        }
+        for stmt in import_statements:
+            if stmt.startswith('import '):
+                module_name = stmt.split()[1].split('.')[0]
+                package_name = special_cases.get(module_name, module_name)
+                dependencies.add(package_name)
+            elif stmt.startswith('from '):
+                module_name = stmt.split()[1].split('.')[0]
+                package_name = special_cases.get(module_name, module_name)
+                dependencies.add(package_name)
+        if dependencies:
+            return '\n'.join(sorted(dependencies)) + '\n'
+        else:
+            return "# No additional dependencies required\n"

backend_prompts.py CHANGED Viewed

@@ -61,16 +61,55 @@ Requirements:
 6. Include proper error handling and loading states
 7. Follow accessibility best practices
-Library import (required): Add the following snippet to index.html to import transformers.js:
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3';
 </script>
-Device Options: By default, transformers.js runs on CPU (via WASM). For better performance, you can run models on GPU using WebGPU:
-- CPU (default): const pipe = await pipeline('task', 'model-name');
-- GPU (WebGPU): const pipe = await pipeline('task', 'model-name', { device: 'webgpu' });
-Consider providing users with a toggle option to choose between CPU and GPU execution based on their browser's WebGPU support.
 The index.html should contain the basic HTML structure and link to the CSS and JS files.
 The index.js should contain all the JavaScript logic including transformers.js integration.

 6. Include proper error handling and loading states
 7. Follow accessibility best practices
+**Transformers.js Library Usage:**
+Import via CDN (use in index.html or index.js):
+```javascript
 <script type="module">
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0';
 </script>
+```
+**Pipeline API - Quick Tour:**
+Pipelines group together a pretrained model with preprocessing and postprocessing. Example:
+```javascript
+import { pipeline } from '@huggingface/transformers';
+// Allocate a pipeline for sentiment-analysis
+const pipe = await pipeline('sentiment-analysis');
+const out = await pipe('I love transformers!');
+// [{'label': 'POSITIVE', 'score': 0.999817686}]
+// Use a different model by specifying model id
+const pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
+```
+**Device Options:**
+By default, models run on CPU (via WASM). For better performance, use WebGPU:
+```javascript
+// Run on WebGPU (GPU)
+const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
+  device: 'webgpu',
+});
+```
+**Quantization Options:**
+In resource-constrained environments (browsers), use quantized models:
+- "fp32" (default for WebGPU)
+- "fp16"
+- "q8" (default for WASM)
+- "q4" (4-bit quantization for smaller size)
+```javascript
+// Run at 4-bit quantization for better performance
+const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
+  dtype: 'q4',
+});
+```
+Consider providing users with options to choose device (CPU/GPU) and quantization level based on their needs.
 The index.html should contain the basic HTML structure and link to the CSS and JS files.
 The index.js should contain all the JavaScript logic including transformers.js integration.