alantech · dfellis · Jul 10, 2023 · Aug 7, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,6 @@ marsha.egg-info/
 .env
 examples/**/*.py
 examples/**/requirements.txt
-*.ipynb*
+*.ipynb*
+*.swp
+marsha/bin/
diff --git a/marsha/Makefile b/marsha/Makefile
@@ -1,10 +1,30 @@
-./dist/marsha: ./venv ./*.py ./marsha.spec
+./dist/marsha: ./venv ./*.py ./marsha.spec ./bin/llamacpp
 	. ./venv/bin/activate; pip install -r requirements.txt
-	. ./venv/bin/activate; pyinstaller __main__.py --name marsha --onefile --collect-all pyflakes --collect-all mccabe --collect-all pycodestyle --collect-all pydocstyle --collect-all pylama --add-data ../examples:./examples --add-data ./helper.py:./marsha
+	. ./venv/bin/activate; pyinstaller __main__.py --name marsha --onefile --collect-all pyflakes --collect-all mccabe --collect-all pycodestyle --collect-all pydocstyle --collect-all pylama --add-data ../examples:./examples --add-data ./helper.py:./marsha --add-data ./bin/llamacpp:./marsha/bin
 
 ./venv:
 	(command -v $(python) && $(python) -m venv venv) || (command -v python && python -m venv venv) || (command -v python3 && python3 -m venv venv)
 
+NVCC := $(shell command -v nvcc 2> /dev/null)
+
+ifndef NVCC
+./bin/llamacpp:
+	mkdir -p ./bin
+	mkdir -p /tmp/__marsha_setup__
+	cd /tmp/__marsha_setup__ && git clone https://github.com/ggerganov/llama.cpp.git
+	cd /tmp/__marsha_setup__/llama.cpp && make
+	cp /tmp/__marsha_setup__/llama.cpp/main ./bin/llamacpp
+	rm -rf /tmp/__marsha_setup__
+else
+./bin/llamacpp:
+	mkdir -p ./bin
+	mkdir -p /tmp/__marsha_setup__
+	cd /tmp/__marsha_setup__ && git clone https://github.com/ggerganov/llama.cpp.git
+	cd /tmp/__marsha_setup__/llama.cpp && make LLAMA_CUBLAS=1
+	cp /tmp/__marsha_setup__/llama.cpp/main ./bin/llamacpp
+	rm -rf /tmp/__marsha_setup__
+endif
+
 .PHONY: clean
 clean:
 	git clean -ffdx -e .env

diff --git a/marsha/base.py b/marsha/base.py
@@ -12,9 +12,10 @@
 from marsha.stats import MarshaStats
 from marsha.utils import read_file, autoformat_files, copy_file, get_filename_from_path, add_helper, copy_tree
 
-# Set up OpenAI
-openai.organization = os.getenv('OPENAI_ORG')
-openai.api_key = os.getenv('OPENAI_SECRET_KEY')
+# Set up OpenAI if not using Llama.cpp
+if not os.getenv('LLAMACPP_MODEL'):
+    openai.organization = os.getenv('OPENAI_ORG')
+    openai.api_key = os.getenv('OPENAI_SECRET_KEY')
 
 # Parse the input arguments
 parser = argparse.ArgumentParser(

diff --git a/marsha/llamashim.py b/marsha/llamashim.py
@@ -0,0 +1,65 @@
+# Minimal recreation of the async `openai.ChatCompletion.acreate` API supporting only what we use.
+# Maybe some day will be broken out into a general-purpose llama.cpp wrapper API
+import asyncio
+import multiprocessing
+import os
+import subprocess
+
+
+class DotDict(dict):
+    """dot.notation access to dictionary attributes"""
+    def __getattr__(*args):
+        val = dict.get(*args)
+        return DotDict(val) if type(val) is dict else val
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+script_directory = os.path.dirname(os.path.abspath(__file__))
+llamacpp = os.path.join(script_directory, 'bin/llamacpp')
+gpu_support = True if 'gpu-layers' in subprocess.run(
+    [llamacpp, '--help'], capture_output=True, encoding='utf8').stdout else False
+
+
+async def run_subprocess(stream: asyncio.subprocess.Process, timeout: float = 60.0) -> tuple[str, str]:
+    stdout = ''
+    stderr = ''
+    try:
+        stdout, stderr = await asyncio.wait_for(stream.communicate(), timeout)
+    except asyncio.exceptions.TimeoutError:
+        try:
+            stream.kill()
+        except OSError:
+            # Ignore 'no such process' error
+            pass
+        raise Exception('run_subprocess timeout...')
+    except Exception as e:
+        raise e
+    return (stdout.decode('utf-8'), stderr.decode('utf-8'))
+
+
+async def acreate(model='gpt-3.5-turbo', messages=[], name=None, temperature=1.0, top_p=None, n=1, max_tokens=float('inf')):
+    fmt_messages = '\n\n'.join([f"""{message['role'].upper()}:
+
+{message['content']}""" for message in messages])
+    req = f"""This is a transcript of an advanced AI ASSISTANT. The AI SYSTEM gives it a persona and STRICT output formatting rules, and it solves a problem statement posed to it by the USER. It is emotionless and provides NO SECONDARY EXPLANATORY TEXT, solely the requested output in the requested format. The transcript is ended immediately after this with "END OF TRANSCRIPT".
+
+{fmt_messages}
+
+ASSISTANT: """
+    args = [llamacpp, '-m', os.getenv('LLAMACPP_MODEL'), '-t',
+            str(multiprocessing.cpu_count()), '-c', '4096', '-eps', '1e-5', '--temp', str(0.75), '-p', req]
+    if max_tokens != float('inf'):
+        args.extend(['-n', str(max_tokens)])
+    if gpu_support:
+        # TODO: Figure out how to determine the proper number of layers here based on GPU memory size and the model chosen
+        args.extend(['-ngl', '43'])
+    choices = []
+    for i in range(n):
+        stdout, stderr = await run_subprocess(await asyncio.create_subprocess_exec(*args,
+                                                                                   stdout=subprocess.PIPE, stderr=subprocess.PIPE), float('inf'))
+        print(stdout, stderr)
+        print(stdout.split(req)[1])
+        print(stdout.split(req)[1].split('END OF TRANSCRIPT')[0])
+        choices.append(DotDict({'message': {'content': stdout.split(req)[1].split('END OF TRANSCRIPT')[0]}}))
+    return DotDict({'model': model, 'usage': {'prompt_tokens': 0, 'completion_tokens': 0}, 'choices': choices})
diff --git a/marsha/llm.py b/marsha/llm.py
@@ -13,6 +13,10 @@
 from marsha.parse import validate_first_stage_markdown, validate_second_stage_markdown, write_files_from_markdown, format_marsha_for_llm, extract_func_name
 from marsha.stats import MarshaStats
 from marsha.utils import read_file
+from marsha.llamashim import acreate
+
+# Flag if using Llama.cpp or OpenAI
+use_llama = True if os.getenv('LLAMACPP_MODEL') else False
 
 # Get time at startup to make human legible "start times" in the logs
 t0 = time.time()
@@ -58,7 +62,7 @@ async def retry_chat_completion(query, model='gpt-3.5-turbo', max_tries=3, n_res
     query['n'] = n_results
     while True:
         try:
-            out = await openai.ChatCompletion.acreate(**query)
+            out = await acreate(**query) if use_llama else await openai.ChatCompletion.acreate(**query)
             t2 = time.time()
             print(
                 f'''Chat query took {prettify_time_delta(t2 - t1)}, started at {prettify_time_delta(t1 - t0)}, ms/chars = {(t2 - t1) * 1000 / out.get('usage', {}).get('total_tokens', 9001)}''')
@@ -143,10 +147,12 @@ async def gpt_func_to_python(marsha_filename: str, functions: list[str], defined
 {marsha_for_code_llm}
     ---- end ----''')
 
-    reses = await asyncio.gather(retry_chat_completion({
-        'messages': [{
-            'role': 'system',
-            'content': f'''You are a senior software engineer assigned to write Python 3 functions.
+    if use_llama:
+        reses = []
+        reses.append(await retry_chat_completion({
+            'messages': [{
+                'role': 'system',
+                'content': f'''You are a senior software engineer assigned to write Python 3 functions.
 The assignment is written in markdown format.
 The description of each function should be included as a docstring.
 Add type hints if feasible.
@@ -178,14 +184,15 @@ async def gpt_func_to_python(marsha_filename: str, functions: list[str], defined
 ```
 
 ''',
-        }, {
-            'role': 'user',
-            'content': f'''{marsha_for_code_llm}'''
-        }],
-    }, n_results=n_results), retry_chat_completion({
-        'messages': [{
-            'role': 'system',
-            'content': f'''You are a senior software engineer assigned to write a unit test suite for Python 3 functions.
+            }, {
+                'role': 'user',
+                'content': f'''{marsha_for_code_llm}'''
+            }],
+        }, n_results=n_results))
+        reses.append(await retry_chat_completion({
+            'messages': [{
+                'role': 'system',
+                'content': f'''You are a senior software engineer assigned to write a unit test suite for Python 3 functions.
 The assignment is written in markdown format.
 The unit tests created should exactly match the example cases provided for each function.
 You have to create a TestCase per function provided.
@@ -209,11 +216,83 @@ async def gpt_func_to_python(marsha_filename: str, functions: list[str], defined
 ```
 
 ''',
-        }, {
-            'role': 'user',
-            'content': f'''{marsha_for_test_llm}'''
-        }],
-    }, n_results=n_results))
+            }, {
+                'role': 'user',
+                'content': f'''{marsha_for_test_llm}'''
+            }],
+        }, n_results=n_results))
+    else:
+        reses = await asyncio.gather(retry_chat_completion({
+            'messages': [{
+                'role': 'system',
+                'content': f'''You are a senior software engineer assigned to write Python 3 functions.
+The assignment is written in markdown format.
+The description of each function should be included as a docstring.
+Add type hints if feasible.
+The filename should exactly match the name `{marsha_filename}.py`.
+Make sure to follow PEP8 guidelines.
+Make sure to include all needed standard Python libraries imports.
+Generate `requirements.txt` file with all needed dependencies, do not add fixed version to dependencies.
+If need to convert `type` to Python classes, you will receive a markdown where the heading is the class name followed by several rows following a comma separated CSV format where the first row contains all class properties and the following rows contain examples of the values of those properties. Make sure to add the __str__, __repr__, and __eq__ methods to the class.
+Your response must not comment on what you changed.
+Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts.
+Your response must be a markdown file.
+The first section header must be the filename `{marsha_filename}.py`.
+The content of the first section must be a python code block with the generated code.
+The second section header must be the filename `requirements.txt`.
+The content of the second section must be a text code block with the generated code.
+The file should end with the code block, nothing else should be added to the file.
+The desired response must look like the following:
+
+# {marsha_filename}.py
+
+```py
+<generated code>
+```
+
+# requirements.txt
+
+```txt
+<dependencies needed>
+```
+
+''',
+            }, {
+                'role': 'user',
+                'content': f'''{marsha_for_code_llm}'''
+            }],
+        }, n_results=n_results), retry_chat_completion({
+            'messages': [{
+                'role': 'system',
+                'content': f'''You are a senior software engineer assigned to write a unit test suite for Python 3 functions.
+The assignment is written in markdown format.
+The unit tests created should exactly match the example cases provided for each function.
+You have to create a TestCase per function provided.
+The filename should exactly match the name `{marsha_filename}_test.py`.
+Unknown imports might come from the file where the function is defined, or from the standard library.
+If you are working with files, make sure to mock the file system since the tests will be run in a sandboxed environment.
+Make sure to follow PEP8 guidelines.
+Make sure to include all needed standard Python libraries imports.
+Your response must not comment on what you changed.
+Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts.
+Your response must be a markdown file.
+The first section header must be the filename `{marsha_filename}_test.py`.
+The content of the first section must be a python code block with the generated code.
+The file should end with the code block, nothing else should be added to the file.
+The desired response must look like the following:
+
+# {marsha_filename}_test.py
+
+```py
+<generated code>
+```
+
+''',
+            }, {
+                'role': 'user',
+                'content': f'''{marsha_for_test_llm}'''
+            }],
+        }, n_results=n_results))
     stats.stage_update('first_stage', reses)
     # The output should be a valid list of Markdown documents. Parse each one and return the list of parsed doc, on failure
     # do not add it to the list. If the list to return is empty try again (or fully error out, for now)

diff --git a/marsha/marsha.spec b/marsha/marsha.spec
@@ -1,7 +1,7 @@
 # -*- mode: python ; coding: utf-8 -*-
 from PyInstaller.utils.hooks import collect_all
 
-datas = [('../examples', './examples'), ('./helper.py', './marsha')]
+datas = [('../examples', './examples'), ('./helper.py', './marsha'), ('./bin/llamacpp', './marsha/bin')]
 binaries = []
 hiddenimports = []
 tmp_ret = collect_all('pyflakes')

diff --git a/marsha/parse.py b/marsha/parse.py
@@ -72,7 +72,8 @@ def to_markdown(node):
 
 def format_marsha_for_llm(marsha_filename: str, functions: list[str], defined_types: list[str] = None):
     break_line = '\n'
-    res = [f'# Requirements for file `{marsha_filename}`']
+    res = [f'''# Requirements for file `{marsha_filename}.py`
+''']
     for func in functions:
         ast = ast_renderer.get_ast(Document(func))
         if ast['children'][0]['type'] != 'Heading':

diff --git a/setup.py b/setup.py
@@ -1,4 +1,40 @@
+import os
+import subprocess
+import tempfile
 from setuptools import setup
+from setuptools.command.develop import develop
+from setuptools.command.install import install
+
+
+def setup_llamacpp(install_libbase):
+    if not os.path.exists(os.path.join(install_libbase, 'marsha/bin')):
+        os.makedirs(os.path.join(install_libbase, 'marsha/bin'))
+    if not os.path.exists(os.path.join(install_libbase, 'marsha/bin/llamacpp')):
+        with tempfile.TemporaryDirectory(
+                suffix='__marsha_setup__') as tmpdir:
+            print(tmpdir)
+            print(subprocess.run(['bash', '-c', f'cd {tmpdir}; git clone https://github.com/ggerganov/llama.cpp.git']))
+            cuda_support = True if len(subprocess.run(['bash', '-c', 'command -v nvcc'], capture_output=True, encoding='utf8').stdout) > 0 else False
+            if cuda_support:
+                print(subprocess.run(['bash', '-c', f'cd {os.path.join(tmpdir, "llama.cpp")}; make LLAMA_CUBLAS=1']))
+            else:
+                print(subprocess.run(['bash', '-c', f'cd {os.path.join(tmpdir, "llama.cpp")}; make']))
+            subprocess.run(['cp', os.path.join(tmpdir, 'llama.cpp/main'), os.path.join(install_libbase, 'marsha/bin/llamacpp')])
+
+
+class PostDevelopCommand(develop):
+    """Post-installation for development mode."""
+    def run(self):
+        develop.run(self)
+        setup_llamacpp()
+
+
+class PostInstallCommand(install):
+    """Post-installation for installation mode."""
+    def run(self):
+        install.run(self)
+        setup_llamacpp(self.install_libbase)
+
 
 setup(
     name='marsha',
@@ -28,4 +64,8 @@
         'Operating System :: POSIX :: Linux',
         'Programming Language :: Python :: 3.10',
     ],
+    cmdclass={
+        'develop': PostDevelopCommand,
+        'install': PostInstallCommand,
+    },
 )