From 85e6eff23efb839029a7e56f8feee283c366ecaa Mon Sep 17 00:00:00 2001 From: Tonyo Date: Sat, 21 Jun 2025 20:20:13 +0200 Subject: [PATCH 1/3] feat: add OpenAI API support with multi-provider architecture - Add OpenAI client implementation with streaming support - Introduce --client_a and --client_b arguments for provider selection - Support mixed conversations (Ollama + OpenAI) - Add environment configuration with .env file support - Implement auto-detection of default models per provider - Add openai and python-dotenv dependencies - Update README with new usage examples and configuration guide - Maintain backward compatibility with existing Ollama functionality --- .env.example | 6 + Pipfile | 2 + Pipfile.lock | 330 +++++++++++++++++++++++++++++++++++++++++++++++--- README.md | 46 +++++-- app.py | 70 +++++++---- llm_client.py | 225 ++++++++++++++++++++++++++++++++++ 6 files changed, 637 insertions(+), 42 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9eddfe8 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +# OpenAI API Configuration +OPENAI_API_KEY=your_openai_api_key_here + +# Optional: Set default OpenAI models +DEFAULT_OPENAI_MODEL_A=gpt-4o +DEFAULT_OPENAI_MODEL_B=gpt-4.1 \ No newline at end of file diff --git a/Pipfile b/Pipfile index 59ef06f..87d0ed9 100644 --- a/Pipfile +++ b/Pipfile @@ -6,6 +6,8 @@ name = "pypi" [packages] requests = "*" rich = "*" +openai = "*" +python-dotenv = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 8dbbb2d..8f8aa8a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b85e7b371e128f6d6127a60fded4616bdce1122fd31aaee583bb450d2b15518f" + "sha256": "31b0b87f85152ba58158892844732ba9e8821e70041179c5a659fc05e7f003e5" }, "pipfile-spec": 6, "requires": { @@ -16,13 +16,29 @@ ] }, "default": { + "annotated-types": { + "hashes": [ + "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", + "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89" + ], + "markers": "python_version >= '3.8'", + "version": "==0.7.0" + }, + "anyio": { + "hashes": [ + "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", + "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c" + ], + "markers": "python_version >= '3.9'", + "version": "==4.9.0" + }, "certifi": { "hashes": [ - "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", - "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3" + "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", + "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b" ], - "markers": "python_version >= '3.6'", - "version": "==2025.4.26" + "markers": "python_version >= '3.7'", + "version": "==2025.6.15" }, "charset-normalizer": { "hashes": [ @@ -122,6 +138,46 @@ "markers": "python_version >= '3.7'", "version": "==3.4.2" }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "distro": { + "hashes": [ + "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", + "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2" + ], + "markers": "python_version >= '3.6'", + "version": "==1.9.0" + }, + "h11": { + "hashes": [ + "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", + "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" + ], + "markers": "python_version >= '3.8'", + "version": "==0.16.0" + }, + "httpcore": { + "hashes": [ + "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", + "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.9" + }, + "httpx": { + "hashes": [ + "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", + "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad" + ], + "markers": "python_version >= '3.8'", + "version": "==0.28.1" + }, "idna": { "hashes": [ "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", @@ -130,6 +186,89 @@ "markers": "python_version >= '3.6'", "version": "==3.10" }, + "jiter": { + "hashes": [ + "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8", + "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf", + "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500", + "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959", + "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605", + "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db", + "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070", + "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4", + "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca", + "sha256:166f3606f11920f9a1746b2eea84fa2c0a5d50fd313c38bdea4edc072000b0af", + "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853", + "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9", + "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b", + "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2", + "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224", + "sha256:28dcecbb4ba402916034fc14eba7709f250c4d24b0c43fc94d187ee0580af181", + "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca", + "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041", + "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9", + "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e", + "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026", + "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a", + "sha256:39de429dcaeb6808d75ffe9effefe96a4903c6a4b376b2f6d08d77c1aaee2f18", + "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", + "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978", + "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216", + "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d", + "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5", + "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d", + "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25", + "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6", + "sha256:52ce124f13a7a616fad3bb723f2bfb537d78239d1f7f219566dc52b6f2a9e48d", + "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b", + "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3", + "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc", + "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2", + "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522", + "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00", + "sha256:5f51e048540dd27f204ff4a87f5d79294ea0aa3aa552aca34934588cf27023cf", + "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea", + "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0", + "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95", + "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c", + "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7", + "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744", + "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4", + "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5", + "sha256:86c5aa6910f9bebcc7bc4f8bc461aff68504388b43bfe5e5c0bd21efa33b52f4", + "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01", + "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4", + "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61", + "sha256:9ab7fd8738094139b6c1ab1822d6f2000ebe41515c537235fd45dabe13ec9324", + "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812", + "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49", + "sha256:a9be4d0fa2b79f7222a88aa488bd89e2ae0a0a5b189462a12def6ece2faa45f1", + "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f", + "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357", + "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6", + "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee", + "sha256:bd6292a43c0fc09ce7c154ec0fa646a536b877d1e8f2f96c19707f65355b5a4d", + "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90", + "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a", + "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303", + "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12", + "sha256:ceeb52d242b315d7f1f74b441b6a167f78cea801ad7c11c36da77ff2d42e8a28", + "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e", + "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca", + "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4", + "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7", + "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606", + "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644", + "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426", + "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5", + "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2", + "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a", + "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86", + "sha256:ff76d8887c8c8ee1e772274fcf8cc1071c2c58590d13e33bd12d02dc9a560397" + ], + "markers": "python_version >= '3.9'", + "version": "==0.10.0" + }, "markdown-it-py": { "hashes": [ "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", @@ -146,22 +285,153 @@ "markers": "python_version >= '3.7'", "version": "==0.1.2" }, + "openai": { + "hashes": [ + "sha256:9771982cdd5b6631af68c6a603da72ed44cd2caf73b49f717a72b71374bc565b", + "sha256:e5dcb5498ea6b42fec47546d10f1bcc05fb854219a7d953a5ba766718b212a02" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.90.0" + }, + "pydantic": { + "hashes": [ + "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", + "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b" + ], + "markers": "python_version >= '3.9'", + "version": "==2.11.7" + }, + "pydantic-core": { + "hashes": [ + "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", + "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", + "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", + "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", + "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4", + "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", + "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", + "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", + "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", + "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b", + "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", + "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", + "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", + "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", + "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", + "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", + "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", + "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27", + "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", + "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", + "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", + "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", + "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", + "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039", + "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca", + "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", + "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", + "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6", + "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782", + "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", + "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", + "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", + "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", + "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7", + "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", + "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", + "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", + "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", + "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", + "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", + "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", + "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", + "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", + "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", + "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954", + "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", + "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", + "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", + "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64", + "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", + "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9", + "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101", + "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", + "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", + "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3", + "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", + "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", + "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", + "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d", + "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", + "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e", + "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", + "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", + "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", + "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d", + "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", + "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", + "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", + "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", + "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", + "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", + "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a", + "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", + "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", + "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb", + "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", + "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", + "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d", + "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", + "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", + "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", + "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535", + "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", + "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", + "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", + "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", + "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", + "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", + "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", + "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9", + "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", + "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3", + "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", + "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", + "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", + "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", + "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", + "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", + "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d" + ], + "markers": "python_version >= '3.9'", + "version": "==2.33.2" + }, "pygments": { "hashes": [ - "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", - "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c" + "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", + "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" ], "markers": "python_version >= '3.8'", - "version": "==2.19.1" + "version": "==2.19.2" + }, + "python-dotenv": { + "hashes": [ + "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", + "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==1.1.0" }, "requests": { "hashes": [ - "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", - "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", + "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.32.3" + "version": "==2.32.4" }, "rich": { "hashes": [ @@ -172,13 +442,45 @@ "markers": "python_full_version >= '3.8.0'", "version": "==14.0.0" }, + "sniffio": { + "hashes": [ + "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", + "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.1" + }, + "tqdm": { + "hashes": [ + "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", + "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" + ], + "markers": "python_version >= '3.7'", + "version": "==4.67.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4", + "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af" + ], + "markers": "python_version >= '3.9'", + "version": "==4.14.0" + }, + "typing-inspection": { + "hashes": [ + "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", + "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28" + ], + "markers": "python_version >= '3.9'", + "version": "==0.4.1" + }, "urllib3": { "hashes": [ - "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", - "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813" + "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", + "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc" ], "markers": "python_version >= '3.9'", - "version": "==2.4.0" + "version": "==2.5.0" } }, "develop": {} diff --git a/README.md b/README.md index b8d295c..eb8dc82 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # InfiniChat -InfiniChat is a command-line application that simulates conversations between two LLMs running locally using Ollama. Just for fun really - I find it interesting to watch the chats! You can have them flesh out ideas, debate things, argue, or just give vague prompts and see what topics they spiral in to. + +InfiniChat is a command-line application that simulates conversations between two LLMs. It now supports both local Ollama models and OpenAI API models, giving you the flexibility to choose your preferred AI provider. Just for fun really - I find it interesting to watch the chats! You can have them flesh out ideas, debate things, argue, or just give vague prompts and see what topics they spiral in to.

InfiniChat Screenshot @@ -7,24 +8,35 @@ InfiniChat is a command-line application that simulates conversations between tw ## Features +- **Multi-Provider Support**: Works with both local Ollama and OpenAI API models - **Full Conversation History**: Models maintain a record of the entire conversation, enabling more coherent interactions - **Streaming Responses**: Real-time streaming of model outputs with live display - **Attractive Terminal UI**: Rich text formatting with color-coded speakers and panels - **Debate Mode**: Set a specific topic for the models to debate, with one arguing "for" and the other "against" - **Conversation Saving**: Automatically saves transcripts of conversations +- **Environment Configuration**: Easy setup with .env file support ## Requirements - Python 3.+ -- [Ollama](https://ollama.com/download) installed and running -- Required models (`llama3:latest` and `gemma3:12b` by default) pulled in Ollama -- A "non-trivial" amount of RAM. This uses 30GB+ on my Macbook. +- **For Ollama models**: + - [Ollama](https://ollama.com/) installed and running locally + - Local models pulled (e.g., `ollama pull llama3:latest`) +- **For OpenAI models**: + - OpenAI API key (set as environment variable `OPENAI_API_KEY`) + - Internet connection for API calls +- **Mixed usage**: Both requirements above for respective models ## Installation ```bash # Install dependencies pipenv install + +# For OpenAI models only: +# Copy the example environment file and configure your API key +cp .env.example .env +# Edit .env file and add your OpenAI API key ``` ## Usage @@ -50,15 +62,29 @@ InfiniChat supports the following command-line arguments: | `--stats` | Show message history statistics in panel titles | False | | `--history_limit` | Number of messages to keep in conversation history for each model before summarizing and trimming. Turn this down if messages gradually start to take longer to generate | 100 | | `--delay` | Delay in seconds between streaming chunks (for slower, more readable streaming) | 0.0 | -| `--model_a` | Name of the first AI model to use | llama3:latest | -| `--model_b` | Name of the second AI model to use | gemma3:12b | +| `--model_a` | Name of the first AI model to use (auto-detected based on client type if not specified) | gpt-4o (OpenAI) / llama3:latest (Ollama) | +| `--model_b` | Name of the second AI model to use (auto-detected based on client type if not specified) | gpt-4.1 (OpenAI) / gemma3:12b (Ollama) | +| `--client_a` | Type of client to use for model A (`ollama` or `openai`) | ollama | +| `--client_b` | Type of client to use for model B (`ollama` or `openai`) | ollama | | `--debate_topic "Pizza is a vegetable"` | Topic to debate, model A will be "for" the topic, model B will be "against" | None | | `--model_a_prompt "Your custom prompt"` | Custom system prompt for model A (overrides default from `prompts.py`) | None | | `--model_b_prompt "Your custom prompt"` | Custom system prompt for model B (overrides default from `prompts.py`) | None | ### Examples -Run with custom settings: +#### Basic Usage +```bash +# Default: Two Ollama models (requires Ollama installation) +pipenv run python app.py + +# Two OpenAI models +pipenv run python app.py --client_a openai --client_b openai + +# Mixed: Ollama model A and OpenAI model B +pipenv run python app.py --client_a ollama --client_b openai +``` + +#### Advanced Usage ```bash # Run with 2000 conversation turns! pipenv run python app.py --max_turns 2000 @@ -75,9 +101,15 @@ pipenv run python app.py --delay 0.1 # Use different models pipenv run python app.py --model_a qwen:latest --model_b deepseek-r1:latest +# Use different OpenAI models +pipenv run python app.py --client_a openai --client_b openai --model_a gpt-4o --model_b gpt-4.1 + # Start a debate pipenv run python app.py --debate_topic "Coffee is better than tea" +# Debate mode with mixed models +pipenv run python app.py --client_a ollama --client_b openai --debate_topic "Will artificial intelligence replace humans?" + # Use custom prompts for both models pipenv run python app.py --model_a_prompt "You are a cheerful assistant who loves to help people" --model_b_prompt "You are a serious academic who prefers formal language" diff --git a/app.py b/app.py index cc2990a..7602736 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,4 @@ -from llm_client import OllamaClient +from llm_client import OllamaClient, OpenAIClient from console_utils import console from prompts import * from rich.panel import Panel @@ -11,11 +11,16 @@ import sys import time +# Default Ollama models # MODEL_A_NAME = "deepseek-r1:latest" # MODEL_A_NAME = "qwen:latest" MODEL_A_NAME = "llama3:latest" MODEL_B_NAME = "gemma3:12b" +# Default OpenAI models +OPENAI_MODEL_A_NAME = "gpt-4o" +OPENAI_MODEL_B_NAME = "gpt-4.1" + # Style for client A - blue theme CLIENT_A_STYLE = "bold blue" CLIENT_A_PANEL_STYLE = "blue" @@ -386,14 +391,28 @@ def display_chunk(chunk): parser.add_argument( "--model_a", type=str, - default="llama3:latest", - help="Name of the first AI model to use", + default="", + help="Name of the first AI model to use (auto-detected based on client type if not specified)", ) parser.add_argument( "--model_b", type=str, - default="gemma3:12b", - help="Name of the second AI model to use", + default="", + help="Name of the second AI model to use (auto-detected based on client type if not specified)", + ) + parser.add_argument( + "--client_a", + type=str, + choices=["ollama", "openai"], + default="ollama", + help="Type of client to use for model A (ollama or openai)", + ) + parser.add_argument( + "--client_b", + type=str, + choices=["ollama", "openai"], + default="ollama", + help="Type of client to use for model B (ollama or openai)", ) parser.add_argument( "--stats", @@ -420,6 +439,13 @@ def display_chunk(chunk): ) args = parser.parse_args() + # Auto-detect model names based on client type if not specified + if not args.model_a: + args.model_a = OPENAI_MODEL_A_NAME if args.client_a == "openai" else MODEL_A_NAME + + if not args.model_b: + args.model_b = OPENAI_MODEL_B_NAME if args.client_b == "openai" else MODEL_B_NAME + if args.debate_topic: console.print( f"[bold yellow]Debate mode enabled! Topic: {args.debate_topic}[/bold yellow]" @@ -442,22 +468,24 @@ def display_chunk(chunk): console.print("[bold green]Using custom prompt for Model B[/bold green]") # Initialize clients with parsed arguments - client_A = OllamaClient( - model_name=args.model_a, - debug_mode=args.debug, - show_json=args.show_json, - system_prompt=MODEL_A_PROMPT, - history_limit=args.history_limit, - log_history=args.log_history, - ) - client_B = OllamaClient( - model_name=args.model_b, - debug_mode=args.debug, - show_json=args.show_json, - system_prompt=MODEL_B_PROMPT, - history_limit=args.history_limit, - log_history=args.log_history, - ) + def create_client(client_type, model_name, system_prompt): + """Create a client based on the specified type.""" + client_args = { + "model_name": model_name, + "debug_mode": args.debug, + "show_json": args.show_json, + "system_prompt": system_prompt, + "history_limit": args.history_limit, + "log_history": args.log_history, + } + + if client_type == "openai": + return OpenAIClient(**client_args) + else: # default to ollama + return OllamaClient(**client_args) + + client_A = create_client(args.client_a, args.model_a, MODEL_A_PROMPT) + client_B = create_client(args.client_b, args.model_b, MODEL_B_PROMPT) # Print welcome message console.print("") diff --git a/llm_client.py b/llm_client.py index 65758f0..815dbcb 100644 --- a/llm_client.py +++ b/llm_client.py @@ -3,6 +3,7 @@ import platform import subprocess import sys +import os from console_utils import console from rich.panel import Panel from ollama_utils import ( @@ -13,6 +14,15 @@ check_ollama_availability, try_start_ollama_service, ) +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +try: + from openai import OpenAI +except ImportError: + OpenAI = None HISTORY_LOG_FILE = "message_history.log" @@ -403,3 +413,218 @@ def _calculate_message_history_size(self): total_size += sys.getsizeof(value) return total_size + + +class OpenAIClient: + def __init__( + self, + model_name: str, + system_prompt: str = None, + debug_mode: bool = False, + show_json: bool = False, + quiet_mode: bool = False, + history_limit: int = 30, + log_history: bool = False, + ): + self.quiet_mode = quiet_mode + self.debug_mode = debug_mode + self.show_json = show_json + self.model_name = model_name + self.system_prompt = system_prompt + self.history_limit = history_limit + self.log_history = log_history + self.message_history = [] # Initialize an empty message history, used for chat models + self.trim_count = 0 # Counter for how many times we've trimmed the history + + # Check if OpenAI library is installed + if OpenAI is None: + raise ImportError("OpenAI library not installed. Run: pip install openai") + + # Get API key from environment + api_key = os.getenv('OPENAI_API_KEY') + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable not set") + + # Initialize the OpenAI client + self.client = OpenAI(api_key=api_key) + + # Add system prompt to message history if provided + if self.system_prompt: + self.message_history.append( + {"role": "system", "content": self.system_prompt} + ) + + if not self.quiet_mode: + console.print( + f"[bold green]OpenAIClient initialized with model: {self.model_name}[/bold green]" + ) + + def add_message_to_history(self, role: str, content: str): + """ + Add a message to the message history for chat models. + :param role: The role of the message sender ('user' or 'assistant'). + :param content: The content of the message. + """ + self.message_history.append({"role": role, "content": content}) + + if self.log_history: + with open(HISTORY_LOG_FILE, "w") as f: + f.write(f"Message History Size: {len(self.message_history)}\n") + f.write( + f"Message History in Bytes: {self._calculate_message_history_size()}\n" + ) + f.write(f"Model Name: {self.model_name}\n") + f.write(json.dumps(self.message_history, indent=2)) + + def chat(self, max_tokens: int = 1_000) -> str: + """ + Generate a chat response using the specified OpenAI model and the current message history. + :param max_tokens: The maximum number of tokens to generate. + :return: The generated chat response. + """ + if not self.message_history: + raise ValueError( + "Message history is empty. Add messages before calling chat()." + ) + + # Auto-trim if the history is getting too long + self._auto_trim_if_needed() + + response = self.client.chat.completions.create( + model=self.model_name, + messages=self.message_history, + max_tokens=max_tokens, + temperature=0.7, + ) + + if self.show_json: + print(f"\n****\nOpenAI API Response: {json.dumps(response.model_dump(), indent=2)} \n****") + + response_text = response.choices[0].message.content + + # Add the response to history + self.add_message_to_history("assistant", response_text) + + if self.debug_mode: + print(f"Chat response: {response_text}") + + return response_text + + def chat_stream(self, max_tokens: int = 100): + """ + Generate a streaming chat response using the specified OpenAI model and the current message history. + + :param max_tokens: The maximum number of tokens to generate. + :return: A generator yielding chunks of the generated chat response. + """ + if not self.message_history: + raise ValueError( + "Message history is empty. Add messages before calling chat_stream()." + ) + + # Auto-trim if the history is getting too long + self._auto_trim_if_needed() + + stream = self.client.chat.completions.create( + model=self.model_name, + messages=self.message_history, + max_tokens=max_tokens, + temperature=0.7, + stream=True, + ) + + full_response = "" + for chunk in stream: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + full_response += content + yield content + + # Add the complete response to history + self.add_message_to_history("assistant", full_response) + + def chat_stream_with_callback(self, max_tokens: int = 100, callback=None): + """ + Generate a streaming chat response and apply a callback function to each chunk. + Also returns the complete response as a single string. + + :param max_tokens: The maximum number of tokens to generate. + :param callback: A function to call with each chunk (e.g., to display it). + :return: The complete generated chat response as a string. + """ + result = "" + for chunk in self.chat_stream(max_tokens): + # Apply the callback to each chunk if provided + if callback: + callback(chunk) + + # Accumulate the result + result += chunk + + return result + + def trim_message_history(self, max_messages, keep_system_prompt=True): + """ + Trim the message history to prevent it from growing too large. + + :param max_messages: Maximum number of messages before triggering trim + :param keep_system_prompt: Whether to always keep the system prompt + """ + if self.debug_mode: + print(f"Trimming message history to a maximum of {max_messages} messages.") + print(f"Current message history length: {len(self.message_history)}") + + # Check if trimming is needed + if len(self.message_history) <= max_messages: + if self.debug_mode: + print("No trimming needed - history is within limit.") + return + + # Keep system prompt if requested + system_messages = [] + other_messages = [] + + for msg in self.message_history: + if msg["role"] == "system" and keep_system_prompt: + system_messages.append(msg) + else: + other_messages.append(msg) + + # Calculate how many non-system messages to keep + available_slots = max_messages - len(system_messages) + if available_slots > 0: + # Keep the most recent messages + other_messages = other_messages[-available_slots:] + else: + other_messages = [] + + # Rebuild message history + self.message_history = system_messages + other_messages + + if self.debug_mode: + print(f"Trimmed message history to {len(self.message_history)} messages") + + def _calculate_message_history_size(self): + """Calculate the approximate size of the message history in bytes.""" + total_size = 0 + for message in self.message_history: + total_size += sys.getsizeof(message) + # Add size of each key-value pair in the message dictionary + for key, value in message.items(): + total_size += sys.getsizeof(key) + total_size += sys.getsizeof(value) + + return total_size + + def _auto_trim_if_needed(self): + """ + Automatically trim the message history if it exceeds the history limit. + This helps prevent token limits from being exceeded. + """ + self.trim_count += 1 + + # Only check every few messages to avoid constant trimming + if self.trim_count % 5 == 0 and len(self.message_history) > self.history_limit: + if self.debug_mode: + print(f"Auto-trimming message history (current size: {len(self.message_history)})") + self.trim_message_history(self.history_limit) From 73f4bc2b1406d7d3f5e8a66616d00c47459fabb0 Mon Sep 17 00:00:00 2001 From: Tonyo Date: Sat, 21 Jun 2025 21:21:05 +0200 Subject: [PATCH 2/3] fix: resolve OpenAI API compatibility issues and encoding problems on conversation history --- app.py | 2 +- llm_client.py | 55 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/app.py b/app.py index 7602736..6d02939 100644 --- a/app.py +++ b/app.py @@ -509,7 +509,7 @@ def create_client(client_type, model_name, system_prompt): # Save the conversation history to a file output_file = "conversation_history.txt" - with open(output_file, "w") as f: + with open(output_file, "w", encoding="utf-8") as f: f.write(conversation_history) # Confirm save with nice formatting diff --git a/llm_client.py b/llm_client.py index 815dbcb..9cc2b6e 100644 --- a/llm_client.py +++ b/llm_client.py @@ -459,6 +459,30 @@ def __init__( f"[bold green]OpenAIClient initialized with model: {self.model_name}[/bold green]" ) + def _get_api_params(self, max_tokens: int, temperature: float = 0.7): + """ + Get API parameters based on model capabilities. + Reasoning models (like o4-mini) have restrictions on certain parameters. + """ + params = { + "model": self.model_name, + "messages": self.message_history, + } + + # Handle max_tokens vs max_completion_tokens + # Reasoning models require max_completion_tokens instead of max_tokens + if "o4" in self.model_name.lower() or "o3" in self.model_name.lower(): + params["max_completion_tokens"] = max_tokens + else: + params["max_tokens"] = max_tokens + + # Handle temperature restrictions + # Reasoning models only support default temperature + if not ("o4" in self.model_name.lower() or "o1" in self.model_name.lower()): + params["temperature"] = temperature + + return params + def add_message_to_history(self, role: str, content: str): """ Add a message to the message history for chat models. @@ -476,10 +500,11 @@ def add_message_to_history(self, role: str, content: str): f.write(f"Model Name: {self.model_name}\n") f.write(json.dumps(self.message_history, indent=2)) - def chat(self, max_tokens: int = 1_000) -> str: + def chat(self, max_tokens: int = 1_000, temperature: float = 0.7) -> str: """ Generate a chat response using the specified OpenAI model and the current message history. :param max_tokens: The maximum number of tokens to generate. + :param temperature: The temperature for response generation (ignored for models that don't support it). :return: The generated chat response. """ if not self.message_history: @@ -490,12 +515,9 @@ def chat(self, max_tokens: int = 1_000) -> str: # Auto-trim if the history is getting too long self._auto_trim_if_needed() - response = self.client.chat.completions.create( - model=self.model_name, - messages=self.message_history, - max_tokens=max_tokens, - temperature=0.7, - ) + # Get appropriate API parameters for this model + api_params = self._get_api_params(max_tokens, temperature) + response = self.client.chat.completions.create(**api_params) if self.show_json: print(f"\n****\nOpenAI API Response: {json.dumps(response.model_dump(), indent=2)} \n****") @@ -510,11 +532,12 @@ def chat(self, max_tokens: int = 1_000) -> str: return response_text - def chat_stream(self, max_tokens: int = 100): + def chat_stream(self, max_tokens: int = 100, temperature: float = 0.7): """ Generate a streaming chat response using the specified OpenAI model and the current message history. :param max_tokens: The maximum number of tokens to generate. + :param temperature: The temperature for response generation (ignored for models that don't support it). :return: A generator yielding chunks of the generated chat response. """ if not self.message_history: @@ -525,13 +548,10 @@ def chat_stream(self, max_tokens: int = 100): # Auto-trim if the history is getting too long self._auto_trim_if_needed() - stream = self.client.chat.completions.create( - model=self.model_name, - messages=self.message_history, - max_tokens=max_tokens, - temperature=0.7, - stream=True, - ) + # Get appropriate API parameters for this model + api_params = self._get_api_params(max_tokens, temperature) + api_params["stream"] = True + stream = self.client.chat.completions.create(**api_params) full_response = "" for chunk in stream: @@ -543,17 +563,18 @@ def chat_stream(self, max_tokens: int = 100): # Add the complete response to history self.add_message_to_history("assistant", full_response) - def chat_stream_with_callback(self, max_tokens: int = 100, callback=None): + def chat_stream_with_callback(self, max_tokens: int = 100, temperature: float = 0.7, callback=None): """ Generate a streaming chat response and apply a callback function to each chunk. Also returns the complete response as a single string. :param max_tokens: The maximum number of tokens to generate. + :param temperature: The temperature for response generation (ignored for models that don't support it). :param callback: A function to call with each chunk (e.g., to display it). :return: The complete generated chat response as a string. """ result = "" - for chunk in self.chat_stream(max_tokens): + for chunk in self.chat_stream(max_tokens, temperature): # Apply the callback to each chunk if provided if callback: callback(chunk) From 73526aa3f977dfce1d15bc13929ee1fb827fff8d Mon Sep 17 00:00:00 2001 From: Keylias <123815531+keyliass@users.noreply.github.com> Date: Sun, 22 Jun 2025 20:26:38 +0200 Subject: [PATCH 3/3] fix: typo in the detection of reasoning models --- llm_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm_client.py b/llm_client.py index 9cc2b6e..1266337 100644 --- a/llm_client.py +++ b/llm_client.py @@ -478,7 +478,7 @@ def _get_api_params(self, max_tokens: int, temperature: float = 0.7): # Handle temperature restrictions # Reasoning models only support default temperature - if not ("o4" in self.model_name.lower() or "o1" in self.model_name.lower()): + if not ("o4" in self.model_name.lower() or "o3" in self.model_name.lower()): params["temperature"] = temperature return params