Sébastien De Greef commited on
Commit
bde8b55
1 Parent(s): d6fc6f4

chore: Update Dockerfile to use pip3 for installing requirements

Browse files
Files changed (1) hide show
  1. ModelsCatalog.ipynb +219 -0
ModelsCatalog.ipynb ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 91,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "https://ollama.com/library\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "from bs4 import BeautifulSoup\n",
18
+ "from requests import get\n",
19
+ "# download the HTML content\n",
20
+ "\n",
21
+ "base_url = 'https://ollama.com'\n",
22
+ "library_url = f'{base_url}/library'\n",
23
+ "print(library_url)\n",
24
+ "html_content = get(library_url).text\n",
25
+ "\n",
26
+ "\n",
27
+ "# Parse the HTML content with BeautifulSoup\n",
28
+ "soup = BeautifulSoup(html_content, 'html.parser')\n",
29
+ "\n",
30
+ "# Extract all the li elements within the ul\n",
31
+ "li_items = soup.select('ul[role=\"list\"] > li')\n",
32
+ "\n",
33
+ "models = []\n",
34
+ "\n",
35
+ "# Iterate over the extracted li elements and print them\n",
36
+ "for li in li_items:\n",
37
+ " # get first a tag text\n",
38
+ " sizes = li.div.div.select('span')\n",
39
+ " sizes = [size.text for size in sizes]\n",
40
+ "\n",
41
+ " pulls = li.div.select('p')[1].select('span')\n",
42
+ " # remove svg tags from pulls\n",
43
+ " pulls = [pull.text[:-1] for pull in pulls]\n",
44
+ " pulls = pulls[0].split('\\xa0')[0].strip()\n",
45
+ "\n",
46
+ " model = {\n",
47
+ " \"name\": li.h2.text.strip(),\n",
48
+ " \"description\": li.p.text.strip(),\n",
49
+ " \"url\": f\"{base_url}{li.a['href']}\",\n",
50
+ " \"params\": sizes,\n",
51
+ " \"pulls\": pulls \n",
52
+ " }\n",
53
+ " models.append(model)\n",
54
+ "import json\n",
55
+ "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
56
+ " file.write(json.dumps(models, indent=4, ensure_ascii=False))"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 97,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "Model: llama3\n",
69
+ "Model: phi3\n",
70
+ "Model: wizardlm2\n",
71
+ "Model: mistral\n",
72
+ "Model: gemma\n",
73
+ "Model: mixtral\n",
74
+ "Model: llama2\n",
75
+ "Model: codegemma\n",
76
+ "Model: command-r\n",
77
+ "Model: command-r-plus\n",
78
+ "Model: llava\n",
79
+ "Model: dbrx\n",
80
+ "Model: codellama\n",
81
+ "Model: qwen\n",
82
+ "Model: dolphin-mixtral\n",
83
+ "Model: llama2-uncensored\n",
84
+ "Model: deepseek-coder\n",
85
+ "Model: mistral-openorca\n",
86
+ "Model: nomic-embed-text\n",
87
+ "Model: dolphin-mistral\n",
88
+ "Model: phi\n",
89
+ "Model: orca-mini\n",
90
+ "Model: nous-hermes2\n",
91
+ "Model: zephyr\n",
92
+ "Model: llama2-chinese\n",
93
+ "Model: wizard-vicuna-uncensored\n",
94
+ "Model: starcoder2\n",
95
+ "Model: vicuna\n",
96
+ "Model: tinyllama\n",
97
+ "Model: openhermes\n",
98
+ "Model: starcoder\n",
99
+ "Model: openchat\n",
100
+ "Model: dolphin-llama3\n",
101
+ "Model: yi\n",
102
+ "Model: tinydolphin\n",
103
+ "Model: wizardcoder\n",
104
+ "Model: stable-code\n",
105
+ "Model: mxbai-embed-large\n",
106
+ "Model: neural-chat\n",
107
+ "Model: phind-codellama\n",
108
+ "Model: wizard-math\n",
109
+ "Model: starling-lm\n",
110
+ "Model: falcon\n",
111
+ "Model: dolphincoder\n",
112
+ "Model: nous-hermes\n",
113
+ "Model: orca2\n",
114
+ "Model: sqlcoder\n",
115
+ "Model: stablelm2\n",
116
+ "Model: dolphin-phi\n",
117
+ "Model: solar\n",
118
+ "Model: yarn-llama2\n",
119
+ "Model: deepseek-llm\n",
120
+ "Model: codeqwen\n",
121
+ "Model: bakllava\n",
122
+ "Model: all-minilm\n",
123
+ "Model: samantha-mistral\n",
124
+ "Model: llama3-gradient\n",
125
+ "Model: medllama2\n",
126
+ "Model: wizardlm-uncensored\n",
127
+ "Model: xwinlm\n",
128
+ "Model: nous-hermes2-mixtral\n",
129
+ "Model: stable-beluga\n",
130
+ "Model: wizardlm\n",
131
+ "Model: codeup\n",
132
+ "Model: yarn-mistral\n",
133
+ "Model: everythinglm\n",
134
+ "Model: meditron\n",
135
+ "Model: llama-pro\n",
136
+ "Model: magicoder\n",
137
+ "Model: stablelm-zephyr\n",
138
+ "Model: nexusraven\n",
139
+ "Model: codebooga\n",
140
+ "Model: mistrallite\n",
141
+ "Model: llama3-chatqa\n",
142
+ "Model: wizard-vicuna\n",
143
+ "Model: snowflake-arctic-embed\n",
144
+ "Model: llava-llama3\n",
145
+ "Model: goliath\n",
146
+ "Model: open-orca-platypus2\n",
147
+ "Model: moondream\n",
148
+ "Model: duckdb-nsql\n",
149
+ "Model: notux\n",
150
+ "Model: megadolphin\n",
151
+ "Model: notus\n",
152
+ "Model: alfred\n",
153
+ "Model: llava-phi3\n",
154
+ "Model: falcon2\n"
155
+ ]
156
+ }
157
+ ],
158
+ "source": [
159
+ "for model in models:\n",
160
+ " tagsurl = f\"{model['url']}/tags\"\n",
161
+ " tags_page = get(tagsurl).text\n",
162
+ " # Parse the HTML content with BeautifulSoup\n",
163
+ " soup = BeautifulSoup(tags_page, 'html.parser')\n",
164
+ " # select links with the class group\n",
165
+ " tags = soup.select('a.group')\n",
166
+ " print(f\"Model: {model['name']}\")\n",
167
+ " model_tags = []\n",
168
+ " for tag in tags:\n",
169
+ " # get the parent div of the tag\n",
170
+ " parent = tag.parent\n",
171
+ " sizes = parent.parent.select('div.items-baseline')[0].text.strip().split(' • ',2)\n",
172
+ " # strip each size\n",
173
+ " sizes = [size.strip() for size in sizes]\n",
174
+ " model_tags.append({\n",
175
+ " \"name\": tag.text.strip(),\n",
176
+ " \"url\": f\"{base_url}{tag['href']}\",\n",
177
+ " \"size\": sizes[1],\n",
178
+ " \"hash\": sizes[0],\n",
179
+ " \"updated\": sizes[2],\n",
180
+ " })\n",
181
+ " link = tag['href']\n",
182
+ " #print(sizes,\"----\")\n",
183
+ " # get the next sibling of the parent div\n",
184
+ " sibling = parent.select('span')\n",
185
+ " if len(sibling) == 1:\n",
186
+ " hash = sibling[0].text.strip()\n",
187
+ " if len(sibling) == 3:\n",
188
+ " size = sibling[2].strip()\n",
189
+ " else:\n",
190
+ " pass\n",
191
+ " #print(sibling)\n",
192
+ " model[\"tags\"] = model_tags\n",
193
+ "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
194
+ " file.write(json.dumps(models, indent=4, ensure_ascii=False))"
195
+ ]
196
+ }
197
+ ],
198
+ "metadata": {
199
+ "kernelspec": {
200
+ "display_name": "base",
201
+ "language": "python",
202
+ "name": "python3"
203
+ },
204
+ "language_info": {
205
+ "codemirror_mode": {
206
+ "name": "ipython",
207
+ "version": 3
208
+ },
209
+ "file_extension": ".py",
210
+ "mimetype": "text/x-python",
211
+ "name": "python",
212
+ "nbconvert_exporter": "python",
213
+ "pygments_lexer": "ipython3",
214
+ "version": "3.11.7"
215
+ }
216
+ },
217
+ "nbformat": 4,
218
+ "nbformat_minor": 2
219
+ }