{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://ollama.com/library\n"
     ]
    }
   ],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "from requests import get\n",
    "# download the HTML content\n",
    "\n",
    "base_url = 'https://ollama.com'\n",
    "library_url = f'{base_url}/library'\n",
    "print(library_url)\n",
    "html_content = get(library_url).text\n",
    "\n",
    "\n",
    "# Parse the HTML content with BeautifulSoup\n",
    "soup = BeautifulSoup(html_content, 'html.parser')\n",
    "\n",
    "# Extract all the li elements within the ul\n",
    "li_items = soup.select('ul[role=\"list\"] > li')\n",
    "\n",
    "models = []\n",
    "\n",
    "# Iterate over the extracted li elements and print them\n",
    "for li in li_items:\n",
    "    # get first a tag text\n",
    "    sizes = li.div.div.select('span')\n",
    "    sizes = [size.text for size in sizes]\n",
    "\n",
    "    pulls = li.div.select('p')[1].select('span')\n",
    "    # remove svg tags from pulls\n",
    "    pulls = [pull.text[:-1] for pull in pulls]\n",
    "    pulls = pulls[0].split('\\xa0')[0].strip()\n",
    "\n",
    "    model = {\n",
    "        \"name\": li.h2.text.strip(),\n",
    "        \"description\": li.p.text.strip(),\n",
    "        \"url\": f\"{base_url}{li.a['href']}\",\n",
    "        \"params\": sizes,\n",
    "        \"pulls\": pulls      \n",
    "    }\n",
    "    models.append(model)\n",
    "import json\n",
    "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
    "    file.write(json.dumps(models, indent=4, ensure_ascii=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: llama3\n",
      "Model: phi3\n",
      "Model: wizardlm2\n",
      "Model: mistral\n",
      "Model: gemma\n",
      "Model: mixtral\n",
      "Model: llama2\n",
      "Model: codegemma\n",
      "Model: command-r\n",
      "Model: command-r-plus\n",
      "Model: llava\n",
      "Model: dbrx\n",
      "Model: codellama\n",
      "Model: qwen\n",
      "Model: dolphin-mixtral\n",
      "Model: llama2-uncensored\n",
      "Model: deepseek-coder\n",
      "Model: mistral-openorca\n",
      "Model: nomic-embed-text\n",
      "Model: dolphin-mistral\n",
      "Model: phi\n",
      "Model: orca-mini\n",
      "Model: nous-hermes2\n",
      "Model: zephyr\n",
      "Model: llama2-chinese\n",
      "Model: wizard-vicuna-uncensored\n",
      "Model: starcoder2\n",
      "Model: vicuna\n",
      "Model: tinyllama\n",
      "Model: openhermes\n",
      "Model: starcoder\n",
      "Model: openchat\n",
      "Model: dolphin-llama3\n",
      "Model: yi\n",
      "Model: tinydolphin\n",
      "Model: wizardcoder\n",
      "Model: stable-code\n",
      "Model: mxbai-embed-large\n",
      "Model: neural-chat\n",
      "Model: phind-codellama\n",
      "Model: wizard-math\n",
      "Model: starling-lm\n",
      "Model: falcon\n",
      "Model: dolphincoder\n",
      "Model: nous-hermes\n",
      "Model: orca2\n",
      "Model: sqlcoder\n",
      "Model: stablelm2\n",
      "Model: dolphin-phi\n",
      "Model: solar\n",
      "Model: yarn-llama2\n",
      "Model: deepseek-llm\n",
      "Model: codeqwen\n",
      "Model: bakllava\n",
      "Model: all-minilm\n",
      "Model: samantha-mistral\n",
      "Model: llama3-gradient\n",
      "Model: medllama2\n",
      "Model: wizardlm-uncensored\n",
      "Model: xwinlm\n",
      "Model: nous-hermes2-mixtral\n",
      "Model: stable-beluga\n",
      "Model: wizardlm\n",
      "Model: codeup\n",
      "Model: yarn-mistral\n",
      "Model: everythinglm\n",
      "Model: meditron\n",
      "Model: llama-pro\n",
      "Model: magicoder\n",
      "Model: stablelm-zephyr\n",
      "Model: nexusraven\n",
      "Model: codebooga\n",
      "Model: mistrallite\n",
      "Model: llama3-chatqa\n",
      "Model: wizard-vicuna\n",
      "Model: snowflake-arctic-embed\n",
      "Model: llava-llama3\n",
      "Model: goliath\n",
      "Model: open-orca-platypus2\n",
      "Model: moondream\n",
      "Model: duckdb-nsql\n",
      "Model: notux\n",
      "Model: megadolphin\n",
      "Model: notus\n",
      "Model: alfred\n",
      "Model: llava-phi3\n",
      "Model: falcon2\n"
     ]
    }
   ],
   "source": [
    "for model in models:\n",
    "    tagsurl = f\"{model['url']}/tags\"\n",
    "    tags_page = get(tagsurl).text\n",
    "    # Parse the HTML content with BeautifulSoup\n",
    "    soup = BeautifulSoup(tags_page, 'html.parser')\n",
    "    # select links with the class group\n",
    "    tags = soup.select('a.group')\n",
    "    print(f\"Model: {model['name']}\")\n",
    "    model_tags = []\n",
    "    for tag in tags:\n",
    "        # get the parent div of the tag\n",
    "        parent = tag.parent\n",
    "        sizes = parent.parent.select('div.items-baseline')[0].text.strip().split(' • ',2)\n",
    "        # strip each size\n",
    "        sizes = [size.strip() for size in sizes]\n",
    "        model_tags.append({\n",
    "            \"name\": tag.text.strip(),\n",
    "            \"url\": f\"{base_url}{tag['href']}\",\n",
    "            \"size\": sizes[1],\n",
    "            \"hash\": sizes[0],\n",
    "            \"updated\": sizes[2],\n",
    "        })\n",
    "        link = tag['href']\n",
    "        #print(sizes,\"----\")\n",
    "        # get the next sibling of the parent div\n",
    "        sibling = parent.select('span')\n",
    "        if len(sibling) == 1:\n",
    "            hash = sibling[0].text.strip()\n",
    "            if len(sibling) == 3:\n",
    "                size = sibling[2].strip()\n",
    "            else:\n",
    "                pass\n",
    "                #print(sibling)\n",
    "    model[\"tags\"] = model_tags\n",
    "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
    "    file.write(json.dumps(models, indent=4, ensure_ascii=False))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}