-
Notifications
You must be signed in to change notification settings - Fork 19
/
24-3.1 Query Wikidata and Wikipedia
1 lines (1 loc) · 9 KB
/
24-3.1 Query Wikidata and Wikipedia
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"24-3.1 Query Wikidata and Wikipedia","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOXkQcVORNvgxvx/oqEyTqq"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"YH36iyxX5sFa"},"source":["# Query Wikidata and Wikipedia\n","# author: Gressling, T\n","# license: MIT License # code: github.com/gressling/examples\n","# activity: single example # index: 24-3 "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"29qC_SFI6Laf","executionInfo":{"status":"ok","timestamp":1605343182344,"user_tz":-60,"elapsed":5958,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"9d644546-0648-4d4f-ccd0-f00878448a77","colab":{"base_uri":"https://localhost:8080/"}},"source":["!pip install wikipedia"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting wikipedia\n"," Downloading https://files.pythonhosted.org/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz\n","Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.6/dist-packages (from wikipedia) (4.6.3)\n","Requirement already satisfied: requests<3.0.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from wikipedia) (2.23.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2020.6.20)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (1.24.3)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2.10)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (3.0.4)\n","Building wheels for collected packages: wikipedia\n"," Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for wikipedia: filename=wikipedia-1.4.0-cp36-none-any.whl size=11686 sha256=4ce5abca3a505a08dadf8775e05c1b8adaa4d4cf5b7ba13910d2811a85fcea86\n"," Stored in directory: /root/.cache/pip/wheels/87/2a/18/4e471fd96d12114d16fe4a446d00c3b38fb9efcb744bd31f4a\n","Successfully built wikipedia\n","Installing collected packages: wikipedia\n","Successfully installed wikipedia-1.4.0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"RR6sc_DA5yCc","executionInfo":{"status":"ok","timestamp":1605343316817,"user_tz":-60,"elapsed":832,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["import wikipedia"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"Dbhv-h2-50vY","executionInfo":{"status":"ok","timestamp":1605343318936,"user_tz":-60,"elapsed":748,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"c052c21b-2388-4844-9d56-194c894b1932","colab":{"base_uri":"https://localhost:8080/"}},"source":["# query wikipedia to get a list\n","print(wikipedia.search(\"benzoic\"))"],"execution_count":3,"outputs":[{"output_type":"stream","text":["['Benzoic acid', 'Benzoic anhydride', 'Sodium benzoate', 'Benzaldehyde', 'Saccharin', 'Benzamide', 'Benzyl benzoate', 'Benzoyl peroxide', 'Ethyl benzoate', 'Potassium benzoate']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"j7zs6oeO6BCq","executionInfo":{"status":"ok","timestamp":1605343322589,"user_tz":-60,"elapsed":1424,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"d27d6468-16cd-42d8-d02c-80c285e5a3d1","colab":{"base_uri":"https://localhost:8080/","height":122}},"source":["# get one single article https://en.wikipedia.org/wiki/Perovskite_(structure)\n","wiki = wikipedia.page(\"Benzoic acid\")\n","wiki.summary"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'Benzoic acid is a white (or colorless) solid with the formula C6H5CO2H. It is the simplest aromatic carboxylic acid. The name is derived from gum benzoin, which was for a long time its only source. Benzoic acid occurs naturally in many plants and serves as an intermediate in the biosynthesis of many secondary metabolites. Salts of benzoic acid are used as food preservatives. Benzoic acid is an important precursor for the industrial synthesis of many other organic substances. The salts and esters of benzoic acid are known as benzoates .'"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"hPqQtNGJ61DL","executionInfo":{"status":"ok","timestamp":1605343340848,"user_tz":-60,"elapsed":3812,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"969d0295-282d-4d8b-8406-8136aa72a423","colab":{"base_uri":"https://localhost:8080/"}},"source":["!pip install qwikidata"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Collecting qwikidata\n"," Downloading https://files.pythonhosted.org/packages/a9/40/4273aaaacd7269f80d8ce475aff7115ab8fce31488ba08f3eaca776d110a/qwikidata-0.4.0-py3-none-any.whl\n","Collecting mypy-extensions\n"," Downloading https://files.pythonhosted.org/packages/5c/eb/975c7c080f3223a5cdaff09612f3a5221e4ba534f7039db34c35d95fa6a5/mypy_extensions-0.4.3-py2.py3-none-any.whl\n","Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from qwikidata) (2.23.0)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->qwikidata) (2.10)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->qwikidata) (3.0.4)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->qwikidata) (1.24.3)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->qwikidata) (2020.6.20)\n","Installing collected packages: mypy-extensions, qwikidata\n","Successfully installed mypy-extensions-0.4.3 qwikidata-0.4.0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"lLXjHHwK6C2R","executionInfo":{"status":"ok","timestamp":1605343343265,"user_tz":-60,"elapsed":766,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty\n","from qwikidata.linked_data_interface import get_entity_dict_from_api"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"id":"z5_hL00V6FND","executionInfo":{"status":"ok","timestamp":1605343349108,"user_tz":-60,"elapsed":1567,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"3f4ed679-c799-441e-a174-c39ff5d41b2a","colab":{"base_uri":"https://localhost:8080/"}},"source":["# create an item representing \"benzoic acid\"\n","entity = get_entity_dict_from_api(\"Q191700\")\n","item = WikidataItem(entity)\n","item"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["WikidataItem(label=benzoic acid, id=Q191700, description=chemical compound, aliases=['Retardex', 'E210', 'phenylcarboxylic acid', 'benzenemethanoic acid', 'benzeneformic acid', 'benzenemethonic acid', 'diacylic acid', 'carboxybenzene', 'oracylic acid'], enwiki_title=Benzoic acid)"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"ol2XyfjV6G_h","executionInfo":{"status":"ok","timestamp":1605343355646,"user_tz":-60,"elapsed":1067,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"d23dc303-6a4e-4df2-b583-30395c892f44","colab":{"base_uri":"https://localhost:8080/"}},"source":["# create a property representing \"subclass of\"\n","subclassItem = get_entity_dict_from_api(\"P3117\")\n","subclass = WikidataProperty(subclassItem)\n","subclass"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["WikidataProperty(label=DSSTox substance ID, id=P3117, description=DSSTox substance identifier (DTXSID) used in the Environmental Protection Agency CompTox Dashboard, aliases=['DTXSID'])"]},"metadata":{"tags":[]},"execution_count":8}]}]}