this_algorithm/docs/wordlist-new.ipynb
2023-03-01 22:26:06 -05:00

2658 lines
68 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0b00342f-7b19-49cc-bc6c-21019f8cc7dc",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting nltk\n",
" Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hCollecting odfpy\n",
" Downloading odfpy-1.4.1.tar.gz (717 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m717.0/717.0 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hCollecting regex>=2021.8.3\n",
" Downloading regex-2022.10.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (770 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m770.5/770.5 kB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from nltk) (1.2.0)\n",
"Requirement already satisfied: click in /opt/conda/lib/python3.10/site-packages (from nltk) (8.1.3)\n",
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from nltk) (4.64.1)\n",
"Requirement already satisfied: defusedxml in /opt/conda/lib/python3.10/site-packages (from odfpy) (0.7.1)\n",
"Building wheels for collected packages: odfpy\n",
" Building wheel for odfpy (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for odfpy: filename=odfpy-1.4.1-py2.py3-none-any.whl size=160672 sha256=5bfe9fcd7c590666411d404ea3e4ef0f704c9e62ff6621deb4ab09c84bec082a\n",
" Stored in directory: /home/jovyan/.cache/pip/wheels/c8/2e/95/90d94fe33903786937f3b8c33dd88807f792359c6424b40469\n",
"Successfully built odfpy\n",
"Installing collected packages: regex, odfpy, nltk\n",
"Successfully installed nltk-3.8.1 odfpy-1.4.1 regex-2022.10.31\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...\n"
]
}
],
"source": [
"try:\n",
" _initialized\n",
"except:\n",
" !pip install nltk odfpy\n",
" import nltk\n",
" \n",
" nltk.download(\"wordnet\")\n",
" _initialized=True\n",
" \n",
"from nltk.stem.wordnet import WordNetLemmatizer\n",
"import pandas as pd\n",
"import gzip\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "985883de-8049-4f81-acd9-34e1abcd4070",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def get_lines(filename):\n",
" with gzip.open(filename, 'r') as f:\n",
" ret = []\n",
" for l in f:\n",
" if len(ret) > 30_000:\n",
" return ret\n",
" ret.append(str(l).lower())\n",
" return ret\n",
"\n",
"\n",
" \n",
"WORDLIST_SIZE = 8192 + 3\n",
"lemmatizer = WordNetLemmatizer()\n",
"word_re = re.compile(r\"^[A-Za-z]+$\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "926d0d84-0d7e-4939-b87f-1a170f870a8f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"annotated_words=pd.read_excel(\"annotated_words.ods\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8b0d26e4-051c-4669-b566-bbd5ddbbe02b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['a', 'as', 'it', 'was', 'i', 'has', 'so', 'its', 's', 'p']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"excluded_words = list(annotated_words[annotated_words[\"keep\"] != \"Yes\"][\"word\"].str.lower())\n",
"excluded_words[0:10]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2eea14b2-82bf-4353-8982-76a6c7f46d22",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[('be', 'bee'),\n",
" ('by', 'bye'),\n",
" ('per', 'purr'),\n",
" ('sense', 'cent'),\n",
" ('died', 'dyed'),\n",
" ('cents', 'sense'),\n",
" ('yellow', 'hello'),\n",
" ('corps', 'core'),\n",
" ('ore', 'oar'),\n",
" ('ore', ' or'),\n",
" ('vary', 'very'),\n",
" ('com', 'calm'),\n",
" ('filing', 'filling'),\n",
" ('fax', 'facts'),\n",
" ('favour', 'favor'),\n",
" ('theatre', 'theater'),\n",
" ('par', 'parse'),\n",
" ('honour', 'honor'),\n",
" ('harry', 'hairy'),\n",
" ('brings', 'bring'),\n",
" ('organisation', 'organization'),\n",
" ('simultaneously', 'simultaneous'),\n",
" ('aluminum', 'aluminium'),\n",
" ('knight', 'night'),\n",
" ('electronics', 'electronic'),\n",
" ('senses', 'cent'),\n",
" ('organisations', 'organization'),\n",
" ('fortunately', 'fortunate'),\n",
" ('corp', 'core'),\n",
" ('chile', 'chilly'),\n",
" ('chile', ' chili'),\n",
" ('owe', 'oh'),\n",
" ('capitol', 'capital'),\n",
" ('weary', 'wary'),\n",
" ('berry', 'barry'),\n",
" ('lecturer', 'lecture'),\n",
" ('weigh', 'way'),\n",
" ('aluminium', 'aluminum'),\n",
" ('isle', 'aisle'),\n",
" ('boulder', 'bolder'),\n",
" ('blew', 'blue'),\n",
" ('reformed', 'reform'),\n",
" ('scent', 'cent'),\n",
" ('ads', 'adds'),\n",
" ('honours', 'honors'),\n",
" ('bot', 'bought'),\n",
" ('dew', 'do'),\n",
" ('dew', ' due'),\n",
" ('theatres', 'theater'),\n",
" ('thru', 'through'),\n",
" ('sensed', 'cent'),\n",
" ('monies', 'moneys'),\n",
" ('cue', 'queue'),\n",
" ('hairy', 'harry'),\n",
" ('weighs', 'way'),\n",
" ('hem', 'him'),\n",
" ('nun', 'none'),\n",
" ('organisational', 'organizational'),\n",
" ('grate', 'great'),\n",
" ('dessert', 'desert'),\n",
" ('aux', 'ox'),\n",
" ('rap', 'wrap'),\n",
" ('filings', 'filling'),\n",
" ('pars', 'parse'),\n",
" ('dazed', 'day'),\n",
" ('scents', 'cent'),\n",
" ('daze', 'day'),\n",
" ('four', 'for')]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"custom_maps = annotated_words[annotated_words[\"maps_to\"].notna()][[\"word\",\"maps_to\"]].assign(maps_to=lambda x: x[\"maps_to\"].map(lambda y: y.split(\",\")))\n",
"\n",
"custom_maps = [\n",
" (m[1][\"word\"].lower(), mapping.lower())\n",
" for m in custom_maps.iterrows()\n",
" for mapping in m[1][\"maps_to\"]\n",
"]\n",
"custom_maps"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8bdfd108-bf43-4c0f-bc5c-f91925da753f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Start parsing the wordlist\n",
"all_words = get_lines(\"frequency-all.txt.gz\")\n",
"\n",
"# Delete header line\n",
"all_words = all_words[1:]\n",
"\n",
"# Get only the word (fixed width)\n",
"all_words = [w[13:36].strip() for w in all_words]\n",
"\n",
"# Remove special characters\n",
"all_words = [w for w in all_words if word_re.search(w)]\n",
"\n",
"# Remove all removed words\n",
"all_words = [w for w in all_words if w not in excluded_words]\n",
"\n",
"# Add all custom mappings\n",
"for m in list(sum(custom_maps, ())):\n",
" if m[0] not in all_words:\n",
" all_words.append(m[0])\n",
" if m[1] not in all_words:\n",
" all_words.append(m[1])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e42f2b56-98b3-4465-95be-812d8584b511",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['the',\n",
" 'of',\n",
" 'and',\n",
" 'to',\n",
" 'in',\n",
" 'is',\n",
" 'that',\n",
" 'for',\n",
" 'be',\n",
" 'by',\n",
" 'with',\n",
" 'on',\n",
" 'not',\n",
" 'or',\n",
" 'this',\n",
" 'are',\n",
" 'at',\n",
" 'from',\n",
" 'he',\n",
" 'which',\n",
" 'his',\n",
" 'have',\n",
" 'an',\n",
" 'but',\n",
" 'you',\n",
" 'they',\n",
" 'were',\n",
" 'had',\n",
" 'we',\n",
" 'all',\n",
" 'one',\n",
" 'their',\n",
" 'been',\n",
" 'will',\n",
" 'there',\n",
" 'can',\n",
" 'if',\n",
" 'other',\n",
" 'would',\n",
" 'no',\n",
" 'her',\n",
" 'may',\n",
" 'more',\n",
" 'when',\n",
" 'who',\n",
" 'such',\n",
" 'these',\n",
" 'any',\n",
" 'she',\n",
" 'new',\n",
" 'time',\n",
" 'than',\n",
" 'do',\n",
" 'some',\n",
" 'what',\n",
" 'only',\n",
" 'into',\n",
" 'them',\n",
" 'two',\n",
" 'also',\n",
" 'about',\n",
" 'out',\n",
" 'him',\n",
" 'my',\n",
" 'said',\n",
" 'up',\n",
" 'our',\n",
" 'first',\n",
" 'should',\n",
" 'under',\n",
" 'made',\n",
" 'state',\n",
" 'see',\n",
" 'after',\n",
" 'could',\n",
" 'then',\n",
" 'me',\n",
" 'most',\n",
" 'over',\n",
" 'very',\n",
" 'your',\n",
" 'between',\n",
" 'where',\n",
" 'now',\n",
" 'shall',\n",
" 'work',\n",
" 'those',\n",
" 'same',\n",
" 'well',\n",
" 'each',\n",
" 'many',\n",
" 'being',\n",
" 'years',\n",
" 'did',\n",
" 'year',\n",
" 'through',\n",
" 'must',\n",
" 'upon',\n",
" 'before',\n",
" 'like',\n",
" 'use',\n",
" 'part',\n",
" 'general',\n",
" 'people',\n",
" 'because',\n",
" 'used',\n",
" 'how',\n",
" 'even',\n",
" 'much',\n",
" 'states',\n",
" 'during',\n",
" 'both',\n",
" 'case',\n",
" 'three',\n",
" 'number',\n",
" 'make',\n",
" 'per',\n",
" 'great',\n",
" 'act',\n",
" 'way',\n",
" 'life',\n",
" 'good',\n",
" 'day',\n",
" 'public',\n",
" 'man',\n",
" 'however',\n",
" 'system',\n",
" 'water',\n",
" 'without',\n",
" 'government',\n",
" 'while',\n",
" 'long',\n",
" 'order',\n",
" 'law',\n",
" 'section',\n",
" 'court',\n",
" 'high',\n",
" 'right',\n",
" 'own',\n",
" 'found',\n",
" 'united',\n",
" 'just',\n",
" 'here',\n",
" 'against',\n",
" 'world',\n",
" 'does',\n",
" 'company',\n",
" 'within',\n",
" 'given',\n",
" 'service',\n",
" 'house',\n",
" 'another',\n",
" 'power',\n",
" 'place',\n",
" 'know',\n",
" 'little',\n",
" 'down',\n",
" 'present',\n",
" 'every',\n",
" 'national',\n",
" 'back',\n",
" 'take',\n",
" 'information',\n",
" 'men',\n",
" 'since',\n",
" 'might',\n",
" 'small',\n",
" 'large',\n",
" 'school',\n",
" 'following',\n",
" 'still',\n",
" 'less',\n",
" 'last',\n",
" 'city',\n",
" 'second',\n",
" 'development',\n",
" 'different',\n",
" 'university',\n",
" 'old',\n",
" 'form',\n",
" 'point',\n",
" 'total',\n",
" 'data',\n",
" 'too',\n",
" 'committee',\n",
" 'report',\n",
" 'business',\n",
" 'think',\n",
" 'end',\n",
" 'get',\n",
" 'set',\n",
" 'research',\n",
" 'say',\n",
" 'come',\n",
" 'country',\n",
" 'never',\n",
" 'fact',\n",
" 'go',\n",
" 'control',\n",
" 'thus',\n",
" 'having',\n",
" 'value',\n",
" 'social',\n",
" 'department',\n",
" 'few',\n",
" 'above',\n",
" 'important',\n",
" 'interest',\n",
" 'study',\n",
" 'off',\n",
" 'area',\n",
" 'means',\n",
" 'office',\n",
" 'group',\n",
" 'give',\n",
" 'again',\n",
" 'war',\n",
" 'whether',\n",
" 'question',\n",
" 'called',\n",
" 'period',\n",
" 'line',\n",
" 'land',\n",
" 'four',\n",
" 'among',\n",
" 'table',\n",
" 'board',\n",
" 'until',\n",
" 'hand',\n",
" 'taken',\n",
" 'need',\n",
" 'education',\n",
" 'certain',\n",
" 'county',\n",
" 'action',\n",
" 'several',\n",
" 'am',\n",
" 'course',\n",
" 'cases',\n",
" 'far',\n",
" 'effect',\n",
" 'possible',\n",
" 'though',\n",
" 'left',\n",
" 'further',\n",
" 'home',\n",
" 'days',\n",
" 'person',\n",
" 'health',\n",
" 'amount',\n",
" 'members',\n",
" 'subject',\n",
" 'yet',\n",
" 'program',\n",
" 'therefore',\n",
" 'process',\n",
" 'services',\n",
" 'rate',\n",
" 'local',\n",
" 'name',\n",
" 'find',\n",
" 'necessary',\n",
" 'often',\n",
" 'others',\n",
" 'whole',\n",
" 'change',\n",
" 'example',\n",
" 'president',\n",
" 'history',\n",
" 'best',\n",
" 'although',\n",
" 'family',\n",
" 'side',\n",
" 'women',\n",
" 'held',\n",
" 'based',\n",
" 'south',\n",
" 'special',\n",
" 'required',\n",
" 'came',\n",
" 'thought',\n",
" 'five',\n",
" 'always',\n",
" 'himself',\n",
" 'air',\n",
" 'known',\n",
" 'head',\n",
" 'either',\n",
" 'property',\n",
" 'cost',\n",
" 'rather',\n",
" 'bill',\n",
" 'put',\n",
" 'human',\n",
" 'figure',\n",
" 'results',\n",
" 'level',\n",
" 'conditions',\n",
" 'full',\n",
" 'times',\n",
" 'book',\n",
" 'available',\n",
" 'early',\n",
" 'matter',\n",
" 'common',\n",
" 'light',\n",
" 'let',\n",
" 'society',\n",
" 'body',\n",
" 'international',\n",
" 'including',\n",
" 'free',\n",
" 'evidence',\n",
" 'better',\n",
" 'type',\n",
" 'provided',\n",
" 'due',\n",
" 'next',\n",
" 'production',\n",
" 'once',\n",
" 'done',\n",
" 'making',\n",
" 'least',\n",
" 'support',\n",
" 'north',\n",
" 'later',\n",
" 'using',\n",
" 'things',\n",
" 'economic',\n",
" 'chapter',\n",
" 'various',\n",
" 'why',\n",
" 'white',\n",
" 'going',\n",
" 'commission',\n",
" 'federal',\n",
" 'away',\n",
" 'field',\n",
" 'result',\n",
" 'nature',\n",
" 'policy',\n",
" 'become',\n",
" 'political',\n",
" 'increase',\n",
" 'around',\n",
" 'age',\n",
" 'want',\n",
" 'low',\n",
" 'trade',\n",
" 'half',\n",
" 'position',\n",
" 'young',\n",
" 'money',\n",
" 'percent',\n",
" 'cent',\n",
" 'class',\n",
" 'words',\n",
" 'view',\n",
" 'provide',\n",
" 'seen',\n",
" 'show',\n",
" 'district',\n",
" 'party',\n",
" 'analysis',\n",
" 'care',\n",
" 'june',\n",
" 'foreign',\n",
" 'shown',\n",
" 'received',\n",
" 'management',\n",
" 'third',\n",
" 'took',\n",
" 'something',\n",
" 'tax',\n",
" 'account',\n",
" 'problem',\n",
" 'almost',\n",
" 'west',\n",
" 'nothing',\n",
" 'together',\n",
" 'individual',\n",
" 'open',\n",
" 'material',\n",
" 'paper',\n",
" 'feet',\n",
" 'force',\n",
" 'association',\n",
" 'purpose',\n",
" 'terms',\n",
" 'method',\n",
" 'help',\n",
" 'real',\n",
" 'ever',\n",
" 'already',\n",
" 'along',\n",
" 'went',\n",
" 'term',\n",
" 'systems',\n",
" 'member',\n",
" 'particular',\n",
" 'problems',\n",
" 'energy',\n",
" 'secretary',\n",
" 'date',\n",
" 'price',\n",
" 'short',\n",
" 'true',\n",
" 'street',\n",
" 'building',\n",
" 'room',\n",
" 'market',\n",
" 'look',\n",
" 'similar',\n",
" 'industry',\n",
" 'areas',\n",
" 'bank',\n",
" 'according',\n",
" 'studies',\n",
" 'itself',\n",
" 'application',\n",
" 'current',\n",
" 'read',\n",
" 'press',\n",
" 'community',\n",
" 'plan',\n",
" 'whose',\n",
" 'major',\n",
" 'considered',\n",
" 'mind',\n",
" 'union',\n",
" 'cause',\n",
" 'able',\n",
" 'surface',\n",
" 'face',\n",
" 'river',\n",
" 'council',\n",
" 'income',\n",
" 'july',\n",
" 'near',\n",
" 'experience',\n",
" 'non',\n",
" 'paid',\n",
" 'pay',\n",
" 'reason',\n",
" 'themselves',\n",
" 'asked',\n",
" 'march',\n",
" 'king',\n",
" 'higher',\n",
" 'single',\n",
" 'rights',\n",
" 'average',\n",
" 'father',\n",
" 'note',\n",
" 'treatment',\n",
" 'love',\n",
" 'changes',\n",
" 'black',\n",
" 'knowledge',\n",
" 'enough',\n",
" 'future',\n",
" 'kind',\n",
" 'lower',\n",
" 'authority',\n",
" 'past',\n",
" 'natural',\n",
" 'six',\n",
" 'persons',\n",
" 'food',\n",
" 'working',\n",
" 'central',\n",
" 'college',\n",
" 'self',\n",
" 'products',\n",
" 'model',\n",
" 'brought',\n",
" 'greater',\n",
" 'countries',\n",
" 'test',\n",
" 'nor',\n",
" 'students',\n",
" 'private',\n",
" 'construction',\n",
" 'perhaps',\n",
" 'ground',\n",
" 'sir',\n",
" 'basis',\n",
" 'months',\n",
" 'growth',\n",
" 'increased',\n",
" 'word',\n",
" 'east',\n",
" 'language',\n",
" 'rule',\n",
" 'continued',\n",
" 'quite',\n",
" 'except',\n",
" 'series',\n",
" 'practice',\n",
" 'thing',\n",
" 'night',\n",
" 'works',\n",
" 'eyes',\n",
" 'oil',\n",
" 'art',\n",
" 'told',\n",
" 'especially',\n",
" 'population',\n",
" 'science',\n",
" 'whom',\n",
" 'obtained',\n",
" 'parts',\n",
" 'capital',\n",
" 'include',\n",
" 'generally',\n",
" 'meeting',\n",
" 'specific',\n",
" 'described',\n",
" 'believe',\n",
" 'review',\n",
" 'issue',\n",
" 'respect',\n",
" 'contract',\n",
" 'became',\n",
" 'effects',\n",
" 'medical',\n",
" 'road',\n",
" 'got',\n",
" 'clear',\n",
" 'main',\n",
" 'labor',\n",
" 'operation',\n",
" 'size',\n",
" 'below',\n",
" 'hours',\n",
" 'sense',\n",
" 'addition',\n",
" 'probably',\n",
" 'mean',\n",
" 'groups',\n",
" 'century',\n",
" 'personal',\n",
" 'plant',\n",
" 'training',\n",
" 'design',\n",
" 'statement',\n",
" 'structure',\n",
" 'project',\n",
" 'million',\n",
" 'usually',\n",
" 'range',\n",
" 'call',\n",
" 'mother',\n",
" 'seems',\n",
" 'standard',\n",
" 'return',\n",
" 'title',\n",
" 'established',\n",
" 'keep',\n",
" 'space',\n",
" 'annual',\n",
" 'record',\n",
" 'close',\n",
" 'april',\n",
" 'complete',\n",
" 'page',\n",
" 'heart',\n",
" 'says',\n",
" 'fig',\n",
" 'quality',\n",
" 'gas',\n",
" 'methods',\n",
" 'letter',\n",
" 'stock',\n",
" 'costs',\n",
" 'gave',\n",
" 'related',\n",
" 'administration',\n",
" 'activities',\n",
" 'condition',\n",
" 'theory',\n",
" 'town',\n",
" 'equipment',\n",
" 'rates',\n",
" 'soon',\n",
" 'decision',\n",
" 'pressure',\n",
" 'written',\n",
" 'lines',\n",
" 'corporation',\n",
" 'tell',\n",
" 'schools',\n",
" 'agreement',\n",
" 'reported',\n",
" 'attention',\n",
" 'materials',\n",
" 'fire',\n",
" 'direct',\n",
" 'saw',\n",
" 'published',\n",
" 'temperature',\n",
" 'species',\n",
" 'really',\n",
" 'laws',\n",
" 'woman',\n",
" 'function',\n",
" 'military',\n",
" 'proposed',\n",
" 'january',\n",
" 'additional',\n",
" 'late',\n",
" 'books',\n",
" 'opinion',\n",
" 'loss',\n",
" 'limited',\n",
" 'source',\n",
" 'article',\n",
" 'notice',\n",
" 'security',\n",
" 'organization',\n",
" 'hands',\n",
" 'financial',\n",
" 'rules',\n",
" 'follows',\n",
" 'miles',\n",
" 'values',\n",
" 'points',\n",
" 'chief',\n",
" 'distribution',\n",
" 'sometimes',\n",
" 'insurance',\n",
" 'son',\n",
" 'strong',\n",
" 'length',\n",
" 'activity',\n",
" 'original',\n",
" 'forms',\n",
" 'yes',\n",
" 'effective',\n",
" 'defendant',\n",
" 'living',\n",
" 'december',\n",
" 'character',\n",
" 'began',\n",
" 'carried',\n",
" 'supply',\n",
" 'blood',\n",
" 'taking',\n",
" 'manner',\n",
" 'journal',\n",
" 'hundred',\n",
" 'red',\n",
" 'shows',\n",
" 'developed',\n",
" 'performance',\n",
" 'situation',\n",
" 'programs',\n",
" 'felt',\n",
" 'workers',\n",
" 'volume',\n",
" 'presented',\n",
" 'knew',\n",
" 'answer',\n",
" 'resources',\n",
" 'questions',\n",
" 'industrial',\n",
" 'needs',\n",
" 'twenty',\n",
" 'sent',\n",
" 'looked',\n",
" 'purposes',\n",
" 'library',\n",
" 'added',\n",
" 'passed',\n",
" 'ten',\n",
" 'sea',\n",
" 'applied',\n",
" 'included',\n",
" 'physical',\n",
" 'across',\n",
" 'army',\n",
" 'toward',\n",
" 'produced',\n",
" 'makes',\n",
" 'placed',\n",
" 'role',\n",
" 'october',\n",
" 'final',\n",
" 'approach',\n",
" 'provisions',\n",
" 'leave',\n",
" 'director',\n",
" 'employment',\n",
" 'anything',\n",
" 'particularly',\n",
" 'hard',\n",
" 'outside',\n",
" 'week',\n",
" 'feel',\n",
" 'charge',\n",
" 'indeed',\n",
" 'degree',\n",
" 'reference',\n",
" 'requirements',\n",
" 'september',\n",
" 'today',\n",
" 'western',\n",
" 'influence',\n",
" 'unit',\n",
" 'solution',\n",
" 'chairman',\n",
" 'legal',\n",
" 'motion',\n",
" 'region',\n",
" 'idea',\n",
" 'list',\n",
" 'judgment',\n",
" 'determined',\n",
" 'poor',\n",
" 'disease',\n",
" 'civil',\n",
" 'turn',\n",
" 'modern',\n",
" 'normal',\n",
" 'appear',\n",
" 'employees',\n",
" 'latter',\n",
" 'heard',\n",
" 'top',\n",
" 'sure',\n",
" 'moment',\n",
" 'code',\n",
" 'reports',\n",
" 'wife',\n",
" 'post',\n",
" 'difficult',\n",
" 'recent',\n",
" 'extent',\n",
" 'longer',\n",
" 'story',\n",
" 'meet',\n",
" 'officers',\n",
" 'companies',\n",
" 'patients',\n",
" 'front',\n",
" 'doing',\n",
" 'staff',\n",
" 'product',\n",
" 'august',\n",
" 'needed',\n",
" 'involved',\n",
" 'likely',\n",
" 'former',\n",
" 'run',\n",
" 'author',\n",
" 'middle',\n",
" 'turned',\n",
" 'agency',\n",
" 'reading',\n",
" 'beginning',\n",
" 'duty',\n",
" 'movement',\n",
" 'month',\n",
" 'alone',\n",
" 'issues',\n",
" 'beyond',\n",
" 'fine',\n",
" 'base',\n",
" 'parties',\n",
" 'relations',\n",
" 'simple',\n",
" 'consider',\n",
" 'proper',\n",
" 'instead',\n",
" 'significant',\n",
" 'appears',\n",
" 'equal',\n",
" 'lost',\n",
" 'followed',\n",
" 'hope',\n",
" 'cut',\n",
" 'unless',\n",
" 'nearly',\n",
" 'claim',\n",
" 'associated',\n",
" 'expected',\n",
" 'operations',\n",
" 'difference',\n",
" 'funds',\n",
" 'direction',\n",
" 'cross',\n",
" 'live',\n",
" 'finally',\n",
" 'weight',\n",
" 'lead',\n",
" 'trial',\n",
" 'justice',\n",
" 'officer',\n",
" 'factors',\n",
" 'response',\n",
" 'cells',\n",
" 'earth',\n",
" 'rest',\n",
" 'fund',\n",
" 'bring',\n",
" 'trust',\n",
" 'goods',\n",
" 'observed',\n",
" 'behind',\n",
" 'job',\n",
" 'door',\n",
" 'types',\n",
" 'understand',\n",
" 'acid',\n",
" 'hold',\n",
" 'technology',\n",
" 'wide',\n",
" 'protection',\n",
" 'basic',\n",
" 'november',\n",
" 'seemed',\n",
" 'throughout',\n",
" 'levels',\n",
" 'importance',\n",
" 'sales',\n",
" 'sale',\n",
" 'stated',\n",
" 'address',\n",
" 'potential',\n",
" 'payment',\n",
" 'prior',\n",
" 'discussion',\n",
" 'conference',\n",
" 'writing',\n",
" 'stage',\n",
" 'fall',\n",
" 'notes',\n",
" 'iron',\n",
" 'play',\n",
" 'ask',\n",
" 'plants',\n",
" 'relationship',\n",
" 'towards',\n",
" 'regard',\n",
" 'referred',\n",
" 'patient',\n",
" 'flow',\n",
" 'consideration',\n",
" 'hospital',\n",
" 'seem',\n",
" 'february',\n",
" 'soil',\n",
" 'morning',\n",
" 'commercial',\n",
" 'planning',\n",
" 'provides',\n",
" 'appropriate',\n",
" 'technical',\n",
" 'demand',\n",
" 'sufficient',\n",
" 'principal',\n",
" 'credit',\n",
" 'peace',\n",
" 'previous',\n",
" 'object',\n",
" 'prices',\n",
" 'kept',\n",
" 'sound',\n",
" 'wanted',\n",
" 'looking',\n",
" 'entire',\n",
" 'plaintiff',\n",
" 'heat',\n",
" 'ways',\n",
" 'otherwise',\n",
" 'judge',\n",
" 'hour',\n",
" 'capacity',\n",
" 'brown',\n",
" 'music',\n",
" 'risk',\n",
" 'box',\n",
" 'exchange',\n",
" 'produce',\n",
" 'station',\n",
" 'big',\n",
" 'primary',\n",
" 'institute',\n",
" 'mentioned',\n",
" 'prepared',\n",
" 'cell',\n",
" 'spirit',\n",
" 'allowed',\n",
" 'claims',\n",
" 'site',\n",
" 'green',\n",
" 'directly',\n",
" 'text',\n",
" 'friends',\n",
" 'presence',\n",
" 'survey',\n",
" 'determine',\n",
" 'car',\n",
" 'larger',\n",
" 'gives',\n",
" 'deep',\n",
" 'simply',\n",
" 'immediately',\n",
" 'distance',\n",
" 'coming',\n",
" 'seven',\n",
" 'steel',\n",
" 'friend',\n",
" 'records',\n",
" 'existing',\n",
" 'clearly',\n",
" 'actual',\n",
" 'relation',\n",
" 'born',\n",
" 'learning',\n",
" 'forces',\n",
" 'voice',\n",
" 'earlier',\n",
" 'circumstances',\n",
" 'safety',\n",
" 'ago',\n",
" 'issued',\n",
" 'upper',\n",
" 'require',\n",
" 'scale',\n",
" 'island',\n",
" 'culture',\n",
" 'employed',\n",
" 'eight',\n",
" 'estate',\n",
" 'facts',\n",
" 'portion',\n",
" 'deal',\n",
" 'share',\n",
" 'actually',\n",
" 'aid',\n",
" 'engineering',\n",
" 'continue',\n",
" 'formed',\n",
" 'agricultural',\n",
" 'entitled',\n",
" 'mass',\n",
" 'truth',\n",
" 'giving',\n",
" 'numbers',\n",
" 'places',\n",
" 'met',\n",
" 'built',\n",
" 'content',\n",
" 'connection',\n",
" 'assistance',\n",
" 'coal',\n",
" 'progress',\n",
" 'receive',\n",
" 'active',\n",
" 'nation',\n",
" 'contact',\n",
" 'amendment',\n",
" 'interests',\n",
" 'net',\n",
" 'wall',\n",
" 'standards',\n",
" 'farm',\n",
" 'understanding',\n",
" 'strength',\n",
" 'minutes',\n",
" 'figures',\n",
" 'move',\n",
" 'elements',\n",
" 'concerned',\n",
" 'regulations',\n",
" 'step',\n",
" 'literature',\n",
" 'units',\n",
" 'opportunity',\n",
" 'investment',\n",
" 'led',\n",
" 'reduced',\n",
" 'follow',\n",
" 'facilities',\n",
" 'benefit',\n",
" 'compared',\n",
" 'reached',\n",
" 'student',\n",
" 'religious',\n",
" 'measure',\n",
" 'individuals',\n",
" 'meaning',\n",
" 'considerable',\n",
" 'relative',\n",
" 'electric',\n",
" 'joint',\n",
" 'certainly',\n",
" 'failure',\n",
" 'apply',\n",
" ...]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_words"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd9e939e-7827-42f9-89be-bcfbb8bd3f52",
"metadata": {
"tags": []
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"id": "64b6fcd3-acf7-45da-a335-79c538963bdd",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"discus\n",
"physic\n",
"posse\n",
"serf\n",
"sens\n",
"caput\n",
"bos\n",
"graf\n",
"pant\n",
"barrack\n",
"auspex\n",
"footstep\n",
"colonist\n",
"villager\n",
"kilometer\n",
"granule\n",
"credential\n",
"petal\n",
"trouser\n",
"shortcoming\n",
"microorganism\n",
"italic\n",
"grandchild\n",
"munition\n",
"parenthesis\n",
"foodstuff\n",
"attache\n",
"grandparent\n",
"tropic\n",
"kilometre\n",
"congratulation\n",
"fume\n",
"convulsion\n",
"nostril\n",
"utensil\n",
"cooky\n",
"amenity\n",
"reptile\n",
"pretension\n",
"sock\n",
"peso\n",
"mitochondrion\n",
"reminiscence\n",
"invader\n",
"macrophage\n",
"eyelid\n",
"dweller\n",
"bristle\n",
"tenet\n",
"taxon\n",
"outskirt\n",
"policyholder\n",
"stamen\n",
"horseman\n",
"striker\n",
"ramification\n",
"tuft\n",
"cultivar\n",
"interrogatory\n",
"bylaw\n",
"bellow\n",
"neoplasm\n",
"insurgent\n",
"chore\n",
"pensioner\n",
"exigency\n",
"forefather\n",
"atrocity\n",
"dissenter\n",
"corpuscle\n",
"islander\n",
"numeral\n",
"bureaucrat\n",
"classmate\n",
"crossroad\n",
"pitfall\n",
"firework\n",
"ravage\n",
"broadcaster\n",
"heretic\n",
"appurtenance\n",
"potentiality\n",
"louse\n",
"conspirator\n",
"revers\n",
"combatant\n",
"conferee\n",
"serviceman\n",
"repercussion\n",
"grader\n",
"exhibitor\n",
"alkaloid\n",
"collaborator\n",
"slipper\n",
"foothill\n",
"homeowner\n",
"hallucination\n",
"ailment\n",
"crumb\n",
"milligram\n",
"turnip\n",
"fingertip\n",
"tradesman\n",
"archaeologist\n",
"bondholder\n",
"lira\n",
"emolument\n",
"tailing\n",
"enthusiast\n",
"tubule\n",
"warship\n",
"speculator\n",
"jobber\n",
"raisin\n",
"vicissitude\n",
"courtier\n",
"clove\n",
"entrant\n",
"festivity\n",
"bough\n",
"imago\n",
"fibroblast\n",
"bruise\n",
"misgiving\n",
"parishioner\n",
"bract\n",
"microbe\n",
"industrialist\n",
"sprout\n",
"wrinkle\n",
"worshipper\n",
"retiree\n",
"cracker\n",
"negotiator\n",
"pronouncement\n",
"devotee\n",
"sandal\n",
"sepal\n",
"interrelationship\n",
"corticosteroid\n",
"sou\n",
"framer\n",
"knuckle\n",
"leukocyte\n",
"malformation\n",
"geographer\n",
"fastener\n",
"ruble\n",
"whisker\n",
"tentacle\n",
"footprint\n",
"ratepayer\n",
"marketer\n",
"refiner\n",
"cilium\n",
"inroad\n",
"dragoon\n",
"litigant\n",
"kilo\n",
"shipowner\n",
"rudiment\n",
"appointee\n",
"fingerprint\n",
"anther\n",
"depredation\n",
"stave\n",
"rancher\n",
"cytokine\n",
"artefact\n",
"freeholder\n",
"churchman\n",
"fungicide\n",
"inequity\n",
"contraindication\n",
"arrhythmia\n",
"functionary\n",
"bandit\n",
"horde\n",
"spermatozoon\n",
"selectman\n",
"blocker\n",
"inaccuracy\n",
"gramme\n",
"billet\n",
"radiograph\n",
"demonstrator\n",
"amphibian\n",
"mussel\n",
"rafter\n",
"headlight\n",
"vestige\n",
"loin\n",
"raider\n",
"crevice\n",
"suitor\n",
"technologist\n",
"trooper\n",
"globule\n",
"firefighter\n",
"woodcut\n",
"purr\n",
" or\n",
"parse\n",
" chili\n",
"bolder\n",
" due\n",
"scents\n",
"daze\n"
]
}
],
"source": [
"# Lemmatize all words (plural -> singular)\n",
"lemmatize_mappings = [\n",
" (w, lemmatizer.lemmatize(w)) \n",
" for w in all_words\n",
" # if w != lemmatizer.lemmatize(w)\n",
"]\n",
"\n",
"# Remove all words that lemmatize to another word\n",
"#all_words = [w for w in all_words if w not in ]\n",
"\n",
"# Add custom lemmatizations\n",
"for l in custom_maps:\n",
" if l in lemmatize_mappings:\n",
" print(f\"Warning: {l} is already lemmatized\")\n",
" else:\n",
" lemmatize_mappings.append(l)\n",
" \n",
"lemmatize_mappings = [w for w in lemmatize_mappings if w[1] not in excluded_words]\n",
"\n",
"# Now, re-add all lematized words to the list of every word\n",
"for w in sum(lemmatize_mappings, ()):\n",
" if w not in all_words:\n",
" print(w)\n",
" all_words.append(w)\n",
" \n",
"lemmatize_mappings = {k: v for k, v in lemmatize_mappings}\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8bdff9d0-f3ff-498f-952d-13f1a91bfbd5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"final_wordlist = []\n",
"seen_lemmatizations = set()\n",
"for w in all_words:\n",
" lemmatized = lemmatize_mappings.get(w) or w\n",
" if lemmatized in seen_lemmatizations:\n",
" # The lemmatized version of this word was already seen\n",
" continue\n",
" else:\n",
" # The lemmatized version hasn't been seen. We're good to add it\n",
" final_wordlist.append([\n",
" k\n",
" for k\n",
" in lemmatize_mappings.keys()\n",
" if lemmatize_mappings[k] == lemmatized\n",
" ])\n",
" seen_lemmatizations.add(lemmatized)\n",
"\n",
" if len(final_wordlist) >= WORDLIST_SIZE:\n",
" break\n",
"\n",
"# Now, convert it to the format (number, word)\n",
"final_wordlist = [\n",
" (idx, w)\n",
" for idx, words in enumerate(final_wordlist)\n",
" for w in words\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "65bd6887-613e-45ae-ac45-6ed5967b3a43",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[(0, 'the'),\n",
" (1, 'of'),\n",
" (2, 'and'),\n",
" (3, 'to'),\n",
" (4, 'in'),\n",
" (5, 'is'),\n",
" (6, 'that'),\n",
" (7, 'for'),\n",
" (7, 'four'),\n",
" (8, 'be'),\n",
" (8, 'bee'),\n",
" (8, 'bees'),\n",
" (9, 'by'),\n",
" (9, 'bye'),\n",
" (10, 'with'),\n",
" (11, 'on'),\n",
" (12, 'not'),\n",
" (13, 'or'),\n",
" (14, 'this'),\n",
" (15, 'are'),\n",
" (16, 'at'),\n",
" (17, 'from'),\n",
" (18, 'he'),\n",
" (19, 'which'),\n",
" (20, 'his'),\n",
" (21, 'have'),\n",
" (22, 'an'),\n",
" (23, 'but'),\n",
" (24, 'you'),\n",
" (25, 'they'),\n",
" (26, 'were'),\n",
" (27, 'had'),\n",
" (28, 'we'),\n",
" (29, 'all'),\n",
" (30, 'one'),\n",
" (30, 'ones'),\n",
" (31, 'their'),\n",
" (32, 'been'),\n",
" (33, 'will'),\n",
" (33, 'wills'),\n",
" (34, 'there'),\n",
" (35, 'can'),\n",
" (35, 'cans'),\n",
" (36, 'if'),\n",
" (37, 'other'),\n",
" (38, 'would'),\n",
" (39, 'no'),\n",
" (39, 'nos'),\n",
" (40, 'her'),\n",
" (41, 'may'),\n",
" (42, 'more'),\n",
" (42, 'mores'),\n",
" (43, 'when'),\n",
" (44, 'who'),\n",
" (45, 'such'),\n",
" (46, 'these'),\n",
" (47, 'any'),\n",
" (48, 'she'),\n",
" (49, 'new'),\n",
" (50, 'time'),\n",
" (50, 'times'),\n",
" (51, 'than'),\n",
" (52, 'do'),\n",
" (53, 'some'),\n",
" (54, 'what'),\n",
" (55, 'only'),\n",
" (56, 'into'),\n",
" (57, 'them'),\n",
" (58, 'two'),\n",
" (59, 'also'),\n",
" (60, 'about'),\n",
" (61, 'out'),\n",
" (61, 'outs'),\n",
" (62, 'him'),\n",
" (62, 'hem'),\n",
" (63, 'my'),\n",
" (64, 'said'),\n",
" (65, 'up'),\n",
" (66, 'our'),\n",
" (67, 'first'),\n",
" (68, 'should'),\n",
" (69, 'under'),\n",
" (70, 'made'),\n",
" (71, 'state'),\n",
" (71, 'states'),\n",
" (72, 'see'),\n",
" (72, 'sees'),\n",
" (73, 'after'),\n",
" (74, 'could'),\n",
" (75, 'then'),\n",
" (76, 'me'),\n",
" (77, 'most'),\n",
" (78, 'over'),\n",
" (79, 'very'),\n",
" (79, 'vary'),\n",
" (80, 'your'),\n",
" (81, 'between'),\n",
" (82, 'where'),\n",
" (83, 'now'),\n",
" (84, 'shall'),\n",
" (85, 'work'),\n",
" (85, 'works'),\n",
" (86, 'those'),\n",
" (87, 'same'),\n",
" (88, 'well'),\n",
" (88, 'wells'),\n",
" (89, 'each'),\n",
" (90, 'many'),\n",
" (91, 'being'),\n",
" (91, 'beings'),\n",
" (92, 'years'),\n",
" (92, 'year'),\n",
" (93, 'did'),\n",
" (94, 'through'),\n",
" (94, 'thru'),\n",
" (95, 'must'),\n",
" (96, 'upon'),\n",
" (97, 'before'),\n",
" (98, 'like'),\n",
" (98, 'likes'),\n",
" (99, 'use'),\n",
" (100, 'part'),\n",
" (100, 'parts'),\n",
" (101, 'general'),\n",
" (101, 'generals'),\n",
" (102, 'people'),\n",
" (102, 'peoples'),\n",
" (103, 'because'),\n",
" (104, 'used'),\n",
" (105, 'how'),\n",
" (106, 'even'),\n",
" (107, 'much'),\n",
" (108, 'during'),\n",
" (109, 'both'),\n",
" (110, 'case'),\n",
" (110, 'cases'),\n",
" (111, 'three'),\n",
" (112, 'number'),\n",
" (112, 'numbers'),\n",
" (113, 'make'),\n",
" (113, 'makes'),\n",
" (114, 'per'),\n",
" (115, 'great'),\n",
" (115, 'grate'),\n",
" (116, 'act'),\n",
" (116, 'acts'),\n",
" (117, 'way'),\n",
" (117, 'ways'),\n",
" (117, 'weigh'),\n",
" (117, 'weighs'),\n",
" (118, 'life'),\n",
" (118, 'lives'),\n",
" (119, 'good'),\n",
" (119, 'goods'),\n",
" (120, 'day'),\n",
" (120, 'days'),\n",
" (120, 'dazed'),\n",
" (120, 'daze'),\n",
" (121, 'public'),\n",
" (122, 'man'),\n",
" (122, 'mans'),\n",
" (123, 'however'),\n",
" (124, 'system'),\n",
" (124, 'systems'),\n",
" (125, 'water'),\n",
" (125, 'waters'),\n",
" (126, 'without'),\n",
" (127, 'government'),\n",
" (127, 'governments'),\n",
" (128, 'while'),\n",
" (129, 'long'),\n",
" (130, 'order'),\n",
" (130, 'orders'),\n",
" (131, 'law'),\n",
" (131, 'laws'),\n",
" (132, 'section'),\n",
" (132, 'sections'),\n",
" (133, 'court'),\n",
" (133, 'courts'),\n",
" (134, 'high'),\n",
" (135, 'right'),\n",
" (135, 'rights'),\n",
" (136, 'own'),\n",
" (137, 'found'),\n",
" (138, 'united'),\n",
" (139, 'just'),\n",
" (140, 'here'),\n",
" (141, 'against'),\n",
" (142, 'world'),\n",
" (142, 'worlds'),\n",
" (144, 'company'),\n",
" (144, 'companies'),\n",
" (145, 'within'),\n",
" (146, 'given'),\n",
" (147, 'service'),\n",
" (147, 'services'),\n",
" (148, 'house'),\n",
" (148, 'houses'),\n",
" (149, 'another'),\n",
" (150, 'power'),\n",
" (150, 'powers'),\n",
" (151, 'place'),\n",
" (151, 'places'),\n",
" (152, 'know'),\n",
" (152, 'knows'),\n",
" (153, 'little'),\n",
" (154, 'down'),\n",
" (155, 'present'),\n",
" (155, 'presents'),\n",
" (156, 'every'),\n",
" (157, 'national'),\n",
" (157, 'nationals'),\n",
" (158, 'back'),\n",
" (158, 'backs'),\n",
" (159, 'take'),\n",
" (159, 'takes'),\n",
" (160, 'information'),\n",
" (161, 'men'),\n",
" (162, 'since'),\n",
" (163, 'might'),\n",
" (164, 'small'),\n",
" (165, 'large'),\n",
" (166, 'school'),\n",
" (166, 'schools'),\n",
" (167, 'following'),\n",
" (168, 'still'),\n",
" (170, 'last'),\n",
" (170, 'lasts'),\n",
" (171, 'city'),\n",
" (171, 'cities'),\n",
" (172, 'second'),\n",
" (172, 'seconds'),\n",
" (173, 'development'),\n",
" (173, 'developments'),\n",
" (174, 'different'),\n",
" (175, 'university'),\n",
" (175, 'universities'),\n",
" (176, 'old'),\n",
" (177, 'form'),\n",
" (177, 'forms'),\n",
" (178, 'point'),\n",
" (178, 'points'),\n",
" (179, 'total'),\n",
" (179, 'totals'),\n",
" (180, 'data'),\n",
" (181, 'too'),\n",
" (182, 'committee'),\n",
" (182, 'committees'),\n",
" (183, 'report'),\n",
" (183, 'reports'),\n",
" (184, 'business'),\n",
" (184, 'businesses'),\n",
" (185, 'think'),\n",
" (185, 'thinks'),\n",
" (186, 'end'),\n",
" (186, 'ends'),\n",
" (187, 'get'),\n",
" (187, 'gets'),\n",
" (188, 'set'),\n",
" (188, 'sets'),\n",
" (189, 'research'),\n",
" (189, 'researches'),\n",
" (190, 'say'),\n",
" (190, 'says'),\n",
" (191, 'come'),\n",
" (191, 'comes'),\n",
" (192, 'country'),\n",
" (192, 'countries'),\n",
" (193, 'never'),\n",
" (194, 'fact'),\n",
" (194, 'facts'),\n",
" (195, 'go'),\n",
" (195, 'goes'),\n",
" (196, 'control'),\n",
" (196, 'controls'),\n",
" (197, 'thus'),\n",
" (198, 'having'),\n",
" (199, 'value'),\n",
" (199, 'values'),\n",
" (200, 'social'),\n",
" (201, 'department'),\n",
" (201, 'departments'),\n",
" (202, 'few'),\n",
" (203, 'above'),\n",
" (204, 'important'),\n",
" (205, 'interest'),\n",
" (205, 'interests'),\n",
" (206, 'study'),\n",
" (206, 'studies'),\n",
" (207, 'off'),\n",
" (208, 'area'),\n",
" (208, 'areas'),\n",
" (209, 'means'),\n",
" (209, 'mean'),\n",
" (210, 'office'),\n",
" (210, 'offices'),\n",
" (211, 'group'),\n",
" (211, 'groups'),\n",
" (212, 'give'),\n",
" (212, 'gives'),\n",
" (213, 'again'),\n",
" (214, 'war'),\n",
" (214, 'wars'),\n",
" (215, 'whether'),\n",
" (216, 'question'),\n",
" (216, 'questions'),\n",
" (217, 'called'),\n",
" (218, 'period'),\n",
" (218, 'periods'),\n",
" (219, 'line'),\n",
" (219, 'lines'),\n",
" (220, 'land'),\n",
" (220, 'lands'),\n",
" (221, 'among'),\n",
" (222, 'table'),\n",
" (222, 'tables'),\n",
" (223, 'board'),\n",
" (223, 'boards'),\n",
" (224, 'until'),\n",
" (225, 'hand'),\n",
" (225, 'hands'),\n",
" (226, 'taken'),\n",
" (227, 'need'),\n",
" (227, 'needs'),\n",
" (228, 'education'),\n",
" (229, 'certain'),\n",
" (230, 'county'),\n",
" (230, 'counties'),\n",
" (231, 'action'),\n",
" (231, 'actions'),\n",
" (232, 'several'),\n",
" (233, 'am'),\n",
" (234, 'course'),\n",
" (234, 'courses'),\n",
" (235, 'far'),\n",
" (236, 'effect'),\n",
" (236, 'effects'),\n",
" (237, 'possible'),\n",
" (238, 'though'),\n",
" (239, 'left'),\n",
" (240, 'further'),\n",
" (241, 'home'),\n",
" (241, 'homes'),\n",
" (242, 'person'),\n",
" (242, 'persons'),\n",
" (243, 'health'),\n",
" (244, 'amount'),\n",
" (244, 'amounts'),\n",
" (245, 'members'),\n",
" (245, 'member'),\n",
" (246, 'subject'),\n",
" (246, 'subjects'),\n",
" (247, 'yet'),\n",
" (248, 'program'),\n",
" (248, 'programs'),\n",
" (249, 'therefore'),\n",
" (250, 'process'),\n",
" (250, 'processes'),\n",
" (251, 'rate'),\n",
" (251, 'rates'),\n",
" (252, 'local'),\n",
" (252, 'locals'),\n",
" (253, 'name'),\n",
" (253, 'names'),\n",
" (254, 'find'),\n",
" (254, 'finds'),\n",
" (255, 'necessary'),\n",
" (255, 'necessaries'),\n",
" (256, 'often'),\n",
" (257, 'others'),\n",
" (258, 'whole'),\n",
" (259, 'change'),\n",
" (259, 'changes'),\n",
" (260, 'example'),\n",
" (260, 'examples'),\n",
" (261, 'president'),\n",
" (262, 'history'),\n",
" (262, 'histories'),\n",
" (263, 'best'),\n",
" (264, 'although'),\n",
" (265, 'family'),\n",
" (265, 'families'),\n",
" (266, 'side'),\n",
" (266, 'sides'),\n",
" (267, 'women'),\n",
" (267, 'woman'),\n",
" (268, 'held'),\n",
" (269, 'based'),\n",
" (270, 'south'),\n",
" (271, 'special'),\n",
" (272, 'required'),\n",
" (273, 'came'),\n",
" (274, 'thought'),\n",
" (274, 'thoughts'),\n",
" (275, 'five'),\n",
" (276, 'always'),\n",
" (277, 'himself'),\n",
" (278, 'air'),\n",
" (278, 'airs'),\n",
" (279, 'known'),\n",
" (280, 'head'),\n",
" (280, 'heads'),\n",
" (281, 'either'),\n",
" (282, 'property'),\n",
" (282, 'properties'),\n",
" (283, 'cost'),\n",
" (283, 'costs'),\n",
" (284, 'rather'),\n",
" (285, 'bill'),\n",
" (285, 'bills'),\n",
" (286, 'put'),\n",
" (286, 'puts'),\n",
" (287, 'human'),\n",
" (287, 'humans'),\n",
" (288, 'figure'),\n",
" (288, 'figures'),\n",
" (289, 'results'),\n",
" (289, 'result'),\n",
" (290, 'level'),\n",
" (290, 'levels'),\n",
" (291, 'conditions'),\n",
" (291, 'condition'),\n",
" (292, 'full'),\n",
" (293, 'book'),\n",
" (293, 'books'),\n",
" (294, 'available'),\n",
" (295, 'early'),\n",
" (296, 'matter'),\n",
" (296, 'matters'),\n",
" (297, 'common'),\n",
" (298, 'light'),\n",
" (298, 'lights'),\n",
" (299, 'let'),\n",
" (299, 'lets'),\n",
" (300, 'society'),\n",
" (300, 'societies'),\n",
" (301, 'body'),\n",
" (301, 'bodies'),\n",
" (302, 'international'),\n",
" (303, 'including'),\n",
" (304, 'free'),\n",
" (305, 'evidence'),\n",
" (305, 'evidences'),\n",
" (306, 'better'),\n",
" (307, 'type'),\n",
" (307, 'types'),\n",
" (308, 'provided'),\n",
" (309, 'due'),\n",
" (309, 'dues'),\n",
" (310, 'next'),\n",
" (311, 'production'),\n",
" (311, 'productions'),\n",
" (312, 'once'),\n",
" (313, 'done'),\n",
" (314, 'making'),\n",
" (315, 'least'),\n",
" (316, 'support'),\n",
" (316, 'supports'),\n",
" (317, 'north'),\n",
" (318, 'later'),\n",
" (319, 'using'),\n",
" (320, 'things'),\n",
" (320, 'thing'),\n",
" (321, 'economic'),\n",
" (322, 'chapter'),\n",
" (322, 'chapters'),\n",
" (323, 'various'),\n",
" (324, 'why'),\n",
" (325, 'white'),\n",
" (325, 'whites'),\n",
" (326, 'going'),\n",
" (327, 'commission'),\n",
" (327, 'commissions'),\n",
" (328, 'federal'),\n",
" (329, 'away'),\n",
" (330, 'field'),\n",
" (330, 'fields'),\n",
" (331, 'nature'),\n",
" (331, 'natures'),\n",
" (332, 'policy'),\n",
" (332, 'policies'),\n",
" (333, 'become'),\n",
" (334, 'political'),\n",
" (335, 'increase'),\n",
" (335, 'increases'),\n",
" (336, 'around'),\n",
" (337, 'age'),\n",
" (337, 'ages'),\n",
" (338, 'want'),\n",
" (338, 'wants'),\n",
" (339, 'low'),\n",
" (339, 'lows'),\n",
" (340, 'trade'),\n",
" (340, 'trades'),\n",
" (341, 'half'),\n",
" (341, 'halves'),\n",
" (342, 'position'),\n",
" (342, 'positions'),\n",
" (343, 'young'),\n",
" (344, 'money'),\n",
" (344, 'moneys'),\n",
" (345, 'percent'),\n",
" (346, 'cent'),\n",
" (346, 'sense'),\n",
" (346, 'senses'),\n",
" (346, 'scent'),\n",
" (346, 'sensed'),\n",
" (346, 'scents'),\n",
" (347, 'class'),\n",
" (347, 'classes'),\n",
" (348, 'words'),\n",
" (348, 'word'),\n",
" (349, 'view'),\n",
" (349, 'views'),\n",
" (350, 'provide'),\n",
" (351, 'seen'),\n",
" (352, 'show'),\n",
" (352, 'shows'),\n",
" (353, 'district'),\n",
" (353, 'districts'),\n",
" (354, 'party'),\n",
" (354, 'parties'),\n",
" (355, 'analysis'),\n",
" (355, 'analyses'),\n",
" (356, 'care'),\n",
" (356, 'cares'),\n",
" (357, 'june'),\n",
" (358, 'foreign'),\n",
" (359, 'shown'),\n",
" (360, 'received'),\n",
" (361, 'management'),\n",
" (362, 'third'),\n",
" (362, 'thirds'),\n",
" (363, 'took'),\n",
" (364, 'something'),\n",
" (365, 'tax'),\n",
" (365, 'taxes'),\n",
" (366, 'account'),\n",
" (366, 'accounts'),\n",
" (367, 'problem'),\n",
" (367, 'problems'),\n",
" (368, 'almost'),\n",
" (369, 'west'),\n",
" (370, 'nothing'),\n",
" (371, 'together'),\n",
" (372, 'individual'),\n",
" (372, 'individuals'),\n",
" (373, 'open'),\n",
" (373, 'opens'),\n",
" (374, 'material'),\n",
" (374, 'materials'),\n",
" (375, 'paper'),\n",
" (375, 'papers'),\n",
" (376, 'feet'),\n",
" (376, 'foot'),\n",
" (377, 'force'),\n",
" (377, 'forces'),\n",
" (378, 'association'),\n",
" (378, 'associations'),\n",
" (379, 'purpose'),\n",
" (379, 'purposes'),\n",
" (380, 'terms'),\n",
" (380, 'term'),\n",
" (381, 'method'),\n",
" (381, 'methods'),\n",
" (382, 'help'),\n",
" (382, 'helps'),\n",
" (383, 'real'),\n",
" (384, 'ever'),\n",
" (385, 'already'),\n",
" (386, 'along'),\n",
" (387, 'went'),\n",
" (388, 'particular'),\n",
" (388, 'particulars'),\n",
" (389, 'energy'),\n",
" (389, 'energies'),\n",
" (390, 'secretary'),\n",
" (391, 'date'),\n",
" (391, 'dates'),\n",
" (392, 'price'),\n",
" (392, 'prices'),\n",
" (393, 'short'),\n",
" (393, 'shorts'),\n",
" (394, 'true'),\n",
" (395, 'street'),\n",
" (395, 'streets'),\n",
" (396, 'building'),\n",
" (396, 'buildings'),\n",
" (397, 'room'),\n",
" (397, 'rooms'),\n",
" (398, 'market'),\n",
" (398, 'markets'),\n",
" (399, 'look'),\n",
" (399, 'looks'),\n",
" (400, 'similar'),\n",
" (401, 'industry'),\n",
" (401, 'industries'),\n",
" (402, 'bank'),\n",
" (402, 'banks'),\n",
" (403, 'according'),\n",
" (404, 'itself'),\n",
" (405, 'application'),\n",
" (405, 'applications'),\n",
" (406, 'current'),\n",
" (406, 'currents'),\n",
" (407, 'read'),\n",
" (407, 'reads'),\n",
" (408, 'press'),\n",
" (408, 'presses'),\n",
" (409, 'community'),\n",
" (409, 'communities'),\n",
" (410, 'plan'),\n",
" (410, 'plans'),\n",
" (411, 'whose'),\n",
" (412, 'major'),\n",
" (412, 'majors'),\n",
" (413, 'considered'),\n",
" (414, 'mind'),\n",
" (414, 'minds'),\n",
" (415, 'union'),\n",
" (415, 'unions'),\n",
" (416, 'cause'),\n",
" (416, 'causes'),\n",
" (417, 'able'),\n",
" (418, 'surface'),\n",
" (418, 'surfaces'),\n",
" (419, 'face'),\n",
" (419, 'faces'),\n",
" (420, 'river'),\n",
" (420, 'rivers'),\n",
" (421, 'council'),\n",
" (421, 'councils'),\n",
" (422, 'income'),\n",
" (422, 'incomes'),\n",
" (423, 'july'),\n",
" (424, 'near'),\n",
" (425, 'experience'),\n",
" (425, 'experiences'),\n",
" (426, 'non'),\n",
" (427, 'paid'),\n",
" (428, 'pay'),\n",
" (428, 'pays'),\n",
" (429, 'reason'),\n",
" (429, 'reasons'),\n",
" (430, 'themselves'),\n",
" (431, 'asked'),\n",
" (432, 'march'),\n",
" (432, 'marches'),\n",
" (433, 'king'),\n",
" (433, 'kings'),\n",
" (434, 'higher'),\n",
" (435, 'single'),\n",
" (435, 'singles'),\n",
" (436, 'average'),\n",
" (436, 'averages'),\n",
" (437, 'father'),\n",
" (437, 'fathers'),\n",
" (438, 'note'),\n",
" (438, 'notes'),\n",
" (439, 'treatment'),\n",
" (439, 'treatments'),\n",
" (440, 'love'),\n",
" (440, 'loves'),\n",
" (441, 'black'),\n",
" (441, 'blacks'),\n",
" (442, 'knowledge'),\n",
" (443, 'enough'),\n",
" (444, 'future'),\n",
" (444, 'futures'),\n",
" (445, 'kind'),\n",
" (445, 'kinds'),\n",
" (446, 'lower'),\n",
" (446, 'lowers'),\n",
" (447, 'authority'),\n",
" (447, 'authorities'),\n",
" (448, 'past'),\n",
" (449, 'natural'),\n",
" (450, 'six'),\n",
" (451, 'food'),\n",
" (451, 'foods'),\n",
" (452, 'working'),\n",
" (452, 'workings'),\n",
" (453, 'central'),\n",
" (454, 'college'),\n",
" (454, 'colleges'),\n",
" (455, 'self'),\n",
" (455, 'selves'),\n",
" (456, 'products'),\n",
" (456, 'product'),\n",
" (457, 'model'),\n",
" (457, 'models'),\n",
" (458, 'brought'),\n",
" (459, 'greater'),\n",
" (460, 'test'),\n",
" (460, 'tests'),\n",
" (461, 'nor'),\n",
" (462, 'students'),\n",
" (462, 'student'),\n",
" (463, 'private'),\n",
" (464, 'construction'),\n",
" (464, 'constructions'),\n",
" (465, 'perhaps'),\n",
" (466, 'ground'),\n",
" (466, 'grounds'),\n",
" (467, 'sir'),\n",
" (468, 'basis'),\n",
" (469, 'months'),\n",
" (469, 'month'),\n",
" (470, 'growth'),\n",
" (470, 'growths'),\n",
" (471, 'increased'),\n",
" (472, 'east'),\n",
" (473, 'language'),\n",
" (473, 'languages'),\n",
" (474, 'rule'),\n",
" (474, 'rules'),\n",
" (475, 'continued'),\n",
" (476, 'quite'),\n",
" (477, 'except'),\n",
" (478, 'series'),\n",
" (479, 'practice'),\n",
" (479, 'practices'),\n",
" (480, 'night'),\n",
" (480, 'knight'),\n",
" (480, 'nights'),\n",
" (481, 'eyes'),\n",
" (481, 'eye'),\n",
" (482, 'oil'),\n",
" (482, 'oils'),\n",
" (483, 'art'),\n",
" (483, 'arts'),\n",
" (484, 'told'),\n",
" (485, 'especially'),\n",
" (486, 'population'),\n",
" (486, 'populations'),\n",
" (487, 'science'),\n",
" (487, 'sciences'),\n",
" (488, 'whom'),\n",
" (489, 'obtained'),\n",
" (490, 'capital'),\n",
" (490, 'capitol'),\n",
" (490, 'capitals'),\n",
" (491, 'include'),\n",
" (492, 'generally'),\n",
" (493, 'meeting'),\n",
" (493, 'meetings'),\n",
" (494, 'specific'),\n",
" (494, 'specifics'),\n",
" (495, 'described'),\n",
" (496, 'believe'),\n",
" (497, 'review'),\n",
" (497, 'reviews'),\n",
" (498, 'issue'),\n",
" (498, 'issues'),\n",
" (499, 'respect'),\n",
" (499, 'respects'),\n",
" (500, 'contract'),\n",
" (500, 'contracts'),\n",
" (501, 'became'),\n",
" (502, 'medical'),\n",
" (503, 'road'),\n",
" (503, 'roads'),\n",
" (504, 'got'),\n",
" (505, 'clear'),\n",
" (505, 'clears'),\n",
" (506, 'main'),\n",
" (506, 'mains'),\n",
" (507, 'labor'),\n",
" (507, 'labors'),\n",
" (508, 'operation'),\n",
" (508, 'operations'),\n",
" (509, 'size'),\n",
" (509, 'sizes'),\n",
" (510, 'below'),\n",
" (511, 'hours'),\n",
" (511, 'hour'),\n",
" (512, 'addition'),\n",
" (512, 'additions'),\n",
" (513, 'probably'),\n",
" (514, 'century'),\n",
" (514, 'centuries'),\n",
" (515, 'personal'),\n",
" (516, 'plant'),\n",
" (516, 'plants'),\n",
" (517, 'training'),\n",
" (518, 'design'),\n",
" (518, 'designs'),\n",
" (519, 'statement'),\n",
" (519, 'statements'),\n",
" (520, 'structure'),\n",
" (520, 'structures'),\n",
" (521, 'project'),\n",
" (521, 'projects'),\n",
" (522, 'million'),\n",
" (522, 'millions'),\n",
" (523, 'usually'),\n",
" (524, 'range'),\n",
" (524, 'ranges'),\n",
" (525, 'call'),\n",
" (525, 'calls'),\n",
" (526, 'mother'),\n",
" (526, 'mothers'),\n",
" (527, 'seems'),\n",
" (528, 'standard'),\n",
" (528, 'standards'),\n",
" (529, 'return'),\n",
" (529, 'returns'),\n",
" (530, 'title'),\n",
" (530, 'titles'),\n",
" (531, 'established'),\n",
" (532, 'keep'),\n",
" (532, 'keeps'),\n",
" (533, 'space'),\n",
" (533, 'spaces'),\n",
" (534, 'annual'),\n",
" (535, 'record'),\n",
" (535, 'records'),\n",
" (536, 'close'),\n",
" (536, 'closes'),\n",
" (537, 'april'),\n",
" (538, 'complete'),\n",
" (539, 'page'),\n",
" (539, 'pages'),\n",
" (540, 'heart'),\n",
" (540, 'hearts'),\n",
" (541, 'fig'),\n",
" (541, 'figs'),\n",
" (542, 'quality'),\n",
" (542, 'qualities'),\n",
" (543, 'gas'),\n",
" (543, 'gases'),\n",
" (544, 'letter'),\n",
" (544, 'letters'),\n",
" (545, 'stock'),\n",
" (545, 'stocks'),\n",
" (546, 'gave'),\n",
" (547, 'related'),\n",
" (548, 'administration'),\n",
" (548, 'administrations'),\n",
" (549, 'activities'),\n",
" (549, 'activity'),\n",
" (550, 'theory'),\n",
" (550, 'theories'),\n",
" (551, 'town'),\n",
" (551, 'towns'),\n",
" (552, 'equipment'),\n",
" (552, 'equipments'),\n",
" (553, 'soon'),\n",
" (554, 'decision'),\n",
" (554, 'decisions'),\n",
" (555, 'pressure'),\n",
" (555, 'pressures'),\n",
" (556, 'written'),\n",
" (557, 'corporation'),\n",
" (557, 'corporations'),\n",
" (558, 'tell'),\n",
" (558, 'tells'),\n",
" (559, 'agreement'),\n",
" (559, 'agreements'),\n",
" (560, 'reported'),\n",
" (561, 'attention'),\n",
" (561, 'attentions'),\n",
" (562, 'fire'),\n",
" (562, 'fires'),\n",
" (563, 'direct'),\n",
" (564, 'saw'),\n",
" (564, 'saws'),\n",
" (565, 'published'),\n",
" (566, 'temperature'),\n",
" (566, 'temperatures'),\n",
" (567, 'species'),\n",
" (567, 'specie'),\n",
" (568, 'really'),\n",
" (569, 'function'),\n",
" (569, 'functions'),\n",
" (570, 'military'),\n",
" (571, 'proposed'),\n",
" (572, 'january'),\n",
" (573, 'additional'),\n",
" (574, 'late'),\n",
" (575, 'opinion'),\n",
" (575, 'opinions'),\n",
" (576, 'loss'),\n",
" (576, 'losses'),\n",
" (577, 'limited'),\n",
" (578, 'source'),\n",
" (578, 'sources'),\n",
" (579, 'article'),\n",
" (579, 'articles'),\n",
" (580, 'notice'),\n",
" (580, 'notices'),\n",
" (581, 'security'),\n",
" (581, 'securities'),\n",
" (582, 'organization'),\n",
" (582, 'organizations'),\n",
" (582, 'organisation'),\n",
" (582, 'organisations'),\n",
" (583, 'financial'),\n",
" (584, 'follows'),\n",
" (585, 'miles'),\n",
" (585, 'mile'),\n",
" (586, 'chief'),\n",
" (586, 'chiefs'),\n",
" (587, 'distribution'),\n",
" (587, 'distributions'),\n",
" (588, 'sometimes'),\n",
" (589, 'insurance'),\n",
" (590, 'son'),\n",
" (590, 'sons'),\n",
" (591, 'strong'),\n",
" (592, 'length'),\n",
" (592, 'lengths'),\n",
" (593, 'original'),\n",
" (593, 'originals'),\n",
" (594, 'yes'),\n",
" (595, 'effective'),\n",
" (596, 'defendant'),\n",
" (596, 'defendants'),\n",
" (597, 'living'),\n",
" (598, 'december'),\n",
" (599, 'character'),\n",
" (599, 'characters'),\n",
" (600, 'began'),\n",
" (601, 'carried'),\n",
" (602, 'supply'),\n",
" (602, 'supplies'),\n",
" (603, 'blood'),\n",
" (604, 'taking'),\n",
" (605, 'manner'),\n",
" (605, 'manners'),\n",
" (606, 'journal'),\n",
" (606, 'journals'),\n",
" (607, 'hundred'),\n",
" (607, 'hundreds'),\n",
" (608, 'red'),\n",
" (609, 'developed'),\n",
" (610, 'performance'),\n",
" (610, 'performances'),\n",
" (611, 'situation'),\n",
" (611, 'situations'),\n",
" (612, 'felt'),\n",
" (613, 'workers'),\n",
" (613, 'worker'),\n",
" (614, 'volume'),\n",
" (614, 'volumes'),\n",
" (615, 'presented'),\n",
" (616, 'knew'),\n",
" (617, 'answer'),\n",
" (617, 'answers'),\n",
" (618, 'resources'),\n",
" (618, 'resource'),\n",
" (619, 'industrial'),\n",
" (620, 'twenty'),\n",
" (620, 'twenties'),\n",
" (621, 'sent'),\n",
" (622, 'looked'),\n",
" (623, 'library'),\n",
" (623, 'libraries'),\n",
" (624, 'added'),\n",
" (625, 'passed'),\n",
" (626, 'ten'),\n",
" (626, 'tens'),\n",
" (627, 'sea'),\n",
" (627, 'seas'),\n",
" (628, 'applied'),\n",
" (629, 'included'),\n",
" (630, 'physical'),\n",
" (631, 'across'),\n",
" (632, 'army'),\n",
" (632, 'armies'),\n",
" (633, 'toward'),\n",
" (634, 'produced'),\n",
" (635, 'placed'),\n",
" (636, 'role'),\n",
" (636, 'roles'),\n",
" (637, 'october'),\n",
" (638, 'final'),\n",
" (639, 'approach'),\n",
" (639, 'approaches'),\n",
" (640, 'provisions'),\n",
" (640, 'provision'),\n",
" (641, 'leave'),\n",
" (642, 'director'),\n",
" (642, 'directors'),\n",
" (643, 'employment'),\n",
" (643, 'employments'),\n",
" (644, 'anything'),\n",
" (645, 'particularly'),\n",
" (646, 'hard'),\n",
" (647, 'outside'),\n",
" (648, 'week'),\n",
" (648, 'weeks'),\n",
" (649, 'feel'),\n",
" (649, 'feels'),\n",
" (650, 'charge'),\n",
" (650, 'charges'),\n",
" (651, 'indeed'),\n",
" (652, 'degree'),\n",
" (652, 'degrees'),\n",
" (653, 'reference'),\n",
" ...]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_wordlist"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "d1a06597-4ad5-4566-a716-8bbad416b7ab",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"with open(\"final_wordlist.csv\", \"w\") as f:\n",
" f.write(\"word,number\\n\")\n",
" \n",
" for w in final_wordlist:\n",
" lemmatized = \"\" if not w[1] else w[1]\n",
" f.write(f\"{w[1].upper()},{w[0]}\")\n",
" f.write(\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c88fe193-11cc-4a06-a3cf-d1ad85f44d14",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"final_wordlist"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a0d177b-3499-42fb-8091-29547567d69a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}