{
"cells": [
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:15.759516Z",
"start_time": "2018-12-16T11:12:14.836378Z"
}
},
"outputs": [],
"source": [
"import requests, pandas as pd, numpy as np\n",
"from requests import session\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:15.834578Z",
"start_time": "2018-12-16T11:12:15.763518Z"
}
},
"outputs": [],
"source": [
"dfsi=pd.read_csv('dfsi.csv',sep=';')"
]
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:15.899576Z",
"start_time": "2018-12-16T11:12:15.838583Z"
}
},
"outputs": [],
"source": [
"dfsi2=pd.read_csv('dfsi2.csv',sep=';')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Normalize"
]
},
{
"cell_type": "code",
"execution_count": 231,
"metadata": {},
"outputs": [],
"source": [
"dfsi.columns=['Unnamed: 0', '0', 'tavaly', 'Cégnév',\n",
" 'Alkalmazottak száma 2018', 'Alkalmazottak száma 2017',\n",
" 'Alkalmazottak száma 2016','Alkalmazottak száma 2015', 'region', 'nr', 'coords', 'kws', 'cms']\n",
"dfsi2.columns=['Unnamed: 0', '0', 'tavaly', 'Cégnév',\n",
" 'Árbevétel 2018 (RON)', 'Árbevétel 2017 (RON)', 'Árbevétel 2016 (RON)','Árbevétel 2015 (RON)',\n",
" 'region', 'nr', 'coords', 'kws', 'cms']"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:15.911583Z",
"start_time": "2018-12-16T11:12:15.904585Z"
}
},
"outputs": [],
"source": [
"dfsi['nr_alkalmazottak']=dfsi['nr']\n",
"dfsi2['nr_arbevetel']=dfsi2['nr']"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" 0 | \n",
" tavaly | \n",
" Cégnév | \n",
" Alkalmazottak száma 2018 | \n",
" Alkalmazottak száma 2017 | \n",
" Alkalmazottak száma 2016 | \n",
" Alkalmazottak száma 2015 | \n",
" region | \n",
" nr | \n",
" coords | \n",
" kws | \n",
" cms | \n",
" nr_alkalmazottak | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" READY GARMENT TECHNOLOGY ROMANIA SRL | \n",
" 990 | \n",
" 730 | \n",
" 1 005 | \n",
" 1 199 | \n",
" also-haromszek | \n",
" 1 | \n",
" ['45.8772830', ' 25.7994510'] | \n",
" ['mosas-tisztitas', '', '\"'] | \n",
" 520036, Sepsiszentgy\\xf6rgy, Cs\\xedki u., 149/A\\t | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" VALKES SRL | \n",
" 621 | \n",
" 788 | \n",
" 886 | \n",
" 1 007 | \n",
" also-haromszek | \n",
" 2 | \n",
" ['45.855129 ', ' 25.806651 '] | \n",
" ['kabelek', '', '\"'] | \n",
" 520077, Sepsiszentgy\\xf6rgy, Păiş Da... | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 0 tavaly Cégnév \\\n",
"0 0 1 2 READY GARMENT TECHNOLOGY ROMANIA SRL \n",
"1 1 2 1 VALKES SRL \n",
"\n",
" Alkalmazottak száma 2018 Alkalmazottak száma 2017 Alkalmazottak száma 2016 \\\n",
"0 990 730 1 005 \n",
"1 621 788 886 \n",
"\n",
" Alkalmazottak száma 2015 region nr coords \\\n",
"0 1 199 also-haromszek 1 ['45.8772830', ' 25.7994510'] \n",
"1 1 007 also-haromszek 2 ['45.855129 ', ' 25.806651 '] \n",
"\n",
" kws \\\n",
"0 ['mosas-tisztitas', '', '\"'] \n",
"1 ['kabelek', '', '\"'] \n",
"\n",
" cms nr_alkalmazottak \n",
"0 520036, Sepsiszentgy\\xf6rgy, Cs\\xedki u., 149/A\\t 1 \n",
"1 520077, Sepsiszentgy\\xf6rgy, Păiş Da... 2 "
]
},
"execution_count": 233,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfsi.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" 0 | \n",
" tavaly | \n",
" Cégnév | \n",
" Árbevétel 2018 (RON) | \n",
" Árbevétel 2017 (RON) | \n",
" Árbevétel 2016 (RON) | \n",
" Árbevétel 2015 (RON) | \n",
" region | \n",
" nr | \n",
" coords | \n",
" kws | \n",
" cms | \n",
" nr_arbevetel | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" FABRICA DE LAPTE BRASOV SA | \n",
" 468 016 207 | \n",
" 431 950 759 | \n",
" 370 418 528 | \n",
" 338 283 110 | \n",
" also-haromszek | \n",
" 1 | \n",
" ['46.0750630', ' 25.6087250'] | \n",
" ['tejfeldolgozas', '', '\"'] | \n",
" 525100, Bar\\xf3t, V\\xedz u., 109\\t | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" COVALACT SA | \n",
" 252 640 519 | \n",
" 235 415 329 | \n",
" 230 073 646 | \n",
" 199 493 229 | \n",
" also-haromszek | \n",
" 2 | \n",
" ['45.869573 ', ' 25.800705 '] | \n",
" ['tejfeldolgozas', '', '\"'] | \n",
" 520036, Sepsiszentgy\\xf6rgy, Oltmez\\xf5 u., 1\\t | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 0 tavaly Cégnév Árbevétel 2018 (RON) \\\n",
"0 0 1 1 FABRICA DE LAPTE BRASOV SA 468 016 207 \n",
"1 1 2 2 COVALACT SA 252 640 519 \n",
"\n",
" Árbevétel 2017 (RON) Árbevétel 2016 (RON) Árbevétel 2015 (RON) \\\n",
"0 431 950 759 370 418 528 338 283 110 \n",
"1 235 415 329 230 073 646 199 493 229 \n",
"\n",
" region nr coords \\\n",
"0 also-haromszek 1 ['46.0750630', ' 25.6087250'] \n",
"1 also-haromszek 2 ['45.869573 ', ' 25.800705 '] \n",
"\n",
" kws \\\n",
"0 ['tejfeldolgozas', '', '\"'] \n",
"1 ['tejfeldolgozas', '', '\"'] \n",
"\n",
" cms nr_arbevetel \n",
"0 525100, Bar\\xf3t, V\\xedz u., 109\\t 1 \n",
"1 520036, Sepsiszentgy\\xf6rgy, Oltmez\\xf5 u., 1\\t 2 "
]
},
"execution_count": 234,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfsi2.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 235,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:15.941582Z",
"start_time": "2018-12-16T11:12:15.915585Z"
}
},
"outputs": [],
"source": [
"data=dfsi.set_index('Cégnév').join(dfsi2.set_index('Cégnév'),how='outer',lsuffix='_left', rsuffix='_right')"
]
},
{
"cell_type": "code",
"execution_count": 236,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.604103Z",
"start_time": "2018-12-16T11:12:15.945583Z"
}
},
"outputs": [],
"source": [
"data['Cím']=data[['cms_right','cms_left']].T.ffill().bfill().T['cms_right']\n",
"data['Koord']=data[['coords_right','coords_left']].T.ffill().bfill().T['coords_right']\n",
"data['Kw']=data[['kws_right','kws_left']].T.ffill().bfill().T['kws_right']\n",
"data['Régió']=data[['region_right','region_left']].T.ffill().bfill().T['region_right']"
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.620101Z",
"start_time": "2018-12-16T11:12:16.609111Z"
}
},
"outputs": [],
"source": [
"data=data.drop(['0_left', 'region_left','nr_left', 'coords_left', 'kws_left', 'cms_left',\n",
" '0_right', 'region_right', 'nr_right', 'coords_right', 'kws_right', 'cms_right',\n",
" 'Unnamed: 0_left','Unnamed: 0_right'],axis=1).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 238,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Cégnév | \n",
" tavaly_left | \n",
" Alkalmazottak száma 2018 | \n",
" Alkalmazottak száma 2017 | \n",
" Alkalmazottak száma 2016 | \n",
" Alkalmazottak száma 2015 | \n",
" nr_alkalmazottak | \n",
" tavaly_right | \n",
" Árbevétel 2018 (RON) | \n",
" Árbevétel 2017 (RON) | \n",
" Árbevétel 2016 (RON) | \n",
" Árbevétel 2015 (RON) | \n",
" nr_arbevetel | \n",
" Cím | \n",
" Koord | \n",
" Kw | \n",
" Régió | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A M C SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 20 | \n",
" 28 716 061 | \n",
" 27 881 270 | \n",
" 24 390 005 | \n",
" 22 173 039 | \n",
" 22.0 | \n",
" 527100, H\\xeddv\\xe9g, Rom\\xe1n u., 203\\t | \n",
" ['45.8391350', ' 25.5892160'] | \n",
" ['nagykereskedelem', '', '\"'] | \n",
" also-haromszek | \n",
"
\n",
" \n",
" 1 | \n",
" ABC IMPEX SRL | \n",
" 20 | \n",
" 139 | \n",
" 145 | \n",
" 132 | \n",
" 132 | \n",
" 21.0 | \n",
" 20 | \n",
" 37 875 372 | \n",
" 36 539 530 | \n",
" 33 071 733 | \n",
" 31 956 498 | \n",
" 24.0 | \n",
" 535600, Sz\\xe9kelyudvarhely, R\\xe1k\\xf3czi Fer... | \n",
" ['46.289768 ', ' 25.290034 '] | \n",
" ['csomagoloanyagok', 'nyomdak', '', '\"'] | \n",
" udvarhelyszek | \n",
"
\n",
" \n",
" 2 | \n",
" ABRAZIV SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 49 | \n",
" 16 316 705 | \n",
" 5 483 275 | \n",
" 0 | \n",
" 0 | \n",
" 17.0 | \n",
" 535500, Gyergy\\xf3szentmikl\\xf3s, \\xc1llom\\xe1... | \n",
" ['46.7174250', ' 25.5751650'] | \n",
" ['szerszamgepek', '', '\"'] | \n",
" gyergyoszek | \n",
"
\n",
" \n",
" 3 | \n",
" ADILEX FUNGO SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" >50 | \n",
" 6 834 996 | \n",
" 925 866 | \n",
" - | \n",
" - | \n",
" 50.0 | \n",
" 537355, Vasl\\xe1b, , 37\\t | \n",
" ['46.64607', ' 25.62371'] | \n",
" ['zoldseg-gyumolcs-csomagolas', '', '\"'] | \n",
" gyergyoszek | \n",
"
\n",
" \n",
" 4 | \n",
" ADIMAG COM IMPEX SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 36 | \n",
" 65 548 663 | \n",
" 57 106 224 | \n",
" 48 827 471 | \n",
" 47 437 389 | \n",
" 40.0 | \n",
" 540190, Marosv\\xe1s\\xe1rhely, Szabads\\xe1g u.,... | \n",
" ['46.537905', ' 24.548819'] | \n",
" ['belsoepiteszeti-anyagok', 'epitoanyagok', 'f... | \n",
" marosszek | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Cégnév tavaly_left Alkalmazottak száma 2018 \\\n",
"0 A M C SRL NaN NaN \n",
"1 ABC IMPEX SRL 20 139 \n",
"2 ABRAZIV SRL NaN NaN \n",
"3 ADILEX FUNGO SRL NaN NaN \n",
"4 ADIMAG COM IMPEX SRL NaN NaN \n",
"\n",
" Alkalmazottak száma 2017 Alkalmazottak száma 2016 Alkalmazottak száma 2015 \\\n",
"0 NaN NaN NaN \n",
"1 145 132 132 \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" nr_alkalmazottak tavaly_right Árbevétel 2018 (RON) Árbevétel 2017 (RON) \\\n",
"0 NaN 20 28 716 061 27 881 270 \n",
"1 21.0 20 37 875 372 36 539 530 \n",
"2 NaN 49 16 316 705 5 483 275 \n",
"3 NaN >50 6 834 996 925 866 \n",
"4 NaN 36 65 548 663 57 106 224 \n",
"\n",
" Árbevétel 2016 (RON) Árbevétel 2015 (RON) nr_arbevetel \\\n",
"0 24 390 005 22 173 039 22.0 \n",
"1 33 071 733 31 956 498 24.0 \n",
"2 0 0 17.0 \n",
"3 - - 50.0 \n",
"4 48 827 471 47 437 389 40.0 \n",
"\n",
" Cím \\\n",
"0 527100, H\\xeddv\\xe9g, Rom\\xe1n u., 203\\t \n",
"1 535600, Sz\\xe9kelyudvarhely, R\\xe1k\\xf3czi Fer... \n",
"2 535500, Gyergy\\xf3szentmikl\\xf3s, \\xc1llom\\xe1... \n",
"3 537355, Vasl\\xe1b, , 37\\t \n",
"4 540190, Marosv\\xe1s\\xe1rhely, Szabads\\xe1g u.,... \n",
"\n",
" Koord \\\n",
"0 ['45.8391350', ' 25.5892160'] \n",
"1 ['46.289768 ', ' 25.290034 '] \n",
"2 ['46.7174250', ' 25.5751650'] \n",
"3 ['46.64607', ' 25.62371'] \n",
"4 ['46.537905', ' 24.548819'] \n",
"\n",
" Kw Régió \n",
"0 ['nagykereskedelem', '', '\"'] also-haromszek \n",
"1 ['csomagoloanyagok', 'nyomdak', '', '\"'] udvarhelyszek \n",
"2 ['szerszamgepek', '', '\"'] gyergyoszek \n",
"3 ['zoldseg-gyumolcs-csomagolas', '', '\"'] gyergyoszek \n",
"4 ['belsoepiteszeti-anyagok', 'epitoanyagok', 'f... marosszek "
]
},
"execution_count": 238,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 239,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.792101Z",
"start_time": "2018-12-16T11:12:16.626106Z"
}
},
"outputs": [],
"source": [
"lat=[]\n",
"lon=[]\n",
"for i in range(len(data.index)):\n",
" k=data.loc[data.index[i]]['Koord'].replace(\"'\",'').replace('[','')\\\n",
" .replace(']','').replace(' ','').split(',')\n",
" lon.append(k[0])\n",
" lat.append(k[1])"
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.803108Z",
"start_time": "2018-12-16T11:12:16.796102Z"
}
},
"outputs": [],
"source": [
"data['Latitude']=lat\n",
"data['Longitude']=lon"
]
},
{
"cell_type": "code",
"execution_count": 241,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.838104Z",
"start_time": "2018-12-16T11:12:16.807102Z"
}
},
"outputs": [],
"source": [
"data=data.drop('Koord',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:12:16.999105Z",
"start_time": "2018-12-16T11:12:16.851102Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Cégnév | \n",
" tavaly_left | \n",
" Alkalmazottak száma 2018 | \n",
" Alkalmazottak száma 2017 | \n",
" Alkalmazottak száma 2016 | \n",
" Alkalmazottak száma 2015 | \n",
" nr_alkalmazottak | \n",
" tavaly_right | \n",
" Árbevétel 2018 (RON) | \n",
" Árbevétel 2017 (RON) | \n",
" Árbevétel 2016 (RON) | \n",
" Árbevétel 2015 (RON) | \n",
" nr_arbevetel | \n",
" Cím | \n",
" Kw | \n",
" Régió | \n",
" Latitude | \n",
" Longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A M C SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 20 | \n",
" 28 716 061 | \n",
" 27 881 270 | \n",
" 24 390 005 | \n",
" 22 173 039 | \n",
" 22.0 | \n",
" 527100, H\\xeddv\\xe9g, Rom\\xe1n u., 203\\t | \n",
" ['nagykereskedelem', '', '\"'] | \n",
" also-haromszek | \n",
" 25.5892160 | \n",
" 45.8391350 | \n",
"
\n",
" \n",
" 1 | \n",
" ABC IMPEX SRL | \n",
" 20 | \n",
" 139 | \n",
" 145 | \n",
" 132 | \n",
" 132 | \n",
" 21.0 | \n",
" 20 | \n",
" 37 875 372 | \n",
" 36 539 530 | \n",
" 33 071 733 | \n",
" 31 956 498 | \n",
" 24.0 | \n",
" 535600, Sz\\xe9kelyudvarhely, R\\xe1k\\xf3czi Fer... | \n",
" ['csomagoloanyagok', 'nyomdak', '', '\"'] | \n",
" udvarhelyszek | \n",
" 25.290034 | \n",
" 46.289768 | \n",
"
\n",
" \n",
" 2 | \n",
" ABRAZIV SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 49 | \n",
" 16 316 705 | \n",
" 5 483 275 | \n",
" 0 | \n",
" 0 | \n",
" 17.0 | \n",
" 535500, Gyergy\\xf3szentmikl\\xf3s, \\xc1llom\\xe1... | \n",
" ['szerszamgepek', '', '\"'] | \n",
" gyergyoszek | \n",
" 25.5751650 | \n",
" 46.7174250 | \n",
"
\n",
" \n",
" 3 | \n",
" ADILEX FUNGO SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" >50 | \n",
" 6 834 996 | \n",
" 925 866 | \n",
" - | \n",
" - | \n",
" 50.0 | \n",
" 537355, Vasl\\xe1b, , 37\\t | \n",
" ['zoldseg-gyumolcs-csomagolas', '', '\"'] | \n",
" gyergyoszek | \n",
" 25.62371 | \n",
" 46.64607 | \n",
"
\n",
" \n",
" 4 | \n",
" ADIMAG COM IMPEX SRL | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 36 | \n",
" 65 548 663 | \n",
" 57 106 224 | \n",
" 48 827 471 | \n",
" 47 437 389 | \n",
" 40.0 | \n",
" 540190, Marosv\\xe1s\\xe1rhely, Szabads\\xe1g u.,... | \n",
" ['belsoepiteszeti-anyagok', 'epitoanyagok', 'f... | \n",
" marosszek | \n",
" 24.548819 | \n",
" 46.537905 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 419 | \n",
" WONDERLAND SRL | \n",
" 44 | \n",
" 28 | \n",
" 29 | \n",
" 28 | \n",
" 29 | \n",
" 46.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 527160, Torja, F\\xf5 u., 225\\t | \n",
" ['epitkezes', '', '\"'] | \n",
" felso-haromszek | \n",
" 26.063746 | \n",
" 46.040239 | \n",
"
\n",
" \n",
" 420 | \n",
" ZABOLA ESTATE SRL | \n",
" 39 | \n",
" 45 | \n",
" 31 | \n",
" 25 | \n",
" 23 | \n",
" 28.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 527190, Zabola, , 437\\t | \n",
" ['hotelek', 'kastelyszallo', 'vendeglatas', ''... | \n",
" felso-haromszek | \n",
" 26.1980287 | \n",
" 45.8915563 | \n",
"
\n",
" \n",
" 421 | \n",
" ZAMBELLI METAL SRL | \n",
" 15 | \n",
" 205 | \n",
" 179 | \n",
" 162 | \n",
" 163 | \n",
" 14.0 | \n",
" 15 | \n",
" 52 694 046 | \n",
" 42 016 920 | \n",
" 34 591 562 | \n",
" 27 891 895 | \n",
" 15.0 | \n",
" 520077, Sepsiszentgy\\xf6rgy, \\xc9p\\xedt\\xf5k u... | \n",
" ['badogosmunkak', '', '\"'] | \n",
" also-haromszek | \n",
" 25.818515 | \n",
" 45.861235 | \n",
"
\n",
" \n",
" 422 | \n",
" ZARAH MODEN SRL | \n",
" 2 | \n",
" 764 | \n",
" 785 | \n",
" 778 | \n",
" 839 | \n",
" 2.0 | \n",
" 2 | \n",
" 154 604 899 | \n",
" 148 531 005 | \n",
" 141 121 629 | \n",
" 139 752 712 | \n",
" 2.0 | \n",
" 525400, K\\xe9zdiv\\xe1s\\xe1rhely, B\\xe9ke u., 27\\t | \n",
" ['nadraggyartas', 'textilipar', '', 'INDUSTRIA... | \n",
" felso-haromszek | \n",
" 26.1359670 | \n",
" 45.9969390 | \n",
"
\n",
" \n",
" 423 | \n",
" ZENCO TRANS SRL | \n",
" 40 | \n",
" 36 | \n",
" 37 | \n",
" 33 | \n",
" 33 | \n",
" 41.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 535700, Marosh\\xe9v\\xedz, , 2\\t | \n",
" ['aruszallitas', '', '\"'] | \n",
" gyergyoszek | \n",
" 25.3534040 | \n",
" 46.9260300 | \n",
"
\n",
" \n",
"
\n",
"
424 rows × 18 columns
\n",
"
"
],
"text/plain": [
" Cégnév tavaly_left Alkalmazottak száma 2018 \\\n",
"0 A M C SRL NaN NaN \n",
"1 ABC IMPEX SRL 20 139 \n",
"2 ABRAZIV SRL NaN NaN \n",
"3 ADILEX FUNGO SRL NaN NaN \n",
"4 ADIMAG COM IMPEX SRL NaN NaN \n",
".. ... ... ... \n",
"419 WONDERLAND SRL 44 28 \n",
"420 ZABOLA ESTATE SRL 39 45 \n",
"421 ZAMBELLI METAL SRL 15 205 \n",
"422 ZARAH MODEN SRL 2 764 \n",
"423 ZENCO TRANS SRL 40 36 \n",
"\n",
" Alkalmazottak száma 2017 Alkalmazottak száma 2016 \\\n",
"0 NaN NaN \n",
"1 145 132 \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
".. ... ... \n",
"419 29 28 \n",
"420 31 25 \n",
"421 179 162 \n",
"422 785 778 \n",
"423 37 33 \n",
"\n",
" Alkalmazottak száma 2015 nr_alkalmazottak tavaly_right \\\n",
"0 NaN NaN 20 \n",
"1 132 21.0 20 \n",
"2 NaN NaN 49 \n",
"3 NaN NaN >50 \n",
"4 NaN NaN 36 \n",
".. ... ... ... \n",
"419 29 46.0 NaN \n",
"420 23 28.0 NaN \n",
"421 163 14.0 15 \n",
"422 839 2.0 2 \n",
"423 33 41.0 NaN \n",
"\n",
" Árbevétel 2018 (RON) Árbevétel 2017 (RON) Árbevétel 2016 (RON) \\\n",
"0 28 716 061 27 881 270 24 390 005 \n",
"1 37 875 372 36 539 530 33 071 733 \n",
"2 16 316 705 5 483 275 0 \n",
"3 6 834 996 925 866 - \n",
"4 65 548 663 57 106 224 48 827 471 \n",
".. ... ... ... \n",
"419 NaN NaN NaN \n",
"420 NaN NaN NaN \n",
"421 52 694 046 42 016 920 34 591 562 \n",
"422 154 604 899 148 531 005 141 121 629 \n",
"423 NaN NaN NaN \n",
"\n",
" Árbevétel 2015 (RON) nr_arbevetel \\\n",
"0 22 173 039 22.0 \n",
"1 31 956 498 24.0 \n",
"2 0 17.0 \n",
"3 - 50.0 \n",
"4 47 437 389 40.0 \n",
".. ... ... \n",
"419 NaN NaN \n",
"420 NaN NaN \n",
"421 27 891 895 15.0 \n",
"422 139 752 712 2.0 \n",
"423 NaN NaN \n",
"\n",
" Cím \\\n",
"0 527100, H\\xeddv\\xe9g, Rom\\xe1n u., 203\\t \n",
"1 535600, Sz\\xe9kelyudvarhely, R\\xe1k\\xf3czi Fer... \n",
"2 535500, Gyergy\\xf3szentmikl\\xf3s, \\xc1llom\\xe1... \n",
"3 537355, Vasl\\xe1b, , 37\\t \n",
"4 540190, Marosv\\xe1s\\xe1rhely, Szabads\\xe1g u.,... \n",
".. ... \n",
"419 527160, Torja, F\\xf5 u., 225\\t \n",
"420 527190, Zabola, , 437\\t \n",
"421 520077, Sepsiszentgy\\xf6rgy, \\xc9p\\xedt\\xf5k u... \n",
"422 525400, K\\xe9zdiv\\xe1s\\xe1rhely, B\\xe9ke u., 27\\t \n",
"423 535700, Marosh\\xe9v\\xedz, , 2\\t \n",
"\n",
" Kw Régió \\\n",
"0 ['nagykereskedelem', '', '\"'] also-haromszek \n",
"1 ['csomagoloanyagok', 'nyomdak', '', '\"'] udvarhelyszek \n",
"2 ['szerszamgepek', '', '\"'] gyergyoszek \n",
"3 ['zoldseg-gyumolcs-csomagolas', '', '\"'] gyergyoszek \n",
"4 ['belsoepiteszeti-anyagok', 'epitoanyagok', 'f... marosszek \n",
".. ... ... \n",
"419 ['epitkezes', '', '\"'] felso-haromszek \n",
"420 ['hotelek', 'kastelyszallo', 'vendeglatas', ''... felso-haromszek \n",
"421 ['badogosmunkak', '', '\"'] also-haromszek \n",
"422 ['nadraggyartas', 'textilipar', '', 'INDUSTRIA... felso-haromszek \n",
"423 ['aruszallitas', '', '\"'] gyergyoszek \n",
"\n",
" Latitude Longitude \n",
"0 25.5892160 45.8391350 \n",
"1 25.290034 46.289768 \n",
"2 25.5751650 46.7174250 \n",
"3 25.62371 46.64607 \n",
"4 24.548819 46.537905 \n",
".. ... ... \n",
"419 26.063746 46.040239 \n",
"420 26.1980287 45.8915563 \n",
"421 25.818515 45.861235 \n",
"422 26.1359670 45.9969390 \n",
"423 25.3534040 46.9260300 \n",
"\n",
"[424 rows x 18 columns]"
]
},
"execution_count": 242,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 243,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:22:05.563869Z",
"start_time": "2018-12-16T11:22:05.557864Z"
}
},
"outputs": [],
"source": [
"manual=data[data['Longitude']=='']\n",
"data=data[data['Longitude']!='']"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:40:49.592428Z",
"start_time": "2018-12-16T11:40:49.009253Z"
}
},
"outputs": [],
"source": [
"data.to_excel('data.xlsx')\n",
"manual.to_excel('manual.xlsx')\n",
"manual.to_excel('manual_manual.xlsx')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fix sector, address data and coordinates manually"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {},
"outputs": [],
"source": [
"manual=pd.read_excel('manual_manual.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
"of pandas will change to not sort by default.\n",
"\n",
"To accept the future behavior, pass 'sort=False'.\n",
"\n",
"To retain the current behavior and silence the warning, pass 'sort=True'.\n",
"\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"data=pd.concat([data,manual]).set_index('Cégnév')"
]
},
{
"cell_type": "code",
"execution_count": 246,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:34:37.319384Z",
"start_time": "2018-12-16T11:34:37.281390Z"
}
},
"outputs": [],
"source": [
"sectors=pd.read_excel('sectors.xlsx')\n",
"sectormap={}\n",
"repl={'á':'a','é':'e','í':'i','ó':'o','ú':'u','ü':'u','ű':'u','ő':'o','ö':'o'}\n",
"for s in sectors.columns:\n",
" for k in sectors[s].values:\n",
" for j in str(k).replace(' ','').split(','):\n",
" sectormap[j]=s\n",
" for c in repl:\n",
" j=j.replace(c,repl[c])\n",
" sectormap[j]=s"
]
},
{
"cell_type": "code",
"execution_count": 247,
"metadata": {
"ExecuteTime": {
"end_time": "2018-12-16T11:37:53.247325Z",
"start_time": "2018-12-16T11:37:53.070331Z"
}
},
"outputs": [],
"source": [
"valid=[]\n",
"kws=[]\n",
"ki=-1\n",
"for i in range(len(data.index)):\n",
" ks=data.loc[data.index[i]]['Kw'].replace(\"'\",'').replace('[','')\\\n",
" .replace(']','').replace(' ','').lower().split(',')[:-2]\n",
" for k in ks:\n",
" if k in sectormap:\n",
" kws.append(sectormap[k])\n",
" break\n",
" k=k.replace('-','')\n",
" if k in sectormap:\n",
" kws.append(sectormap[k])\n",
" break\n",
" if len(kws)