{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "import os\n", "import glob\n", "import re \n", "import pandas as pd\n", "import numpy as np\n", "from lxml import etree\n", "import requests\n", "import time\n", "from collections import Counter" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "sys.path.append(os.path.abspath(\"C:/Users/calvotello/Dropbox/MTB/Göttingen/research/\"))\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from librarian_robot import downloading_datasets, map_classications_systems, tokenize, extract_data_from_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "k10plus_df = pd.read_parquet(\"./../data/titles_romance_languages_1980_2019_secondary_literature.parquet\", engine=\"pyarrow\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ppnmediumtitletitle_supplementyearentry_firstauthor_first_nameauthor_last_nameauthor_gnd_ideditor_first_name...GOK_ppnGOK_notationGOK_jsignatur_placesignatursignatur_dateAbrufzeichenqueryyear_publicationliterary_texts
ppn
16420674071642067407AauCamusNone1987HDBSRO:13-09-18MorvanLebesque077112679None...NoneNoneNoneNoneRO/IH 24081 L443 C2101-07-05|01-07-05Nonepica_jah_19651987NaN
17345386191734538619Abv1 2 3..! LectureNone19822004:02-10-20NoneNoneNoneNone...NoneNoneNoneNoneNoneNoneNonepica_jah_19651982NaN
16784150651678415065OauLivre et société dans la France du XVIIIe siècle[1]2019EBP:08-10-19NoneNoneNoneGeneviève...NoneNoneNone5ebook|eBook de Gruyter|DeGruyter Oldenbourg E-...19-12-19|05-02-20|21-11-19|18-11-20|24-02-21|1...Geschenk|DeGruyter|DigitalisierungVerlagsarchi...pica_jah_19652019NaN
780075951780075951AauRousseau and the French Revolution1762 - 179120133401:10-03-14JoanMacDonald1022931997None...NoneNoneNoneF-1Bo RousJea/15|265 42514-03-14Nonepica_jah_19652013NaN
194869113194869113AauA @linguagem dos pescadores da EriceiraNone19932003:29-03-96Joana LopesAlves800444299None...NoneNoneNoneSUBa rom 613.3 e/27|A/31136530-05-00|09-07-96nipica_jah_19651993NaN
..................................................................
80014743X80014743XAau11-Mel atentado que cambió la historia de España20140018:06-11-14Jaime IgnacioBurgo75103150XNone...NoneNoneNoneSUB|MAA/645349|A 14 / 2508109-01-15|21-11-14Nonepica_jah_20142014NaN
800143604800143604OaXWelten in Sprachezur Entwicklung der Kategorie \"Modus\" in roman...20140008:06-11-14Martin G.Becker593638956None...NoneNoneNone1960Online-Ressource23-12-18|06-11-14|22-12-18|18-01-18|14-01-16|1...ebook_2019_degruyter_ebs|ebm|olr-mili|gs|DeGru...pica_jah_20142014NaN
800141377800141377AauÉtudes sur le roman français au XVIIIe siècleNone20140035:06-11-14HenriCoulet079659950None...NoneNoneNone2|SUB|FHB|ZENLSA|ZEN|Freihand1 A 922223|A/653243|66.950|IG 1657 C855|IG 165...16-04-15|22-05-15|02-03-15|26-01-15|06-11-14|1...Nonepica_jah_20142014NaN
800135350800135350AauLa @lucha contra la corrupción urbanística en ...None20140018:06-11-14VicenteCorral EscarizNoneNone...NoneNoneNoneSUBA/64533408-01-15Nonepica_jah_20142014NaN
800132432800132432AauContra a morte das linguaso caso do galego20140018:06-11-14MiguelMoreira BarbeitoNoneNone...NoneNoneNoneSUB|MAA/645391|A 16 / 1798918-12-14|19-08-16Nonepica_jah_20142014NaN
\n", "

248501 rows × 61 columns

\n", "
" ], "text/plain": [ " ppn medium \\\n", "ppn \n", "1642067407 1642067407 Aau \n", "1734538619 1734538619 Abv \n", "1678415065 1678415065 Oau \n", "780075951 780075951 Aau \n", "194869113 194869113 Aau \n", "... ... ... \n", "80014743X 80014743X Aau \n", "800143604 800143604 OaX \n", "800141377 800141377 Aau \n", "800135350 800135350 Aau \n", "800132432 800132432 Aau \n", "\n", " title \\\n", "ppn \n", "1642067407 Camus \n", "1734538619 1 2 3..! Lecture \n", "1678415065 Livre et société dans la France du XVIIIe siècle \n", "780075951 Rousseau and the French Revolution \n", "194869113 A @linguagem dos pescadores da Ericeira \n", "... ... \n", "80014743X 11-M \n", "800143604 Welten in Sprache \n", "800141377 Études sur le roman français au XVIIIe siècle \n", "800135350 La @lucha contra la corrupción urbanística en ... \n", "800132432 Contra a morte das linguas \n", "\n", " title_supplement year \\\n", "ppn \n", "1642067407 None 1987 \n", "1734538619 None 1982 \n", "1678415065 [1] 2019 \n", "780075951 1762 - 1791 2013 \n", "194869113 None 1993 \n", "... ... ... \n", "80014743X el atentado que cambió la historia de España 2014 \n", "800143604 zur Entwicklung der Kategorie \"Modus\" in roman... 2014 \n", "800141377 None 2014 \n", "800135350 None 2014 \n", "800132432 o caso do galego 2014 \n", "\n", " entry_first author_first_name author_last_name author_gnd_id \\\n", "ppn \n", "1642067407 HDBSRO:13-09-18 Morvan Lebesque 077112679 \n", "1734538619 2004:02-10-20 None None None \n", "1678415065 EBP:08-10-19 None None None \n", "780075951 3401:10-03-14 Joan MacDonald 1022931997 \n", "194869113 2003:29-03-96 Joana Lopes Alves 800444299 \n", "... ... ... ... ... \n", "80014743X 0018:06-11-14 Jaime Ignacio Burgo 75103150X \n", "800143604 0008:06-11-14 Martin G. Becker 593638956 \n", "800141377 0035:06-11-14 Henri Coulet 079659950 \n", "800135350 0018:06-11-14 Vicente Corral Escariz None \n", "800132432 0018:06-11-14 Miguel Moreira Barbeito None \n", "\n", " editor_first_name ... GOK_ppn GOK_notation GOK_j \\\n", "ppn ... \n", "1642067407 None ... None None None \n", "1734538619 None ... None None None \n", "1678415065 Geneviève ... None None None \n", "780075951 None ... None None None \n", "194869113 None ... None None None \n", "... ... ... ... ... ... \n", "80014743X None ... None None None \n", "800143604 None ... None None None \n", "800141377 None ... None None None \n", "800135350 None ... None None None \n", "800132432 None ... None None None \n", "\n", " signatur_place \\\n", "ppn \n", "1642067407 None \n", "1734538619 None \n", "1678415065 5 \n", "780075951 F-1 \n", "194869113 SUB \n", "... ... \n", "80014743X SUB|MA \n", "800143604 1960 \n", "800141377 2|SUB|FHB|ZENLSA|ZEN|Freihand \n", "800135350 SUB \n", "800132432 SUB|MA \n", "\n", " signatur \\\n", "ppn \n", "1642067407 RO/IH 24081 L443 C21 \n", "1734538619 None \n", "1678415065 ebook|eBook de Gruyter|DeGruyter Oldenbourg E-... \n", "780075951 Bo RousJea/15|265 425 \n", "194869113 a rom 613.3 e/27|A/311365 \n", "... ... \n", "80014743X A/645349|A 14 / 25081 \n", "800143604 Online-Ressource \n", "800141377 1 A 922223|A/653243|66.950|IG 1657 C855|IG 165... \n", "800135350 A/645334 \n", "800132432 A/645391|A 16 / 17989 \n", "\n", " signatur_date \\\n", "ppn \n", "1642067407 01-07-05|01-07-05 \n", "1734538619 None \n", "1678415065 19-12-19|05-02-20|21-11-19|18-11-20|24-02-21|1... \n", "780075951 14-03-14 \n", "194869113 30-05-00|09-07-96 \n", "... ... \n", "80014743X 09-01-15|21-11-14 \n", "800143604 23-12-18|06-11-14|22-12-18|18-01-18|14-01-16|1... \n", "800141377 16-04-15|22-05-15|02-03-15|26-01-15|06-11-14|1... \n", "800135350 08-01-15 \n", "800132432 18-12-14|19-08-16 \n", "\n", " Abrufzeichen query \\\n", "ppn \n", "1642067407 None pica_jah_1965 \n", "1734538619 None pica_jah_1965 \n", "1678415065 Geschenk|DeGruyter|DigitalisierungVerlagsarchi... pica_jah_1965 \n", "780075951 None pica_jah_1965 \n", "194869113 ni pica_jah_1965 \n", "... ... ... \n", "80014743X None pica_jah_2014 \n", "800143604 ebook_2019_degruyter_ebs|ebm|olr-mili|gs|DeGru... pica_jah_2014 \n", "800141377 None pica_jah_2014 \n", "800135350 None pica_jah_2014 \n", "800132432 None pica_jah_2014 \n", "\n", " year_publication literary_texts \n", "ppn \n", "1642067407 1987 NaN \n", "1734538619 1982 NaN \n", "1678415065 2019 NaN \n", "780075951 2013 NaN \n", "194869113 1993 NaN \n", "... ... ... \n", "80014743X 2014 NaN \n", "800143604 2014 NaN \n", "800141377 2014 NaN \n", "800135350 2014 NaN \n", "800132432 2014 NaN \n", "\n", "[248501 rows x 61 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k10plus_df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "k10plus_df[\"source\"] = \"k10plus\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ppnmediumtitletitle_supplementyearentry_firstauthor_first_nameauthor_last_nameauthor_gnd_ideditor_first_name...GOK_notationGOK_jsignatur_placesignatursignatur_dateAbrufzeichenqueryyear_publicationliterary_textssource
ppn
16420674071642067407AauCamusNone1987HDBSRO:13-09-18MorvanLebesque077112679None...NoneNoneNoneRO/IH 24081 L443 C2101-07-05|01-07-05Nonepica_jah_19651987NaNk10plus
17345386191734538619Abv1 2 3..! LectureNone19822004:02-10-20NoneNoneNoneNone...NoneNoneNoneNoneNoneNonepica_jah_19651982NaNk10plus
16784150651678415065OauLivre et société dans la France du XVIIIe siècle[1]2019EBP:08-10-19NoneNoneNoneGeneviève...NoneNone5ebook|eBook de Gruyter|DeGruyter Oldenbourg E-...19-12-19|05-02-20|21-11-19|18-11-20|24-02-21|1...Geschenk|DeGruyter|DigitalisierungVerlagsarchi...pica_jah_19652019NaNk10plus
780075951780075951AauRousseau and the French Revolution1762 - 179120133401:10-03-14JoanMacDonald1022931997None...NoneNoneF-1Bo RousJea/15|265 42514-03-14Nonepica_jah_19652013NaNk10plus
194869113194869113AauA @linguagem dos pescadores da EriceiraNone19932003:29-03-96Joana LopesAlves800444299None...NoneNoneSUBa rom 613.3 e/27|A/31136530-05-00|09-07-96nipica_jah_19651993NaNk10plus
..................................................................
80014743X80014743XAau11-Mel atentado que cambió la historia de España20140018:06-11-14Jaime IgnacioBurgo75103150XNone...NoneNoneSUB|MAA/645349|A 14 / 2508109-01-15|21-11-14Nonepica_jah_20142014NaNk10plus
800143604800143604OaXWelten in Sprachezur Entwicklung der Kategorie \"Modus\" in roman...20140008:06-11-14Martin G.Becker593638956None...NoneNone1960Online-Ressource23-12-18|06-11-14|22-12-18|18-01-18|14-01-16|1...ebook_2019_degruyter_ebs|ebm|olr-mili|gs|DeGru...pica_jah_20142014NaNk10plus
800141377800141377AauÉtudes sur le roman français au XVIIIe siècleNone20140035:06-11-14HenriCoulet079659950None...NoneNone2|SUB|FHB|ZENLSA|ZEN|Freihand1 A 922223|A/653243|66.950|IG 1657 C855|IG 165...16-04-15|22-05-15|02-03-15|26-01-15|06-11-14|1...Nonepica_jah_20142014NaNk10plus
800135350800135350AauLa @lucha contra la corrupción urbanística en ...None20140018:06-11-14VicenteCorral EscarizNoneNone...NoneNoneSUBA/64533408-01-15Nonepica_jah_20142014NaNk10plus
800132432800132432AauContra a morte das linguaso caso do galego20140018:06-11-14MiguelMoreira BarbeitoNoneNone...NoneNoneSUB|MAA/645391|A 16 / 1798918-12-14|19-08-16Nonepica_jah_20142014NaNk10plus
\n", "

248501 rows × 62 columns

\n", "
" ], "text/plain": [ " ppn medium \\\n", "ppn \n", "1642067407 1642067407 Aau \n", "1734538619 1734538619 Abv \n", "1678415065 1678415065 Oau \n", "780075951 780075951 Aau \n", "194869113 194869113 Aau \n", "... ... ... \n", "80014743X 80014743X Aau \n", "800143604 800143604 OaX \n", "800141377 800141377 Aau \n", "800135350 800135350 Aau \n", "800132432 800132432 Aau \n", "\n", " title \\\n", "ppn \n", "1642067407 Camus \n", "1734538619 1 2 3..! Lecture \n", "1678415065 Livre et société dans la France du XVIIIe siècle \n", "780075951 Rousseau and the French Revolution \n", "194869113 A @linguagem dos pescadores da Ericeira \n", "... ... \n", "80014743X 11-M \n", "800143604 Welten in Sprache \n", "800141377 Études sur le roman français au XVIIIe siècle \n", "800135350 La @lucha contra la corrupción urbanística en ... \n", "800132432 Contra a morte das linguas \n", "\n", " title_supplement year \\\n", "ppn \n", "1642067407 None 1987 \n", "1734538619 None 1982 \n", "1678415065 [1] 2019 \n", "780075951 1762 - 1791 2013 \n", "194869113 None 1993 \n", "... ... ... \n", "80014743X el atentado que cambió la historia de España 2014 \n", "800143604 zur Entwicklung der Kategorie \"Modus\" in roman... 2014 \n", "800141377 None 2014 \n", "800135350 None 2014 \n", "800132432 o caso do galego 2014 \n", "\n", " entry_first author_first_name author_last_name author_gnd_id \\\n", "ppn \n", "1642067407 HDBSRO:13-09-18 Morvan Lebesque 077112679 \n", "1734538619 2004:02-10-20 None None None \n", "1678415065 EBP:08-10-19 None None None \n", "780075951 3401:10-03-14 Joan MacDonald 1022931997 \n", "194869113 2003:29-03-96 Joana Lopes Alves 800444299 \n", "... ... ... ... ... \n", "80014743X 0018:06-11-14 Jaime Ignacio Burgo 75103150X \n", "800143604 0008:06-11-14 Martin G. Becker 593638956 \n", "800141377 0035:06-11-14 Henri Coulet 079659950 \n", "800135350 0018:06-11-14 Vicente Corral Escariz None \n", "800132432 0018:06-11-14 Miguel Moreira Barbeito None \n", "\n", " editor_first_name ... GOK_notation GOK_j \\\n", "ppn ... \n", "1642067407 None ... None None \n", "1734538619 None ... None None \n", "1678415065 Geneviève ... None None \n", "780075951 None ... None None \n", "194869113 None ... None None \n", "... ... ... ... ... \n", "80014743X None ... None None \n", "800143604 None ... None None \n", "800141377 None ... None None \n", "800135350 None ... None None \n", "800132432 None ... None None \n", "\n", " signatur_place \\\n", "ppn \n", "1642067407 None \n", "1734538619 None \n", "1678415065 5 \n", "780075951 F-1 \n", "194869113 SUB \n", "... ... \n", "80014743X SUB|MA \n", "800143604 1960 \n", "800141377 2|SUB|FHB|ZENLSA|ZEN|Freihand \n", "800135350 SUB \n", "800132432 SUB|MA \n", "\n", " signatur \\\n", "ppn \n", "1642067407 RO/IH 24081 L443 C21 \n", "1734538619 None \n", "1678415065 ebook|eBook de Gruyter|DeGruyter Oldenbourg E-... \n", "780075951 Bo RousJea/15|265 425 \n", "194869113 a rom 613.3 e/27|A/311365 \n", "... ... \n", "80014743X A/645349|A 14 / 25081 \n", "800143604 Online-Ressource \n", "800141377 1 A 922223|A/653243|66.950|IG 1657 C855|IG 165... \n", "800135350 A/645334 \n", "800132432 A/645391|A 16 / 17989 \n", "\n", " signatur_date \\\n", "ppn \n", "1642067407 01-07-05|01-07-05 \n", "1734538619 None \n", "1678415065 19-12-19|05-02-20|21-11-19|18-11-20|24-02-21|1... \n", "780075951 14-03-14 \n", "194869113 30-05-00|09-07-96 \n", "... ... \n", "80014743X 09-01-15|21-11-14 \n", "800143604 23-12-18|06-11-14|22-12-18|18-01-18|14-01-16|1... \n", "800141377 16-04-15|22-05-15|02-03-15|26-01-15|06-11-14|1... \n", "800135350 08-01-15 \n", "800132432 18-12-14|19-08-16 \n", "\n", " Abrufzeichen query \\\n", "ppn \n", "1642067407 None pica_jah_1965 \n", "1734538619 None pica_jah_1965 \n", "1678415065 Geschenk|DeGruyter|DigitalisierungVerlagsarchi... pica_jah_1965 \n", "780075951 None pica_jah_1965 \n", "194869113 ni pica_jah_1965 \n", "... ... ... \n", "80014743X None pica_jah_2014 \n", "800143604 ebook_2019_degruyter_ebs|ebm|olr-mili|gs|DeGru... pica_jah_2014 \n", "800141377 None pica_jah_2014 \n", "800135350 None pica_jah_2014 \n", "800132432 None pica_jah_2014 \n", "\n", " year_publication literary_texts source \n", "ppn \n", "1642067407 1987 NaN k10plus \n", "1734538619 1982 NaN k10plus \n", "1678415065 2019 NaN k10plus \n", "780075951 2013 NaN k10plus \n", "194869113 1993 NaN k10plus \n", "... ... ... ... \n", "80014743X 2014 NaN k10plus \n", "800143604 2014 NaN k10plus \n", "800141377 2014 NaN k10plus \n", "800135350 2014 NaN k10plus \n", "800132432 2014 NaN k10plus \n", "\n", "[248501 rows x 62 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k10plus_df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "hebis_df = pd.read_parquet(\"./../data/pica_rvp_IA-IZ_pica_sgt_440_860_filtered_year_wo_rvk_wo_content_type.parquet\")\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexppnmediumtitletitle_supplementyearentry_firstauthor_first_nameauthor_last_nameauthor_gnd_id...keyword_BDSL_tkeyword_BDSL_skeyword_Fremddatenlieferanten_lieferantenkeyword_Fremddatenlieferantenlcc_notationsignatur_placesignatursignatur_dateapi_queryyear_publication
33450873676AbvcFrancophonies du mondeNone20196050:18-07-19NoneNoneNone...NaNNaNNaNNoneNone330|02216/IA 5021a|28 Frz Z 1775[Suppl.14-06-07|24-07-19pica_rvp_IA2019
44442132174AauLe @\"théâtre provincial\" en France(XVIe-XVIIIe siècle)20180026:31-01-19NoneNoneNone...NaNNaNNaNNoneNone000|112291.535|Za 195 (97)19-09-19|31-01-19pica_rvp_IA2018
55438697448AauModernités des troubadoursNone20180077:13-11-18NoneNoneNone...NaNNaNNaNNonePN56.T768000|00091.282.92|288.69112-10-21|11-12-18pica_rvp_IA2018
66424315815AbvcLe @nouveau magazine littéraireNone20186050:29-12-17NoneNoneNone...NaNNaNNaNNoneNone330|022|07401/IA 6450|28 Rom Z 15592|Z 87124-07-20|10-01-18|08-01-18pica_rvp_IA2018
77481184937OauCourage de la vérité et écritures de l'histoire(XVIe-XVIIIe siècle)20170026:05-07-21NoneNoneNone...NaNNaNNaNNoneNoneNoneNone05-07-21|05-07-21|05-07-21|05-07-21|05-07-21|0...pica_rvp_IA2017
..................................................................
189755499448323869OaxKRTU und andere ProsadichtungenZweisprachige Ausgabe mit einem Nachwort von E...19886055:07-05-19Josep VicençFoixNone...NaNNaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601988
1897560448323850OaxCalderónFremdheit und Nähe eines spanischen Barockdram...19886055:07-05-19NoneNoneNone...NaNNaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601988
1897576448323842OaxDein Körper neben mirGedichte Zweisprachige Ausgabe19876055:07-05-19JaimeSabinesNone...NaNNaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601987
1897587448323834OaxAvantgarde und RevolutionMexikanische Lyrik von López Velarde bis Octav...19876055:07-05-19NoneNoneNone...NaNNaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601987
18976010344832184XOaxDie @Frau im spanischen Roman nach dem Bürgerk...Camilo José Cela, Carmen Laforet, Ana María Ma...19826055:07-05-19SylviaTruxaNone...NaNNaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601982
\n", "

93434 rows × 49 columns

\n", "
" ], "text/plain": [ " index ppn medium \\\n", "3 3 450873676 Abvc \n", "4 4 442132174 Aau \n", "5 5 438697448 Aau \n", "6 6 424315815 Abvc \n", "7 7 481184937 Oau \n", "... ... ... ... \n", "189755 499 448323869 Oax \n", "189756 0 448323850 Oax \n", "189757 6 448323842 Oax \n", "189758 7 448323834 Oax \n", "189760 103 44832184X Oax \n", "\n", " title \\\n", "3 Francophonies du monde \n", "4 Le @\"théâtre provincial\" en France \n", "5 Modernités des troubadours \n", "6 Le @nouveau magazine littéraire \n", "7 Courage de la vérité et écritures de l'histoire \n", "... ... \n", "189755 KRTU und andere Prosadichtungen \n", "189756 Calderón \n", "189757 Dein Körper neben mir \n", "189758 Avantgarde und Revolution \n", "189760 Die @Frau im spanischen Roman nach dem Bürgerk... \n", "\n", " title_supplement year \\\n", "3 None 2019 \n", "4 (XVIe-XVIIIe siècle) 2018 \n", "5 None 2018 \n", "6 None 2018 \n", "7 (XVIe-XVIIIe siècle) 2017 \n", "... ... ... \n", "189755 Zweisprachige Ausgabe mit einem Nachwort von E... 1988 \n", "189756 Fremdheit und Nähe eines spanischen Barockdram... 1988 \n", "189757 Gedichte Zweisprachige Ausgabe 1987 \n", "189758 Mexikanische Lyrik von López Velarde bis Octav... 1987 \n", "189760 Camilo José Cela, Carmen Laforet, Ana María Ma... 1982 \n", "\n", " entry_first author_first_name author_last_name author_gnd_id ... \\\n", "3 6050:18-07-19 None None None ... \n", "4 0026:31-01-19 None None None ... \n", "5 0077:13-11-18 None None None ... \n", "6 6050:29-12-17 None None None ... \n", "7 0026:05-07-21 None None None ... \n", "... ... ... ... ... ... \n", "189755 6055:07-05-19 Josep Vicenç Foix None ... \n", "189756 6055:07-05-19 None None None ... \n", "189757 6055:07-05-19 Jaime Sabines None ... \n", "189758 6055:07-05-19 None None None ... \n", "189760 6055:07-05-19 Sylvia Truxa None ... \n", "\n", " keyword_BDSL_t keyword_BDSL_s \\\n", "3 NaN NaN \n", "4 NaN NaN \n", "5 NaN NaN \n", "6 NaN NaN \n", "7 NaN NaN \n", "... ... ... \n", "189755 NaN NaN \n", "189756 NaN NaN \n", "189757 NaN NaN \n", "189758 NaN NaN \n", "189760 NaN NaN \n", "\n", " keyword_Fremddatenlieferanten_lieferanten \\\n", "3 NaN \n", "4 NaN \n", "5 NaN \n", "6 NaN \n", "7 NaN \n", "... ... \n", "189755 NaN \n", "189756 NaN \n", "189757 NaN \n", "189758 NaN \n", "189760 NaN \n", "\n", " keyword_Fremddatenlieferanten lcc_notation signatur_place \\\n", "3 None None 330|022 \n", "4 None None 000|112 \n", "5 None PN56.T768 000|000 \n", "6 None None 330|022|074 \n", "7 None None None \n", "... ... ... ... \n", "189755 None None None \n", "189756 None None None \n", "189757 None None None \n", "189758 None None None \n", "189760 None None None \n", "\n", " signatur \\\n", "3 16/IA 5021a|28 Frz Z 1775[Suppl. \n", "4 291.535|Za 195 (97) \n", "5 91.282.92|288.691 \n", "6 01/IA 6450|28 Rom Z 15592|Z 871 \n", "7 None \n", "... ... \n", "189755 None \n", "189756 None \n", "189757 None \n", "189758 None \n", "189760 None \n", "\n", " signatur_date api_query \\\n", "3 14-06-07|24-07-19 pica_rvp_IA \n", "4 19-09-19|31-01-19 pica_rvp_IA \n", "5 12-10-21|11-12-18 pica_rvp_IA \n", "6 24-07-20|10-01-18|08-01-18 pica_rvp_IA \n", "7 05-07-21|05-07-21|05-07-21|05-07-21|05-07-21|0... pica_rvp_IA \n", "... ... ... \n", "189755 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "189756 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "189757 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "189758 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "189760 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "\n", " year_publication \n", "3 2019 \n", "4 2018 \n", "5 2018 \n", "6 2018 \n", "7 2017 \n", "... ... \n", "189755 1988 \n", "189756 1988 \n", "189757 1987 \n", "189758 1987 \n", "189760 1982 \n", "\n", "[93434 rows x 49 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hebis_df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "hebis_df.drop(columns=[\"index\"], inplace=True)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "hebis_df.index = hebis_df.ppn" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "hebis_df.rename(columns ={\"api_query\": \"query\"},inplace=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "hebis_df[\"source\"] = \"hebis\"" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ppnmediumtitletitle_supplementyearentry_firstauthor_first_nameauthor_last_nameauthor_gnd_ideditor_first_name...keyword_BDSL_skeyword_Fremddatenlieferanten_lieferantenkeyword_Fremddatenlieferantenlcc_notationsignatur_placesignatursignatur_datequeryyear_publicationsource
ppn
450873676450873676AbvcFrancophonies du mondeNone20196050:18-07-19NoneNoneNoneNone...NaNNaNNoneNone330|02216/IA 5021a|28 Frz Z 1775[Suppl.14-06-07|24-07-19pica_rvp_IA2019hebis
442132174442132174AauLe @\"théâtre provincial\" en France(XVIe-XVIIIe siècle)20180026:31-01-19NoneNoneNoneNone...NaNNaNNoneNone000|112291.535|Za 195 (97)19-09-19|31-01-19pica_rvp_IA2018hebis
438697448438697448AauModernités des troubadoursNone20180077:13-11-18NoneNoneNoneNone...NaNNaNNonePN56.T768000|00091.282.92|288.69112-10-21|11-12-18pica_rvp_IA2018hebis
424315815424315815AbvcLe @nouveau magazine littéraireNone20186050:29-12-17NoneNoneNoneNone...NaNNaNNoneNone330|022|07401/IA 6450|28 Rom Z 15592|Z 87124-07-20|10-01-18|08-01-18pica_rvp_IA2018hebis
481184937481184937OauCourage de la vérité et écritures de l'histoire(XVIe-XVIIIe siècle)20170026:05-07-21NoneNoneNoneNone...NaNNaNNoneNoneNoneNone05-07-21|05-07-21|05-07-21|05-07-21|05-07-21|0...pica_rvp_IA2017hebis
..................................................................
448323869448323869OaxKRTU und andere ProsadichtungenZweisprachige Ausgabe mit einem Nachwort von E...19886055:07-05-19Josep VicençFoixNoneNone...NaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601988hebis
448323850448323850OaxCalderónFremdheit und Nähe eines spanischen Barockdram...19886055:07-05-19NoneNoneNoneAngel San...NaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601988hebis
448323842448323842OaxDein Körper neben mirGedichte Zweisprachige Ausgabe19876055:07-05-19JaimeSabinesNoneNone...NaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601987hebis
448323834448323834OaxAvantgarde und RevolutionMexikanische Lyrik von López Velarde bis Octav...19876055:07-05-19NoneNoneNoneKlaus...NaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601987hebis
44832184X44832184XOaxDie @Frau im spanischen Roman nach dem Bürgerk...Camilo José Cela, Carmen Laforet, Ana María Ma...19826055:07-05-19SylviaTruxaNoneNone...NaNNaNNoneNoneNoneNone16-11-20|09-05-19|24-09-19|07-02-20pica_sgt_8601982hebis
\n", "

93434 rows × 49 columns

\n", "
" ], "text/plain": [ " ppn medium \\\n", "ppn \n", "450873676 450873676 Abvc \n", "442132174 442132174 Aau \n", "438697448 438697448 Aau \n", "424315815 424315815 Abvc \n", "481184937 481184937 Oau \n", "... ... ... \n", "448323869 448323869 Oax \n", "448323850 448323850 Oax \n", "448323842 448323842 Oax \n", "448323834 448323834 Oax \n", "44832184X 44832184X Oax \n", "\n", " title \\\n", "ppn \n", "450873676 Francophonies du monde \n", "442132174 Le @\"théâtre provincial\" en France \n", "438697448 Modernités des troubadours \n", "424315815 Le @nouveau magazine littéraire \n", "481184937 Courage de la vérité et écritures de l'histoire \n", "... ... \n", "448323869 KRTU und andere Prosadichtungen \n", "448323850 Calderón \n", "448323842 Dein Körper neben mir \n", "448323834 Avantgarde und Revolution \n", "44832184X Die @Frau im spanischen Roman nach dem Bürgerk... \n", "\n", " title_supplement year \\\n", "ppn \n", "450873676 None 2019 \n", "442132174 (XVIe-XVIIIe siècle) 2018 \n", "438697448 None 2018 \n", "424315815 None 2018 \n", "481184937 (XVIe-XVIIIe siècle) 2017 \n", "... ... ... \n", "448323869 Zweisprachige Ausgabe mit einem Nachwort von E... 1988 \n", "448323850 Fremdheit und Nähe eines spanischen Barockdram... 1988 \n", "448323842 Gedichte Zweisprachige Ausgabe 1987 \n", "448323834 Mexikanische Lyrik von López Velarde bis Octav... 1987 \n", "44832184X Camilo José Cela, Carmen Laforet, Ana María Ma... 1982 \n", "\n", " entry_first author_first_name author_last_name author_gnd_id \\\n", "ppn \n", "450873676 6050:18-07-19 None None None \n", "442132174 0026:31-01-19 None None None \n", "438697448 0077:13-11-18 None None None \n", "424315815 6050:29-12-17 None None None \n", "481184937 0026:05-07-21 None None None \n", "... ... ... ... ... \n", "448323869 6055:07-05-19 Josep Vicenç Foix None \n", "448323850 6055:07-05-19 None None None \n", "448323842 6055:07-05-19 Jaime Sabines None \n", "448323834 6055:07-05-19 None None None \n", "44832184X 6055:07-05-19 Sylvia Truxa None \n", "\n", " editor_first_name ... keyword_BDSL_s \\\n", "ppn ... \n", "450873676 None ... NaN \n", "442132174 None ... NaN \n", "438697448 None ... NaN \n", "424315815 None ... NaN \n", "481184937 None ... NaN \n", "... ... ... ... \n", "448323869 None ... NaN \n", "448323850 Angel San ... NaN \n", "448323842 None ... NaN \n", "448323834 Klaus ... NaN \n", "44832184X None ... NaN \n", "\n", " keyword_Fremddatenlieferanten_lieferanten \\\n", "ppn \n", "450873676 NaN \n", "442132174 NaN \n", "438697448 NaN \n", "424315815 NaN \n", "481184937 NaN \n", "... ... \n", "448323869 NaN \n", "448323850 NaN \n", "448323842 NaN \n", "448323834 NaN \n", "44832184X NaN \n", "\n", " keyword_Fremddatenlieferanten lcc_notation signatur_place \\\n", "ppn \n", "450873676 None None 330|022 \n", "442132174 None None 000|112 \n", "438697448 None PN56.T768 000|000 \n", "424315815 None None 330|022|074 \n", "481184937 None None None \n", "... ... ... ... \n", "448323869 None None None \n", "448323850 None None None \n", "448323842 None None None \n", "448323834 None None None \n", "44832184X None None None \n", "\n", " signatur \\\n", "ppn \n", "450873676 16/IA 5021a|28 Frz Z 1775[Suppl. \n", "442132174 291.535|Za 195 (97) \n", "438697448 91.282.92|288.691 \n", "424315815 01/IA 6450|28 Rom Z 15592|Z 871 \n", "481184937 None \n", "... ... \n", "448323869 None \n", "448323850 None \n", "448323842 None \n", "448323834 None \n", "44832184X None \n", "\n", " signatur_date query \\\n", "ppn \n", "450873676 14-06-07|24-07-19 pica_rvp_IA \n", "442132174 19-09-19|31-01-19 pica_rvp_IA \n", "438697448 12-10-21|11-12-18 pica_rvp_IA \n", "424315815 24-07-20|10-01-18|08-01-18 pica_rvp_IA \n", "481184937 05-07-21|05-07-21|05-07-21|05-07-21|05-07-21|0... pica_rvp_IA \n", "... ... ... \n", "448323869 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "448323850 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "448323842 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "448323834 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "44832184X 16-11-20|09-05-19|24-09-19|07-02-20 pica_sgt_860 \n", "\n", " year_publication source \n", "ppn \n", "450873676 2019 hebis \n", "442132174 2018 hebis \n", "438697448 2018 hebis \n", "424315815 2018 hebis \n", "481184937 2017 hebis \n", "... ... ... \n", "448323869 1988 hebis \n", "448323850 1988 hebis \n", "448323842 1987 hebis \n", "448323834 1987 hebis \n", "44832184X 1982 hebis \n", "\n", "[93434 rows x 49 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hebis_df" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "k10plus_hebis_df = pd.concat([k10plus_df, hebis_df])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ppnmediumtitletitle_supplementyearentry_firstauthor_first_nameauthor_last_nameauthor_gnd_ideditor_first_name...keyword_fremd_ppnkeyword_fremdkeyword_einzel_ppnkeyword_einzelkeyword_BDSL_akeyword_BDSL_pkeyword_BDSL_tkeyword_BDSL_skeyword_Fremddatenlieferanten_lieferantenkeyword_Fremddatenlieferanten
ppn
16420674071642067407AauCamusNone1987HDBSRO:13-09-18MorvanLebesque077112679None...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
17345386191734538619Abv1 2 3..! LectureNone19822004:02-10-20NoneNoneNoneNone...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16784150651678415065OauLivre et société dans la France du XVIIIe siècle[1]2019EBP:08-10-19NoneNoneNoneGeneviève...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
780075951780075951AauRousseau and the French Revolution1762 - 179120133401:10-03-14JoanMacDonald1022931997None...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
194869113194869113AauA @linguagem dos pescadores da EriceiraNone19932003:29-03-96Joana LopesAlves800444299None...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
448323869448323869OaxKRTU und andere ProsadichtungenZweisprachige Ausgabe mit einem Nachwort von E...19886055:07-05-19Josep VicençFoixNoneNone...NoneNone085157066|090714555Sprachausgabe|GoigNoneNaNNaNNaNNaNNone
448323850448323850OaxCalderónFremdheit und Nähe eines spanischen Barockdram...19886055:07-05-19NoneNoneNoneAngel San...NoneNone086874756|085936928|085043559|085028231|085003...Flasche, Hans&datl=1911-1994|Welttheater|Fremd...NoneNaNNaNNaNNaNNone
448323842448323842OaxDein Körper neben mirGedichte Zweisprachige Ausgabe19876055:07-05-19JaimeSabinesNoneNone...NoneNone085083747KörperNoneNaNNaNNaNNaNNone
448323834448323834OaxAvantgarde und RevolutionMexikanische Lyrik von López Velarde bis Octav...19876055:07-05-19NoneNoneNoneKlaus...NoneNone08708046X|085099805|08548833X|089277058|088575...Paz, Octavio&datl=1914-1998|Lyrik|Avantgarde|E...NoneNaNNaNNaNNaNNone
44832184X44832184XOaxDie @Frau im spanischen Roman nach dem Bürgerk...Camilo José Cela, Carmen Laforet, Ana María Ma...19826055:07-05-19SylviaTruxaNoneNone...NoneNone087037629|087119080|087008505|087123223|085140...Goytisolo, Juan&datl=1931-2017|Laforet, Carmen...NoneNaNNaNNaNNaNNone
\n", "

341935 rows × 76 columns

\n", "
" ], "text/plain": [ " ppn medium \\\n", "ppn \n", "1642067407 1642067407 Aau \n", "1734538619 1734538619 Abv \n", "1678415065 1678415065 Oau \n", "780075951 780075951 Aau \n", "194869113 194869113 Aau \n", "... ... ... \n", "448323869 448323869 Oax \n", "448323850 448323850 Oax \n", "448323842 448323842 Oax \n", "448323834 448323834 Oax \n", "44832184X 44832184X Oax \n", "\n", " title \\\n", "ppn \n", "1642067407 Camus \n", "1734538619 1 2 3..! Lecture \n", "1678415065 Livre et société dans la France du XVIIIe siècle \n", "780075951 Rousseau and the French Revolution \n", "194869113 A @linguagem dos pescadores da Ericeira \n", "... ... \n", "448323869 KRTU und andere Prosadichtungen \n", "448323850 Calderón \n", "448323842 Dein Körper neben mir \n", "448323834 Avantgarde und Revolution \n", "44832184X Die @Frau im spanischen Roman nach dem Bürgerk... \n", "\n", " title_supplement year \\\n", "ppn \n", "1642067407 None 1987 \n", "1734538619 None 1982 \n", "1678415065 [1] 2019 \n", "780075951 1762 - 1791 2013 \n", "194869113 None 1993 \n", "... ... ... \n", "448323869 Zweisprachige Ausgabe mit einem Nachwort von E... 1988 \n", "448323850 Fremdheit und Nähe eines spanischen Barockdram... 1988 \n", "448323842 Gedichte Zweisprachige Ausgabe 1987 \n", "448323834 Mexikanische Lyrik von López Velarde bis Octav... 1987 \n", "44832184X Camilo José Cela, Carmen Laforet, Ana María Ma... 1982 \n", "\n", " entry_first author_first_name author_last_name author_gnd_id \\\n", "ppn \n", "1642067407 HDBSRO:13-09-18 Morvan Lebesque 077112679 \n", "1734538619 2004:02-10-20 None None None \n", "1678415065 EBP:08-10-19 None None None \n", "780075951 3401:10-03-14 Joan MacDonald 1022931997 \n", "194869113 2003:29-03-96 Joana Lopes Alves 800444299 \n", "... ... ... ... ... \n", "448323869 6055:07-05-19 Josep Vicenç Foix None \n", "448323850 6055:07-05-19 None None None \n", "448323842 6055:07-05-19 Jaime Sabines None \n", "448323834 6055:07-05-19 None None None \n", "44832184X 6055:07-05-19 Sylvia Truxa None \n", "\n", " editor_first_name ... keyword_fremd_ppn keyword_fremd \\\n", "ppn ... \n", "1642067407 None ... NaN NaN \n", "1734538619 None ... NaN NaN \n", "1678415065 Geneviève ... NaN NaN \n", "780075951 None ... NaN NaN \n", "194869113 None ... NaN NaN \n", "... ... ... ... ... \n", "448323869 None ... None None \n", "448323850 Angel San ... None None \n", "448323842 None ... None None \n", "448323834 Klaus ... None None \n", "44832184X None ... None None \n", "\n", " keyword_einzel_ppn \\\n", "ppn \n", "1642067407 NaN \n", "1734538619 NaN \n", "1678415065 NaN \n", "780075951 NaN \n", "194869113 NaN \n", "... ... \n", "448323869 085157066|090714555 \n", "448323850 086874756|085936928|085043559|085028231|085003... \n", "448323842 085083747 \n", "448323834 08708046X|085099805|08548833X|089277058|088575... \n", "44832184X 087037629|087119080|087008505|087123223|085140... \n", "\n", " keyword_einzel keyword_BDSL_a \\\n", "ppn \n", "1642067407 NaN NaN \n", "1734538619 NaN NaN \n", "1678415065 NaN NaN \n", "780075951 NaN NaN \n", "194869113 NaN NaN \n", "... ... ... \n", "448323869 Sprachausgabe|Goig None \n", "448323850 Flasche, Hans&datl=1911-1994|Welttheater|Fremd... None \n", "448323842 Körper None \n", "448323834 Paz, Octavio&datl=1914-1998|Lyrik|Avantgarde|E... None \n", "44832184X Goytisolo, Juan&datl=1931-2017|Laforet, Carmen... None \n", "\n", " keyword_BDSL_p keyword_BDSL_t keyword_BDSL_s \\\n", "ppn \n", "1642067407 NaN NaN NaN \n", "1734538619 NaN NaN NaN \n", "1678415065 NaN NaN NaN \n", "780075951 NaN NaN NaN \n", "194869113 NaN NaN NaN \n", "... ... ... ... \n", "448323869 NaN NaN NaN \n", "448323850 NaN NaN NaN \n", "448323842 NaN NaN NaN \n", "448323834 NaN NaN NaN \n", "44832184X NaN NaN NaN \n", "\n", " keyword_Fremddatenlieferanten_lieferanten \\\n", "ppn \n", "1642067407 NaN \n", "1734538619 NaN \n", "1678415065 NaN \n", "780075951 NaN \n", "194869113 NaN \n", "... ... \n", "448323869 NaN \n", "448323850 NaN \n", "448323842 NaN \n", "448323834 NaN \n", "44832184X NaN \n", "\n", " keyword_Fremddatenlieferanten \n", "ppn \n", "1642067407 NaN \n", "1734538619 NaN \n", "1678415065 NaN \n", "780075951 NaN \n", "194869113 NaN \n", "... ... \n", "448323869 None \n", "448323850 None \n", "448323842 None \n", "448323834 None \n", "44832184X None \n", "\n", "[341935 rows x 76 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k10plus_hebis_df" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ppn',\n", " 'medium',\n", " 'title',\n", " 'title_supplement',\n", " 'year',\n", " 'entry_first',\n", " 'author_first_name',\n", " 'author_last_name',\n", " 'author_gnd_id',\n", " 'editor_first_name',\n", " 'editor_last_name',\n", " 'editor_gnd_id',\n", " 'isbn',\n", " 'ILNs',\n", " 'content_type',\n", " 'publisher',\n", " 'language_text',\n", " 'language_original',\n", " 'pages',\n", " 'format',\n", " 'comment_isbn',\n", " 'place_publication',\n", " 'summary',\n", " 'title_continuing_resource',\n", " 'work_ppn',\n", " 'work_info',\n", " 'work_title',\n", " 'expression_ppn',\n", " 'expression_info',\n", " 'expression_title',\n", " 'DDC_notation',\n", " 'DDC_sachgruppe',\n", " 'DDC_grundnotation',\n", " 'BK_ppn',\n", " 'BK_notation',\n", " 'BK_j',\n", " 'RVK_ppn',\n", " 'RVK_notation',\n", " 'RVK_j',\n", " 'RVK_k',\n", " 'keyword_RSWK',\n", " 'keyword_K10plus',\n", " 'keyword_project',\n", " 'keyword_local',\n", " 'keyword_045D',\n", " 'keyword_LoC',\n", " 'lcc_notation',\n", " 'klassifikationssystem_system',\n", " 'klassifikationssystem_notation',\n", " 'uri_description',\n", " 'uri',\n", " 'GOK_ppn',\n", " 'GOK_notation',\n", " 'GOK_j',\n", " 'signatur_place',\n", " 'signatur',\n", " 'signatur_date',\n", " 'Abrufzeichen',\n", " 'query',\n", " 'year_publication',\n", " 'literary_texts',\n", " 'source',\n", " 'content_type_ppn',\n", " 'DDC_sachgruppe_a',\n", " 'DDC_sachgruppe_b',\n", " 'DDC_sachgruppe_c',\n", " 'keyword_fremd_ppn',\n", " 'keyword_fremd',\n", " 'keyword_einzel_ppn',\n", " 'keyword_einzel',\n", " 'keyword_BDSL_a',\n", " 'keyword_BDSL_p',\n", " 'keyword_BDSL_t',\n", " 'keyword_BDSL_s',\n", " 'keyword_Fremddatenlieferanten_lieferanten',\n", " 'keyword_Fremddatenlieferanten']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k10plus_hebis_df.columns.tolist()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "k10plus_hebis_df.to_parquet(\"./../data/k10plus_hebis_titles_romance_languages_1980_2019_secondary_literature.parquet\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.6 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "orig_nbformat": 2, "vscode": { "interpreter": { "hash": "f47a9bd213414a99a19c677dbc3fe06cd2e784c4236a07176834505119fd1ea8" } } }, "nbformat": 4, "nbformat_minor": 2 }