dc.contributor.author |
Drobac, Senka |
|
dc.contributor.author |
Kauppinen, Pekka Sakari |
|
dc.contributor.author |
Linden, Bo Krister Johan |
|
dc.contributor.editor |
Tiedemann, Jörg |
|
dc.date.accessioned |
2017-12-21T14:49:01Z |
|
dc.date.available |
2017-12-21T14:49:01Z |
|
dc.date.issued |
2017 |
|
dc.identifier.citation |
Drobac , S , Kauppinen , P S & Linden , B K J 2017 , OCR and post-correction of historical Finnish texts . in J Tiedemann (ed.) , Proceedings of the 21st Nordic Conference on Computational Linguistics, NoDaLiDa, 22-24 May 2017, Gothenburg, Sweden . Linköping Electronic Conference Proceedings , no. 131 , Linköping University Electronic Press , Linköping , pp. 70-76 , Nordic Conference of Computational Linguistics , Gothenburg , Sweden , 22/05/2017 . < http://www.ep.liu.se/ecp/131/ecp17131.pdf > |
|
dc.identifier.citation |
conference |
|
dc.identifier.other |
PURE: 88679443 |
|
dc.identifier.other |
PURE UUID: ab996588-e25d-480a-ac18-8de8515df367 |
|
dc.identifier.other |
ORCID: /0000-0003-2337-303X/work/39925415 |
|
dc.identifier.other |
ORCID: /0000-0002-7645-3079/work/39925678 |
|
dc.identifier.other |
ORCID: /0000-0003-2071-5110/work/39925718 |
|
dc.identifier.uri |
http://hdl.handle.net/10138/229864 |
|
dc.description.abstract |
This paper presents experiments on Optical character recognition (OCR) as a combination of Ocropy software and data-driven spelling correction that uses Weighted Finite-State Methods. Both model training and testing were done on Finnish corpora of historical newspaper text and the best combination of OCR and post-processing models give 95.21% character recognition accuracy. |
en |
dc.format.extent |
7 |
|
dc.language.iso |
eng |
|
dc.publisher |
Linköping University Electronic Press |
|
dc.relation.ispartof |
Proceedings of the 21st Nordic Conference on Computational Linguistics, NoDaLiDa, 22-24 May 2017, Gothenburg, Sweden |
|
dc.relation.ispartofseries |
Linköping Electronic Conference Proceedings |
|
dc.relation.isversionof |
978-91-7685-601-7 |
|
dc.rights.uri |
info:eu-repo/semantics/openAccess |
|
dc.subject |
6121 Languages |
|
dc.title |
OCR and post-correction of historical Finnish texts |
en |
dc.type |
Conference contribution |
|
dc.contributor.organization |
Department of Modern Languages 2010-2017 |
|
dc.contributor.organization |
Language Technology |
|
dc.description.reviewstatus |
Peer reviewed |
|
dc.relation.issn |
1650-3686 |
|
dc.rights.accesslevel |
openAccess |
|
dc.type.version |
publishedVersion |
|
dc.identifier.url |
http://www.ep.liu.se/ecp/131/ecp17131.pdf |
|