{"id":2796,"date":"2014-08-25T15:57:58","date_gmt":"2014-08-25T20:57:58","guid":{"rendered":"http:\/\/blogs.terrorware.com\/geoff\/?p=2796"},"modified":"2014-08-25T15:57:58","modified_gmt":"2014-08-25T20:57:58","slug":"tools-for-extracting-structured-data-from-a-pdf-file","status":"publish","type":"post","link":"http:\/\/blogs.terrorware.com\/geoff\/2014\/08\/25\/tools-for-extracting-structured-data-from-a-pdf-file\/","title":{"rendered":"Tools for extracting structured data from a PDF file"},"content":{"rendered":"<p>These are tools that have been suggested to me to extract structured data from a PDF files:<\/p>\n<ul>\n<li>pdftotext from <a href=\"http:\/\/www.foolabs.com\/xpdf\/\">xpdf<\/a><\/li>\n<li><a href=\"http:\/\/www.cometdocs.com\/\">CometDocs<\/a><\/li>\n<li>Cogniview <a href=\"http:\/\/www.cogniview.com\/pdf-to-excel\/\">PDF2Excel<\/a><\/li>\n<li><a href=\"http:\/\/tabula.nerdpower.org\/\">Tabula<\/a><\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>These are tools that have been suggested to me to extract structured data from a PDF files: pdftotext from xpdf CometDocs Cogniview PDF2Excel Tabula<\/p>\n","protected":false},"author":3,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2}},"categories":[1],"tags":[215,524],"class_list":["post-2796","post","type-post","status-publish","format-standard","hentry","category-uncategorized","tag-pdf","tag-tools","entry"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p4wnIz-J6","_links":{"self":[{"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/posts\/2796","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/comments?post=2796"}],"version-history":[{"count":1,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/posts\/2796\/revisions"}],"predecessor-version":[{"id":2797,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/posts\/2796\/revisions\/2797"}],"wp:attachment":[{"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/media?parent=2796"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/categories?post=2796"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/blogs.terrorware.com\/geoff\/wp-json\/wp\/v2\/tags?post=2796"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}