coding{-*- @Comment: utf-8; fill-column: 5000 -*-} @InProceedings{semInteropDataspaces-2024, author = {Robert David and Petar Ivanov and Vladimir Alexiev}, title = {Raising the Role of Vocabulary Hubs for Semantic Data Interoperability in Dataspaces}, booktitle = {Third workshop on Semantic Interoperability in Data Spaces}, year = 2024, month = oct, address = {Budapest, Hungary}, url_Proceedings = {https://semantic.internationaldataspaces.org/workshop-2024/}, url_Slides = {https://rawgit2.com/underpin-project/papers/main/EBDVF-2024/EBDVF-2024-presentation/presentation.html}, url_PDF = {https://raw.githubusercontent.com/underpin-project/papers/refs/heads/main/EBDVF-2024/EBDVF-2024-presentation/presentation.pdf}, keywords = {dataspaces, semantic interoperability, semantic technologies, ontologies, vocabulary hub, oil and gas, renewable energy, refineries, windfarms}, date = {2024-10-02}, abstract = {Dataspaces are an important enabler for industrial sharing data (either commercially licensed or private). Europe is investing heavily into sectoral dataspaces, federation and orchestration platforms like SIMPL, Eclipse DSC, GXFS, etc. Still, dataspaces enable shared data access, but do not solve the data interoperability problem. For that, the consumer would like to see the data from different providers in a harmonized and semantically integrated form. The Vocabulary Hub service (part of the IDS RAM) provides a repository for ontologies and vocabularies. We describe an approach of raising the role of the Vocabulary Hub to also allow richer metadata description (e.g. the meaning of every column in a tabular dataset), and binding semantic descriptions to ingested datasets, thus providing on-the-fly data semantization and easing data querying. This is achieved through the integration of two commercial semantic products (PoolParty and GraphDB), leveraging the partnership between the Semantic Web Company and Ontotext, and is being developed within the frame of the Digital Europe project UNDERPIN, with applications to refinery and wind farm data.}, } @Manual{AEC3PO-ontology-2024, title = {{AEC3PO: Architecture, Engineering, Construction Compliance Checking and Permitting Ontology}}, author = {Edlira Vakaj and Panagiotis Patlakas and Thomas Beach and Maxime Lefrançois and Amna Dridi and Vladimir Alexiev}, month = feb, year = 2024, url = {https://w3id.org/lbd/aec3po/}, keywords = {AECO, automated compliance checking, building regulations, construction regulations, land use, ontologies, ACCORD, Architecture Engineering and Construction Compliance Checking and Permitting Ontology, AEC3PO, compliance checking, CO2 emission, Sustainability, domain-specific rule language, RASE}, url_githib = {https://github.com/accord-project/aec3po/}, date = {2024-02-15}, abstract = {The Architecture, Engineering, Construction Compliance Checking and Permitting Ontology (AEC3PO) is an ontology developed to support the automated compliance checking of construction, renovation, and demolition works. It has been developed in the context of the Automated Compliance Checking for Construction, Renovation or Demolition Works (ACCORD) project, an ERC/Horizon-funded project that aims to digitalise permitting and compliance processes. AEC3PO aims to capture all aspects of building compliance and building permitting in Architecture, Engineering, and Construction (AEC), across different regulatory systems. It allows the modelling of aspects such as: building and infrastructure codes, regulations, and standards, and their compliance requirements; building and infrastructure permitting processes and documentation; building and infrastructure compliance and permitting actors. The ontology requirements are derived from the rule formalisation methodology that aims to semantise regulations and provide an open format for machine-readable rules. The ontology is built using Semantic Web technologies, adhering to standards like RDF, OWL, and SKOS. It also integrates popular ontologies such as Dublin Core Terms (DCT) and Europe's Legislation Identifier (ELI) in order to create a structured and interconnected knowledge graph. This allows professionals to explore, query, and understand various aspects of the compliance and permitting processes more comprehensively.}, } @InProceedings{DBP2024-buildingCodes, author = {Gonçal Costa and Edlira Vakaj and Thomas Beach and Rita Lavikka and Maxime Lefrançois and Antoine Zimmermann and Thamer Mecharnia and Vladimir Alexiev and Amna Dridi and Hansi Hettiarachchi and Nataliya Keberle}, title = {Formalization of Building Codes and Regulations in Knowledge Graphs}, booktitle = {Digital Building Permit Conference 2024}, year = 2024, editor = {Francesca Noardo and Judith Fauth}, pages = {142-146}, month = apr, address = {Barcelona, Spain}, url = {https://zenodo.org/records/12760552}, url_Slides = {https://docs.google.com/presentation/d/1UO8bH9LY_KprjZhrSHFV7QKq2d8r_fjF/edit}, keywords = {AECO, BIM, regulation checking, automated compliance checking}, doi = {10.5281/zenodo.12760552}, abstract = {The Architecture, Engineering and Construction (AEC) industry is subject to many building codes and regulations that apply to the design and construction of buildings. These regulations often involve complex language and technical vocabulary that can give rise to different interpretations, depending on their context and purpose, and therefore a difficulty in their application. The introduction of Building Information Modelling (BIM), as well as authoring tools capable of creating and exporting 3D representations of buildings, is paving the way for compliance checking to become more automated and less dependent on interpretation. This should allow for better quality by reducing the time needed for checking and avoiding human errors. However, despite attempts to provide new BIM-based methods and approaches to achieve this goal in the past two decades, none of these methods have proven to be close to being a definitive solution. The basis for checking compliance against regulations using a BIM model is to have a description of the regulations in a computable form. In turn, this makes it necessary to define data requirements for models that guarantee that regulations can be checked consistently. Within this framework, several scenarios can be considered to address the problem. One is to consider the descriptive part of the regulation separate from the execution part, that is, compliance checking procedures. Currently, those in charge of writing the regulations typically publish them in plain text documents in PDF format. Therefore, the next evolutionary step is to manage construction regulations in a machine-readable way underpinned by semantics, thus, ensuring they can be interpretated precisely by the software used for checking buildings against them.}, } @InProceedings{DBP2024-GeoSPARQL, author = {Vladimir Alexiev and Nataliya Keberle}, title = {Checking of Urban Planning Regulations with GeoSPARQL and BIM SPARQL}, booktitle = {Digital Building Permit 2024}, year = 2024, pages = 234, month = apr, address = {Barcelona, Spain}, url = {https://zenodo.org/records/12760552}, url_HTML = {https://presentations.ontotext.com/2024/checking-of-urban-planning-regulations-with-geosparql-and-bim-sparql/Slides.html}, url_Slides = {https://drive.google.com/file/d/1PtApSPgU2nQjvRBCcgNU8oles2O7EnC6/view}, keywords = {AECO, urban planning, BIM, City Information Management, regulation checking, XPlanung, INSPIRE PLU, CityGML, GeoSPARQL, Berlin Tegel, TXL, Malgrat}, doi = {10.5281/zenodo.12760552}, abstract = {The former Berlin Tegel airport (TXL) will be the site of a university campus (refurbished airport terminal), startups, production facilities (“tech republic”), a living quarter, stores, smart mobility hubs, park and recreation areas, etc. The Tegel Project company (owned by the City of Berlin) has developed detailed urban planning and regulations covering built area use, height restrictions, noise protection, floor space index (buildup density), greenery requirements (vegetation and habitats), etc. The regulations are expressed in XPlanung and INSPIRE PLU. These are GML-based UML and XML models for urban planning: XPlanung is Germany-specific and PLU (Planned Land Use) is part of the INSPIRE initiative. Building designs are expressed in IFC and include simple geometries (for residential buildings) and complex geometries (for the university campus). Compliance checking of urban planning requires accessing two different kinds of data in a harmonized way: BIM (building information) and GIS (also called CIM “city information management” and often represented using GML extension schemas). As part of the Horizon Europe ACCORD project, we plan to do this checking using SPARQL in Ontotext GraphDB. GIS data is covered by the existing GeoSPARQL plugin that supports WKT and GML geometries. BIM data can either be converted to GIS/GML using already developed approaches, or accessed through a future Binary Engineering Data connector for GraphDB based on the HDF5 format. We give an overview of XPlanung, INSPIRE PLU, CityGML and GeoSPARQL 1.0 and 1.1. Then we describe the semantic conversion of XPlanung / INSPIRE PLU data, our approach regarding semantization of BIM data, the overall structure of regulations, the respective geometric and non-geometric checks to be implemented, the use of GeoSPARQL topological relations to leverage planning zone hierarchies and to check which buildings fall in which zones, potential specialized BIM SPARQL functions to be implemented, management of multiple BIM files that need to be checked in concert, and result creation and content.}, } @Misc{Alexiev-Qualification-vs-Rolification-2024, author = {Vladimir Alexiev}, title = {{Using Qualification Instead of Rolification for the Records in Context Ontology (RiC-O)}}, month = jan, year = 2024, url = {https://github.com/ICA-EGAD/RiC-O/issues/67#issuecomment-1919383104}, keywords = {rolification, qualification, reasoning, GraphDB Rules, GLAM, archives, RiC-O, Records in Context, PROV}, abstract = {The Records in Context Ontology (RiC-O) uses a "Rolification" pattern to derive direct (unqualified) relations from Relation nodes. This involves the use of a "parasitic" self-link on relation nodes (owl:hasSelf) and owl:propertyChainAxiom, which is expensive to implement. Instead, I propose to use the PROV Qualified Relation pattern (associate the direct relation to the Relation class using prov:unqualifiedForm) and implement it with simpler GraphDB rules.} } @TechReport{thamermecharniaExistingOntologiesStandards2023, author = {Thamer Mecharnia and Maxime Lefrançois and Antoine Zimmermann and Edlira Vakaj and Amna Dridi and Hansi Hettiarachchi and Vladimir Alexiev and Nataliya Keberle and He Tan and Francesca Noardo and Rick Makkinga and Franco Cheung}, title = {Existing Ontologies, Standards, and Data Models in the Building Data Domain Relevant to Compliance Checking}, institution = {ACCORD Project}, year = 2023, type = {Deliverable}, number = {D2.1}, month = aug, url = {https://accordproject.eu/wp-content/uploads/2023/09/ACCORD_D2.1_Technical_Report_Existing_Models.pdf}, keywords = {Architecture Engineering Construction and Operations (AECO), Ontologies, Review}, url_library = {https://www.zotero.org/groups/3007408/semantic_bim/library}, abstract = {This deliverable presents the results of Task 2.1 (Technical Review of Existing Standards) of the ACCORD project. The ACCORD project employs a semantic approach for validating building permits, eliminating the need for costly centralized systems that are challenging to establish and maintain. The primary aim of the ACCORD project is to digitize permit and compliance procedures to improve the productivity and quality of design and construction processes and facilitate the creation of an environmentally sustainable built environment. This deliverable will review the existing ontologies, standards, and data models in the Architecture, Engineering, and Construction (AEC) domain and how they can be reused for the purpose of the automatic compliance check. More specifically, this deliverable will: 1. Evaluate the AEC domain-related ontologies and propose suggestions on how they can be employed for the development of the Architecture Engineering and Construction Compliance Checking and Permitting Ontology (AEC3PO). 2. Conduct a review of query languages associated with the AEC domain and the semantic web. 3. Compare the rule languages developed or used in AEC projects. 4. Review the standards that may be relevant to different areas in the ACCORD project. 5. Compare the existing reasoners that could be useful to building permitting automatic compliance checking. All the references used in this deliverable are gathered in the open Zotero library for the project. In the AEC industry, several standards and recommendations aim to achieve different levels of data interoperability in systems. This deliverable concentrates on data-related standards such as those that provide syntactic rules and semantics to represent data in a standardized way. Policy and regulatory standards are out of the scope of this deliverable and are addressed in deliverable D1.1 "Landscape Review Report". The outcomes of this deliverable will serve as a reference for other tasks within the project, which will determine the preferred rule language, which ontologies can be reused, aligned, or serve as inspiration for the creation of the AEC3PO to be developed in Task 2.2 of WP2. Furthermore, the standards that will be presented in this deliverable can be employed in various aspects of the ACCORD project. This groundwork will facilitate the development of the AEC3PO ontology as well as the design and implementation of the Rule Formalisation Tool.}, } @TechReport{Alexiev-Crunchbase-Fibo-2023, author = {Vladimir Alexiev}, title = {{Exploring FIBO Complexity With Crunchbase: Representing Crunchbase IPOs in FIBO}}, month = apr, year = 2024, url = {https://rawgit2.com/VladimirAlexiev/crunchbase-fibo/main/README.html}, url_Github = {https://github.com/VladimirAlexiev/crunchbase-fibo/}, keywords = {fintech, Crunchbase, ontologies, semantic modeling, Initial Public Offering, IPO, Financial Industry Business Ontology, FIBO}, abstract = {The Financial Industry Business Ontology (FIBO) by the Enterprise Data Management Council (EDMC) is a family of ontologies and a reference model for representing data in the financial world using semantic technologies. It is used in fintech Knowledge Graph (KG) projects because it offers a comprehensive and principled approach to representing financial data, and a wide set of predefined models that can be used to implement data harmonization and financial data integration. The 2022Q2 FIBO release consists of 290 ontologies using 380 prefixes that cover topics such as legal entities, contracts, agency, trusts, regulators, securities, loans, derivatives, etc. FIBO's reach and flexible ontological approach allow the integration of a wide variety of financial data, but it comes at the price of more complex representation. Crunchbase (CB) is a well-known dataset by TechCrunch that includes companies, key people, funding rounds, acquisitions, Initial Public Offerings (IPOs), etc. It has about 2M companies with a good mix of established enterprises (including 47k public companies), mid-range companies and startups. We (Ontotext and other Wikidata contributors) have matched 72k CB companies to Wikidata, see this query. I explore the representation of Crunchbase data (more specifically IPOs) in FIBO and compare it to the simplest possible semantic representation. I therefore illustrate the complexity of FIBO, and explain its flexibility along the way. I finish with some discussion and conclusions as to when FIBO can bring value to fintech KG projects.}, } @Misc{NLQ-GPT-SOML-GraphQL-2023, author = {Vladimir Alexiev}, title = {{Natural Language Querying with GPT, SOML and GraphQL}}, howpublished = {Ontotext Last Friday Webinar}, month = may, year = 2023, url_video = {https://drive.google.com/file/d/1TOHrtlleOAkv4oZYhlAWa22mUqtvsV7o/view}, abstract = {Clients want to talk to their KG, i.e. ask questions about the schema and data in natural language. LLMs like GPT and LLAMA have opened a revolution in this regard. Currently Ontotext is exploring 8 themes with LLMs. NLQ can be accomplished either by: - Providing data from GraphDB to the LLM, or - Presenting a schema to the LLM and asking it to generate queries. In this talk we explore query generation. - SPARQL queries are complex, so even for known schemas (eg Wikidata, DBpedia), GPT has trouble generating good queries, see \Shared drives\KGS\AI-GPT\GPT-SPARQL. Furthermore, RDF schemas (OWL and SHACL) are complex. But I'm sure there will be fast progress in SPARQL generation, see LlamaIndex advances in GDB-8329 - GraphQL queries are regular and much simpler, and SOML is a simpler schema language (from which the Ontotext Platform generates GraphQL schema, queries and SHACL shapes). In this talk I'll show how GPT4 can answer questions about a schema, and generate GraphQL to answer questions about data.} } @InProceedings{SemanticBSDD-LDAC-2023, author = {Vladimir Alexiev and Mihail Radkov and Nataliya Keberle}, title = {{Semantic bSDD: Improving the GraphQL, JSON and RDF Representations of buildingSmart Data Dictionary}}, booktitle = {{Linked Data in Architecture and Construction (LDAC 2023)}}, year = 2023, month = jun, address = {Matera, Italy}, url = {https://linkedbuildingdata.net/ldac2023/files/papers/papers/LDAC2023_paper_1547.pdf}, url_Demo = {https://bsdd.ontotext.com/}, url_Detailed = {https://bsdd.ontotext.com/README.html}, url_Github = {https://github.com/Accord-Project/bsdd}, url_Preprint = {https://bsdd.ontotext.com/paper/paper.pdf}, url_Slides = {https://bsdd.ontotext.com/presentation/presentation.html}, url_Video = {https://drive.google.com/open?id=1Mhts8JwbdJFUmQHGULCqduijZ0NpEoxX}, keywords = {Linked building data, LBD, buildingSMART Data Dictionary, bSDD, FAIR data, data quality}, abstract = {The buildingSmart Data Dictionary (bSDD) is an important shared resource in the Architecture, Engineering, Construction, and Operations (AECO) domain. It is a collection of datasets ("domains") that define various classifications (objects representing building components, products, and materials), their properties, allowed values, etc. bSDD defines a GraphQL API, as well as REST APIs that return JSON and RDF representations. This improves the interoperability of bSDD and its easier deployment in architectural Computer Aided Design (CAD) and other AECO software. However, bSDD data is not structured as well as possible, and data retrieved via different APIs is not identical in content and structure. This lowers bSDD data quality, usability and trust. We conduct a thorough comparison and analysis of bSDD data related to fulfillment of FAIR (findable, accessible, interoperable, and reusable) principles. Based on this analysis, we suggest enhancements to make bSDD data better structured and more FAIR. We implement many of the suggestions by refactoring the original data to make it better structured/interconnected, and more "semantic". We provide a SPARQL endpoint using Ontotext GraphDB, and GraphQL endpoint using Ontotext Platform Semantic Objects. Our detailed work is available at https://github.com/Accord-Project/bsdd (open source) and https://bsdd.ontotext.com (home page, schemas, data, sample queries).}, } @TechReport{InnoGraph-AI-Taxonomy, author = {Vladimir Alexiev and Boyan Bechev and Alexandr Osytsin}, title = {The InnoGraph Artificial Intelligence Taxonomy: A Key to Unlocking AI-Related Entities and Content}, institution = {Ontotext Corp}, year = 2023, type = {whitepaper}, month = dec, note = {Introduction: - Potential InnoGraph Datasets and Users - Importance of Topics and A Holistic Approach - Example: Github Topics - Kinds of Topics. Core Topics: Wikipedia Articles: - Wikipedia Categories - Category Pruning. Collaborative Patent Classification: Application Areas: - PatBase Browser - CPC Semantic Data at EPO - Finding All CPC AI Topics - CPC Snowballing - CPC for Application Area Topics. Other Topic Datasets: - ACM CCS - AIDA FAT - AMiner KGs - ANZSRC FOR - arXiv Areas - China NSFC - EU CORDIS EuroSciVoc - Crunchbase Categories - CSO - JEL - MESH - MSC - OpenAlex Topics - SemanticScholar FOS - StackExchange Tags. Conclusion and Future Work: - Acknowledgements - References}, url = {https://www.ontotext.com/knowledgehub/white_paper/the-innograph-artificial-intelligence-taxonomy/}, keywords = {InnoGraph, Artificial Intelligence, Topics, Taxonomy, InnoGraph}, date = {2023-12}, abstract = {InnoGraph is a holistic Knowledge Graph of innovation based on Artificial Intelligence (AI). AI is the underpinning of much of the world's innovation, therefore it has immense economic and human improvement potential. With the explosive growth of Machine Learning (ML), Deep Learning (DL) and Large Language Models (LLM), it is hard to keep up with all AI development, but also this is a valuable effort. A key to discovering AI elements is to build a comprehensive taxonomy of topics: AI techniques, application areas (verticals). We describe our approach to developing such a taxonomy by integrating and coreferencing data from numerous sources.}, } @InProceedings{TowardsInnograph-SciK-2023, author = {M.Besher Massri and Blerina Spahiu and Marko Grobelnik and Vladimir Alexiev and Matteo Palmonari and Dumitru Roman}, title = {{Towards InnoGraph: A Knowledge Graph for AI Innovation}}, booktitle = {{3rd International Workshop on Scientific Knowledge Representation, Discovery, and Assessment (Sci-K 2023). WWW 2023 Companion}}, year = 2023, month = jun, address = {Austin, Texas}, url = {https://dl.acm.org/doi/10.1145/3543873.3587614}, url_Preprint = {https://zenodo.org/record/7750707/files/Towards%20InnoGraph%20A%20Knowledge%20Graph%20for%20AI%20Innovation.pdf?download=1}, url_Slides = {https://zenodo.org/record/7750707/files/Towards%20InnoGraph%20A%20Knowledge%20Graph%20for%20AI%20Innovation.pptx?download=1}, url_Zenodo = {https://zenodo.org/record/7750707}, keywords = {artificial intelligence, innovation, innovation ecosystem, knowledge graph, science knowledge graph, economics knowledge graph}, doi = {10.1145/3543873.3587614}, abstract = {To understand the state-of-the-art innovations in a particular domain, researchers have to explore patents and scientific articles published recently in that particular domain. Innovation ecosystems comprise interconnected information regarding entities, i.e., researchers, institutions, projects, products, and technologies. Representing such information in a machine-readable format is challenging. This is due to the fact that representing concepts like "knowledge" is not straightforward. However, even a partial representation provides valuable information. Representing innovation ecosystems as knowledge graphs (KGs) enables the generation of new insights and would allow advanced data analysis. In this paper, we propose InnoGraph, a KG of the worldwide AI innovation ecosystem.}, } @Misc{Alexiev-InnographDatasets-2023, author = {Vladimir Alexiev}, title = {{InnoGraph Datasets}}, howpublished = {presentation}, month = jan, year = 2023, note = {The presentation is not public yet, contact in case of interest}, url = {https://github.com/enRichMyData/InnoGraph/blob/main/papers-write/202301-InnoGraph-Datasets/index.org}, url_HTML = {https://rawgit2.com/enRichMyData/InnoGraph/main/papers-write/202301-InnoGraph-Datasets/index.html}, address = {enrichMyData Project Meeting, Milan, Italy}, abstract = {What is InnoGraph: a Holistic KG of the world-wide AI innovation ecosystem. Who are its users/uses: Investment and strategic advice (VCs, M&A): Innovators and startups, Industry convergence and digitization, Strategic gaps, Strategic acquisition targets; Researchers and developers; Policy makers: EU level, National science foundations, OECD.AI, maybe even US and China; Self-use: AI is at cusp (singularity), learn about it in depth!}, } @InProceedings{Alexiev-SEMIC-2022, author = {Vladimir Alexiev}, title = {{Semantic Interoperability for Data Spaces}}, booktitle = {SEMIC: Data Spaces in an Interoperable Europe (SEMIC 2022)}, year = 2022, month = dec, url = {https://docs.google.com/presentation/d/1OMxNZItNCjGnod0KQ__Hp9oQ8mwmMyNn}, keywords = {data spaces, interoperability, semantic interoperability, knowledge graphs}, howpublished = {presentation}, keywords = {Data Spaces, RDF, Semantic Technology, Polyglot Modeling, Product Classifications, Product Catalogs, Manufacturing Industry, Electricity, Transport and Logistics, Architecture and Construction}, } @Misc{Alexiev-JSONLD-YAMLLD-2022, author = {Vladimir Alexiev}, title = {{JSON-LD, YAML-LD and Polyglot Modeling}}, howpublished = {presentation}, month = oct, year = 2022, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20221028-JSONLD/Slides.html}, keywords = {JSON-LD, YAML-LD, polyglot modeling, GraphDB, rdf4j, Titanium, GS1, EPCIS, Allotrope}, address = {Presentation at Ontotext Last Friday}, } @Misc{Alexiev-decentralization-2022, author = {Vladimir Alexiev}, title = {{Decentralization and Self-Sovereignty, Or how I finally understood what Blockchain is good for}}, howpublished = {presentation}, month = feb, year = 2022, url = {https://docs.google.com/presentation/d/1AEwLjM7ry6BeM0XoF8EVbl5zeoMkE-tBht0CcL3cfPk/edit}, keywords = {LD, JSONLD, HDT, HDF5, TPF, LDF, LDP, LDN, SOLID, DID, VC, IDSA RAM}, url_report = {https://docs.google.com/document/d/1qpMAa55SYV6E4D_ffIgsZopmpzrUrjjR9c36SXXCVZQ/edit#}, address = {Presentation at Ontotext Last Friday}, } @Misc{Alexiev-podcast2022, author = {Vladimir Alexiev}, title = {{Semantic and Polyglot Modeling, Generation of Declarative Transformations, Data Spaces ft. Vladimir Alexiev}}, howpublished = {Podcast}, address = {Loose Edges podcast with Marsel Tadjer and Justin Dowdy}, month = oct, year = 2022, url = {https://player.fm/series/loose-edges/semantic-and-polyglot-modeling-generation-of-declarative-transformations-data-spaces-ft-vladimir-alexiev}, keywords = {semantic modeling, polyglot modeling, ontology engineering, knowledge graphs, competency questions, upper ontologies, reusable ontologies, GraphDB, GrpahQL, Ontotext Refine, Ontotext Reconcile}, abstract = {In this episode of Loose Edges Marsel and Justin interview Vladimir Alexiev, Chief Data Architect at Ontotext. - We explore Application Centric Data and how to catch defects in various modeling approaches. - Discuss Ontotext products: new GraphDB capabilities such as search and connectors, GraphQL capabilities, Ontotext Refine, Ontotext Reconcile. - Ontotext "10 step guide to KGs". Start a KG project with "competency questions". - Semantic transformation best practices and approaches: declarative and generated transformations . - Polyglot modeling: what is it and where it is manifesting itself in various data communities (from HL7 FHIR to YAML-LD). - Standards. Working Groups. How to get involved, what are some of the best practices from Vladimir's perspective and what should an aspiring semantic engineer and ontologist be aware of. - Common upper ontologies / Reusable ontologies / simple vs. broad, hear some examples from a dozen different industries. }, annote = { 00:00 - 01:13 Intro and Ontotext News 01:14 - 04:00 GraphDB features where to use RDF / Use cases and industries 04:00 - 07:00 Connectors and transformation language for imports 07:00 - 08:00 Elastic and search connectors 08:00 - 10:49 GraphQL support / standards / avoiding cartesian product / standardization and full text search. 10:50 - 16:30 RML / Start ontologies or start with data / templates with standard ttl + generating conversions 16:35 - 20:00 Ontotext 10 step guide / start KG project with "competency questions" 20:00 - 23:30 Application Centric Data / Defects in vocabularies / semantic representations / examples of standards settings organizations 20:30 - 29:40 Polyglot modeling / data modeling / HL7/FHIR / YAML-LD easiness of yaml to read vs json 30:00 - 32:00 Better modeling with json-ld Frames / community practices 32:00 - 35:15 Object vs. Literal / Transparency EKG spec (schema.org vs other approach) / Inclusivity of wikidata / "be too demanding" 35:15 - 39:10 Subject matter under specify deterrent for raising the quality of data / "use wikidata and geonames" 39:00 - 41:30 Specificity of thinking from files and messages to real world. We are describing "things" in real world 41:30 - 43:30 Value of descriptions 43:30 - 47:15 Standards / Working groups / DBPedia vs Wikidata best practices / Ontotext Refine / Ontotext Reconcile 47:15 - 51:30 Practices with W3C vs ISO standards 51:30 - 54:45 Advice for upcoming graph specialists - example with internal query itterated from external query posted to SPARQL 54:45 - 58:30 Justin asks for DPV W3C no consistent worldview / Common upper ontologies / Reusable ontologies / W3C practice with ORG and ADMS "simplify / make usable" / ISO-15926 / C vs lisp 58:30 - 1:00:00 Start from "common primitives" / define the base is "not free" comes at a price / Crunchbase + IPO examples} } @Misc{Petkova-Alexiev-DataSpaces-2022, author = {Teodora Petkova and Vladimir Alexiev}, title = {{Data Wants To Be Truly Sovereign: Designing Data Spaces with Linked Data Principles In Mind}}, howpublished = {Ontotext blog post}, month = nov, year = 2022, url = {https://www.ontotext.com/blog/data-wants-to-be-truly-sovereign-designing-data-spaces/}, keywords = {Data Spaces, Knowledge Graphs, Semantic Data Spaces}, abstract = {Learn how data spaces, being a mechanism to enable efficient commercial data exchange can significantly benefit from the use of Linked Data at the level of data itself}, } @InProceedings{Alexiev-DataSpaces-2022, author = {Vladimir Alexiev}, title = {{Data Spaces vs Knowledge Graphs: How to Get To Semantic Data Spaces?}}, booktitle = {{Data Spaces & Semantic Interoperability Workshop}}, year = 2022, month = jul, address = {Vienna, Austria}, url = {https://drive.google.com/file/d/15RuCfyresjmc0JWoNl8Jpjpbf_O65UkD/view}, url_Slides = {https://docs.google.com/presentation/d/1uujCfAGw7nTwz9c6ItLtUhsKiGEbK2bKCWUOOunpyw0/edit}, url_Video = {https://www.youtube.com/watch?v=RpCVChGczSA}, url_Blog = {https://www.ontotext.com/company/news/ontotext-presents-position-paper-at-data-spaces-and-semantic-interoperability-workshop/}, keywords = {Data Spaces, RDF, Semantic Technology, Polyglot Modeling, Product Classifications, Product Catalogs, Manufacturing Industry, Electricity, Transport and Logistics, Architecture and Construction}, abstract = {EU invests heavily in Data Spaces (DS) as a mechanism to enable commercial data exchange and therefore industry digitalization and proliferation of Data Science (DS) and Artificial Intelligence, in particular Machine Learning (ML). While DSs use heavily semantic technologies, that is limited to describing metadata, license agreements, data market participants, etc. I argue that using Linked Data and semantic technologies for the data itself offers significant benefits regarding more efficient data sharing and use, and improvements to ML and DS processes. I give an overview of the state of semantic data sharing in several industrial domains (Product Classifications and Catalogs, Manufacturing Industry, Electricity, Transport and Logistics, Architecture and Construction; and close with a brief overview of technological enablers required for Semantic Data Spaces.}, } @InProceedings{Alexiev-SemIIM2002, author = {Vladimir Alexiev}, title = {{Ontologies vs Linked Data & Knowledge Graphs}}, booktitle = {{First International Workshop on Semantic Industrial Information Modelling (SemIIM 2022 at ESWC 2022)}}, year = 2022, month = may, note = {Panel presentation}, url = {https://docs.google.com/presentation/d/1lKGZ_6MsTE15E6wFBorsVHmyQ3RjqzznpzQ7xpoDhUU/edit}, } @TechReport{AlexievEtAl-TEKG-spec, author = {Vladimir Alexiev and Viktor Ribchev and Miroslav Chervenski and Nikola Tulechki and Mihail Radkov and Antoniy Kunchev and Radostin Nanov}, title = {{Transparency EKG Requirements Specification, Architecture and Semantic Model}}, institution = {Ontotext Corp}, year = 2022, type = {Specification}, month = jun, url = {https://transparency.ontotext.com/spec/}, keywords = {energy, electricity, ENTSO-E, market transparency, knowledge graph, specification, semantic architecture, semantic model}, } @Misc{Alexiev-TEKG4, author = {Vladimir Alexiev}, title = {{Transparency Energy Knowledge Graph Project: Final Results}}, howpublished = {Presentation}, month = oct, year = 2022, url = {https://docs.google.com/presentation/d/1jpgrBr2eXvOShlOtFmoMeF1jjGIrvt5F}, url_Video = {https://www.youtube.com/watch?v=Lm4Q2riM3Ro}, keywords = {energy, electricity, ENTSO-E, power plant databases, electricity market, energy markets, market transparency, knowledge graph, OpenStreetMap, EIC, validation, SHACL, SHACL Advanced, analytics}, url_BDVA = {https://jam4.sapjam.com/blogs/show/XnKajJjHL6qjJt6dUuPXzI}, address = {Presentation at Ontotext Knowledge Graph Forum 2022}, abstract = {The Transparency Energy KG (TEKG) project converted part of the ENTSO-E electricity market transparency data to a semantic KG and complemented it with external data sources (VIES for VAT validation, OpenStreetMap for power plant maps and coordinates, several power plant databases for correspondences/coreferencing). We have implemented a number of advanced validations over fundamental electricity data such as the EIC file (Energy Identification Code), power plant data, and specific market data observations (time series). We also implemented advanced analytics and map views, including integration of OpenStreetMap maps. KGs afford a holistic view over the data that allow us to uncover a number of data problems, presented in a Data Quality Assessment Dashboard. This could help ENTSO-E and national electricity authorities (Transmission System Operators, TSO) diagnose data quality problems and improve data collection procedures and legislation.}, } @Comment{Triona, Smart Flow of Information for the Transport Infrastructure https://www.youtube.com/watch?v=ulX6L6lNf00} @Comment{Nikolay Krustev, How Knowledge Graphs are Bridging the Gap Between Industries https://www.youtube.com/watch?v=r13nk7MjVt4} @Misc{Alexiev-TEKG3, author = {Vladimir Alexiev}, title = {{Transparency Energy Knowledge Graphs for Energy Traceability}}, howpublished = {presentation}, month = sep, year = 2022, url = {https://docs.google.com/presentation/d/1LIw_F_hN6hXGBFnSxq-8_IOJN9bWwh_8}, keywords = {energy, electricity, gas, ENTSO-E, ENTSO-G, ACER, GIE, power plant databases, energy markets, market transparency, knowledge graph, traceability, Wikidata, OpenStreetMap, EIC, GIE gas storage id, ACER id, MIC, SWIFT/BIC, GLEI, GS1 GLN}, url_bdva = {https://jam4.sapjam.com/blogs/show/XnKajJjHL6qjJt6dUuPXzI}, address = {Presentation at Ontotext Knowledge Graph Forum 2022}, pages = 42, abstract = {Ontotext's Transparency Energy KG (TEKG) project converted part of ENTSO-E electricity market transparency data to a semantic KG and complemented it with external data sources (VIES for VAT validation, OpenStreetMap for power plant maps and coordinates, several power plant databases for correspondences/coreferencing). There are at least 8 EU regulations that lay out rules for market transparency, in particular in energy markets. But energy is holistic, so going beyond electricity, ACER tracks at least 20 transparency platforms in various stages of certification, of which 16 operate for Electricity and 16 for Natural Gas. ENTSO-E and ENTSO-G are the central transparency platforms, but there are also platforms run by energy exchanges (e.g. EEX) and nonprofits (e.g. GIE). The ENTSO-G Transparency Platform publishes data about the gas market, and GIE has data about current and future gas infrastructure, including gas storages. ACER also tracks 130 other platforms (104 of which active): marketplaces, Registered Reporting Mechanisms, trade matching systems, etc. This is important data that affects all of us as energy consumers, and becomes even more important given the Russian gas crisis. However, the data is fragmented in distributed databases with their own access modes and only partially harmonized information. KGs and semantic data integration afford holistic views over all data across an industry and facilitate data validation and analyzes that were not previously possible. A number of identifiers can be used to coreference these entities: EIC for all kind of energy resources and players (issued in a decentralized way, no central database exists), 13 database-specific ids of power plants, GIE storage id for gas storages, ACER id for market players, MIC for market places, BIC for bank routing, GLEI for legal entities, GS1 GLN for logistics locations, OpenStreetMap for entities on a map, and Wikidata id for an encyclopedic KG, etc. We have worked with many of these datasets, in particular integrating parts in Wikidata for open semantic integration. We present the TEKG project, then some of the mentioned datasets and our ideas how TEKG could be extended to cover the following cases. UC1: Energy transparency basic data: semantically integrated, verified through blockchain and RDF Validation; including master data UC2: Energy data for market players, exchanges, regulators and policy makers: analysis of energy prices, trading practices, energy mix transformation and evolution UC3: Analysis of the Sustainability of EU Gas and Progress Towards Energy Independence from Russia UC4: Energy Tracing for CO2 Footprint and Pollution Impact, for enterprises who have mandates to progress towards zero emissions } @Misc{Alexiev-TEKG2, author = {Vladimir Alexiev}, title = {{Advanced SHACL Data Validation for the Transparency Energy KG}}, howpublished = {presentation}, month = may, year = 2022, url = {https://docs.google.com/presentation/d/1Hhxmx2YDnaxlaU5KeafjRJSDlVgHRz1z/edit}, url_Video = {https://youtu.be/4JGSui7Uq_Y}, keywords = {energy, electricity, ENTSO-E, market transparency, knowledge graph, validation, SHACL, SHACL Advanced}, address = {Presentation at Ontotext Demo Days}, abstract = {The Transparency Energy KG (TEKG) project converts the ENTSO-E electricity market transparency data to a semantic KG and complements with external data sources (VIES for VAT validation, OpenStreetMap for power plant maps and coordinates, several power plant databases for correspondences/coreferencing). We have implemented a number of advanced validations over fundamental electricity data such as the EIC file (Energy Identification Code), power plant data, and specific market data observations (time series). KGs afford a holistic view over the data that allow us to uncover a number of data problems, presented in a Data Quality Assessment Dashboard. This could help ENTSO-E and national electricity authorities (Transmission System Operators, TSO) diagnose data quality problems and improve data collection procedures and legislation.}, } @Misc{Alexiev-TEKG1, author = {Vladimir Alexiev}, title = {{Transparency Energy Knowledge Graph}}, howpublished = {presentation}, month = jan, year = 2022, url = {https://docs.google.com/presentation/d/1I0CKJ_y-Lq0eErnOabBBxmfAuOQHYNey/edit}, url_PDF = {http://interrface.eu/sites/default/files/ontotext_TEKG-20210131.pdf}, keywords = {energy, electricity, ENTSO-E, market transparency, knowledge graph}, address = {Presentation at Joint INTERRFACE Open Call Projects meeting}, } @Misc{Alexiev-EBDVF2021, author = {Vladimir Alexiev}, title = {{Cross-disciplinary ontologies for buildings, infrastructure, smart grid, electricity, energy efficiency}}, howpublished = {presentation}, month = nov, year = 2021, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/EBDVF-2021-(V.Alexiev).pptx}, keywords = {Cross-disciplinary, ontologies, buildings, infrastructure, smart grid, electricity, energy efficiency, energy consumption, AECO, architecture, construction, cadaster, smart city, Manufacturing, Transport/Logistics, Product classification, sensor, CIM, CGMES, IFC, LOIN, IDM, ICDD, COINS, MVD, BCF, bSDD, Data Dictionaries, Data Templates, Object Libraries, Bricks Schema, Haystack, Digital Buildings, Real Estate Core, LBD, BOT, BPO, CDC, CTO, DOT, FOG, OMG CityGML, GeoSPARQL, other OGC, ISO 23262, GIS-BIM interop, FSGIM, OpenADR, DABGEO, EnergyUse, OEMA, EEPSA, PROSG, SEAS, SEMANCO, DogOnt, ThinkHome, OPC UA, AutomationML, RAMI, AdminShell GS1 EPCIS, CBV, WebVoc, Digital Links, TDS identifiers (GTIN, GLN, GRAI, GIAI, GDTN…) COBIE, eClass, IEC CDD, GS1 GPC, UNSPSC, SOSA, SSN, WoT TD, DTML, SAREF, SAREF4ENER, SAREF4BLDG, SAREF4water}, address = {Presentation at European Big Dava Value Forum (EBDVF 2021)}, } @Misc{AlexievBoytcheva2021-SemantizationML, author = {Vladimir Alexiev and Svetla Boytcheva}, title = {{Semantization of Machine Learning and Data Science (a Project Idea)}}, howpublished = {presentation}, month = sep, year = 2021, url = {https://docs.google.com/presentation/d/1_8LSXa9vVzNwPE6Hjj4cKIJNRRBNz2wP/edit}, keywords = {Ontotext, research projects, knowledge graph, KG technologies, Semantization, Machine Learning, Data Science}, address = {Presentation at Big Dava Value Association Activity Group 45 (BDVA AG 45)}, abstract = {Problem: Data Science, AI & ML are expensive, and that's one of the reasons why relatively few enterprises use them. Goal: rationalize and industrialize DS efforts, and make them more reproducible and reusable. Approach: capture a lot of semantic info about all DS processes in an enterprise, and thus enable automation, discovery, reusability. The kinds of data we'd like to represent and integrate semantically (part of it is similar to what you can see on the Kaggle and OpenML sites): - Business context: goals, motivations, data value, value chain, cost vs benefit analysis, SWOT analysis... - DS challenges, where do they come from, datasets that can be leveraged to solve them - DS staff, expertise, projects, tasks, risks - DS/ML algorithms, implementations, modules, dependencies, software projects, versions, issue trackers - Cloud and IT resources: compute, storage; their deployment, management, automation... - ML model deployment, performance, model drift, retraining… Established software genres that cover parts of this landscape: - ModelOps (devOps for ML), Feature Spaces - Enterprise data catalogs (data hubs) vs data marketplaces vs open data catalogs vs EU Data Spaces and their metadata - FAIR data, reproducible research, Research Objects, research workflows, We've researched over 100 relevant ontologies that can be leveraged, covering - Organizations/enterprises, business plans, - Ontologies, semantic data, - DS challenges, datasets, statistical data, quality assessment - DS/ML approaches, software, projects, issues, - Data on research/science - Project management Focusing on DS/ML approaches only, a couple of the relevant ontologies or standards are: - PMML (predictive modeling markup language) - e-LICO, DMEX ontologies for describing DS - OntoDM, KDO ontologies for describing DS}, } @Misc{IvanovAlexiev2021-EnergyKG, author = {Chavdar Ivanov and Vladimir Alexiev}, title = {{Energy Knowledge Graphs to Facilitate Evolution of the European Energy Market}}, howpublished = {presentation}, address = {Presentation at Ontotext Knowledge Graph Forum 2021}, month = oct, year = 2021, url = {https://docs.google.com/presentation/d/1vvrUGtutbOzwUK19Z0nEUhZbP6kw-KiFdNELMG4V3v8/edit}, keywords = {knowledge graph, energy knowledge graph, CIM, CGMES, ENTSOE, Single Energy Market, energy market transparency}, abstract = {Presents the EU Single Electricity Market, IEC Common Information Model (CIM), ENTSOE Common Grid Model Exchange Specification (CGMES), how Energy KGs can improve data integration in the energy domain, ENTSOE market transparency data, and Ontotext's Energy Transparency KG project.}, } @Misc{Alexiev2021-EnergyKG, author = {Vladimir Alexiev}, title = {{Energy Knowledge Graphs}}, howpublished = {presentation}, month = jul, year = 2021, url = {https://docs.google.com/presentation/d/1GcJqTZFRptX5lAGBA2RXThreGxH9LQAZi5qPnJX1tTQ/edit}, keywords = {knowledge graphs, data spaces, European Energy Data Space, CIM, CGMES, ENTSOE, Single Energy Market, energy market transparency, EU Green Deal, industry digitization}, address = {Presentation to IIA/KeyLogic and US DOE NETL and OSTI}, abstract = {Presents the EU Data Spaces initiatives, Single Electricity Market, ENTSOE market transparency data, IEC Common Information Model (CIM), ENTSOE Common Grid Model Exchange Specification (CGMES), and how Energy KGs can improve data integration in the energy domain.}, } @InProceedings{Alexiev-ENDORSE-2023, author = {Vladimir Alexiev}, title = {{Generation of Declarative Transformations from Semantic Models}}, booktitle = {{European Data Conference on Reference Data and Semantics (ENDORSE 2023)}}, year = 2023, pages = {33, 42-59}, month = mar, organization = {European Commission: Directorate-General for Informatics, Publications Office of the European Union}, url = {https://drive.google.com/open?id=1Cq5o9th_P812paqGkDsaEomJyAmnypkD}, url_PPT = {https://docs.google.com/presentation/d/1JCMQEH8Tw_F-ta6haIToXMLYJxQ9LRv6/edit}, url_Slides = {https://op.europa.eu/documents/10157494/12134844/DAY1-TRACK2-16.35-16.50-VladimirAlexiev_FORPUB.pdf/6e564f96-6ad6-1464-7a6e-e9533207f281}, url_Video = {https://youtu.be/yL5nI_3ccxs}, keywords = {semantic model, semantic data integration, ETL, semantic conversion, declarative approaches, PlantUML, R2RML, generation, model-driven, RDF by Example, rdfpuml, rdf2rml}, isbn = {978-92-78-43682-7}, doi = {10.2830/343811}, annote = {Catalogue number: OA-04-23-743-EN-N}, date = {2023-08-04}, url_proceedings= {https://op.europa.eu/en/publication-detail/-/publication/4db67b35-34df-11ee-bdc3-01aa75ed71a1}, abstract = {The daily work of the Knowledge Graph Solutions group at Ontotext involves KG building activities such as investigating data standards and datasets, ontology engineering, harmonizing data through semantic models, converting or virtualizing data to semantic form, entity matching, semantic text enrichment, etc. Semantic pipelines have a variety of desirable properties, of which maintainability and consistency of the various artefacts are some of the most important ones. Despite significant recent progress (eg in the KG Building W3C community group), semantic conversion still remains one of the difficult steps. We favor generation of semantic transformations from semantic models that are both sufficiently precise, easily understandable, can be used to generate diagrams, and are valid RDF to allow processing with RDF tools. We call this approach "RDF by Example" and have developed a set of open source tools at https://github.com/VladimirAlexiev/rdf2rml. This includes "rdfpuml" for generating diagrams, "rdf2rml" for generating R2RML for semantization of relational data and ONTOP virtualization, "rdf2sparql" for semantization of tabular data with Ontotext Refine or TARQL. We describe our approach and illustrate it with complex and high-performance transformations in a variety of domains, such as company data and NIH research grants.}, } @InProceedings{Alexiev-ENDORSE-2021, author = {Vladimir Alexiev}, title = {{Diverse Uses of a Semantic Graph Database for Knowledge Organization and Research}}, booktitle = {{European Data Conference on Reference Data and Semantics (ENDORSE 2021)}}, year = 2021, pages = 47, month = jul, organization = {European Commission: Directorate-General for Informatics, Publications Office of the European Union, ISA2 Programme}, url = {https://op.europa.eu/o/opportal-service/download-handler?identifier=41b06a9b-e388-11eb-895a-01aa75ed71a1&format=pdf&language=en&productionSystem=cellar}, url_Github = {https://github.com/VladimirAlexiev/ontotext-graphdb-applications}, url_PPT = {https://github.com/VladimirAlexiev/ontotext-graphdb-applications/raw/master/Diverse%20Uses%20of%20a%20Semantic%20Graph%20Database%20for%20Knowledge%20Organization%20and%20Research%20(ENDORSE%202021).pptx}, url_Slides = {https://op.europa.eu/documents/7525478/8087182/ALEXIEV_presentation_Diverse+Uses+of+a+Semantic+Graph+Database+for+Knowledge+Organization+and+Research.pdf/b27afc2c-3db7-749b-c50c-52b3ded79f3c}, url_Video = {https://www.youtube.com/watch?v=0q63x2P1V0o&list=PLT5rARDev_rmGr_LJkr7zcI-Qul7yOOHO&index=4&t=4780s}, url_Zotero = {https://www.zotero.org/groups/2744757/ontotext-graphdb}, keywords = {bibliography, semantic database, graph database, semantic repository, knowledge graph, Knowledge Organization System, VocBench, PoolParty, Synaptica, Semaphore, EuroVoc, AgroVoc, Getty Vocabularies, social media analytics, data marketplaces, business process management, enterprise data integration, statistical data, engineering, smart cities, sensor networks, life sciences, biomedical ontologies, medicines, chemistry, linguistic data, semantic publishing, semantic text analysis, geographic information, master data management, academic/research data, COVID, Zika virus, Quran, bilingual data, art history, Holocaust research, musical events, musical adaptations, iconography, food and drink, tourism, investment decision support, economic research, offshore leaks, maritime data, construction projects, building information management, crisis management, critical incidents, data journalism, clinical trials, investment recommendations, data journalism,}, doi = {10.2830/44569}, isbn = {978-92-78-42416-9}, annote = {Catalogue number: OA-03-21-303-EN-N}, date = {2021-07-12}, abstract = {Semantic Graph Databases are the foundation of Enterprise Knowledge Graphs. They are used in numerous industrial applications, but also Knowledge Organization Management systems (thesaurus and ontology management systems), such as VocBench, SWC PoolParty, Synaptica Semaphore. Through VocBench, semantic databases manage or publish some of the most important thesauri: EuroVoc, AgroVoc, the Getty Vocabularies, etc. Semantic databases are also used in a wide variety of research domains and projects. Some have open source or free editions that make them an easy choice for academic research. We searched on Google Scholar and found 1000-1200 academic papers and theses mentioning one of the popular databases. We also found at least 50 books on Google Books that mention it. We started a Zotero bibliography on the topic (currently about 150 papers), and captured about 220 research topics, based on the titles of about 250 papers. We will present an analysis of reference data and research domains using a semantic database. Some of the traditional topics include: social media analytics, data marketplaces, business process management, enterprise data integration, statistical data, engineering, smart cities, sensor networks, life sciences, biomedical ontologies, medicines, chemistry, linguistic data, semantic publishing, semantic text analysis, geographic information, master data management. Newer or more exotic topics include academic/research data, COVID and Zika viruses, Quran and bilingual Arabic-English data, art history, Holocaust research, musical events and adaptations, iconography, food and drink, tourism, investment decision support, economic research, offshore leaks, maritime data, construction projects, building information management, crisis management, critical incidents and infrastructures, data journalism, clinical trials and specific medical topics (e.g. intestinal cells, intracoronal tooth restorations, vaccines, toxicology), investment recommendations, data journalism, etc.}, } @Article{EBG-2020-SWJ, author = {Dumitru Roman and Vladimir Alexiev and Javier Paniagua and Brian Elvesaeter and Bjorn Marius von Zernichow and Ahmet Soylu and Boyan Simeonov and Chris Taggart}, title = {{The euBusinessGraph Ontology: a Lightweight Ontology for Harmonizing Basic Company Information}}, journal = {{Semantic Web - Interoperability, Usability, Applicability (SWJ)}}, year = 2021, pages = {41-68}, month = nov, url = {https://www.semantic-web-journal.net/content/eubusinessgraph-ontology-lightweight-ontology-harmonizing-basic-company-information-0}, url_Published= {https://content.iospress.com/articles/semantic-web/sw210424}, keywords = {Company data, Knowledge Graph, Ontology, Linked data}, issue = 13, publisher = {IOS Press}, doi = {10.3233/SW-210424}, abstract = {Company data, ranging from basic company information such as company name(s) and incorporation date to complex balance sheets and personal data about directors and shareholders, are the foundation that many data value chains depend upon in various sectors (e.g., business information, marketing and sales, etc.). Company data becomes a valuable asset when data is collected and integrated from a variety of sources, both authoritative (e.g., national business registers) and non-authoritative (e.g., company websites). Company data integration is however a difficult task primarily due to the heterogeneity and complexity of company data, and the lack of generally agreed upon semantic descriptions of the concepts in this domain. In this article, we introduce the euBusinessGraph ontology as a lightweight mechanism for harmonising company data for the purpose of aggregating, linking, provisioning and analysing basic company data. The article provides an overview of the related work, ontology scope, ontology development process, explanations of core concepts and relationships, and the implementation of the ontology. Furthermore, we present scenarios where the ontology was used, among others, for publishing company data (business knowledge graph) and for comparing data from various company data providers. The euBusinessGraph ontology serves as an asset not only for enabling various tasks related to company data but also on which various extensions can be built upon.}, } @InProceedings{Alexiev2021-Wiki-GLAM, author = {Vladimir Alexiev and Philippe Michon and Heather Dunn}, title = {{Canadian Heritage Datasets: Linking and Publishing as LOD}}, booktitle = {{Wébinaire Wiki, data et GLAM 2021}}, year = 2021, month = jun, organization = {Wikimédia France, Etalab}, url = {https://docs.google.com/presentation/d/1yr_FVXeTrFpR-lu8C_QT2ciDtPioEtul2lCyFTh82y4/edit}, keywords = {LOD, GLAM, Wikidata, CHIN, Canadian Heritage, Nomenclature, Creators in Canada, Makers in Canada, Artefacts Canada, DOPHEDA, CIDOC CRM, linked.art}, abstract = {Linked Open Data (LOD) projects underway at the Canadian Heritage Information Network (CHIN) have involved aligning and linking vocabularies used in cultural institutions (among them French thesauri, AAT, and Wikidata), and modeling the semantic representation of heritage content by using commonly established ontologies and aligning to modeling efforts at other GLAM institutions. This presentation will describe some of the problems and successes encountered in these projects, as well as thoughts on future directions and the potential of LOD approaches for heritage content.}, } @Misc{Alexiev2020-Wikidata-Icons, author = {Vladimir Alexiev}, title = {{Wikidata and Icons: KGs for GLAM}}, howpublished = {presentation}, month = jan, year = 2020, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20200130-Wikidata-Icons/Slides.html}, keywords = {knowledge graphs, semantic integration, GLAM, Wikidata, OpenRefine, Mix-n-Match, icons, religious icons, iconography, iconographic subject, GLAM, CLADA}, address = {CLADA BG Consortium Meeting, Sofia, Bulgaria}, abstract = {This presentation introduces the concepts of knowledge graphs, semantic integration, Wikidata, and Wikidata "tools of the trade" to a GLAM audience. We then show the current state of religious Icons in Wikidata, and show an example of how to edit various entities: iconographer, iconographic school, erminia, icon, measurements, material, technique, iconographic subject (a title of the Virgin Mary), scientific article, academic journal, issue. We finally show a simplified form of the resulting knowledge graph, including the graph of all these entities, plus links to external sources: Getty AAT, Nomenclature for Museum Cataloging, ISBN, ISSN, academia.edu}, } @InProceedings{AlexievEtAl-Bulgarian-Icons-DIPP2020, author = {Vladimir Alexiev and Plamen Tarkalanov and Nikola Georgiev and Lilia Pavlova}, title = {{Bulgarian Icons in Wikidata and EDM}}, booktitle = {{Digital Presentation and Preservation of Cultural and Scientific Heritage (DIPP 2020)}}, year = 2020, volume = 10, month = sep, address = {Burgas, Bulgaria}, publisher = {Institute of Mathematics and Informatics (IMI BAS), Sofia}, url = {https://dipp.math.bas.bg/images/2020/045-064_1.2_iDiPP2020-24_v.1c.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20200703-Bulgarian-icons/Slides.html}, keywords = {Knowledge Graphs, Semantic Integration, GLAM, Wikidata, Mix-n-Match, OpenRefine, Icons, Religious Icons, Iconography, Iconographic Subject, GLAM, CLADA, BIDL, Virtual Encyclopedia of Bulgarian Icons}, issn = {1314-4006}, eissn = {2535-0366}, abstract = {We briefly describe Wikidata, its importance for GLAM institutions, iconographic authorities in Mix-n-Match. Then we propose an Icon Knowledge Graph Model comprising the entities: iconographer, iconographic school, herminia, icon, measurements, material, technique, iconographic subject (saint or a title of the Virgin Mary), location (city, monastery, church, museum), scientific article, academic journal, issue, links to LOD datasets (e.g. VIAF, Getty AAT, ISBN, ISSN, academia.edu). Then we introduce the Virtual Encyclopedia of Bulgarian Icons (BIDL) and describe how we exported it to Wikidata, while coreferencing to iconographers, saints, locations; and the extra info we added for such entities. Finally, we describe a conversion to the Europeana Data Model, including details such as links to Wikidata, bilingual descriptions, language tags, providers. The online version of the paper includes live links; the accompanying presentation includes more images and queries.}, } @Misc{Alexiev2020-ECLASS, author = {Vladimir Alexiev}, title = {{ECLASS RDF} Representation}, year = 2020, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20201221-ECLASS-RDF/index.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20201221-ECLASS-RDF/Slides.html}, keywords = {product data, product catalogs, product classification, product characteristics, ECLASS, RDF representation, AAS}, date = {21-Dec-2020}, } @InProceedings{EBG-2020-ISWC, author = {Dumitru Roman and Vladimir Alexiev and Javier Paniagua and Brian Elvesaeter and Bjorn Marius Von Zernichow and Ahmet Soylu and Boyan Simeonov and Chris Taggart}, title = {{A Bird's-Eye View of euBusinessGraph: A Business Knowledge Graph for Company Data}}, booktitle = {{International Semantic Web Conference: Posters, Demos, and Industry Tracks (ISWC 2020)}}, year = 2020, pages = {39-44}, month = nov, url = {https://ceur-ws.org/Vol-2721/paper493.pdf}, keywords = {Company data, Knowledge Graph, Ontology, Linked data}, abstract = {Abstract. This poster paper provides an overview of euBusinessGraph– a business knowledge graph for basic company data, together with related artefacts (datasets, ontology, and infrastructure), and its use for creating a prototype for a data marketplace for basic company data. euBusinessGraph was developed by aggregating, linking, and provisioning data from several distributed data sources.}, } @InProceedings{Alexiev2020-I40KG, author = {Vladimir Alexiev and Miroslav Chervenski}, title = {{Exploring Industry 4.0 Standards with the Ontotext Platform}}, booktitle = {{Semantics 2020 webinar}}, year = 2020, month = sep, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20200908-i40kg-semantics/paper.pdf}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20200908-i40kg-semantics/presentation.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20200908-i40kg-semantics/presentation.html}, url_Video = {https://2020-eu.semantics.cc/exploring-industry-40-standards-graph-using-graphql-mapped-sparql}, keywords = {industrial data, Industry 4.0, Industry 4.0 Knowledge Graph, I40KG, ISO 15926, RAMI, AAS, Knowledge Graph, I40KG, GraphQL, Ontotext Platform}, abstract = {Exploring Industry 4.0 Standards Graph using GraphQL mapped to SPARQL via semantic business objects. With the coming of Industry 4.0 and the continuing digitization of manufacturing, construction, oil & gas, automotive etc, a large number of relevant standards have been proposed, approved, put through use cases and interoperability exercises, and some of them found massive adoption in the industry. Older established standards have been integrated or "semanticized", i.e. harmonized using semantic integration techniques. ISO 15926, in development for nearly 30 years, has been touted as the "lingua franca" for global interoperability, but its complexity is difficult for most people to master. The standards landscape is large and puzzling; thanks to the so-called Industry 4.0 Knowledge Graph (formerly Standards Ontology) one can get a good overview and categorization of such relevant standards. We demonstrate a simple way to access Industry 4.0 standards using GraphQL through the Ontotext Platform. The latter features GraphQL interfaces to make it easier for application developers to access knowledge graphs without tedious development of back-end APIs or complex SPARQL. The underlying Semantic Object service implements an efficient GraphQL to SPARQL translation optimized for GraphDB, as well as a generic configurable security model.}, } @TechReport{EHRI-names-2019, author = {Eickhoff, Martijn and de Leeuw, Daan and Nikolova, Ivelina and Tagarev, Andrey and Alexiev, Vladimir}, title = {{Names and Networks. Holocaust Victim Communities}}, institution = {EHRI}, year = 2019, type = {Internal document WP14: Report on research use case}, url = {https://pure.knaw.nl/portal/en/publications/names-and-networks-holocaust-victim-communities(cf76f212-80f9-4b31-b693-b98451feb665).html}, url_TR = {https://hdl.handle.net/20.500.11755/cf76f212-80f9-4b31-b693-b98451feb665}, language = {eng}, } @Misc{Alexiev2019-devbg, author = {Vladimir Alexiev}, title = {{Semantic Integration Is What You Do Before The Deep Learning}}, howpublished = {presentation}, month = may, year = 2019, url = {https://dev.bg/събитие/machine-learning-semantic-integration-is-what-you-do-before-the-deep-learning/}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20190513-Semantics-and-ML-dev.bg.pptx}, keywords = {knowledge graph, machine learning, deep learning}, address = {dev.bg Machine Learning interest group, Sofia, Bulgaria}, abstract = {It's well known that 80\% of the effort of a data scientist is spent on data preparation. Semantic integration is arguably the best way to spend this effort more efficiently and to reuse it between tasks, projects and organizations. Knowledge Graphs (KG) and Linked Open Data (LOD) have become very popular recently. They are used by Google, Amazon, Bing, Samsung, Springer Nature, Microsoft Academic, AirBnb… and any large enterprise that would like to have a holistic (360 degree) view of its business. The Semantic Web (web 3.0) is a way to build a Giant Global Graph, just like the normal web is a Global Web of Documents. IEEE already talks about Big Data Semantics. We review the topic of KGs and their applicability to Machine Learning.}, } @Article{AlexievNikolova2019-EHRI-UmanisticaDigitale, author = {Vladimir Alexiev and Ivelina Nikolova and Neli Hateva}, title = {{Semantic Archive Integration for Holocaust Research: the EHRI Research Infrastructure}}, journal = {{Umanistica Digitale}}, year = 2019, month = mar, url = {https://umanisticadigitale.unibo.it/article/view/9049}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/AlexievNikolova2018-Semantic-Archive-Integration.pdf}, keywords = {archives, Holocaust research, EHRI, research infrastructure, digital humanities, VRE, semantic integration, semantic archive integration, coreferencing, access points, thesauri, authorities, EAD, OAI PMH, ResourceSync, Geonames, Wikidata, VIAF, person matching, record linking, deduplication}, issue = 4, doi = {10.6092/issn.2532-8816/9049}, publisher = {Associazione per l'Informatica Umanistica e la Cultural Digitale, Universita di Bologna (AIUCD)}, editor = {Laura Brazzo and Kepa J. Rodriguez}, categories = {Arts and humanities, Digital libraries and archives, Information retrieval, Web searching and information discovery, Document searching, Document metadata, Semantic web description languages, Ontologies, Thesauri}, abstract = {The European Holocaust Research Infrastructure (EHRI) is a large-scale EU project that involves 23 institutions and archives working on Holocaust studies, from Europe, Israel and the US. In its first phase (2011-2015) it aggregated archival descriptions and materials on a large scale and built a Virtual Research Environment (portal) for Holocaust researchers based on a graph database. In its second phase (2015-2019), EHRI2 seeks to enhance the gathered materials using semantic approaches: enrichment, coreferencing, interlinking. Semantic integration involves four of the 14 EHRI2 work packages and helps integrate databases, free text, and metadata to interconnect historical entities (people, organizations, places, historic events) and create networks. We will present some of the EHRI2 technical work, including critical issues we have encountered. WP10 (EAD) converts archival descriptions from various formats to standard EAD XML; transports EADs using OAI PMH or ResourceSync; ingests EADs to the EHRI database; enables use cases such as synchronization; coreferencing of textual Access Points to proper thesaurus references. WP11 (Authorities and Standards) consolidates and enlarges the EHRI authorities to render the indexing and retrieval of information more effective. It addresses Access Points in ingested EADs (normalization of Unicode, spelling, punctuation; deduplication; clustering; coreferencing to authority control), Subjects (deployment of a Thesaurus Management System in support of the EHRI Thesaurus Editorial Board), Places (coreferencing to Geonames); Camps and Ghettos (integrating data with Wikidata); Persons, Corporate Bodies (using USHMM HSV and VIAF); semantic (conceptual) search including hierarchical query expansion; interconnectivity of archival descriptions; permanent URLs; metadata quality; EAD RelaxNG and Schematron schemas and validation, etc. WP13 (Data Infrastructures) builds up domain knowledge bases from institutional databases by using deduplication, semantic data integration, semantic text analysis. It provides the foundation for research use cases on Jewish Social Networks and their impact on the chance of survival. WP14 (Digital Historiography Research) works on semantic text analysis (semantic enrichment), text similarity (e.g. clustering based on Neural Networks, LDA, etc), geo-mapping. It develops Digital Historiography researcher tools, including Prosopographical approaches.}, } @InProceedings{Alexiev2019-Qatar, author = {Vladimir Alexiev}, title = {{Linked Open Data: Ontologies, Datasets, Projects}}, booktitle = {{1st International Conference on Museum Big Data}}, year = 2019, month = may, address = {Doha, Qatar}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20190501-Museum Linked Open Data- Ontologies, Datasets, Projects.pdf}, keywords = {American Art Collaborative, Canadian heritage, Conservation Space, Getty, LOD, LODLAM, ResearchSpace, SPARQL, Wikidata, semantic technologies, museum data}, howpublished = {Invited keynote}, abstract = {The Galleries, Libraries, Archives and Museums (GLAM) sector deals with complex and varied data. Integrating that data, especially across institutions, has always been a challenge. Semantic data integration is the best approach to deal with such challenges. Linked Open Data (LOD) enable large-scale Digital Humanities (DH) research, collaboration and aggregation, allowing DH researchers to make connections between (and make sense of) the multitude of digitized Cultural Heritage (CH) available on the web. An upsurge of interest in semtech and LOD has swept the CH and DH communities. An active Linked Open Data for Libraries, Archives and Museums (LODLAM) community exists, CH data is published as LOD, and international collaborations have emerged. The value of LOD is especially high in the GLAM sector, since culture by its very nature is cross-border and interlinked. We present interesting LODLAM projects, datasets, and ontologies, as well as Ontotext's experience in this domain. An extended paper on these topics is also available. It has 77 pages, 67 figures, detailed info about CH content and XML standards, Wikidata and global authority control.}, } @TechReport{Alexiev2018-BigDataGrapes-D3.1-M9, author = {Vladimir Alexiev}, title = {{BigDataGrapes D3.1 - Data Modelling and Linking Components}}, institution = {BigDataGrapes (H2020 project 780751)}, year = 2018, type = {Deliverable}, month = oct, url = {https://doi.org/10.5281/zenodo.1482757}, doi = {10.5281/zenodo.1482757}, abstract = {WP3 Data & Semantics Layer is a core WP of the project. If we have no data, we cannot achieve almost any of the project objectives. Within this WP3, task T3.1 Data Modelling over Big Data Infrastructures has these objectives: • Explores partner data • Defines competence questions that the data should be able to answer • Studies relevant AgroBio ontologies • defines semantic modelling principles and specific models • Studies user (researcher) requirements for discovering ontologies, mapping data, aligning data, etc. • Implements or adopts tools for these requirements The document has the following structure: • Chapter 1 Introduction describes fundamental AgroBio data (observations and measurements), outlines the ontological representation of measurements, mentions possible alternatives (e.g. following existing AgroBio patterns vs using the W3C CUBE ontology), describes the steps of semantic data integration, and provides links to consortium resources related to the task. • Chapter 2 Relevant AgroBio Ontologies outlines the vast number of potentially relevant ontologies and the terms included in them. We provide some metrics (number of terms) and surveys various Ontology Portals and Tools that are available for browsing, finding and using ontologies; and that can also serve as inspiration for developing requirements for tools to be developed/adopted by the project. • Chapter 3 Improving AgroBio Ontologies describes a variety of problems that we have found in AgroBio ontologies, and the initial steps we have taken to engage with the AgroBio communities to improve the quality of these ontologies. We also show a case of searching for a specific term (NDVI) required by specific partner data in a couple of ontology portals. • Chapter 4 Specific Project Data discusses specific consortium data (including problems of draft semantic data that will be corrected), data processing requirements and data access requirements. • Chapter 5 Conclusions provides conclusions, next steps and a bibliography. Deliverable D3.1 Data Modelling and Linking Components will have 3 iterations at M9, M21, M30. In this first iteration (M9), we describe the first steps taken for the realization of task T3.1. These initial steps were taken to clarify the scope and essential ingredients of the task. Since the project is early in its life cycle, we do not yet have finalized requirements for the tools to be developed by Task 3.1. Section 1.4 outlines the steps that we intend to follow, and the approximate point that we have reached within these steps.}, } @Misc{Alexiev2018-CLADA-BG, author = {Vladimir Alexiev}, title = {{Linked Open Data and Ontotext Projects}}, howpublished = {presentation}, month = nov, year = 2018, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20181109-CLADA-BG- Ontotext and LOD (68 slides).ppt}, keywords = {LOD, LOD Cloud, semantic technologies, museum data, LODLAM, CIDOC CRM, Wikidata, Ontotext, CLARIN, DARIAH, CLADA}, address = {CLADA-BG Kickoff meeting, BAS IICT, Sofia, Bulgaria}, pages = 68, } @Manual{Alexiev2018-EBG-model, title = {{euBusinessGraph Semantic Data Model}}, author = {Vladimir Alexiev and Tatiana Tarasova and Javier Paniagua and Chris Taggart and Brian Elvesaeter and Fredrik Seehusen and Dumitru Roman and David Norheim}, organization = {euBusinessGraph Consortium}, month = jun, year = 2018, url = {https://docs.google.com/document/d/1dhMOTlIOC6dOK_jksJRX0CB-GIRoiYY6fWtCnZArUhU/edit}, url_Source = {https://github.com/euBusinessGraph/eubg-data/tree/master/model}, abstract = {This document describes the semantic model used by euBusinessGraph (EBG) to represent companies, their attributes, addresses, directors and CEOs, datasets by the different providers, provenance. It includes an informative description of classes and properties, gives examples and data provider rules, provides schema and instance diagrams. It also provides RDF bindings, i.e. classes and properties to be used for representation of company data. The github project euBusinessGraph/eubg-data/model provides a generated ontology and RDF shapes intended to validate submitted data (these are not yet complete).}, keywords = {euBusinessGraph, firmographics, company data, organisation data, trade registers, POL data, linked data, business graph, economics, W3C Org, W3C RegOrg, Schema.org}, } @Manual{Alexiev2018-EBG-ontology, title = {{euBusinessGraph Ontology}}, author = {Vladimir Alexiev and Javier Paniagua}, organization = {euBusinessGraph Consortium}, month = oct, year = 2018, url_Source = {https://github.com/euBusinessGraph/eubg-data/raw/master/model/ebg-ontology.ttl}, url = {https://rawgit2.com/euBusinessGraph/eubg-data/master/ontology/_old/index.html}, abstract = {The euBusinessGraph (ebg:) ontology represents companies, type/status/economic classification, addresses, identifiers, (and soon) directors and CEOs, dataset offerings. It uses schema:domain/rangeIncludes (which are polymorphic) to describe which properties are applicable to a class, rather than rdfs:domain/rainge (which are monomorphic) to prescribe what classes must be applied to each node using a property. We find that this enables more flexible reuse and combination of different ontologies. We reuse the following ontologies and nomenclatures, and extend them where appropriate with classes and properties: - W3C Org, W3C RegOrg (basic company data), - W3C Time (officer membership), - W3C Locn (addresses), - schema.org (domain/rangeIncludes and various properties) - DBpedia ontology (jurisdiction) - NGEO and Spatial (NUTS administrative divisions) - ADMS (identifiers), - FOAF, SIOC (blog posts), - RAMON, SKOS (NACE economic classifications and various nomenclatures), - VOID (dataset descriptions). This is only a reference. See more detail in the EBG Semantic Model document, which includes an informative description of classes and properties, gives examples and data provider rules, and provides more schema and instance diagrams.}, keywords = {euBusinessGraph, firmographics, company data, organisation data, trade registers, POL data, linked data, business graph, economics, W3C Org, W3C RegOrg, Schema.org, domainIncludes, rangeIncludes}, } @TechReport{Alexiev2018-LODE, author = {Vladimir Alexiev}, title = {{Live OWL Documentation Environment (LODE) Extended with Schema and SKOS Properties}}, institution = {Ontotext}, year = 2018, type = {software}, month = may, url = {https://github.com/VladimirAlexiev/LODE}, keywords = {LODE, ontology, ontology documentation, documentation generator, Schema.org, domainIncludes, rangeIncludes}, abstract = {I've extended LODE to handle schema:domain/rangeIncludes in addition to rdfs:domain/range, some SKOS properties (example, scopeNote) and a number of typographic enahncements (eg emit multi-valued fields such as examples as a list). schema:domain/rangeIncludes (which are polymorphic) are used to describe which properties are applicable to a class, rather than rdfs:domain/range (which are monomorphic) that prescribe what classes must be applied to each node using a property. I find that this enables more flexible reuse and combination of different ontologies.}, } @InProceedings{Alexiev2018-MuseumLOD-DIPP2018, author = {Vladimir Alexiev}, title = {{Museum Linked Open Data: Ontologies, Datasets, Projects (invited report)}}, booktitle = {{Digital Presentation and Preservation of Cultural and Scientific Heritage (DIPP 2018)}}, year = 2018, volume = 8, pages = {19-50}, month = sep, address = {Burgas, Bulgaria}, publisher = {Institute of Mathematics and Informatics (IMI BAS), Sofia}, url = {https://dipp.math.bas.bg/images/2018/019-050_32_11-iDiPP2018-34.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Museum Linked Open Data (DIPP 2018, 21 slides).ppt}, keywords = {semantic technologies, museum data, LODLAM, CIDOC CRM}, abstract = {The Galleries, Libraries, Archives and Museums (GLAM) sector deals with complex and varied data. Integrating that data, especially across institutions, has always been a challenge. Semantic data integration is the best approach to deal with such challenges. Linked Open Data (LOD) enable large-scale Digital Humanities (DH) research, collaboration and aggregation, allowing DH researchers to make connections between (and make sense of) the multitude of digitized Cultural Heritage (CH) available on the web. An upsurge of interest in semtech and LOD has swept the CH and DH communities. An active Linked Open Data for Libraries, Archives and Museums (LODLAM) community exists, CH data is published as LOD, and international collaborations have emerged. The value of LOD is especially high in the GLAM sector, since culture by its very nature is cross-border and interlinked. We present interesting LODLAM projects, datasets, and ontologies, as well as Ontotext's experience in this domain. An extended version of this paper is available. It has 77 pages, 67 figures, detailed info about CH content and XML standards, Wikidata and global authority control.}, issn = {1314-4006}, eissn = {2535-0366}, } @TechReport{Alexiev2018-MuseumLOD-extended, author = {Vladimir Alexiev}, title = {{Museum Linked Open Data: Ontologies, Datasets, Projects (extended version)}}, institution = {Ontotext}, year = 2018, month = sep, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Museum Linked Open Data (77 pages).pdf}, keywords = {semantic technologies, museum data, LODLAM, CIDOC CRM, Wikidata, LIDO, CCO, CDWA, CONA}, pages = 77, abstract = {The Galleries, Libraries, Archives and Museums (GLAM) sector deals with complex and varied data. Integrating that data, especially across institutions, has always been a challenge. Semantic data integration is the best approach to deal with such challenges. Linked Open Data (LOD) enable large-scale Digital Humanities (DH) research, collaboration and aggregation, allowing DH researchers to make connections between (and make sense of) the multitude of digitized Cultural Heritage (CH) available on the web. An upsurge of interest in semtech and LOD has swept the CH and DH communities. An active Linked Open Data for Libraries, Archives and Museums (LODLAM) community exists, CH data is published as LOD, and international collaborations have emerged. The value of LOD is especially high in the GLAM sector, since culture by its very nature is cross-border and interlinked. We present interesting LODLAM projects, datasets, and ontologies, as well as Ontotext's experience in this domain. This is an extended version of an invited report at the DIPP 2018 conference that covers CH content and XML standards, Wikidata and global authority control.}, } @Article{Vrochidis2018-Multimodal, author = {Stefanos Vrochidis and Anastasia Moumtzidou and Ilias Gialampoukidis and Dimitris Liparas and Gerard Casamayor and Leo Wanner and Nicolaus Heise and Tilman Wagner and Andriy Bilous and Emmanuel Jamin and Boyan Simeonov and Vladimir Alexiev and Reihard Busch and Ioannis Arapakis and Ioannis Yiannis Kompatsiaris}, title = {{A multimodal analytics platform for journalists analysing large-scale, heterogeneous multilingual and multimedia content}}, journal = {{Frontiers in Robotics and AI}}, year = 2018, volume = 5, month = oct, url = {https://doi.org/10.3389/frobt.2018.00123}, keywords = {multimodal, analytics plaforms, multinlingual, multimedia, analyst, journalist, web crawling, semantic indexing, summarization}, topic = {Machine Learning at Scale: How Big Data and AI are Transforming Engineering}, doi = {10.3389/frobt.2018.00123}, issn = {2296-9144}, eissn = {2296-9144}, abstract = {Analysts and journalists face the problem of having to deal with very large, heterogeneous, and multilingual data volumes that need to be analyzed, understood, and aggregated. Automated and simplified editorial and authoring process could significantly reduce time, labor, and costs. Therefore, there is a need for unified access to multilingual and multicultural news story material, beyond the level of a nation, ensuring context-aware, spatiotemporal, and semantic interpretation, correlating also and summarizing the interpreted material into a coherent gist. In this paper, we present a platform integrating multimodal analytics techniques, which are able to support journalists in handling large streams of real-time and diverse information. Specifically, the platform automatically crawls and indexes multilingual and multimedia information from heterogeneous resources. Textual information is automatically summarized and can be translated (on demand) into the language of the journalist. High-level information is extracted from both textual and multimedia content for fast inspection using concept clouds. The textual and multimedia content is semantically integrated and indexed using a common representation, to be accessible through a web-based search engine. The evaluation of the proposed platform was performed by several groups of journalists revealing satisfaction from the user side.}, } @InProceedings{deLeeuw2018-EHRI, author = {Daan de Leeuw and Mike Bryant and Michal Frankl and Ivelina Nikolova and Vladimir Alexiev}, title = {{Digital Methods in Holocaust Studies: The European Holocaust Research Infrastructure}}, booktitle = {{14th IEEE International Conference on eScience}}, year = 2018, month = oct, address = {Amsterdam, The Netherlands}, publisher = {IEEE}, url = {https://ieeexplore.ieee.org/document/8588640}, keywords = {archives, Holocaust research, EHRI, research infrastructure, digital humanities, VRE, semantic integration, semantic archive integration, coreferencing, access points, thesauri, authorities, EADperson matching, record linking, deduplication}, abstract = {Digital methods and tools for the humanities also change historical research into the Holocaust. The European funded Holocaust project EHRI has developed various digital tools and methods that facilitate Holocaust research. This paper will describe a number of them and discuss how they affect scholarship into the annihilation of European Jews.}, eisbn = {978-1-5386-9156-4}, doi = {10.1109/eScience.2018.00021}, isbn = {978-1-5386-9157-1}, } @Misc{Alexiev2023-MigratingGettyID, author = {Vladimir Alexiev}, title = {{Migrating J. Paul Getty Museum Agent ID from P2432 to P12040}}, howpublished = {Github gist}, month = nov, year = 2023, url = {https://gist.github.com/VladimirAlexiev/e0a7bae256e9646a7b6f47b23184f9a4}, keywords = {Getty Trust, J. Paul Getty Museum, authority control, Wikidata, cultural heritage, GLAM}, abstract = {Previously Wikidata had Getty Museum agent DOR ID (P2432), eg https://www.getty.edu/art/collection/artists/377. But this is an internal ID that redirects to eg https://www.getty.edu/art/collection/person/103JV9. So I made a Wikidata property for the Getty Museum new agent ID (P12040). Using the Getty SPARQL endpoint, I exported 12936 persons and 3616 groups with fields "guid name old new ulan nat role birthDate birthPlace deathDate deathPlace". Then I initiated a discussion how to populate these new ID's to Wikidata, leveraging ULAN and the old DOR ID: https://www.wikidata.org/wiki/Property_talk:P12040#Populating_J._Paul_Getty_Museum_agent_ID. I also found some records without new ID, and started a discussion with Getty to see why that ID was missing}, } @Misc{Alexiev2016-How-not-to-do-Open-Publications, author = {Vladimir Alexiev}, title = {{How Not to Do Open Publications LOD}}, howpublished = {Github gist}, month = aug, year = 2017, url = {https://gist.github.com/VladimirAlexiev/90753af3a1148b7fd9bb194b2b0d7cbd}, keywords = {OpenAIRE, open access, open publications, open data, linked reasearch, CERIF}, abstract = {A review of the OpenAIRE beta ontology https://lod.openaire.eu/vocab. An EC Research Infrastructure gone awry.}, } @Misc{Alexiev2017-GSheet-Wikidata, author = {Vladimir Alexiev}, title = {{How to use Google Sheets to Manage Wikidata Coreferencing}}, howpublished = {Github gist}, month = mar, year = 2017, url = {https://gist.github.com/VladimirAlexiev/8201d614a819cb7d4023ce9aa315af65}, keywords = {Google Sheets, Wikidata, GLAM, cultural heritage, Getty Vocabularies, GVP, AAT}, abstract = {A previous post explained how to use SPARQL to find missing data on Wikidata (Getty Museum IDs), how to create such values (from museum webpage URLs) and how to format them properly for QuickStatements. Here I explain how to use Google sheets to manage a more advanced task. The sheet AAT-Wikidata matches about 3k AAT concepts to Wikipedia, WordNet30 and BabelNet (it restored an old mapping to Wordnet, retrieved it from BabelNet, mapped to Wikipedia). For each row, it uses a formula to query the Wikipedia API and get the corresponding Wikidata ID (wikibase_item). This formula asks for results in XML format, and uses a XPath that fetches the WD ID from the resulting XML. Making 3k API calls is slow, so Google sheet initially shows "Loading…" for all rows, and gradually "materializes" the WD IDs (Qnnnn) as they come in. I have periodically sorted the column and used "Edit> Paste special> Values only" for the "materialized" IDs in order to fix them and not cause re-fetching next time when I open the google sheet. Columns C,D,E are specially formatted to produce the required QuickStatements tab-delimited format. The benefit of Google sheets is that they allow easy addition of columns and convenient facilities for manual tasks: Collaborative editing by several people at once. Column A for tracking which rows are checked, which are already inserted to WD, etc. Using filters to find rows of interest. E.g. check=1 means rows that are manually checked and ready for insertion to QuickStatements. After insertion, I change it to check=2 to mark it as already inserted. Column B for tracking already existing WD IDs. Conditional formatting to colour existing WD IDs (column B) that differ from my idea what is the matching WD ID (column C) and therefore must be checked.}, } @Misc{Alexiev2017-PracticalSemanticModeling, author = {Vladimir Alexiev}, title = {{Practical Semantic Modeling, SPARQL, RDF Shapes, IoT/WoT/UoM}}, howpublished = {Ontotext Training, 80 Slides}, month = oct, year = 2017, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20171025-Practical Semantic Modeling, SPARQL, RDF Shapes, IoT-WoT-UoM (201710).pptx}, keywords = {RDF, SPARQL, RDF Shapes, rdfpuml, semantic modeling, ontology engineering, IoT, WoT, UoM}, abstract = {RDF Formats. Semantic resolution and content negotiation. Prefixes, URL design (Namespace Carving). RDF Terms, Turtle, SPARQL. Semantic Data Modeling. Modeling vs Ontology Engineering. RDFS vs schema.org. Ontology design patterns. RDF Shapes. Org, RegOrg, Person, Locn Ontologies. euBusinessGraph Data Model. rdfpuml diagramming tool. SPARQL. Getty Sample Queries (https://vocab.getty.org/queries). euBusinessGraph sample queries (https://businessgraph.ontotext.com/sparql, Bulgarian Trade Register). Ontologies for Internet of Things, Web of Things, Units of Measure (IoT, WoT, UoM). Created 25 Oct 2017, Updated 16 Apr 2018}, } @Misc{Alexiev2017-ULAN-SNAC-Wikidata, author = {Vladimir Alexiev}, title = {{How to Count ULAN and SNAC in Wikidata}}, howpublished = {blog}, month = sep, year = 2017, url = {https://docs.google.com/document/d/1F4FGJqJ2lWponSnzxQpJR6Nq87Ecv3qXJ4ExcmWgIvU/edit#}, keywords = {Wikidata, Coreferencing, Getty Vocabularies, ULAN, SNAC}, } @TechReport{Alexiev2017-Visualization, author = {Vladimir Alexiev}, title = {{Data Visualization with GraphDB and Workbench}}, institution = {Ontotext Corp}, year = 2017, month = jun, url = {https://docs.google.com/document/d/e/2PACX-1vTSQPHE0aUO3U1pgtQxk5RrSjsKZFM9Fx425VOdSPURLOGA_5zsGZzmMs9B75krzXh2c9DnHc34WRR8/pub}, url_Slides = {https://docs.google.com/presentation/d/1Udah3b8nc1oxjpi8XHtGX4nxIF6tlyrvo29QJkFKfgo/edit}, url_Source = {https://docs.google.com/document/d/1guwFHi9p4-ujFkrHF6dwMUZndzCmlX_gPyiBi6JlPTs/edit}, url_Video = {https://ontotext.com/knowledgehub/webinars/building-knowledge-data-visualization/}, keywords = {visualization, GraphDB, RDF, SPARQL, CNL, CUBE, OLAP, charts, graphs, NLP, seq2seq}, abstract = {Building Knowledge through Data Visualization. Data visualization enables analysts and organizations to see huge quantities of data clearly and identify patterns quickly. However, data volume, velocity and variety have increased immensely in recent years and so has the need to see the links between data from many sources. By creating visualizations with graph databases, organizations get insights from all perspectives they wish and need to explore. Even a quick glance at the relationship structure reveals where unusually large clusters of nodes or edges are. More traditional charts and statistical visualizations are also very useful to see the structure of data. Expressing relations, graphs and data trends in a visual way turns data into knowledge. The accompanying Webinar is designed to answer a common request from our community - how to make data visualisations from RDF datasets. Are there tools to help with developing queries? How can people who are not conversant with SPARQL get insights into data and understand its structure? How can they run SPARQL queries developed by others? We describe SPARQL editing and data visualization features available in GraphDB Workbench (GDB WB), or such that can be added with little programming. We will also describe SPARQL writing aids and visualization tools that can be integrated with GraphDB. Detailed topics: Writing SPARQL, Built-in SPARQL Result Visualizations, Using SPARQL Results in Spreadsheets, Invoking SPARQL Queries and Query Parameterization, Tools that Help With Writing SPARQL Queries, Translating natural language to SPARQL, Tools for Statistical Visualizations, Graph Visualizations: Built-in to GDB WB, Developing, Visualization Toolkits, Declarative Visualization, RDF by Example, JDBC Data Access API. Last updated: Sep 2022 (HTML report)}, } @Misc{Alexiev2017-Wikidata-Museum, author = {Vladimir Alexiev}, title = {{How to Add Museum IDs to Wikidata}}, howpublished = {Github gist}, month = jan, year = 2017, url = {https://gist.github.com/VladimirAlexiev/e51e256be18870ac5033901197ee8277}, keywords = {Wikidata, SPARQL, GLAM, cultural heritage, Getty Museum, JPGM}, abstract = {I use Wikidata SPARQL to find J. Paul Getty Museum objects without museum Object ID. Most of these objects have a property "described at URL" that includes the object ID I seek. I use SPARQL to transform the results to Wikidata Quick Statements format, then insert the IDs directly to Wikidata.}, } @Misc{Alexiev2017-euBusinessGraph-organization-datasets-and-ontologies, author = {Vladimir Alexiev}, title = {{Organization Datasets and Ontologies}}, howpublished = {presentation}, month = jan, year = 2017, url = {https://docs.google.com/presentation/d/1s-mQwj_0cpbLFVmhZPUrzB_M5mISkZQug0Wiui6H218/pub}, keywords = {euBusinessGraph, organisation data, POL data, Dun and Bradstreet, DnB, Financial Industry Business Ontology, FIBO), Global Legal Entity Identifier, GLEI, Panama Papers, Linked Leaks, Bulgarian Trade Register, BG TR, Wikidata}, address = {euBusinessGraph Project Kickoff, Oslo, Norway}, } @InProceedings{AlexievKiryakovTarkalanov2017-euBusinessGraph-Semantics2017, author = {Vladimir Alexiev and Atanas Kiryakov and Plamen Tarkalanov}, title = {{euBusinessGraph: Company and Economic Data for Innovative Products and Services}}, booktitle = {{13th International Conference on Semantic Systems (Semantics 2017)}}, year = 2017, month = sep, url = {https://rawgit2.com/webdata/SEMANTiCS2017-posters/master/papers_final/163_Alexiev/index.html}, url_PDF = {https://github.com/webdata/SEMANTiCS2017-posters/raw/master/papers_final/163_Alexiev/163_Alexiev.pdf}, url_Slides = {https://www.slideshare.net/valexiev1/eubusinessgraph-company-and-economic-data}, url_Poster = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/euBusinessGraph-poster-Semantics2017.pdf}, keywords = {euBusinessGraph, firmographics, company data, public procurement, linked data, business graph, economics}, categories = {Resource Description Framework (RDF), Ontologies, Enterprise applications}, subtitle = {{Enabling the European Business Graph for Innovative Data Products and Services}}, abstract = {Corporate information, including basic company firmographics (e.g., name(s), incorporation data, registered addresses, ownership and related entities), financials (e.g., balance sheets, ratings), contextual data (e.g., addresses, economic activity classification, key officers, public tenders data, press mentions and events) are the foundation that many data value chains are built on. Furthermore, this type of information contributes to the transparency and accountability of enterprises, is instrumental input to the process of marketing and sales, and plays a key role in many business interactions. Collecting and aggregating data about a business entity from several public sources (be it private/public, official or non-official ones), and especially across country borders and languages is a tedious, time consuming, error prone, and expensive operation which renders many potential business models non-feasible. The euBusinessGraph project integrates European company and economic data from various data providers, including OpenCorporates (the largest open database of company info crawled from official registers), Norway's Bronnoysund Register Center (official register data), SpazioDati (rich IT data from official registers, additional databases, web crawl of company sites, tender info, etc), EventRegistry events, GLEI, Panama Leaks, etc. euBusinessGraph is intended to overcome these barriers and provision several important business cases, such as economic journalism (Deutsche Welle), publication of rich company data (BRC), tender information service (CERVED), business intelligence (EVRY), etc. It will also provide a marketplace of company data, with some free search and faceting, leading to information about richer Data Offerings by specific providers and their pricing. We will present the work done on exploring relevant ontologies and vocabularies for describing companies, systems of identifiers, development of a unified data model, plans for data flows, data aggregation, matching and cross-linking, and the opportunities that lie ahead for the business cases and the data marketplace.}, } @Comment{TODO: download to https://rawgit2.com/VladimirAlexiev/my/master/pubs/euBusinessGraph-presentation-Semantics2017.pdf} @InProceedings{AlexievNikolova2017-EHRI-DSDH17, author = {Vladimir Alexiev and Ivelina Nikolova}, title = {{Semantic Archive Integration for Holocaust Research: the EHRI Research Infrastructure}}, booktitle = {{Data Sharing, Holocaust Documentation and the Digital Humanities. Best Practices, Benefits, Case Studies (DSDH 2017)}}, year = 2017, editor = {Laura Brazzo and Vladimir Alexiev and Kepa Rodrigues and Silvia Mazzini}, month = jun, address = {Venice, Italy}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Semantic Archive Integration for Holocaust Research- the EHRI Research Infrastructure (201707).pdf}, keywords = {archives, Holocaust research, EHRI, research infrastructure, digital humanities, VRE, semantic integration, semantic archive integration, coreferencing, access points, thesauri, authorities, EAD, OAI PMH, ResourceSync, Geonames, Wikidata, VIAF, person matching, record linking, deduplication}, categories = {Arts and humanities, Digital libraries and archives, Information retrieval, Web searching and information discovery, Document searching, Document metadata, Semantic web description languages, Ontologies, Thesauri}, abstract = {The European Holocaust Research Infrastructure (EHRI) is a large-scale EU project that involves 23 institutions and archives working on Holocaust studies, from Europe, Israel and the US. In its first phase (2011-2015) it aggregated archival descriptions and materials on a large scale and built a Virtual Research Environment (portal) for Holocaust researchers based on a graph database. In its second phase (2015-2019), EHRI2 seeks to enhance the gathered materials using semantic approaches: enrichment, coreferencing, interlinking. Semantic integration involves four of the 14 EHRI2 work packages and helps integrate databases, free text, and metadata to interconnect historical entities (people, organizations, places, historic events) and create networks. We will present some of the EHRI2 technical work, including critical issues we have encountered. WP10 (EAD) converts archival descriptions from various formats to standard EAD XML; transports EADs using OAI PMH or ResourceSync; ingests EADs to the EHRI database; enables use cases such as synchronization; coreferencing of textual Access Points to proper thesaurus references. WP11 (Authorities and Standards) consolidates and enlarges the EHRI authorities to render the indexing and retrieval of information more effective. It addresses Access Points in ingested EADs (normalization of Unicode, spelling, punctuation; deduplication; clustering; coreferencing to authority control), Subjects (deployment of a Thesaurus Management System in support of the EHRI Thesaurus Editorial Board), Places (coreferencing to Geonames); Camps and Ghettos (integrating data with Wikidata); Persons, Corporate Bodies (using USHMM HSV and VIAF); semantic (conceptual) search including hierarchical query expansion; interconnectivity of archival descriptions; permanent URLs; metadata quality; EAD RelaxNG and Schematron schemas and validation, etc. WP13 (Data Infrastructures) builds up domain knowledge bases from institutional databases by using deduplication, semantic data integration, semantic text analysis. It provides the foundation for research use cases on Jewish Social Networks and their impact on the chance of survival. WP14 (Digital Historiography Research) works on semantic text analysis (semantic enrichment), text similarity (e.g. clustering based on Neural Networks, LDA, etc), geo-mapping. It develops Digital Historiography researcher tools, including Prosopographical approaches.}, } @InProceedings{TagarevTolosiAlexiev2017-FD-extended, author = {Andrey Tagarev and Laura Tolosi and Vladimir Alexiev}, title = {{Domain-specific modeling: a Food and Drink Gazetteer}}, booktitle = {{Transactions on Computational Collective Intelligence XXVI, special issue on Keyword Search in Big Data}}, year = 2017, editor = {Ngoc Thanh Nguyen and Ryszard Kowalczyk and Alexandre Miguel Pinto and Jorge Cardoso}, volume = 10190, series = {LNCS}, pages = {186-209}, month = jul, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-319-59268-8_9}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Tagarev2017-DomainSpecificGazetteer.pdf}, keywords = {classification, categorization, Wikipedia, DBpedia, gazetteer, Europeana, Cultural Heritage, concept extraction, semantic enrichment, food and drink}, doi = {10.1007/978-3-319-59268-8_9}, abstract = {Our goal is to build a Food and Drink (FD) gazetteer that can serve for classification of general, FD-related concepts, efficient faceted search or automated semantic enrichment. Fully supervised design of domain-specific models ex novo is not scalable. Integration of several ready knowledge bases is tedious and does not ensure coverage. Completely data-driven approaches require a large amount of training data, which is not always available. For general domains (such as the FD domain), re-using encyclopedic knowledge bases like Wikipedia may be a good idea. We propose here a semi-supervised approach that uses a restricted Wikipedia as a base for the modeling, achieved by selecting a domain-relevant Wikipedia category as root for the model and all its subcategories, combined with expert and data-driven pruning of irrelevant categories.}, } @Misc{Alexiev2016-CH-webinar, author = {Vladimir Alexiev}, title = {{Linked Open Data for Cultural Heritage}}, howpublished = {Ontotext webinar, 132 slides}, month = sep, year = 2016, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160929-webinar/index-full.html}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160929-webinar/Linked%20Open%20Data%20for%20Cultural%20Heritage.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160929-webinar/index.html}, keywords = {AAT, Annotation, BTG, BTI, BTP, British Museum, Broader generic, Broader instantial, Broader partitive, CIDOC CRM, cultural heritage, EDM, ESE, Europeana, FRAD, FRBR, FRBR, FRBRoo, FRBRoo, FRSAD, Fundamental Concepts, Fundamental Relations, GLAM, Geonames, Getty, Getty Museum, ISNI, ISO 25964, LDBC, LOD, Metadata, Museum informatics, OAI, OAI PMH, OWLIM, Ontology, Ontotext GraphDB, Provenance, RDAinfo, RDF, ResearchSpace, SKOS, SKOS-XL, SPARQL, SPECTRUM, Schema, Seeing Standards, TGN, Taxonomy, Thesauri, ULAN, VIAF, Web Annotation, Wikidata, concept extraction, cultural heritage, dataset, endpoint, faceted search, food and drink, gazetteer, inference, knowledge base, knowledge-based system, multimedia annotation, ontology, open data, practical applications, reasoning, semantic application, semantic enrichment, semantic integration, semantic mapping, semantic repository, semantic representation, semantic search, semantic technology, text analysis, thesauri, virtual research environment, visualization, vocabularies}, abstract = {The Internet, global digitization efforts, Europe's Digital Agenda, continuing investments in Europeana, the Digital Public Library of America and many other initiatives, have made millions upon millions of digitized cultural artifacts available on the net. We need to make sense of all this information: aggregate it, integrate it, provide cross-collection search, find links between entities and artefacts, build narratives, analyze data, support the scientific discourse, engage users… From ancient maps to bibliographic records, to paintings, to coins and hoards, to paleographic analysis, to prosopography factoids... everything is becoming more and more connected. A host of ontologies and metadata standards exist in the Cultural Heritage (CH) domain: CIDOC CRM, TEI5, LIDO, SPECTRUM, VRA Core, MPEG7, DC, ESE and EDM, OAI ORE and PMH, IIIIF, ResourceSync... the list goes on and on. How many of the standards listed in Seeing Standards: A Visualization of the Metadata Universe (by Jenn Riley, Associate Dean for Digital Initiatives at McGill University Library) apply to your work? A number of established thesauri and gazetteers exist, and some of them are interconnected: DBPedia; Wikidata, VIAF, FAST, ULAN; GeoNames, Pleiades, TGN; LCSH, AAT, IconClass, Joconde, SVCN, Wordnet, etc etc. The diagram below (by Michiel Hildebrand) shows a small part of this upcoming universe of CH data. How to use them in every-day collection management, cataloging, documentation and research? How to expose your institution's collections and other data to allow interlinking? Digital Humanities (DH) has emerged as a new and promising scientific discipline, with universities like Kings College London establishing new departments devoted to it. As Jeffrey Schnapp writes in the Digital Humanities manifesto 2.0 "Digital Humanities embraces and harnesses the expanded, global nature of today’s research communities as one of the great inter-disciplinary/post-disciplinary opportunities of our time. It dreams of models of knowledge production and reproduction that leverage the increasingly distributed nature of expertise and knowledge and transform this reality into occasions for scholarly innovation, disciplinary cross-fertilization, and the democratization of knowledge". In his keynote address at MCN 2014 Beyond Borders: The Humanities in the Digital Age, James Cuno (President and CEO of the J. Paul Getty Trust) emphasizes the role of modernizing Humanities and the value of Linked Data in cultural heritage informatics. The question also is how to preserve the role of libraries, museums and other Cultural Heritage institutions as centers of wisdom and culture into the new millennium? Aren't Google, Wikipedia, Facebook, Twitter and smart-phone apps becoming the new centers of research and culture (or at least popular culture)? We believe the answers to many of these questions lie with Semantic Technology and Linked Data. They enable large-scale Digital Humanities research, collaboration and aggregation; and technological renewal of CH institutions. The Rosetta Stone was key to the deciphering of Egyptian hieroglyphs, by providing parallel text in three scripts: Ancient Egyptian, Demotic and Ancient Greek. Today semantic technologies play a similar role, allowing the Digital Humanist to make connections between (and make sense of) the multitude of digitized cultural artifacts available on the net. An upsurge of interest in semantic technology has swept the CH and DH communities. Meetups and summits, conferences and un-conferences, residences and hackathons are taking place every week. CH institutions are collaborating actively. An active Linked Open Data for Libraries, Archives and Museums (LODLAM) community has emerged, and the #LODLAM twitter hashtag sees active communication. Established institutions create branches that sound like web startups or Wikipedia offsprings (e.g. British Library Labs; Smithsonian Web-strategy and Smithsonian Commons; UK National Archives department of Web Continuity). The Galleries, Libraries, Archives and Museums (GLAM) sector deals with complex and varied data. Integrating that data, especially across institutions, has always been a challenge. On the other hand, the value of linked data is especially high in this sector, since culture by its very nature is cross-border and interlinked. In this webinar we'll present interesting LODLAM projects, datasets and ontologies, as well as Ontotext's experience in this domain.}, url_recording= {https://ontotext.com/knowledgehub/webinars/build-narratives-connect-artifacts-cultural-heritage/}, } @Misc{Alexiev2016-EFD-DBpedia, author = {Vladimir Alexiev}, title = {{Using DBPedia in Europeana Food and Drink}}, howpublished = {presentation}, month = feb, year = 2016, url = {https://drive.google.com/file/d/0B7je1jgVmCgIZzNiWmdqTGpDa28/view}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160212-Using-DBPedia-in-Europeana-Food-and-Drink.pdf}, keywords = {Europeana, cultural heritage, food and drink, DBpedia, Geonames, semantic application, faceted search, semantic search}, address = {DBpedia Meeting, The Hague, Netherlands}, abstract = {The Europeana Food and Drink project collects cultural heritage objects for and develops applications related to Food and Drink heritage. As part of the project, Ontotext developed a FD Classification based on Wikipedia/DBpedia Categories, a semantic enrichment service that annotates each CHO with FD Topics and Places, and a semantic application (https://efd.ontotext.com/app) that implements hierarchical semantic facets and semantic search for these facets. We'll also be packaging the enrichment as a service for others to use in a crowdsourced annotation application. We will explain how we used Categories to build a domain-specific gazetteer, used external datasets (eg UMBEL domains and DBTax types), correlated DBpedia places to Geonames to use the place hierarchy, and the workings of the semantic application}, } @TechReport{Alexiev2016-EFD-semapp-ext, author = {Vladimir Alexiev and Andrey Tagarev and Laura Tolosi}, title = {{Europeana Food and Drink Semantic Demonstrator Extended}}, institution = {Europeana Food and Drink project}, year = 2016, type = {Deliverable}, number = {D3.20d}, month = jul, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Semantic-Demonstrator-Extended-(D3.20d).pdf}, keywords = {Europeana, cultural heritage, food and drink, semantic application, semantic search, faceted search, semantic enrichment}, abstract = {Describes the additional development on the EFD Semantic Demonstrator performed after the official D3.20 deliverable (M22). It describes work performed between 31 October 2015 and 20 July 2016 (M31), the achieved results, the created data and enrichments, and the extended application functionality.}, } @Misc{Alexiev2016-EuropeanaMC-blog, author = {Vladimir Alexiev}, title = {{Meet the Europeana Members Council: Vladimir Alexiev}}, howpublished = {blog post}, month = mar, year = 2016, url = {https://pro.europeana.eu/blogpost/meet-the-members-council-vladimir-alexiev}, keywords = {cultural heritage, Europeana, EHRI, ResearchSpace, data quality, semantic enrichment}, abstract = {Describes the work of Ontotext and in particular Vladimir Alexiev in applying semantic technologies to cultural heritage}, } @Manual{Alexiev2016-GVP-LOD-queries, title = {{Getty Vocabularies: LOD Sample Queries}}, author = {Vladimir Alexiev}, organization = {Getty Research Institute}, edition = {3.3}, month = may, year = 2016, url = {https://vocab.getty.edu/doc/queries/}, abstract = {We provide 120 sample queries for the Getty Vocabularies LOD that should allow you to learn to query the data effectively. We include searching for data, getting all data of a subject, all labels and their attributes, full-text search, getting an ordered hierarchy, charts, etc. The queries are organized in sections: general, TGN-specific, ULAN-specific, Language queries, Counting and descriptive info, Exploring the ontology}, keywords = {Getty, GVP, vocabularies, thesauri, AAT, TGN, ULAN, SPARQL, ontology, SKOS, SKOS-XL, ISO 25964}, } @Misc{Alexiev2016-How-not-to-do-LOD, author = {Vladimir Alexiev}, title = {{How Not to Do Linked Data}}, howpublished = {Github gist}, month = dec, year = 2016, url = {https://gist.github.com/VladimirAlexiev/090d5e54a525d57acb9b366121e77573}, keywords = {cultural heritage, RDF, LODLAM, CIDOC CRM, mapping, review, data quality}, abstract = {I review the data quality of the LOD publication of a national cultural heritage institution, and show examples of bad practices}, } @TechReport{Alexiev2016-Multisensor-profile, author = {Vladimir Alexiev}, title = {{Multisensor RDF Application Profile}}, institution = {Multisensor Project, Ontotext Corp}, year = 2016, month = oct, abstract = {The Multisensor project analyzes and extracts data from mass- and social media documents (so-called SIMMOs), including text, images and video, speech recognition and translationn, across several languages. It also handles social network data, statistical data, etc. Early on the project made the decision that all data exchanged between project partners (between modules inside and outside the processing pipeline) will be in RDF JSONLD format. The final data is stored in a semantic repository and is used by various User Interface components for end-user interaction. This final data forms a corpus of semantic data over SIMMOs and is an important outcome of the project. The flexibility of the semantic web model has allowed us to accommodate a huge variety of data in the same extensible model. We use a number of ontologies for representing that data: NIF and OLIA for linguistic info, ITSRDF for NER, DBpedia and Babelnet for entities and concepts, MARL for sentiment, OA for image and cross-article annotations, W3C CUBE for statistical indicators, etc. In addition to applying existing ontologies, we extended them by the Multisensor ontology, and introduced some innovations like embedding FrameNet in NIF. The documentation of this data has been an important ongoing task. It is even more important towards the end of the project, in order to enable the efficient use of MS data by external consumers. This document describes the different RDF patterns used by Multisensor, and how the data fits together. Thus it represents an "RDF Application Profile" for Multisensor. We use an example-based approach, rather than the more formal and labourious approach being standardized by the W3C RDF Shapes working group (still in development). We cover the following areas: 1. Linguistic Linked Data in NLP Interchange Format (NIF), including Part of Speech (POS), dependency parsing, sentiment, Named Entity Recognition (NER), etc. 2. Speech recognition, translation. 3. Multimedia binding and image annotation. 4. Statistical indicators and similar data. 5. Social network popularity and influence, etc.}, url = {https://rawgit2.com/VladimirAlexiev/multisensor/master/index.html}, url_Source = {https://github.com/VladimirAlexiev/multisensor}, keywords = {Multisensor, CUBE, NLP, NLP2RDF, NIF, OLIA, ITSRDF, NERD, MARL, BabelNet, FrameNet, WordNet}, } @Misc{Alexiev2016-OpenData, author = {Vladimir Alexiev}, title = {{How to find Open Data and Ontologies in Linguistics/NLP and Cultural Heritage}}, howpublished = {presentation}, month = mar, year = 2016, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160329-OpenData-and-Ontologies/index-full.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160329-OpenData-and-Ontologies/index.html}, keywords = {open data, ontology, linguistiscs, NLP, cultural heritage}, address = {4th Open Data & Linked Data meetup, Sofia, Bulgaria}, } @Misc{Alexiev2016-dbpedia-multisensor, author = {Vladimir Alexiev}, title = {{Multisensor Linked Open Data}}, month = sep, year = 2016, url = {https://rawgit2.com/VladimirAlexiev/multisensor/master/20160915-Multisensor-LOD/index.html}, keywords = {Multisensor, CUBE, NLP, NLP2RDF, NIF, OLIA, ITSRDF, NERD, MARL, BabelNet, FrameNet, WordNet}, booktitle = {{DBpedia Meeting}}, howpublished = {presentation}, address = {Leipzig, Germany}, abstract = {The FP7 Multisensor project analyzes and extracts data from mass- and social media documents, including text, images and video, across several languages. It uses a number of ontologies for representing that data: NIF and OLIA for linguistic info, ITSRDF for NER, DBpedia and Babelnet for entities and concepts, MARL for sentiment, OA for image and cross-article annotations, etc. We'll present how all these ontologies fit together, and some innovations like embedding FrameNet in NIF.}, } @Misc{Alexiev2016-rdfpuml, author = {Vladimir Alexiev}, title = {{Making True RDF Diagrams with rdfpuml}}, howpublished = {presentation}, month = mar, year = 2016, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160514-rdfpuml/index-full.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20160514-rdfpuml/index.html}, keywords = {RDF, visualization, PlantUML, cultural heritage, NLP, NIF, EHRI}, abstract = {RDF is a graph data model, thus often the best way to understand RDF data schemas (ontologies, application profiles, RDF shapes) is with a diagram. We describe a tool (rdfpuml) that makes true diagrams from Turtle examples using PlantUML and GraphViz. Diagram readability is of prime concern, and rdfpuml introduces a few diagram control mechanisms using triples in the puml: namespace. We give examples from Getty CONA (Mappings of museum data to CIDOC CRM), Multisensor (NLP2RDF/NIF, FrameNet), EHRI (Holocaust Research into Jewish social networks), Duraspace (Portland Common Data Model for holding metadata in institutional repositories)}, } @InProceedings{Alexiev2016-rdfpuml-rdf2rml, author = {Vladimir Alexiev}, title = {{RDF by Example: rdfpuml for True RDF Diagrams, rdf2rml for R2RML Generation}}, booktitle = {{Semantic Web in Libraries 2016 (SWIB 2016)}}, year = 2016, month = nov, address = {Bonn, Germany}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20161128-rdfpuml-rdf2rml/index-full.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20161128-rdfpuml-rdf2rml/index.html}, url_Video = {https://youtu.be/4WoYlaGF6DE}, keywords = {RDF, visualization, PlantUML, cultural heritage, NLP, NIF, EHRI, R2RML, generation, model-driven, RDF by Example, rdfpuml, rdf2rml}, abstract = {RDF is a graph data model, so the best way to understand RDF data schemas (ontologies, application profiles, RDF shapes) is with a diagram. Many RDF visualization tools exist, but they either focus on large graphs (where the details are not easily visible), or the visualization results are not satisfactory, or manual tweaking of the diagrams is required. We describe a tool *rdfpuml* that makes true diagrams directly from Turtle examples using PlantUML and GraphViz. Diagram readability is of prime concern, and rdfpuml introduces various diagram control mechanisms using triples in the puml: namespace. Special attention is paid to inlining and visualizing various Reification mechanisms (described with PRV). We give examples from Getty CONA, Getty Museum, AAC (mappings of museum data to CIDOC CRM), Multisensor (NIF and FrameNet), EHRI (Holocaust Research into Jewish social networks), Duraspace (Portland Common Data Model for holding metadata in institutional repositories), Video annotation. If the example instances include SQL queries and embedded field names, they can describe a mapping precisely. Another tool *rdf2rdb* generates R2RML transformations from such examples, saving about 15x in complexity.}, } % Future work: extend RDF by Example to describe RDF Shapes; extend rdf2rml to generate RML instead of only R2RML, i.e. handle XML and JSON data sources % https://docs.stardog.com/#_stardog_mapping_syntax is similar: shortcut syntax of R2RML that displays examples @InProceedings{AlexievCasamayor2016-FN-NIF, author = {Vladimir Alexiev and Gerard Casamayor}, title = {{FN goes NIF: Integrating FrameNet in the NLP Interchange Format}}, booktitle = {{Linked Data in Linguistics (LDL-2016): Managing, Building and Using Linked Language Resources}}, year = 2016, month = may, address = {Portorož, Slovenia}, url = {https://rawgit2.com/VladimirAlexiev/multisensor/master/FrameNet/paper.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/multisensor/master/FrameNet/pres.html}, url_HTML = {https://rawgit2.com/VladimirAlexiev/multisensor/master/FrameNet/pres-full.html}, keywords = {linguistic linked data, FrameNet, NIF, NLP2RDF, RDF, application profile}, abstract = {FrameNet (FN) is a large-scale lexical database for English developed at ICSI Berkeley that describes word senses in terms of Frame semantics. FN has been converted to RDF LOD by ISTC-CNR, together with a large corpus of text annotated with FN. NIF is an RDF/OWL format and protocol for exchanging text annotations between NLP tools as Linguistic Linked Data. This paper reviews the FN-LOD representation, compares it to NIF, and describes a simple way to integrate FN in NIF, which does not use any custom classes or properties.}, } @Article{HCLS-paper, author = {Michel Dumontier and Alasdair J. G. Gray and M. Scott Marshall and Vladimir Alexiev and others}, title = {{The health care and life sciences community profile for dataset descriptions}}, journal = {{PeerJ}}, year = 2016, volume = 4, pages = {e2331}, month = aug, url = {https://peerj.com/articles/2331/}, keywords = {Data profiling, Dataset descriptions, Metadata, Provenance, FAIR data, HCLS, dataset, VOID, ontology, Bioinformatics, Taxonomy}, issn = {2167-8359}, doi = {10.7717/peerj.2331}, abstract = {Access to consistent, high-quality metadata is critical to finding, understanding, and reusing scientific data. However, while there are many relevant vocabularies for the annotation of a dataset, none sufficiently captures all the necessary metadata. This prevents uniform indexing and querying of dataset repositories. Towards providing a practical guide for producing a high quality description of biomedical datasets, the W3C Semantic Web for Health Care and the Life Sciences Interest Group (HCLSIG) identified RDF vocabularies that could be used to specify common metadata elements and their value sets. The resulting guideline covers elements of description, identification, attribution, versioning, provenance, and content summarization. This guideline reuses existing vocabularies, and is intended to meet key functional requirements including indexing, discovery, exchange, query, and retrieval of datasets. The resulting metadata profile is generic and could be used by other domains with an interest in providing machine readable descriptions of versioned datasets.}, } @InProceedings{INSCI2016-Multisensor, author = {Boyan Simeonov and Vladimir Alexiev and Dimitris Liparas and Marti Puigbo and Stefanos Vrochidis and Emmanuel Jamin and Ioannis Kompatsiaris}, title = {{Semantic Integration of Web Data for International Investment Decision Support}}, booktitle = {{3rd International Conference on Internet Science (INSCI 2016)}}, year = 2016, month = sep, address = {Florence, Italy}, url = {https://zenodo.org/record/571202}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/INSCI2016.pdf}, keywords = {Decision support, Indicators, Heterogeneous web resources, SME internationalisation, Semantic integration, SPARQL, statistics ontologies, CUBE}, doi = {10.1007/978-3-319-45982-0_18}, abstract = {Given the current economic situation and the financial crisis in many European countries, Small and Medium Enterprises (SMEs) have found interna- tionalisation and exportation of their products as the main way out of this crisis. In this paper, we provide a decision support system that semantically aggregates information from many heterogeneous web resources and provides guidance to SMEs for their potential investments. The main contributions of this paper are the introduction of SME internationalisation indicators that can be considered for such decisions, as well as the novel decision support system for SME inter- nationalisation based on inference over semantically integrated data from heterogeneous web resources. The system is evaluated by SME experts in realistic scenarios in the section of dairy products.}, session = {13 Sep 14:20: Smart Cities and Data Analysis Issues}, } @InProceedings{TPDL2016-semanticEnrichment, author = {Hugo Manguinhas and Nuno Freire and Antoine Isaac and Juliane Stiller and Valentine Charles and Aitor Soroa and Rainer Simon and Vladimir Alexiev}, title = {{Exploring comparative evaluation of semantic enrichment tools for cultural heritage metadata}}, booktitle = {{20th International Conference on Theory and Practice of Digital Libraries (TPDL 2016)}}, year = 2016, editor = {Norbert Fuhr and László Kovács and Thomas Risse and Wolfgang Nejdl}, month = sep, address = {Hannover, Germany}, url = {https://link.springer.com/chapter/10.1007/978-3-319-43997-6_21}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/TPDL2016.pdf}, keywords = {Europeana, semantic enrichment, evaluation, precision, recall, cultural heritage, metadata}, doi = {10.1007/978-3-319-43997-6_21}, abstract = {Semantic enrichment of metadata is an important and difficult problem for digital heritage efforts such as Europeana. This paper gives motivations and presents the work of a recently completed Task Force that addressed the topic of evaluation of semantic enrichment. We especially report on the design and the results of a comparative evaluation experiment, where we have assessed the enrichments of seven tools (or configurations thereof) on a sample benchmark dataset from Europeana.}, } @InProceedings{TagarevTolosiAlexiev2017-FD, author = {Andrey Tagarev and Laura Tolosi and Vladimir Alexiev}, title = {{Domain-specific modeling: Towards a Food and Drink Gazetteer}}, booktitle = {{Semantic Keyword-based Search on Structured Data Sources}}, year = 2016, editor = {Jorge Cardoso and Francesco Guerra and Geert-Jan Houben and Alexandre Miguel Pinto and Yannis Velegrakis}, volume = 9398, series = {Lecture Notes in Computer Science}, pages = {182-196}, month = jan, publisher = {Springer}, note = {First COST Action IC1302 International KEYSTONE Conference (IKC 2015), Coimbra, Portugal, September 8-9, 2015. Revised Selected Papers}, url = {https://link.springer.com/chapter/10.1007/978-3-319-27932-9_16}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Tagarev2015-DomainSpecificGazetteer-slides.pdf}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Tagarev2015-DomainSpecificGazetteer.pdf}, keywords = {classification, categorization, Wikipedia, DBpedia, gazetteer, Europeana, Cultural Heritage, concept extraction, semantic enrichment, food and drink}, chapter = 16, doi = {10.1007/978-3-319-27932-9_16}, isbn = {978-3-319-27932-9}, abstract = {Our goal is to build a Food and Drink (FD) gazetteer that can serve for classification of general, FD-related concepts, efficient faceted search or automated semantic enrichment. Fully supervised design of a domain-specific models "ex novo" is not scalable. Integration of several ready knowledge bases is tedious and does not ensure coverage. Completely data-driven approaches require a large amount of training data, which is not always available. In cases when the domain is not very specific (as the FD domain), re-using encyclopedic knowledge bases like Wikipedia may be a good idea. We propose here a semi-supervised approach, that uses a restricted Wikipedia as a base for the modeling, achieved by selecting a domain-relevant Wikipedia category as root for the model and all its subcategories, combined with expert and data-driven pruning of irrelevant categories.}, } @InProceedings{UzunovAlexiev2016-Fulbright, author = {Ilian Uzunov and Vladimir Alexiev}, title = {{Linked Open Data for Cultural Heritage Institutions: Build Narratives through Connecting Artifacts}}, booktitle = {{Museum Exhibits and Standards: A Look Ahead}}, year = 2016, month = nov, address = {Sofia, Bulgaria}, organization = {Bulgarian-American Fulbright Commission for Educational Exchange: Bi-National Commission for the Preservation of Bulgaria's Cultural Heritage}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20161128-fulbright/index-full.html}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20161128-fulbright/Linked_Open_Data_for_Cultural_Heritage_Institutions.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20161128-fulbright/index.html}, } @TechReport{Alexiev2015-CH-names, author = {Vladimir Alexiev}, title = {{Name Data Sources for Semantic Enrichment}}, institution = {Europeana Creative project}, year = 2015, type = {Deliverable}, number = {Part of Deliverable D2.4}, month = feb, url = {https://rawgit2.com/VladimirAlexiev/CH-names/master/README.html}, keywords = {Europeana, semantic enrichment, knowledge base, gazetteer, VIAF, Wikidata, ULAN, ISNI}, abstract = {Semantic enrichment in Europeana is a very difficult task due to several factors: 1. Varying metadata quality across different collections, sometimes including misallocation of metadata fields; 2. Varying metadata formatting practices across different collections, e.g. some collections indicate the role of a creator in brackets after the creator name; 3. Lack of accurate language information. In this report we focus on Person & Institution enrichment (person Named Entity Recognition), which in itself is an ambitious task. Historic people are often referred to by many names. For successful semantic enrichment it's important to integrate high-quality and high-coverage datasets that provide name info. There is a great number of Name Authority files maintained at libraries, museums and other heritage institutions world-wide, e.g. VIAF, ISNI, Getty ULAN, British Museum. Linked Open Data (LOD) datasets also have a plethora of names, e.g. in DBpedia, Wikidata and FreeBase. We analyze some of the available datasets in terms of person coverage, name coverage, language tags, extra features that can be useful for enrichment, quality. We also analyze the important topic of coreferencing, i.e. how connected the sources are to each other.}, } @TechReport{Alexiev2015-EFD-classification, author = {Vladimir Alexiev}, title = {{Europeana Food and Drink Classification Scheme}}, institution = {Europeana Food and Drink project}, year = 2015, type = {Deliverable}, number = {D2.2}, month = feb, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Classification-Scheme-(D2.2).pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana Food and Drink Classification and Ideas for Semantic App (201503).pptx}, keywords = {Europeana, cultural heritage, food and drink, classification, categorization, DBpedia, Wikipedia, AGROVOC, WordNet, UMBEL}, abstract = {The Europeana Food and Drink Classification scheme (EFD classification) is a multi-dimensional scheme for discovering and classifying Cultural Heritage Objects (CHO) related to Food and Drink (FD). The topic of Food and Drink is so pervasive in our daily lives and in our culture that assembling a small "specialist" thesaurus is not feasible (such specialist thesauri were successfully used in other Europeana projects, eg ECLAP on performing arts and PartagePlus on Art Nouveau). We investigate about 20 datasets for their relevance to FD, including the Getty theasuri, Wordnet FD Domain, Wikipedia (in its 2 semantic data representations: DBpedia and Wikidata), AGROVOC, etc. We have selected Wikipedia as the basis for the classification, and plan to use the Wikipedia Categories to construct a hierarchical network to be used for classification. The project will also use innovative semantic technologies to automate the extraction of terms and co-references. The result will be a body of semantically-enriched metadata that can support a wider range of multi-lingual applications such as search, discovery and browsing. (91 pages)}, } @TechReport{Alexiev2015-EFD-semapp, author = {Vladimir Alexiev}, title = {{Europeana Food and Drink Semantic Demonstrator Delivery}}, institution = {Europeana Food and Drink project}, year = 2015, type = {Deliverable}, number = {D3.20}, month = oct, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Semantic-Demonstrator-Delivery-(D3.20).pdf}, keywords = {Europeana, cultural heritage, food and drink, semantic application, semantic search, faceted search, semantic enrichment}, abstract = {Describes the development and delivery of the EFD Semantic Demonstrator. We describe all work performed between 1 April 2015 and 31 October 2015, the achieved results, the created data and enrichments, and the developed application.}, } @TechReport{Alexiev2015-EFD-semapp-progress1, author = {Vladimir Alexiev}, title = {{Europeana Food and Drink Semantic Demonstrator M18 Progress Report}}, institution = {Europeana Food and Drink project}, year = 2015, type = {Progress Report}, number = {D3.20a}, month = jun, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Semantic-Demonstrator-M18-Report-(D3.20a).pdf}, keywords = {Europeana, cultural heritage, food and drink, semantic application, semantic search, faceted search, semantic enrichment}, abstract = {Describes the development progress on the Europeana Food and Drink Semantic Demonstrator for the first 2.5 months (between 1 April 2015 and 15 June 2015), the achieved results, and project management considerations.}, } @TechReport{Alexiev2015-EFD-semapp-spec, author = {Vladimir Alexiev}, title = {{Europeana Food and Drink Semantic Demonstrator Specification}}, institution = {Europeana Food and Drink project}, year = 2015, type = {Deliverable}, number = {D3.19}, month = mar, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Semantic-Demonstrator-Specification-(D3.19).pdf}, keywords = {Europeana, cultural heritage, food and drink, semantic application, semantic search, faceted search, semantic enrichment}, abstract = {The Europeana Food and Drink Semantic Demonstrator (EFD sem app) will allow multi-dimensional semantic exploration and discovery of cultural heritage objects (CHO) related to Food and Drink (FD). It will both apply and augment the EFD Classification scheme, using positive feedback loop mechanisms: the more the classification is used, the better it becomes. It will enable providers to classify their content, and consumers to explore CHOs using semantic search}, } @Misc{Alexiev2015-GLAMs-Wikidata, author = {Vladimir Alexiev}, title = {{GLAMs Working with Wikidata}}, howpublished = {presentation}, month = may, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/20150518-GLAMs-working-with-Wikidata.ppt}, keywords = {Wikidata, Wikipedia, cultural heritage}, booktitle = {{Europeana Food and Drink content provider workshop}}, address = {Athens, Greece}, abstract = {How GLAMs can use Wikipedia/Wikidata to make their collections globally accessible across languages.}, } @Manual{Alexiev2015-GVP-LOD-doc, title = {{Getty Vocabularies Linked Open Data: Semantic Representation}}, author = {Vladimir Alexiev and Joan Cobb and Gregg Garcia and Patricia Harpring}, organization = {Getty Research Institute}, edition = {3.2}, month = mar, year = 2015, url = {https://vocab.getty.edu/doc/}, keywords = {Getty, GVP, vocabularies, thesauri, AAT, TGN, ULAN, semantic representation, LOD, ontology, SKOS, SKOS-XL, ISO 25964}, } @Manual{Alexiev2015-GVP-ontology, title = {{Getty Vocabulary Program (GVP) Ontology}}, author = {Vladimir Alexiev}, edition = {3.2}, month = mar, year = 2015, url = {https://vocab.getty.edu/ontology}, url_LOV = {https://lov.okfn.org/dataset/lov/details/vocabulary_gvp.html}, keywords = {Getty, GVP, vocabularies, thesauri, AAT, TGN, ULAN, semantic representation, LOD, ontology, SKOS, SKOS-XL, ISO 25964, DC, DCT, BIBO, FOAF, BIO, Schema, PROV, WGS84}, institution = {Getty Research Institute}, type = {Ontology}, note = {Ontology}, abstract = {The GVP Ontology defines classes, properties and values (skos:Concepts) used in GVP LOD. As of version 3.0, it is complete regarding AAT, TGN and ULAN, and will be extended in time with more elements needed for other GVP vocabularies (CONA). It uses the SKOS, SKOS-XL, ISO 25964; DC, DCT, BIBO, FOAF, BIO, Schema, PROV, WGS84 ontologies.}, } @Misc{Alexiev2015-Glam-Wiki, author = {Vladimir Alexiev and Valentine Charles and Hugo Manguinhas}, title = {{Wikidata, a Target for Europeana's Semantic Strategy}}, howpublished = {presentation}, month = apr, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/GLAMwiki2015.ppt}, url_Other = {https://nl.wikimedia.org/wiki/GLAM-WIKI_2015/Programme/Discussions/Strategy#Presentation:_Wikidata.2C_a_target_for_Europeana.E2.80.99s_semantic_strategy.3F}, keywords = {cultural heritage, GLAM, Europeana, semantic enrichment, Wikidata, Wikipedia}, booktitle = {{Glam-Wiki 2015}}, address = {The Hague}, abstract = {For Europeana, the platform for Europe’s digital cultural heritage from libraries, museums and archives, getting richer (semantic and multilingual) metadata is a priority. It improves access to the 40 million cultural heritage objects, notably enabling the multilingual retrieval of documents and creates relations between objects. To enhance data and enable retrieval across languages, Europeana performs automatic enrichment by selecting source metadata field(s) in the Europeana data and creating links to a selected target vocabulary or dataset representing contextual resources such as places, concepts, agents and time periods. Wikidata is since a while on Europeana’s radar as a potential new target for enrichment but how can it be integrated with cultural heritage data?}, } @Article{Alexiev2015-IJDL, author = {Vladimir Alexiev and Jutta Lindenthal and Antoine Isaac}, title = {{On the composition of ISO 25964 hierarchical relations (BTG, BTP, BTI)}}, journal = {{International Journal on Digital Libraries}}, year = 2015, pages = {1-10}, month = aug, url = {https://link.springer.com/article/10.1007/s00799-015-0162-2}, url_PDF = {https://link.springer.com/content/pdf/10.1007/s00799-015-0162-2.pdf}, keywords = {Thesauri, ISO 25964, BTG, BTP, BTI, Broader generic, Broader partitive, Broader instantial, AAT}, issn = {1432-1300}, publisher = {Springer}, language = {English}, doi = {10.1007/s00799-015-0162-2}, abstract = {Knowledge organization systems (KOS) can use different types of hierarchical relations: broader generic (BTG), broader partitive (BTP), and broader instantial (BTI). The latest ISO standard on thesauri (ISO 25964) has formalized these relations in a corresponding OWL ontology and expressed them as properties: broaderGeneric, broaderPartitive, and broaderInstantial, respectively. These relations are used in actual thesaurus data. The compositionality of these types of hierarchical relations has not been investigated systematically yet. They all contribute to the general broader (BT) thesaurus relation and its transitive generalization broader transitive defined in the SKOS model for representing KOS. But specialized relationship types cannot be arbitrarily combined to produce new statements that have the same semantic precision, leading to cases where inference of broader transitive relationships may be misleading. We define Extended properties (BTGE, BTPE, BTIE) and analyze which compositions of the original “one-step” properties and the Extended properties are appropriate. This enables providing the new properties with valuable semantics usable, e.g., for fine-grained information retrieval purposes. In addition, we relax some of the constraints assigned to the ISO properties, namely the fact that hierarchical relationships apply to SKOS concepts only. This allows us to apply them to the Getty Art and Architecture Thesaurus (AAT), where they are also used for non-concepts (facets, hierarchy names, guide terms). In this paper, we present extensive examples derived from the recent publication of AAT as linked open data.}, } @Misc{Alexiev2015-SAAM-Review, author = {Vladimir Alexiev}, title = {{Smithsonian American Art Museum LOD Review}}, howpublished = {Github wiki page}, month = feb, year = 2015, url = {https://github.com/usc-isi-i2/saam-lod/wiki/SAAM-LOD-Review}, keywords = {cultural heritage, RDF, LODLAM, CIDOC CRM, SAAM, mapping, review, data quality}, abstract = {Review of the initial LOD publication of the Smithsonian American Art Museum and recommendations for improvement}, } @Misc{Alexiev2015-SexOrGender, author = {Vladimir Alexiev}, title = {{Sex or Gender?}}, howpublished = {blog post}, month = feb, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/sex-or-gender/index.html}, abstract = {Considerations about sex/gender enumeration values in LOD. While working on ULAN LOD, I wondered how should we map the ULAN field "sex". So I did a small review of available LOD properties and values.}, } @Misc{Alexiev2015-bg.dbpedia, author = {Vladimir Alexiev}, title = {{bg.dbpedia.org launched}}, howpublished = {presentation}, month = feb, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/bg-dbpedia-launched-long.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/bg-dbpedia-launched.html}, keywords = {DBpedia}, booktitle = {{DBpedia Meeting}}, address = {Dublin, Ireland}, } @Misc{Alexiev2015-dbpedia-mapping, author = {Vladimir Alexiev}, title = {{Adding a DBpedia Mapping}}, howpublished = {presentation}, month = feb, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/add-mapping-long.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/add-mapping.html}, keywords = {DBpedia, ontology mapping}, booktitle = {{DBpedia Meeting}}, address = {Dublin, Ireland}, } @Misc{Alexiev2015-dbpedia-problems, author = {Vladimir Alexiev}, title = {{DBpedia Ontology and Mapping Problems}}, howpublished = {presentation}, month = feb, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/dbpedia-problems-long.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20150209-dbpedia/dbpedia-problems.html}, keywords = {DBpedia, ontology, ontology mapping, data quality}, booktitle = {{DBpedia Meeting}}, address = {Dublin, Ireland}, } @Misc{AlexievAngelova2015-CultJam15, author = {Vladimir Alexiev and Dilyana Angelova}, title = {{O is for Open: OAI and SPARQL interfaces for Europeana}}, month = jul, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/O_is_for_Open_(CultJam_201507)_poster.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/O_is_for_Open_(CultJam_201507)_slide.pdf}, keywords = {Europeana, OAI, OAI PMH, SPARQL, EDM, semantic repository}, type = {poster}, booktitle = {{Europeana Creative Culture Jam}}, address = {Vienna, Austria}, abstract = {Poster. As part of the Europeana Creative project, Ontotext added 2 additional channels to Europeana Labs: OAI & SPARQL, complementing the API. OAI is used for bulk download (e.g. to update the semantic repository). SPARQL can answer queries that the API cannot, e.g. linking objects, exploring contextual entities (e.g. parent places or author life dates), analytics/charts}, } @TechReport{AlexievTolosi2015-EFD-semapp-progress2, author = {Vladimir Alexiev and Laura Tolosi}, title = {{Europeana Food and Drink Semantic Demonstrator M21 Progress Report}}, institution = {Europeana Food and Drink project}, year = 2015, type = {Progress Report}, number = {D3.20b}, month = oct, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Europeana-Food-and-Drink-Semantic-Demonstrator-M21-Report-(D3.20b).pdf}, keywords = {Europeana, cultural heritage, food and drink, semantic application, semantic search, faceted search, semantic enrichment}, abstract = {Describes the progress on developing the EFD Semantic Demonstrator for the 3 months from 1 Jul 2015 to 1 Oct 2015. We describe all work performed, the achieved results and project management considerations.}, } @TechReport{Europeana2015-evaluation-enrichments, author = {Antoine Isaac and Hugo Manguinhas and Juliane Stiller and Valentine Charles and others}, title = {{Europeana Report on Enrichment and Evaluation}}, institution = {Europeana Task Force on Enrichment and Evaluation}, year = 2015, month = oct, url = {https://pro.europeana.eu/taskforce/evaluation-and-enrichments}, keywords = {cultural heritage, Europeana, task force, semantic enrichment, text analysis, multilingual, evaluation}, abstract = {This report on Evaluation and Enrichment provides an overview of the different processes in semantic enrichment and offers guidance on how to assess each of these steps to implement a coherent enrichment strategy. The report begins by introducing the terminology used in the report. While defining the notion of semantic enrichment, the Task Force has identified several other associated notions that are commonly used in the cultural heritage domain when addressing semantic enrichment. We also provide an overview of the enrichment tools and services developed in the Europeana Network over the past years, reflecting the diversity of processes at hand: tools for manual enrichment and annotation, tools for automatic enrichment and workflow design tools. We also focus on the interoperability issues such as rules for specifying the linking or the format used to describe the enrichment outputs. As well as looking at the details of the enrichment processes we pick up the work done by the previous Task Force by specifying criteria for selecting and assessing target datasets. These criteria are based on vocabularies and datasets examples relevant to the Cultural Heritage domain. This selection strategy is available in a companion document to this report. The last component of the enrichment strategy is the evaluation of the enrichment processes. So far, evaluation in this domain has not been much documented even though a lot of work has been done in the field. We have tried to summarise different evaluation methodologies developed in related projects. These methods highlight the different components of the enrichment process that can be subject to evaluation. In order to validate all the recommendations provided in the previous sections, we have performed a quantitative and qualitative evaluation of seven enrichment services on a same subset of the Europeana dataset. The report of the evaluation is available in a companion document to this report while the main conclusions remain in this report. This report is a result of an inventory of tools, practices and standards that define the current state of the art for semantic enrichment. The analysis and evaluation work done during the course of the Task Force have allowed us to compile a series of lessons learnt that should be considered for the design and enhancement of enrichment services and their evaluation}, } @TechReport{HCLS-profile, title = {{Dataset Descriptions: HCLS Community Profile}}, author = {Alasdair J. G. Gray and Joachim Baran and M. Scott Marshall and Michel Dumontier and Vladimir Alexiev and others}, month = may, year = 2015, url = {https://www.w3.org/TR/hcls-dataset/}, keywords = {Data profiling, Dataset descriptions, Metadata, Provenance, FAIR data, HCLS, dataset, VOID, ontology, Bioinformatics, Taxonomy}, institution = {Semantic Web in Health Care and Life Sciences Interest Group (HCLSIG)}, abstract = {Access to consistent, high-quality metadata is critical to finding, understanding, and reusing scientific data. This document describes a consensus among participating stakeholders in the Health Care and the Life Sciences domain on the description of datasets using the Resource Description Framework (RDF). This specification meets key functional requirements, reuses existing vocabularies to the extent that it is possible, and addresses elements of data description, versioning, provenance, discovery, exchange, query, and retrieval.}, } @InProceedings{ICMEW2015-Multisensor, author = {Stefanos Vrochidis and Ioannis Kompatsiaris and Gerard Casamayor and Ioannis Arapakis and Reinhard Busch and Vladimir Alexiev and Emmanuel Jamin and Michael Jugov and Nicolaus Heise and Teresa Forrellat and Dimitris Liparas and Leo Wanner and Iris Miliaraki and Vera Aleksic and Kiril Simov and Alan Mas Soro and Mirja Eckhoff and Tilman Wagner and Marti Puigbo}, title = {{MULTISENSOR: Development of Multimedia Content Integration Technologies for Journalism, Media Monitoring and International Exporting Decision Support}}, booktitle = {{2015 IEEE International Conference on Multimedia & Expo Workshops (ICMEW)}}, year = 2015, pages = {1-4}, month = jun, address = {Turin, Italy}, url = {https://www.computer.org/csdl/proceedings/icmew/2015/7079/00/07169818.pdf}, keywords = {Multisensor, semantic enrichment, NLP, multimedia annotation, Journalism, Media Monitoring, International Export, Decision Support}, doi = {10.1109/ICMEW.2015.7169818}, abstract = {This paper presents an overview and the first results of the FP7 MULTISENSOR project, which deals with multidimensional content integration of multimedia content for intelligent sentiment enriched and context oriented interpretation. MULTISENSOR aims at providing unified access to multilingual, multimedia and multicultural economic, news story material across borders in order to support journalism and media monitoring tasks and provide decision support for internationalisation of companies.}, } @Misc{Zeng2015-NKOS, author = {Marcia Zeng and Julaine Clunis and Vladimir Alexiev}, title = {{Innovative Use of KOS that are Published as Linked Open Data (LOD)}}, howpublished = {presentation}, month = dec, year = 2015, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/MarciaZeng-LODKOSInnovativeUse.pdf}, keywords = {vocabularies, thesauri, SKOS, NKOS, ontology}, booktitle = {{First NKOS Workshop at International Conference on Asian Digital Libraries (ICADL 2015), Yonsei University, Seoul, Korea}}, } @comment{https://nkos.slis.kent.edu/2015NKOSworkshop/MarciaZeng-LODKOSInnovativeUse.pdf} @TechReport{Alexiev2014-ExtendingOWL2, author = {Vladimir Alexiev}, title = {{Extending OWL2 Property Constructs with OWLIM Rules}}, institution = {Ontotext Corp}, year = 2014, month = sep, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/extending-owl2/index.html}, keywords = {ontology, OWL2, Property Chain Axiom, sub-property, property inferencing, transitive properies, Ontotext GraphDB}, abstract = {While OWL2 has very powerful class constructs, its property constructs are quite weak. We propose several extensions that we found useful, and implement them using OWLIM rules}, } @InProceedings{Alexiev2014-GVP-LOD, author = {Vladimir Alexiev}, title = {{Getty Vocabulary Program LOD: Ontologies and Semantic Representation}}, booktitle = {{CIDOC Congress}}, year = 2014, month = sep, address = {Dresden, Germany}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140905-CIDOC-GVP/GVP-LOD-CIDOC.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140905-CIDOC-GVP/index.html}, keywords = {Getty, GVP, AAT, TGN, ULAN, LOD, thesauri, vocabularies, SKOS, SKOS-XL, ISO 25964}, } @Manual{Alexiev2014-GraphDBRuleProfiling, title = {{Ontotext GraphDB Rules Optimisations}}, author = {Vladimir Alexiev}, month = dec, year = 2014, url = {https://graphdb.ontotext.com/documentation/standard/rules-optimisations.html}, keywords = {Ontotext GraphDB, inference, performance, optimization, profiling}, abstract = {GraphDB 6 includes a useful new feature that allows you to debug rule performance. We also include Optimization Hints for ruleset performance.}, } @Misc{Alexiev2014-LinguisticLD, author = {Vladimir Alexiev}, title = {{Linguistic Linked Data}}, howpublished = {presentation}, month = oct, year = 2014, url = {https://rawgit2.com/VladimirAlexiev/multisensor/master/20141008-Linguistic-LD/index-full.html}, url_Slides = {https://rawgit2.com/VladimirAlexiev/multisensor/master/20141008-Linguistic-LD/index.html}, keywords = {Linguistic Linked Data, NLP, NLP2RDF, NIF, OLIA, NERD, MARL, BabelNet, FrameNet, WordNet}, booktitle = {{Multisensor Project Meeting}}, address = {Bonn, Germany}, abstract = {There's been a huge drive in recent years to represent NLP data as RDF. NLP data is usually large, so does it make sense to represent it as RDF? What's the benefit? Ontologies, schemas and groups include: GRaF ITS2 FISE LAF LD4LT LEMON LIME LMF MARL NERD NIF NLP2RDF OLIA OntoLex OntoLing OntoTag Penn Stanford... my oh my! There are a lot of linguistic resources available that can be used profitably: BabelNet FrameNet GOLD ISOcat LemonUBY Multitext OmegaNet UBY VerbNet Wiktionary WordNet.}, } @Misc{Alexiev2014-Malmo, author = {Vladimir Alexiev}, title = {{Semantic Technologies for Cultural Heritage}}, howpublished = {presentation}, month = aug, year = 2014, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140821-Malmo/index.html}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140821-Malmo/SemTechCH-Malmo.pdf}, url_Video = {https://youtu.be/n8oGmOu9JEw}, keywords = {semantic technology, ontology, semantic integration, cultural heritage}, booktitle = {{Malmo Linked Data Meetup}}, address = {Malmo, Sweden}, } @InProceedings{Alexiev2014-NKOS, author = {Vladimir Alexiev and Jutta Lindenthal and Antoine Isaac}, title = {{On Compositionality of ISO 25964 Hierarchical Relations (BTG, BTP, BTI)}}, booktitle = {{13th European Networked Knowledge Organization Systems (NKOS 2014)}}, year = 2014, month = sep, address = {London, UK}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140912-NKOS-compositionality/index-full.html}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140912-NKOS-compositionality/BTG-BTP-BTI-compositionality.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140912-NKOS-compositionality/index.html}, } @Misc{Alexiev2014-SmartCulture, author = {Vladimir Alexiev}, title = {{Semantic Technologies for Cultural Heritage}}, howpublished = {presentation}, month = jun, year = 2014, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140611-SmartCulture-sem-tech-CH/index.html}, url_PDF = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20140611-SmartCulture-sem-tech-CH/Semantic Technologies for Cultural Heritage.pdf}, keywords = {semantic technology, ontology, semantic integration, cultural heritage}, booktitle = {{SmartCulture Conference}}, address = {Brussels, Belgium}, } @Misc{Alexiev2014-University-Washington, author = {Vladimir Alexiev}, title = {{Doing Business with Semantic Technologies. INFX 598 - Introducing Linked Data: concepts, methods and tools. Information School, University of Washington. Module 9}}, howpublished = {Guest lecture}, month = may, year = 2014, url = {https://github.com/VladimirAlexiev/my/raw/master/pres/Doing%20Business%20with%20Semantic%20Technologies%20(201405%20guest%20lecture).ppt}, url_Video = {https://voicethread.com/myvoice/#thread/5784646/29625471/31274564}, keywords = {semantic technology}, howpublished = {presentation}, abstract = {Introduction to Ontotext and some of its products, clients and projects}, } @TechReport{Europeana2014-enrichment-strategy, author = {Juliane Stiller and Antoine Isaac and Vivien Petras and others}, title = {{EuropeanaTech Task Force on a Multilingual and Semantic Enrichment Strategy}}, institution = {Europeana}, year = 2014, month = apr, url = {https://pro.europeana.eu/project/multilingual-and-semantic-enrichment-strategy}, keywords = {cultural heritage, Europeana, task force, semantic enrichment, text analysis, multilingual, evaluation}, abstract = {The semantic and multilingual enrichment of metadata in Europeana is a core concern as it improves access to the material, defines relations among objects and enables cross-lingual retrieval of documents. The quality of these enrichments is crucial to ensure that highly curated content from providers gets represented correctly across different languages. To ensure that those enrichments unfold their whole potential and act as facilitators of access, a semantic and multilingual enrichment strategy is needed. The EuropeanaTech Task Force on a Multilingual and Semantic Enrichment Strategy set out to analyze datasets in Europeana and to evaluate them with regard to their enrichment potential and the enrichments that were executed. The goal was to drive a strategy for enriching metadata fields adding value for users. To achieve this, the members of the task force held a one-day workshop in Berlin where they analyzed randomly selected datasets from Europeana, their metadata fields and their enrichment potential. This report aggregates the results and derives findings and recommendations regarding the metadata quality (source), vocabulary used (target) and the enrichment process. It was found that especially during mapping and ingestion time, metadata quality issues arise that influence the success of the enrichments. Tackling these issues with better documentation, training and the establishment of quality scores are some of the recommendations in this field. Furthermore, Europeana should encourage the delivery of specialized vocabularies with resolvable URIs which would also lead to less need for enrichments by Europeana itself. With regard to the enrichment process, clear rules for each field need to be established.}, } @InProceedings{KiryakovAlexiev2014-Keystone, author = {Atanas Kiryakov and Vladimir Alexiev}, title = {{Semantic Technology in Publishing & Finance}}, booktitle = {{Keystone Industrial Panel, ISWC 2014}}, year = 2014, month = Oct, address = {Riva del Garda, Italy}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/Semantic Technology in Publishing & Finance (ISWC 2013, Keystone industrial panel).pptx}, abstract = {Triplestores and inference, applications in Finance, text-mining. Projects and solutions for financial media and publishers. Thanks to Atanas Kiryakov for this presentation, I just cut it to size.}, } @TechReport{LDBC-BenchmarkReasoning-2014, author = {Vassilis Papakonstantinou and Irini Fundulaki and Giorgos Flouris and Vladimir Alexiev}, title = {{Benchmark Design for Reasoning}}, institution = {Linked Data Benchmarking Council project}, year = 2014, type = {Deliverable}, number = {D4.4.2}, month = sep, url = {https://ldbcouncil.org/post/owl-empowered-sparql-query-optimization/LDBC_D4.4.2_final.pdf}, keywords = {LDBC, benchmark, reasoning, inference}, abstract = {Reasoning (mainly OWL reasoning) has received increasing attention by ontology designers for more accurately representing the domain at hand. To reflect this importance, one of LDBC’s objectives is to identify a set of interesting use cases that consider OWL reasoning constructs (beyond the usual RDFS constructs) that can be used to challenge existing RDF engines or repositories. This Deliverable has two parts: in the first part, we present four different sets of queries that can be used to determine whether RDF query engines take into account OWL constructs during query plan construction or query execution; in the second part we consider how a repository or query engine incorporates and considers business rules, i.e., domain-specific rules that follow common templates, useful in practical applications.}, } @TechReport{LDBC-SemanticPublishing-2014, author = {Venelin Kotsev and Atanas Kiryakov and Irini Fundulaki and Vladimir Alexiev}, title = {{{LDBC Semantic Publishing Benchmark}} ({{SPB}}) v2.0}, institution = {Linked Data Benchmarking Council project}, year = 2014, number = {v2.0 First Public Draft Release}, url = {https://ldbcouncil.org/publication/ldbc-spc-specification/}, keywords = {LDBC, benchmark, semantic publishing, graph databases}, abstract = {The Semantic Publishing Benchmark (SPB) is a LDBC benchmark for RDF database engines inspired by the Media/Publishing industry, particularly by the BBC’s Dynamic Semantic Publishing approach. As of June 2014 the benchmark has reached the state of draft publication. This document describes the current state of the Semantic Publishing Benchmark software. The application scenario behind the benchmark considers a media or a publishing organisation that deals with large volume of streaming content, namely articles and other “creative works” and “media assets”. This content is enriched with metadata that describes it and links it to reference knowledge – taxonomies and databases that include relevant concepts, entities and factual information. This metadata allows publishers to efficiently retrieve relevant content, according to their various business models. From a technology standpoint, the benchmark assumes that an RDF database is used to store both the reference knowledge and the metadata. The main interactions with the repository are (i) updates, that add new metadata or alter the repository, and (ii) aggregation queries, that retrieve content according to various criteria. The engine should handle instantly large number of updates in parallel with massive amount of aggregation queries. This document describes all features of the SPB : data (reference data-sets, ontologies, data generation), query workloads (descriptions of queries used, choke point descriptions), validation of query results and instructions (how to configure and use the benchmark driver, execution, auditing and disclosure rules)}, } @InProceedings{Alexiev2013-CRM-reasoning, author = {Vladimir Alexiev and Dimitar Manov and Jana Parvanova and Svetoslav Petrov}, title = {{Large-scale Reasoning with a Complex Cultural Heritage Ontology (CIDOC CRM)}}, booktitle = {{Workshop Practical Experiences with CIDOC CRM and its Extensions (CRMEX 2013) at TPDL 2013}}, year = 2013, volume = 1117, month = sep, address = {Valetta, Malta}, publisher = {CEUR WS}, url = {https://ceur-ws.org/Vol-1117/paper8.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2013-CRM-reasoning-slides.ppt}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2013-CRM-reasoning.pdf}, keywords = {cultural heritage, semantic technology, ontology, CIDOC CRM, semantic search, Fundamental Concepts, Fundamental Relations, GraphDB, semantic repository, inference, performance, ResearchSpace}, abstract = {The CIDOC Conceptual Reference Model (CRM) is an important ontology in the Cultural Heritage (CH) domain. CRM is intended mostly as a data integration mechanism, allowing reasoning and discoverability across diverse CH sources represented in CRM. CRM data comprises complex graphs of nodes and properties. An important question is how to search through such complex graphs, since the number of possible combinations is staggering. One answer is the "Fundamental Relations" (FR) approach that maps whole networks of CRM properties to fewer FRs, serving as a "search index" over the CRM semantic web. We present performance results for an FR Search implementation based on OWLIM. This search works over a significant CH dataset: almost 1B statements resulting from 2M objects of the British Museum. This is an exciting demonstration of large-scale reasoning with real-world data over a complex ontology (CIDOC CRM). We present volumetrics, hardware specs, compare the numbers to other repositories hosted by Ontotext, performance results, and compare performance of a SPARQL implementation.}, } @Misc{Alexiev2013-ResearchSpace, author = {Vladimir Alexiev}, title = {{ResearchSpace as an Example of a VRE Based on CIDOC CRM}}, howpublished = {presentation}, month = apr, year = 2013, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20130413-ResearchSpace as an Example of a VRE Based on CIDOC CRM.pptx}, keywords = {virtual research environment, ontology, CIDOC CRM, ResearchSpace, VCMS}, booktitle = {{Virtual Center for Medieval Studies (Medioevo Europeo VCMS) Workshop}}, address = {Bucharest, Romania}, } @Manual{Alexiev2013-SPARQL-diagrams, title = {{SPARQL 1.1 Syntax Diagrams}}, author = {Vladimir Alexiev}, organization = {Ontotext Corp}, month = apr, year = 2013, url = {https://rawgit2.com/VladimirAlexiev/grammar-diagrams/master/sparql11-grammar.xhtml}, url_Github = {https://github.com/VladimirAlexiev/grammar-diagrams}, url_ebnf = {https://rawgit2.com/VladimirAlexiev/grammar-diagrams/master/sparql11-grammar.ebnf}, abstract = {Cross-linked SPARQL 1.1 syntax (railroad) diagrams, one per production (173 total). A bit hard to understand: use this for reference, but not for learning SPARQL. Also available: EBNF syntax rules extracted from the SPARQL 1.1 specification, Text file including production numbers and syntax rules}, } @Manual{Alexiev2013-XSPARQL-diagrams, title = {{XSPARQL Syntax Diagrams}}, author = {Vladimir Alexiev}, organization = {Ontotext Corp}, month = apr, year = 2013, url = {https://github.com/VladimirAlexiev/grammar-diagrams#xsparql}, url_Github = {https://github.com/VladimirAlexiev/xsparql/blob/master/doc/}, abstract = {Cross-linked XSPARQL syntax (railroad) diagrams, one per production. XSPARQL is a melding of XQuery and SPARQL. Its syntax is pretty large, so the diagrams help.}, } @Misc{Alexiev2013-VCMS-design, author = {Vladimir Alexiev}, title = {{VCMS Project & Proposal Design}}, howpublished = {presentation}, month = oct, year = 2013, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20131013-VCMS Project Design.pptx}, keywords = {Medieval studies, virtual research environment, CIDOC CRM, semantic integration}, booktitle = {{Virtual Center for Medieval Studies (Medioevo Europeo VCMS) Workshop}}, address = {Budapest, Hungary}, } @TechReport{Europeana2013-EDM-FRBRoo, author = {Martin Doerr and Stefan Gradmann and others}, title = {{Europeana Task Force on EDM-FRBRoo Application Profile}}, institution = {Europeana}, year = 2013, month = may, url = {https://pro.europeana.eu/project/edm-frbroo-application-profile}, abstract = {The EDM – FRBRoo Application Profile Task Force (EFAP-TF) was launched in response to the recommendations from the deliverable D3.4 from Europeana V1.0. This deliverable asked for an application profile that would allow a better representation of the FRBR group 1 entities: work, expression, manifestation and item. Additionally, it was to be conceived as an application profile of FRBRoo where each intellectual contribution (e.g., in the publication process) and the related activity are treated as entities in their own right, and does not depend too much on the notion of a bibliographic record. As a starting point they suggested the mapping of FRBRoo and EDM offered by the CIDOC CRM working group. The aim of the EFAP-TF is to extend, correct or restrict this suggested mapping and provide examples for the use of the combined EDM and FRBRoo namespaces. This report delivers combined models in terms of properties and classes of EDM and FRBRoo illustrated by sample data. Smaller groups have worked on three different examples. The report also provides principles for modeling and mapping rules based on the experiments of the working groups.}, keywords={Europeana, task force, EDM, FRBR, FRBRoo, CIDOC CRM, ontology} } @Manual{ISO-25964-owl, title = {{ISO 25964 Part 1: Thesauri for information retrieval: RDF/OWL vocabulary, extension of SKOS and SKOS-XL}}, author = {Johan De Smedt and Antoine Isaac and Stella Dextre Clarke and Jutta Lindenthal and Marcia Lei Zeng and Douglas S. Tudhope and Leonard Will and Vladimir Alexiev}, month = dec, year = 2013, note = {Ontology}, url = {https://lov.linkeddata.es/dataset/lov/vocabs/iso-thes}, keywords = {thesauri, vocabularies, ISO 25964, ontology}, type = {Ontology}, url_broken = {https://purl.org/iso25964/skos-thes}, abstract = {OWL ontology representing the newest ISO standard on thesauri}, } @InProceedings{Ikonomov2013-EuropeanaCreative-EDM, author = {Nikola Ikonomov and Boyan Simeonov and Jana Parvanova and Vladimir Alexiev}, title = {{Europeana Creative. EDM Endpoint. Custom Views}}, booktitle = {{Digital Presentation and Preservation of Cultural and Scientific Heritage (DiPP 2013)}}, year = 2013, month = sep, address = {Veliko Tarnovo, Bulgaria}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Ikonomov2013-EuropeanaCreative-EDM.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Ikonomov2013-EuropeanaCreative-EDM-slides.pdf}, keywords = {cultural heritage, Europeana, EDM, ESE, semantic technology, RDF, SKOS, URI, Ontotext GraphDB, semantic repository, SPARQL, endpoint}, abstract = {The paper discusses the Europeana Creative project which aims to facilitate re-use of cultural heritage metadata and content by the creative industries. The paper focuses on the contribution of Ontotext to the project activities. The Europeana Data Model (EDM) is further discussed as a new proposal for structuring the data that Europeana will ingest, manage and publish. The advan-tages of using EDM instead of the current ESE metadata set are highlighted. Finally, Ontotext's EDM Endpoint is presented, based on OWLIM semantic re-pository and SPARQL query language. A user-friendly RDF view is presented in order to illustrate the possibilities of Forest - an extensible modular user interface framework for creating linked data and semantic web applications.}, } @TechReport{OldmanMahmudAlexiev2013-CRM-revealed, author = {Dominic Oldman and Joshan Mahmud and Vladimir Alexiev}, title = {{The Conceptual Reference Model Revealed. Quality contextual data for research and engagement: A British Museum case study}}, institution = {ResearchSpace Project}, year = 2013, note = {Draft 0.98}, month = jul, url = {https://github.com/VladimirAlexiev/my/tree/master/pubs/BritishMuseum-CRM-mapping}, keywords = {cultural heritage, museum informatics, ontology, CIDOC CRM, semantic mapping, British Museum, ResearchSpace}, pages = {359 pages}, abstract = {Contents: 169p: Main body, including discussion, illustrations and mapping diagrams. 7p: Association Codes (see details at BM Association Mapping v2). 49p: Example Object Graph. 134p: mapping implementation as RDFer configuration files}, } @TechReport{Alexiev2013-FR-implementation, author = {Vladimir Alexiev}, title = {British Museum CIDOC CRM Fundamental Relations Implementation}, institution = {ResearchSpace Project}, year = 2013, url = {https://github.com/VladimirAlexiev/my/blob/master/pubs/BritishMuseum-CRM-Fundamental-Relations/README.md}, keywords = {cultural heritage, semantic technology, ontology, CIDOC CRM, semantic search, Fundamental Concepts, Fundamental Relations, GraphDB, semantic repository, inference, performance, ResearchSpace}, abstract = {Detailed description of CIDOC CRM Fundamental Relations Implementation for the British Museum collection using GraphDB rules}, } @InProceedings{Parvanova2013-SemanticAnnotation, author = {Jana Parvanova and Vladimir Alexiev and Stanislav Kostadinov}, title = {{RDF Data and Image Annotations in ResearchSpace}}, booktitle = {{International Workshop on Collaborative Annotations in Shared Environment: metadata, vocabularies and techniques in the Digital Humanities (DH-CASE 2013). Collocated with DocEng 2013}}, year = 2013, month = sep, address = {Florence, Italy}, url = {https://dl.acm.org/doi/10.1145/2517978.2517997}, url_Preprint = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Parvanova2013-SemanticAnnotation.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Parvanova2013-SemanticAnnotation-slides.pdf}, keywords = {Computer-supported collaborative work, Annotation, Museum informatics, Cultural heritage, ResearchSpace, SVG, Web Annotation, ontology, British Museum, CIDOC CRM}, abstract = {This paper presents the approaches to data and image annotation in ResearchSpace (https://www.researchspace.org), an Andrew W. Mellon Foundation funded project led by the British Museum aimed at supporting collaborative internet research, information sharing and web applications for the cultural heritage scholarly community}, isbn = {978-1-4503-2199-0}, doi = {10.1145/2517978.2517997}, } @Proceedings{TPDL2013-CRMEX2013, title = {{Practical Experiences with CIDOC CRM and its Extensions (CRMEX 2013), Workshop at 17th International Conference on Theory and Practice of Digital Libraries (TPDL 2013)}}, year = 2013, booktitle = {{Practical Experiences with CIDOC CRM and its Extensions (CRMEX 2013), Workshop at 17th International Conference on Theory and Practice of Digital Libraries (TPDL 2013)}}, editor = {Vladimir Alexiev and Vladimir Ivanov and Maurice Grinberg}, volume = 1117, address = {Valetta, Malta}, month = sep, publisher = {CEUR WS}, url = {https://ceur-ws.org/Vol-1117/}, keywords = {CIDOC CRM, RDF, Ontology, cultural heritage, practical applications}, abstract = {The CIDOC CRM (international standard ISO 21127:2006) is a conceptual model and ontology with a fundamental role in many data integration efforts in the Digital Libraries and Cultural Heritage (CH) domain. The goal of this workshop is to describe and showcase systems using CRM at their core, exchange experience about the practical use of CRM, describe difficulties for the practical application of CRM, and share approaches for overcoming such difficulties. The ultimate objective of this workshop is to encourage the wider practical adoption of CRM}, } @InProceedings{Alexiev2012-CRM-properties, author = {Vladimir Alexiev}, title = {{Types and Annotations for CIDOC CRM Properties}}, booktitle = {{Digital Presentation and Preservation of Cultural and Scientific Heritage (DiPP2012) conference (Invited report)}}, year = 2012, month = sep, address = {Veliko Tarnovo, Bulgaria}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2012-CRM-Properties.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2012-CRM-Properties-presentation.ppt}, keywords = {cultural heritage, semantic technology, ontology, CIDOC CRM, properties, attribute assignment, reification, property reification}, abstract = {The CIDOC CRM provides an extensive ontology for describing entities and properties appearing in cultural heritage (CH) documentation, history and archeology. CRM provides some means for describing information about properties (property types, attribute assignment, and "long-cuts") and guidelines for extending the vocabulary. However, these means are far from complete, and in some cases there is little guidance how to "implement" them in RDF. In this article we outline the prob-lems, relate them to established RDF patterns and mechanisms, and describe several implementation alternatives.}, } @InProceedings{Alexiev2012-CRM-search, author = {Vladimir Alexiev}, title = {{Implementing CIDOC CRM Search Based on Fundamental Relations and OWLIM Rules}}, booktitle = {{Workshop on Semantic Digital Archives (SDA 2012), part of International Conference on Theory and Practice of Digital Libraries (TPDL 2012)}}, year = 2012, volume = 912, month = sep, address = {Paphos, Cyprus}, publisher = {CEUR WS}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2012-CRM-FR-search.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2012-CRM-Search-presentation.pdf}, url_Published= {https://ceur-ws.org/Vol-912/paper8.pdf}, keywords = {cultural heritage, semantic technology, ontology, CIDOC CRM, semantic search, Fundamental Concepts, Fundamental Relations, GraphDB, semantic repository, inference, performance, ResearchSpace}, abstract = {The CIDOC CRM provides an ontology for describing entities, properties and relationships appearing in cultural heritage (CH) documentation, history and archeology. CRM promotes shared understanding by providing an extensible semantic framework that any CH information can be mapped to. CRM data is usually represented in semantic web format (RDF) and comprises complex graphs of nodes and properties. An important question is how a user can search through such complex graphs, since the number of possible combinations is staggering. One approach "compresses" the semantic network by mapping many CRM entity classes to a few "Fundamental Concepts" (FC), and mapping whole networks of CRM proper-ties to fewer "Fundamental Relations" (FR). These FC and FRs serve as a "search index" over the CRM semantic web and allow the user to use a simpler query vocabulary. We describe an implementation of CRM FR Search based on OWLIM Rules, done as part of the ResearchSpace (RS) project. We describe the technical de-tails, problems and difficulties encountered, benefits and disadvantages of using OWLIM rules, and preliminary performance results. We provide implementation experience that can be valuable for further implementation, definition and maintenance of CRM FRs.}, } @Misc{Alexiev2010-PMI-BG-IT-PM, author = {Vladimir Alexiev}, title = {{University IT PM Education: NBU and ESI/SEI Masters Programs}}, howpublished = {presentation}, month = nov, year = 2011, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/20101111-PMI-BG-IT-PM.ppt}, url_Other = {https://docplayer.net/12848719-University-it-pm-education-nbu-and-esi-sei-masters-programs-vladimir-alexiev-phd-pmp-ontotext-lab-sirma-group-holding.html}, keywords = {Masters Program, NBU, European Sofware Institute, ESI, IT project management, PM, university curriculum}, booktitle = {{PMI Bulgaria Meeting}}, address = {Sofia, Bulgaria}, abstract = {Describes the Masters program in IT Project Management at New Bulgarian University, and the development of a Software Engineering Masters Program by ESI Bulgaria, sponsored by CMU SEI and America for Bulgaria Foundation}, } @Misc{Alexiev2011-KIM-Stanbol, author = {Vladimir Alexiev}, title = {{Comparing Ontotext KIM and Apache Stanbol}}, month = sep, year = 2011, url = {https://rawgit2.com/VladimirAlexiev/my/master/pres/Comparing Ontotext KIM and Apache Stanbol (201109 pres).ppt}, url_Appendix = {https://rawgit2.com/VladimirAlexiev/my/master/pres/Comparing Ontotext KIM and Apache Stanbol (201109 appendix).doc}, keywords = {semantic enrichment, text analysis, Ontotext KIM, Apache Stanbol}, howpublished = {presentation}, institution = {Ontotext Corp}, } @Misc{Alexiev2011-SemtechForCulturalHeritage, author = {Vladimir Alexiev}, title = {{Semantic Technologies for Cultural Heritage}}, month = may, year = 2011, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2011-SemtechForCulturalHeritage.pdf}, keywords = {semantic technology, ontology, semantic integration, cultural heritage}, howpublished = {presentation}, booktitle = {{Global Smart SOC Initiative Summit}}, address = {Seoul, Korea}, } @Misc{Alexiev2010-costEffectiveEGov, author = {Vladimir Alexiev}, title = {{Cost-effective e-Government Services: Export Control System phase 2 (ECS2)}}, month = feb, year = 2010, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev2010-costEffectiveEGov.pdf}, keywords = {model-driven development, e-customs, e-government, Export Control System}, howpublished = {presentation}, booktitle = {{Bulgaria-Korea IT Experts Workshop}}, address = {Sofia, Bulgaria}, } @InProceedings{AlexievAsenova2010-TeachingIT_PM, author = {Vladimir Alexiev and Petya Asenova}, title = {{An Approach to Teaching IT Project Management in a Masters Program}}, booktitle = {{6th Annual International Conference on Education in Computer Science}}, year = 2010, month = jun, address = {Fulda and Munich, Germany}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/AlexievAsenova2010-TeachingIT_PM.pdf}, keywords = {Masters Program, NBU, IT project management, PM, university curriculum}, abstract = {Many Bulgarian IT professionals manage projects but their knowledge and skills in this area are based mainly on their own experience, which is often obtained through trial and error. Although the project manager (PM) has a crucial role for project success, the university curriculum in Bulgaria does not answer sufficiently these business needs. Some aspects of PM are included in university courses on Software Engineering and some short courses on IT PM are offered, but as overall this matter is not covered in depth in any national university. Having in mind this real need, we proposed a new Masters Program on IT PM hoping it will meet the interest of many students representatives of the software business. This paper presents an approach to prepare PMs for the Bulgarian IT industry through a Masters Program, developed in cooperation between the New Bulgarian University (NBU) and the Institute of Mathematics and Informatics (IMI) of the Bulgarian Academy of Science (BAS). We describe the background, objectives and design of the program, and relations with the business.}, } @Misc{AlexievMitevBukev2010-eGovBPM, author = {Vladimir Alexiev and Adrian Mitev and Alexander Bukev}, title = {{Implementing complex e-Government solutions with open source and BPM: Architecture of Export Control System phase 2 (ECS2)}}, year = 2010, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/AlexievMitevBukev2010-eGovBPM.pdf}, keywords = {model-driven development, business process management, BPMS, e-customs, e-government, Export Control System, system architecture}, howpublished = {presentation}, booktitle = {{Java2Days Conference}}, address = {Sofia, Bulgaria}, } @Article{AlexievMartev2009-ElectronicExportBG, author = {Vladimir Alexiev and Teodor Martev}, title = {{Electronic Export Declarations Ease the Work of the Customs Agency and Traders}}, journal = {{Computerworld (in Bulgarian)}}, year = 2009, volume = 46, month = dec, annote = {ECS2 was nominated for IT Project of the year 2010}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/AlexievMartev2009-ElectronicExportBG.pdf}, keywords = {model-driven development, e-customs, e-government, Export Control System}, } @Book{AlexievBreuBruijn2005-InformationIntegration, author = {Vladimir Alexiev and Michael Breu and Jos de Bruijn and Dieter Fensel and Ruben Lara and Holger Lausen}, title = {{Information Integration with Ontologies: Experiences from an Industrial Showcase}}, publisher = {John Wiley and Sons}, year = 2005, month = feb, url= {https://www.wiley.com/en-gb/Information+Integration+with+Ontologies%3A+Experiences+from+an+Industrial+Showcase-p-9780470010488}, keywords = {semantic integration, ontology, semantic technology, ontology-based data access}, chapter = 2, isbn = {978-0-470-01048-8}, abstract = {Disparate information, spread over various sources, in various formats, and with inconsistent semantics is a major obstacle for enterprises to use this information at its full potential. Information Grids should allow for the effective access, extraction and linking of dispersed information. Currently Europe's coporations spend over 10 Billion EUR to deal with these problems. This book will demonstrate the applicability of grid technologies to industry. To this end, it gives a detailed insight on how on tology technology can be used to manage dispersed information assets more efficiently. The book is based on experiences from the COG (Corporate Ontology Grid) project, carried out jointly by three leading industrial players and the Digital Enterprise Research Institute Austria. Through comparisons of this project with alternative technologies and projects, it provides hands-on experience and best practice examples to act as a reference guide for their development. Information Integration with Ontologies: Ontology based Information Integration in an Industrial Setting is ideal for technical experts and computer researchers in the IT-area looking to achieve integration of heterogeneous information and apply ontology technologies and techniques in practice. It will also be of great benefit to technical decision makers seeking infor mation about ontology technologies and the scientific audience, interested in achievements towards the application of ontologies in an industrial setting.}, } @TechReport{Alexiev2004-DataIntegration, author = {Vladimir Alexiev}, title = {{Data Integration Survey}}, institution = {European project "Corporate Ontology Grid" (COG)}, type = {Deliverable}, month = sep, year = 2004, } @PhdThesis{Alexiev1999-thesis, author = {Vladimir Alexiev}, title = {{Non-deterministic Interaction Nets}}, school = {University of Alberta}, year = 1999, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1999-thesis.pdf}, url_Slides = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1999-thesisPresentation.pdf}, url_Other = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1999-thesis-2up.pdf}, abstract = {The Interaction Nets (IN) of Lafont are a graphical formalism used to model parallel computation. Their genesis can be traced back to the Proof Nets of Linear Logic. They enjoy several nice theoretical properties, amongst them pure locality of interaction, strong confluence, computational completeness, syntactically-definable deadlock-free fragments, combinatorial completeness (existence of a Universal IN). They also have nice "pragmatic" properties: they are simple and elegant, intuitive, can capture aspects of computation at widely varying levels of abstraction. Compared to term and graph rewriting systems, INs are much simpler (a subset of such systems that imposes several constraints on the rewriting process), but are still computationally complete (can capture the lambda-calculus). INs are a refinement of graph rewriting which keeps only the essential features in the system. Conventional INs are strongly confluent, and are therefore unsuitable for the modeling of non-deterministic systems such as process calculi and concurrent object-oriented programming. We study four diffrent ways of "breaking" the confluence of INs by introducing various extensions: - IN with Multiple (reduction) Rules (INMR); Allow more than one reduction rule per redex. - IN with Multiple Principal Ports (INMPP): Allow more than one active port per node. - IN with MultiPorts (INMP): Allow more than one connection per port. - IN with Multiple Connections (INMC): Allow hyper-edges (in the graph-theoretical sense), i.e. connections between more than two ports. We study in considerable detail the relative expressive power of these systems, both by representing various programming examples in them, and by constructing inter-representations that translate nets from one system to another. We study formally a translation from the finite pi-calculus to a system that we call MultiInteraction Nets: MIN=INMP+NMPP. We prove the faithfulness of the translation to the pi-calculus processes that it represents, both structural and operational (completeness and soundness of reduction). We show that unlike the pi-calculus, our translation implements the Prefix operation of the pi-calculus in a distributed and purely local manner, and implements explicitly the distribution and duplication of values to the corresponding occurrences of a variable. We compare our translation to other graphical and combinatory representations of the pi-calculus, such as the pi-nets of Milner, the Interaction Diagrams of Parrow, and the Concurrent Combinators of Honda and Yoshida. The original paper on IN (Lafont, 1990) states that INs were designed to be simple and practical; to be a "programming language that can be used for the design of interactive software". However, to date INs have been used only for theoretical investigations. This thesis is mostly devoted to a hands-on exploration of applications of IN to various "programming problems".} } @TechReport{Alexiev1998-distributedSynchronization, author = {Vladimir Alexiev}, title = {{Distributed Synchronization in a pi-Calculus with Bidirectional Communication}}, institution = {University of Alberta}, year = 1998, month = jan, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1998-distributedSynchronization.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.301.8273}, keywords = {pi-calculus, input prefix, distributed synchronization, communication}, abstract = {The (input) prefix operation of the pi-calculus expresses global synchronization (blocking) of the prefixed process. We show how to implement synchronization in a completely distributed manner, by using bidirectional atomic communication and the principle of provision (data-dependency-based synchronization)}, } @TechReport{Alexiev1998-finitePi, author = {Vladimir Alexiev}, title = {{Representing the Finite pi-calculus in Multi-Interaction Nets: Concurrency = Interaction + Non-determinism}}, institution = {University of Alberta}, year = 1998, month = apr, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1998-finitePi.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.301.8381}, keywords = {pi-calculus, interaction nets, linear logic, concurrent computation,distributed computation}, abstract = {We extend the Interaction Nets of Lafont with some non-determinism capabilities and then show how to implement the finite monadic pi-calculus in that system}, } @InProceedings{Alexiev1996-targetedCommunication, author = {Vladimir Alexiev}, title = {{Targeted Communication in Linear Objects}}, booktitle = {{Artificial Intelligence: Methodology, Systems, Applications (AIMSA'96)}}, year = 1996, month = sep, publisher = {IOI Press}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1996-targetedCommunication.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.47.7107}, url_TR = {https://era.library.ualberta.ca/downloads/bc386k59r}, keywords = {Linear Objects, communication, broadcasting, object-oriented programming, logic programming, linear logic}, note = {Also University of Alberta TR94-14}, abstract = {Linear Objects (LO) of Andreoli and Pareschi is the first proposal to integrate object-oriented programming into logic programming based on Girard's Linear Logic (LL). In LO each object is represented by a separate open node of a proof tree. This ``insulates'' objects from one another which allows the attributes of an object to be represented as a multiset of atoms and thus facilitates easy retrieval and update of attributes. However this separation hinders communication between objects. Communication in LO is achieved through broadcasting to all objects which in our opinion is infeasible from a computational viewpoint. This paper proposes a refined communication mechanism for LO which uses explicit communication channels specified by the programmer. We name it TCLO which stands for ``Targeted Communication in LO''. Although channel specification puts some burden on the programmer, we demonstrate that the language is expressive enough by redoing some of the examples given for LO. Broadcasting can be done in a controlled manner. LO can be seen as a special case of TCLO where only one global channel (the forum) is used.}, } @TechReport{Alexiev1995-eventCalculus, author = {Vladimir Alexiev}, title = {{The Event Calculus as a Linear Logic Program}}, institution = {University of Alberta}, year = 1995, number = {TR95-24}, month = sep, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1995-eventCalculus.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.30.9953}, url_TR = {https://era.library.ualberta.ca/downloads/9g54xk065}, keywords = {event calculus, linear logic, negation as failure, knowledge update}, abstract = {The traditional presentation of Kowalski's Event Calculus as a logic program uses Negation- as-Failure (NAF) in an essential way to support persistence of fluents. In this paper we present an implementation of Event Calculus as a purely logical (without NAF) Linear Logic (LL) program. This work demonstrates some of the internal non-monotonic features of LL and its suitability for knowledge update (as opposed to knowledge revision). Although NAF is an ontologically sufficient solution to the frame problem, the LL solution is implementationally superior. Handling of incomplete temporal descriptions and support for ramifications (derived fluents) are also considered.}, keywords = {event calculus, linear logic}, } @Misc{Alexiev1995-thesisProposal, author = {Vladimir Alexiev}, title = {{Object-Oriented Logic Programming based on Linear Logic}}, month = feb, year = 1995, keywords = {object-oriented programming, logic programming, multiparadigm programming, linear logic}, institution = {University of Alberta}, howpublished = {Thesis proposal}, } @Article{Alexiev1994-applicationsLinearLogic, author = {Vladimir Alexiev}, title = {{Applications of Linear Logic to Computation: An Overview}}, journal = {{Logic Journal of the IGPL}}, year = 1994, volume = 2, number = 1, pages = {77-107}, month = mar, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1994-applicationsLinearLogic.pdf}, url_Published= {https://jigpal.oxfordjournals.org/content/2/1/77}, keywords = {linear logic, survey}, issn = {1368-9894}, note = {Also University of Alberta TR93-18, December 1993}, doi = {10.1093/jigpal/2.1.77}, } @InProceedings{MarinovAlexievDjonev1994-BCPN, author = {Georgi Marinov and Vladimir Alexiev and Yavor Djonev}, title = {{Boolean Constraint Propagation Networks}}, booktitle = {{Artifical Intelligence: Methodology, Systems, and Applications (AIMSA'94)}}, year = 1994, editor = {P. Jorrand and V. Sgurev}, pages = {109-118}, month = sep, address = {Sofia, Bulgaria}, publisher = {World Scientific Publishing}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/MarinovAlexievDjonev1994-BCPN.pdf}, keywords = {constraint propagation, inference, knowledge-based system, expert system}, doi = {10.5555/212090.212113}, isbn = {981-02-1853-2}, abstract = {This paper describes a particular inference mechanism which has been successfully used for the implementation of an expert system and a generic shell supporting consulting-type expert systems. The main features of Boolean Constraint Propagation Networks (BCPN) are: the inference flows in all directions, unlike inference modes of forward or backward chaining systems; all possible consequences of a fact are derived as soon as the user enters the fact, therefore the system is very interactive; if the user withdraws an assertion then all propositions depending on it are retracted; the inference architecture is simple and uniform. After a general description of BCPN we give an account of the problems encountered and the approaches we used to solve them. Some possible extensions of the mechanism and its applicability to various areas are also discussed. The current version of BCPN is written in C++ and took about one man-year to develop.}, } @TechReport{Alexiev1993-annotatedBibliography, author = {Vladimir Alexiev}, title = {{A (Not Very Much) Annotated Bibliography on Integrating Object-Oriented and Logic Programming}}, institution = {University of Alberta}, year = 1993, month = mar, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1993-annotatedBibliography.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.6168}, keywords = {object-oriented programming, logic programming, multiparadigm programming, bibliography}, abstract = {An overview of existing applications of Linear Logic (LL) to issues of computation. After a substantial introduction to LL, it discusses the implications of LL to functional programming, logic programming, concurrent and object-oriented programming and some other applications of LL, like semantics of negation in LP, non-monotonic issues in AI planning, etc. Although the overview covers pretty much the state-of-the-art in this area, by necessity many of the works are only mentioned and referenced, but not discussed in any considerable detail. The paper does not presuppose any previous exposition to LL, and is addressed more to computer scientists (probably with a theoretical inclination) than to logicians. The paper contains over 140 references, of which some 80 are about applications of LL.}, } @TechReport{Alexiev1993-mutableObjectState, author = {Vladimir Alexiev}, title = {{Mutable Object State for Object-Oriented Logic Programming: A Survey}}, institution = {University of Alberta}, year = 1993, number = {TR93-15}, month = aug, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1993-mutableObjectState.pdf}, keywords = {object-oriented programming, logic programming, multiparadigm programming, mutable, object state, survey}, abstract = {One of the most difficult problems on the way to an integration of Object-Oriented and Logic Programming is the modeling of changeable object state(i.e. object dynamics) in a particular logic in order not to forfeit the declarative nature of LP. Classical logic is largely unsuitable for such a task, because it adopts a general (both temporally and spatially), Platonic notion of validity, whereas object state changes over time and is local to an object. This paper presents the problem and surveys the state-of-the-art approaches to its solution, as well as some emerging, promising new approaches. The paper tries to relate the different approaches, to evaluate their merits and deficiencies and to identify promising directions for development. The emphasis in this survey is on efficient implementation of state change, one which would be suitable for the lowest fundamental level of a general OOLP language. The following approaches are covered: Assert/Retract, Declarative Database Updates and Transaction Logic, Modal and Dynamic Logics, Perpetual Objects, Logical Objects and Linear Objects, Linear Logic, Rewriting Logic and MaudeLog.}, } @TechReport{Alexiev1993-objectOriented, author = {Vladimir Alexiev}, title = {{Object-Oriented and Logic-Based Knowledge Representation}}, institution = {University of Alberta}, year = 1993, note = {Term project}, url = {https://rawgit2.com/VladimirAlexiev/my/master/pubs/Alexiev1993-objectOriented.pdf}, url_CiteSeer = {https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.2657}, keywords = {object-oriented programming, logic programming, multiparadigm programming, knowledge representation}, abstract = {This paper is a survey of a number of languages/systems based on both Object-Oriented and Logic Programming and designed expressly for Knowledge Representation tasks. My goal in the paper is to argue that the integration of these two paradigms (particularly the synergism that emerges from such an integration_ forms a stable basis for Knowledge Representation at the symbolic level. I try to support this claim both by examples from the papers surveyed and by considerations in a more general context. Some more advanced topics concerning special-purpose non-classic logics are also discussed.}, }