references.bib

@article{Ashburner2003,
    author = {Ashburner, M and Mungall, C J and Lewis, S E},
    issn = {0091-7451},
    journal = {Cold Spring Harbor symposia on quantitative biology},
    mendeley-groups = {Thesis try 2},
    pages = {227--35},
    pmid = {15338622},
    title = {{ Ontologies for biologists: a community model for the annotation of genomic data. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/15338622},
    volume = {68},
    year = {2003}
}


@article{Zerbino2018,
    abstract = {The Ensembl project has been aggregating, processing, integrating and redistributing genomic datasets since the initial releases of the draft human genome, with the aim of accelerating genomics research through rapid open distribution of public data. Large amounts of raw data are thus transformed into knowledge, which is made available via a multitude of channels, in particular our browser (http://www.ensembl.org). Over time, we have expanded in multiple directions. First, our resources describe multiple fields of genomics, in particular gene annotation, comparative genomics, genetics and epigenomics. Second, we cover a growing number of genome assemblies; Ensembl Release 90 contains exactly 100. Third, our databases feed simultaneously into an array of services designed around different use cases, ranging from quick browsing to genome-wide bioinformatic analysis. We present here the latest developments of the Ensembl project, with a focus on managing an increasing number of assemblies, supporting efforts in genome interpretation and improving our browser.},
    author = {Zerbino, Daniel R and Achuthan, Premanand and Akanni, Wasiu and Amode, M Ridwan and Barrell, Daniel and Bhai, Jyothish and Billis, Konstantinos and Cummins, Carla and Gall, Astrid and Gir { \' { o } } n, Carlos Garc { \' { i } } a and Gil, Laurent and Gordon, Leo and Haggerty, Leanne and Haskell, Erin and Hourlier, Thibaut and Izuogu, Osagie G and Janacek, Sophie H and Juettemann, Thomas and To, Jimmy Kiang and Laird, Matthew R and Lavidas, Ilias and Liu, Zhicheng and Loveland, Jane E and Maurel, Thomas and McLaren, William and Moore, Benjamin and Mudge, Jonathan and Murphy, Daniel N and Newman, Victoria and Nuhn, Michael and Ogeh, Denye and Ong, Chuang Kee and Parker, Anne and Patricio, Mateus and Riat, Harpreet Singh and Schuilenburg, Helen and Sheppard, Dan and Sparrow, Helen and Taylor, Kieron and Thormann, Anja and Vullo, Alessandro and Walts, Brandon and Zadissa, Amonida and Frankish, Adam and Hunt, Sarah E and Kostadima, Myrto and Langridge, Nicholas and Martin, Fergal J and Muffato, Matthieu and Perry, Emily and Ruffier, Magali and Staines, Dan M and Trevanion, Stephen J and Aken, Bronwen L and Cunningham, Fiona and Yates, Andrew and Flicek, Paul},
    doi = {10.1093/nar/gkx1098},
    issn = {1362-4962},
    journal = {Nucleic Acids Res.},
    mendeley-groups = {Doctoral},
    month = {jan},
    number = {D1},
    pages = {D754--D761},
    pmid = {29155950},
    title = {{ Ensembl 2018. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/29155950 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5753206},
    volume = {46},
    year = {2018}
}
@article{Wright2005,
    abstract = {The HGNC Comparison of Orthology Predictions search tool, HCOP (http://www.gene.ucl.ac.uk/cgi-bin/nomenclature/hcop.pl ), enables users to compare predicted human and mouse orthologs for a specified gene, or set of genes, from either species according to the ortholog assertions from the Ensembl, HGNC, Homologene, Inparanoid, MGI and PhIGs databases. Users can assess the reliability of the prediction from the number of these different sources that identify a particular orthologous pair. HCOP provides a useful one-stop resource to summarise, compare and access various sources of human and mouse orthology data.},
    author = {Wright, Mathew W and Eyre, Tina A and Lush, Michael J and Povey, Sue and Bruford, Elspeth A},
    doi = {10.1007/s00335-005-0103-2},
    issn = {0938-8990},
    journal = {Mamm. Genome},
    mendeley-groups = {Doctoral},
    month = {nov},
    number = {11},
    pages = {827--8},
    pmid = {16284797},
    title = {{ HCOP: the HGNC comparison of orthology predictions search tool. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/16284797},
    volume = {16},
    year = {2005}
}
@article{Blake2017,
    abstract = {The Mouse Genome Database (MGD: http://www.informatics.jax.org) is the primary community data resource for the laboratory mouse. It provides a highly integrated and highly curated system offering a comprehensive view of current knowledge about mouse genes, genetic markers and genomic features as well as the associations of those features with sequence, phenotypes, functional and comparative information, and their relationships to human diseases. MGD continues to enhance access to these data, to extend the scope of data content and visualizations, and to provide infrastructure and user support that ensures effective and efficient use of MGD in the advancement of scientific knowledge. Here, we report on recent enhancements made to the resource and new features.},
    author = {Blake, Judith A. and Eppig, Janan T. and Kadin, James A. and Richardson, Joel E. and Smith, Cynthia L. and Bult, Carol J. and Anagnostopoulos, A. and Baldarelli, R. M. and Beal, J. S. and Bello, S. M. and Blodgett, O. and Butler, N. E. and Corbani, L. E. and Dene, H. and Drabkin, H. J. and Forthofer, K. L. and Giannatto, S. L. and Hale, P. and Hill, D. P. and Hutchins, L. and Knowlton, M. and Lavertu, A. and Law, M. and Lewis, J. R. and Lopez, V. and Maghini, D. and Perry, D. and McAndrews, M. and Miers, D. and Montenko, H. and Ni, L. and Onda, H. and Recla, J. M. and Reed, D. J. and Richards-Smith, B. and Sitnikov, D. and Tomczuk, M. and Wilming, L. and Zhu, Y.},
    doi = {10.1093/nar/gkw1040},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Blake et al. - 2017 - Mouse Genome Database (MGD)-2017 Community knowledge resource for the laboratory mouse.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Res.},
    mendeley-groups = {Doctoral},
    number = {D1},
    pages = {D723--D729},
    pmid = {27899570},
    title = {{ Mouse Genome Database (MGD)-2017: Community knowledge resource for the laboratory mouse }},
    volume = {45},
    year = {2017}
}
@article{Shimoyama2015,
    abstract = {The Rat Genome Database (RGD, http://rgd.mcw.edu) provides the most comprehensive data repository and informatics platform related to the laboratory rat, one of the most important model organisms for disease studies. RGD maintains and updates datasets for genomic elements such as genes, transcripts and increasingly in recent years, sequence variations, as well as map positions for multiple assemblies and sequence information. Functional annotations for genomic elements are curated from published literature, submitted by researchers and integrated from other public resources. Complementing the genomic data catalogs are those associated with phenotypes and disease, including strains, QTL and experimental phenotype measurements across hundreds of strains. Data are submitted by researchers, acquired through bulk data pipelines or curated from published literature. Innovative software tools provide users with an integrated platform to query, mine, display and analyze valuable genomic and phenomic datasets for discovery and enhancement of their own research. This update highlights recent developments that reflect an increasing focus on: (i) genomic variation, (ii) phenotypes and diseases, (iii) data related to the environment and experimental conditions and (iv) datasets and software tools that allow the user to explore and analyze the interactions among these and their impact on disease.},
    author = {Shimoyama, Mary and { De Pons } , Jeff and Hayman, G. Thomas and Laulederkind, Stanley J.F. and Liu, Weisong and Nigam, Rajni and Petri, Victoria and Smith, Jennifer R. and Tutaj, Marek and Wang, Shur Jen and Worthey, Elizabeth and Dwinell, Melinda and Jacob, Howard},
    doi = {10.1093/nar/gku1026},
    file = {:Users/cthoyt/ownCloud/Mendeley/2015/Shimoyama et al. - 2015 - The Rat Genome Database 2015 Genomic, phenotypic and environmental variations and disease.pdf:pdf},
    isbn = {0305-1048},
    issn = {13624962},
    journal = {Nucleic Acids Res.},
    mendeley-groups = {Doctoral},
    number = {D1},
    pages = {D743--D750},
    pmid = {25355511},
    title = {{ The Rat Genome Database 2015: Genomic, phenotypic and environmental variations and disease }},
    volume = {43},
    year = {2015}
}
@article{Yates2017,
    abstract = {The HUGO Gene Nomenclature Committee (HGNC) based at the European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. Currently the HGNC database contains almost 40 000 approved gene symbols, over 19 000 of which represent protein-coding genes. In addition to naming genomic loci we manually curate genes into family sets based on shared characteristics such as homology, function or phenotype. We have recently updated our gene family resources and introduced new improved visualizations which can be seen alongside our gene symbol reports on our primary website http://www.genenames.org In 2016 we expanded our remit and formed the Vertebrate Gene Nomenclature Committee (VGNC) which is responsible for assigning names to vertebrate species lacking a dedicated nomenclature group. Using the chimpanzee genome as a pilot project we have approved symbols and names for over 14 500 protein-coding genes in chimpanzee, and have developed a new website http://vertebrate.genenames.org to distribute these data. Here, we review our online data and resources, focusing particularly on the improvements and new developments made during the last two years.},
    author = {Yates, Bethan and Braschi, Bryony and Gray, Kristian A. and Seal, Ruth L. and Tweedie, Susan and Bruford, Elspeth A.},
    doi = {10.1093/nar/gkw1033},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Yates et al. - 2017 - Genenames.org The HGNC and VGNC resources in 2017.pdf:pdf},
    isbn = {13624962 (Electronic)},
    issn = {13624962},
    journal = {Nucleic Acids Res.},
    number = {D1},
    pages = {D619--D625},
    pmid = {27799471},
    title = {{ Genenames.org: The HGNC and VGNC resources in 2017 }},
    volume = {45},
    year = {2017}
}
@article{Maglott2011,
    abstract = {Entrez Gene (www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene) is NCBI's database for gene-specific information. It does not include all known or predicted genes; instead Entrez Gene focuses on the genomes that have been completely sequenced, that have an active research community to contribute gene-specific information, or that are scheduled for intense sequence analysis. The content of Entrez Gene represents the result of curation and automated integration of data from NCBI's Reference Sequence project (RefSeq), from collaborating model organism databases, and from many other databases available from NCBI. Records are assigned unique, stable and tracked integers as identifiers. The content (nomenclature, map location, gene products and their attributes, markers, phenotypes, and links to citations, sequences, variation details, maps, expression, homologs, protein domains and external databases) is updated as new information becomes available. Entrez Gene is a step forward from NCBI's LocusLink, with both a major increase in taxonomic scope and improved access through the many tools associated with NCBI Entrez.},
    author = {Maglott, Donna and Ostell, Jim and Pruitt, Kim D. and Tatusova, Tatiana},
    doi = {10.1093/nar/gkq1237},
    file = {:Users/cthoyt/ownCloud/Mendeley/2011/Maglott et al. - 2011 - Entrez gene Gene-centered information at NCBI.pdf:pdf},
    isbn = {1362-4962 (Electronic)$\backslash$n0305-1048 (Linking)},
    issn = {03051048},
    journal = {Nucleic Acids Res.},
    number = {SUPPL. 1},
    pages = {52--57},
    pmid = {15608257},
    title = {{ Entrez gene: Gene-centered information at NCBI }},
    volume = {39},
    year = {2011}
}

@article{Howe2013,
    abstract = {ZFIN, the Zebrafish Model Organism Database (http://zfin.org), is the central resource for zebrafish genetic, genomic, phenotypic and developmental data. ZFIN curators manually curate and integrate comprehensive data involving zebrafish genes, mutants, transgenics, phenotypes, genotypes, gene expressions, morpholinos, antibodies, anatomical structures and publications. Integrated views of these data, as well as data gathered through collaborations and data exchanges, are provided through a wide selection of web-based search forms. Among the vertebrate model organisms, zebrafish are uniquely well suited for rapid and targeted generation of mutant lines. The recent rapid production of mutants and transgenic zebrafish is making management of data associated with these resources particularly important to the research community. Here, we describe recent enhancements to ZFIN aimed at improving our support for mutant and transgenic lines, including (i) enhanced mutant/transgenic search functionality; (ii) more expressive phenotype curation methods; (iii) new downloads files and archival data access; (iv) incorporation of new data loads from laboratories undertaking large-scale generation of mutant or transgenic lines and (v) new GBrowse tracks for transgenic insertions, genes with antibodies and morpholinos.},
    author = {Howe, Douglas G and Bradford, Yvonne M and Conlin, Tom and Eagle, Anne E and Fashena, David and Frazer, Ken and Knight, Jonathan and Mani, Prita and Martin, Ryan and Moxon, Sierra A Taylor and Paddock, Holly and Pich, Christian and Ramachandran, Sridhar and Ruef, Barbara J and Ruzicka, Leyla and Schaper, Kevin and Shao, Xiang and Singer, Amy and Sprunger, Brock and { Van Slyke } , Ceri E and Westerfield, Monte},
    doi = {10.1093/nar/gks938},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    mendeley-groups = {Thesis try 2},
    month = {jan},
    number = {Database issue},
    pages = {D854--60},
    pmid = {23074187},
    title = {{ ZFIN, the Zebrafish Model Organism Database: increased support for mutants and transgenics. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/23074187 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3531097},
    volume = {41},
    year = {2013}
}
@article{Thurmond2019,
    abstract = {FlyBase (flybase.org) is a knowledge base that supports the community of researchers that use the fruit fly, Drosophila melanogaster, as a model organism. The FlyBase team curates and organizes a diverse array of genetic, molecular, genomic, and developmental information about Drosophila. At the beginning of 2018, 'FlyBase 2.0' was released with a significantly improved user interface and new tools. Among these important changes are a new organization of search results into interactive lists or tables (hitlists), enhanced reference lists, and new protein domain graphics. An important new data class called 'experimental tools' consolidates information on useful fly strains and other resources related to a specific gene, which significantly enhances the ability of the Drosophila researcher to design and carry out experiments. With the release of FlyBase 2.0, there has also been a restructuring of backend architecture and a continued development of application programming interfaces (APIs) for programmatic access to FlyBase data. In this review, we describe these major new features and functionalities of the FlyBase 2.0 site and how they support the use of Drosophila as a model organism for biological discovery and translational research.},
    author = {Thurmond, Jim and Goodman, Joshua L and Strelets, Victor B and Attrill, Helen and Gramates, L Sian and Marygold, Steven J and Matthews, Beverley B and Millburn, Gillian and Antonazzo, Giulia and Trovisco, Vitor and Kaufman, Thomas C and Calvi, Brian R and { FlyBase Consortium }},
    doi = {10.1093/nar/gky1003},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    mendeley-groups = {Thesis try 2},
    month = {jan},
    number = {D1},
    pages = {D759--D765},
    pmid = {30364959},
    title = {{ FlyBase 2.0: the next generation. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/30364959 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC6323960},
    volume = {47},
    year = {2019}
}
@article{Cherry2012,
    abstract = {The Saccharomyces Genome Database (SGD, http://www.yeastgenome.org) is the community resource for the budding yeast Saccharomyces cerevisiae. The SGD project provides the highest-quality manually curated information from peer-reviewed literature. The experimental results reported in the literature are extracted and integrated within a well-developed database. These data are combined with quality high-throughput results and provided through Locus Summary pages, a powerful query engine and rich genome browser. The acquisition, integration and retrieval of these data allow SGD to facilitate experimental design and analysis by providing an encyclopedia of the yeast genome, its chromosomal features, their functions and interactions. Public access to these data is provided to researchers and educators via web pages designed for optimal ease of use.},
    author = {Cherry, J Michael and Hong, Eurie L and Amundsen, Craig and Balakrishnan, Rama and Binkley, Gail and Chan, Esther T and Christie, Karen R and Costanzo, Maria C and Dwight, Selina S and Engel, Stacia R and Fisk, Dianna G and Hirschman, Jodi E and Hitz, Benjamin C and Karra, Kalpana and Krieger, Cynthia J and Miyasato, Stuart R and Nash, Rob S and Park, Julie and Skrzypek, Marek S and Simison, Matt and Weng, Shuai and Wong, Edith D},
    doi = {10.1093/nar/gkr1029},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    mendeley-groups = {Thesis try 2},
    month = {jan},
    number = {Database issue},
    pages = {D700--5},
    pmid = {22110037},
    title = {{ Saccharomyces Genome Database: the genomics resource of budding yeast. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/22110037 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3245034},
    volume = {40},
    year = {2012}
}
@article{Karimi2018,
    author = {Karimi, Kamran and Fortriede, Joshua D and Lotay, Vaneet S and Burns, Kevin A and Wang, Dong Zhou and Fisher, Malcom E and Pells, Troy J and James-Zorn, Christina and Wang, Ying and Ponferrada, V G and Chu, Stanley and Chaturvedi, Praneet and Zorn, Aaron M and Vize, Peter D},
    doi = {10.1093/nar/gkx936},
    issn = {0305-1048},
    journal = {Nucleic Acids Research},
    mendeley-groups = {Thesis try 2},
    month = {jan},
    number = {D1},
    pages = {D861--D868},
    title = {{ Xenbase: a genomic, epigenomic and transcriptomic model organism database }},
    url = {http://academic.oup.com/nar/article/46/D1/D861/4559118},
    volume = {46},
    year = {2018}
}
@article{Bult2019,
    abstract = {The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the community model organism genetic and genome resource for the laboratory mouse. MGD is the authoritative source for biological reference data sets related to mouse genes, gene functions, phenotypes, and mouse models of human disease. MGD is the primary outlet for official gene, allele and mouse strain nomenclature based on the guidelines set by the International Committee on Standardized Nomenclature for Mice. In this report we describe significant enhancements to MGD, including two new graphical user interfaces: (i) the Multi Genome Viewer for exploring the genomes of multiple mouse strains and (ii) the Phenotype-Gene Expression matrix which was developed in collaboration with the Gene Expression Database (GXD) and allows researchers to compare gene expression and phenotype annotations for mouse genes. Other recent improvements include enhanced efficiency of our literature curation processes and the incorporation of Transcriptional Start Site (TSS) annotations from RIKEN's FANTOM 5 initiative.},
    author = {Bult, Carol J and Blake, Judith A and Smith, Cynthia L and Kadin, James A and Richardson, Joel E and { Mouse Genome Database Group }},
    doi = {10.1093/nar/gky1056},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    mendeley-groups = {Thesis try 2},
    month = {jan},
    number = {D1},
    pages = {D801--D806},
    pmid = {30407599},
    title = {{ Mouse Genome Database (MGD) 2019. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/30407599 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC6323923},
    volume = {47},
    year = {2019}
}
@article{Bachman2018,
    abstract = {For automated reading of scientific publications to extract useful information about molecular mechanisms it is critical that genes, proteins and other entities be correctly associated with uniform identifiers, a process known as named entity linking or “grounding.” Correct grounding is essential for resolving relationships among mined information, curated interaction databases, and biological datasets. The accuracy of this process is largely dependent on the availability of machine-readable resources associating synonyms and abbreviations commonly found in biomedical literature with uniform identifiers. In a task involving automated reading of ∼215,000 articles using the REACH event extraction software we found that grounding was disproportionately inaccurate for multi-protein families (e.g., “AKT”) and complexes with multiple subunits (e.g.“NF- $\kappa$B”). To address this problem we constructed FamPlex, a manually curated resource defining protein families and complexes as they are commonly encountered in biomedical text. In FamPlex the gene-level constituents of families and complexes are defined in a flexible format allowing for multi-level, hierarchical membership. To create FamPlex, text strings corresponding to entities were identified empirically from literature and linked manually to uniform identifiers; these identifiers were also mapped to equivalent entries in multiple related databases. FamPlex also includes curated prefix and suffix patterns that improve named entity recognition and event extraction. Evaluation of REACH extractions on a test corpus of ∼54,000 articles showed that FamPlex significantly increased grounding accuracy for families and complexes (from 15 to 71 { \% } ). The hierarchical organization of entities in FamPlex also made it possible to integrate otherwise unconnected mechanistic information across families, subfamilies, and individual proteins. Applications of FamPlex to the TRIPS/DRUM reading system and the Biocreative VI Bioentity Normalization Task dataset demonstrated the utility of FamPlex in other settings. FamPlex is an effective resource for improving named entity recognition, grounding, and relationship resolution in automated reading of biomedical text. The content in FamPlex is available in both tabular and Open Biomedical Ontology formats at
 https://github.com/sorgerlab/famplex


 under the Creative Commons CC0 license and has been integrated into the TRIPS/DRUM and REACH reading systems.},
    author = {Bachman, John A. and Gyori, Benjamin M. and Sorger, Peter K.},
    doi = {10.1186/s12859-018-2211-5},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/FamPlex A resource for entity recognition and relationship resolution of human protein families and complexes in biomedical text mining.pdf:pdf},
    isbn = {14712105 (Electronic)},
    issn = {14712105},
    journal = {BMC Bioinformatics},
    keywords = {Biocuration,Event extraction,Grounding,Named entity linking,Named entity recognition,Natural language processing,Protein families,Text mining},
    mendeley-groups = {Thesis try 2},
    number = {1},
    pages = {1--14},
    pmid = {29954318},
    publisher = {BMC Bioinformatics},
    title = {{ FamPlex: A resource for entity recognition and relationship resolution of human protein families and complexes in biomedical text mining }},
    volume = {19},
    year = {2018}
}

@article{Hobbs1978,
    abstract = {Two approaches to the problem of resolving pronoun references are presented. The first is a naive algorithm that works by traversing the surface parse trees of the sentences of the text in a particular order looking for noun phrases of the correct gender and number. The algorithm clearly does not work in all cases, but the results of an examination of several hundred examples from published texts show that it performs remarkably well. In the second approach, it is shown how pronoun solution can be handled in a comprehensive system for semantic analysis of English texts. The system is described, and it is shown in a detailed treatment of several examples how semantic analysis locates the antecedents of most pronouns as a by-product. Included are the classic examples of Winograd and Charniak.},
    author = {Hobbs, Jerry R},
    doi = {https://doi.org/10.1016/0024-3841(78)90006-2},
    issn = {0024-3841},
    journal = {Lingua},
    mendeley-groups = {Thesis try 2},
    number = {4},
    pages = {311--338},
    title = {{ Resolving pronoun references }},
    url = {http://www.sciencedirect.com/science/article/pii/0024384178900062},
    volume = {44},
    year = {1978}
}

@inproceedings{Brennan1987,
    author = {Brennan, Susan E. and Friedman, Marilyn W. and Pollard, Carl J.},
    title = {A Centering Approach to Pronouns},
    booktitle = {Proceedings of the 25th Annual Meeting on Association for Computational Linguistics},
    series = {ACL '87},
    year = {1987},
    location = {Stanford, California},
    pages = {155--162},
    numpages = {8},
    url = {https://doi.org/10.3115/981175.981197},
    doi = {10.3115/981175.981197},
    acmid = {981197},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@article{Lappin1994,
    author = {Lappin, Shalom and Leass, Herbert J.},
    title = {An Algorithm for Pronominal Anaphora Resolution},
    journal = {Comput. Linguist.},
    issue_date = {December 1994},
    volume = {20},
    number = {4},
    month = dec,
    year = {1994},
    issn = {0891-2017},
    pages = {535--561},
    numpages = {27},
    url = {http://dl.acm.org/citation.cfm?id=203987.203989},
    acmid = {203989},
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
}

@article{Soon2001,
    author = {Soon, Wee Meng and Ng, Hwee Tou and Lim, Daniel Chung Yong},
    title = {A Machine Learning Approach to Coreference Resolution of Noun Phrases},
    journal = {Comput. Linguist.},
    issue_date = {December 2001},
    volume = {27},
    number = {4},
    month = dec,
    year = {2001},
    issn = {0891-2017},
    pages = {521--544},
    numpages = {24},
    url = {http://dl.acm.org/citation.cfm?id=972597.972602},
    acmid = {972602},
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
}

@inproceedings{Ng2002,
    author = {Ng, Vincent and Cardie, Claire},
    title = {Identifying Anaphoric and Non-anaphoric Noun Phrases to Improve Coreference Resolution},
    booktitle = {Proceedings of the 19th International Conference on Computational Linguistics - Volume 1},
    series = {COLING '02},
    year = {2002},
    location = {Taipei, Taiwan},
    pages = {1--7},
    numpages = {7},
    url = {https://doi.org/10.3115/1072228.1072367},
    doi = {10.3115/1072228.1072367},
    acmid = {1072367},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@inproceedings{Bengtson2008,
    author = {Bengtson, Eric and Roth, Dan},
    title = {Understanding the Value of Features for Coreference Resolution},
    booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing},
    series = {EMNLP '08},
    year = {2008},
    location = {Honolulu, Hawaii},
    pages = {294--303},
    numpages = {10},
    url = {http://dl.acm.org/citation.cfm?id=1613715.1613756},
    acmid = {1613756},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@inproceedings{Luo2004,
    author = {Luo, Xiaoqiang and Ittycheriah, Abe and Jing, Hongyan and Kambhatla, Nanda and Roukos, Salim},
    title = {A Mention-synchronous Coreference Resolution Algorithm Based on the Bell Tree},
    booktitle = {Proceedings of the 42Nd Annual Meeting on Association for Computational Linguistics},
    series = {ACL '04},
    year = {2004},
    location = {Barcelona, Spain},
    articleno = {135},
    url = {https://doi.org/10.3115/1218955.1218973},
    doi = {10.3115/1218955.1218973},
    acmid = {1218973},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@inproceedings{Yang2004,
    author = {Yang, Xiaofeng and Su, Jian and Zhou, Guodong and Tan, Chew Lim},
    title = {An NP-cluster Based Approach to Coreference Resolution},
    booktitle = {Proceedings of the 20th International Conference on Computational Linguistics},
    series = {COLING '04},
    year = {2004},
    location = {Geneva, Switzerland},
    articleno = {226},
    url = {https://doi.org/10.3115/1220355.1220388},
    doi = {10.3115/1220355.1220388},
    acmid = {1220388},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@inproceedings{Yang2008,
    author = {Yang, Xiaofeng and Su, Jian and Lang, Jun and Tan, Chew Lim and Liu, Ting and Li, Sheng},
    title = {An Entity-Mention Model for Coreference Resolution with Inductive Logic Programming},
    booktitle = {Proceedings of ACL-08: HLT},
    month = {June},
    year = {2008},
    address = {Columbus, Ohio},
    publisher = {Association for Computational Linguistics},
    pages = {843--851},
    url = {http://www.aclweb.org/anthology/P/P08/P08-1096}
}

@inproceedings{Lee2011,
    author = {Lee, Heeyoung and Peirsman, Yves and Chang, Angel and Chambers, Nathanael and Surdeanu, Mihai and Jurafsky, Dan},
    title = {Stanford's Multi-pass Sieve Coreference Resolution System at the CoNLL-2011 Shared Task},
    booktitle = {Proceedings of the Fifteenth Conference on Computational Natural Language Learning: Shared Task},
    series = {CONLL Shared Task '11},
    year = {2011},
    isbn = {9781937284084},
    location = {Portland, Oregon},
    pages = {28--34},
    numpages = {7},
    url = {http://dl.acm.org/citation.cfm?id=2132936.2132938},
    acmid = {2132938},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@inproceedings{Denis2007,
    author = {Denis, Pascal and Baldridge, Jason},
    title = {A Ranking Approach to Pronoun Resolution},
    booktitle = {Proceedings of the 20th International Joint Conference on Artifical Intelligence},
    series = {IJCAI'07},
    year = {2007},
    location = {Hyderabad, India},
    pages = {1588--1593},
    numpages = {6},
    url = {http://dl.acm.org/citation.cfm?id=1625275.1625532},
    acmid = {1625532},
    publisher = {Morgan Kaufmann Publishers Inc.},
    address = {San Francisco, CA, USA},
}

@inproceedings{Rahman2009,
    author = {Rahman, Altaf and Ng, Vincent},
    title = {Supervised Models for Coreference Resolution},
    booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing: Volume 2 - Volume 2},
    series = {EMNLP '09},
    year = {2009},
    isbn = {978-1-932432-62-6},
    location = {Singapore},
    pages = {968--977},
    numpages = {10},
    url = {http://dl.acm.org/citation.cfm?id=1699571.1699639},
    acmid = {1699639},
    publisher = {Association for Computational Linguistics},
    address = {Stroudsburg, PA, USA},
}

@article{martschat2015,
    title = {Latent Structures for Coreference Resolution},
    author = "Martschat, Sebastian and Strube, Michael",
    journal = "Transactions of the Association for Computational Linguistics",
    volume = "3",
    year = "2015",
    url = "https://www.aclweb.org/anthology/Q15-1029",
    doi = "10.1162/tacl_a_00147",
    pages = "405--418",
    abstract = {Machine learning approaches to coreference resolution vary greatly in the modeling of the problem: while early approaches operated on the mention pair level, current research focuses on ranking architectures and antecedent trees. We propose a unified representation of different approaches to coreference resolution in terms of the structure they operate on. We represent several coreference resolution approaches proposed in the literature in our framework and evaluate their performance. Finally, we conduct a systematic analysis of the output of these approaches, highlighting differences and similarities.},
}

@article{Rahman2011,
    author = {Rahman, Altaf and Ng, Vincent},
    title = {Narrowing the Modeling Gap: A Cluster-ranking Approach to Coreference Resolution},
    journal = {J. Artif. Int. Res.},
    issue_date = {January 2011},
    volume = {40},
    number = {1},
    month = jan,
    year = {2011},
    issn = {1076-9757},
    pages = {469--521},
    numpages = {53},
    url = {http://dl.acm.org/citation.cfm?id=2016945.2016958},
    acmid = {2016958},
    publisher = {AI Access Foundation},
    address = {USA},
}

@inproceedings{Ma2014,
    address = {Stroudsburg, PA, USA},
    author = {Ma, Chao and Doppa, Janardhan Rao and Orr, J. Walker and Mannem, Prashanth and Fern, Xiaoli and Dietterich, Tom and Tadepalli, Prasad},
    booktitle = {Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
    doi = {10.3115/v1/D14-1225},
    pages = {2115--2126},
    publisher = {Association for Computational Linguistics},
    title = {{ Prune-and-Score: Learning for Greedy Coreference Resolution }},
    url = {http://aclweb.org/anthology/D14-1225},
    year = {2014}
}

@inproceedings{Clark2016,
    title = "Improving Coreference Resolution by Learning Entity-Level Distributed Representations",
    author = {Clark, Kevin and Manning, Christopher D.},
    booktitle = "Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = aug,
    year = "2016",
    address = "Berlin, Germany",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/P16-1061",
    doi = "10.18653/v1/P16-1061",
    pages = "643--653",
}

@article{Li2018,
    author = {Li, Chen and Rao, Zhiqiang and Zheng, Qinghua and Zhang, Xiangrong},
    title = "{A set of domain rules and a deep network for protein coreference resolution}",
    journal = {Database},
    volume = {2018},
    year = {2018},
    month = {07},
    issn = {1758-0463},
    doi = {10.1093/database/bay065},
    url = {https://doi.org/10.1093/database/bay065},
    eprint = {http://oup.prod.sis.lan/database/article-pdf/doi/10.1093/database/bay065/27438328/bay065.pdf},
}

@article{Giorgi526244,
    author = {Giorgi, John and Bader, Gary},
    title = {Towards reliable named entity recognition in the biomedical domain},
    elocation-id = {526244},
    year = {2019},
    doi = {10.1101/526244},
    publisher = {Cold Spring Harbor Laboratory},
    URL = {https://www.biorxiv.org/content/early/2019/01/22/526244},
    eprint = {https://www.biorxiv.org/content/early/2019/01/22/526244.full.pdf},
    journal = {bioRxiv}
}

@article{Hakenberg2011,
    abstract = {SUMMARY Identifying mentions of named entities, such as genes or diseases, and normalizing them to database identifiers have become an important step in many text and data mining pipelines. Despite this need, very few entity normalization systems are publicly available as source code or web services for biomedical text mining. Here we present the Gnat Java library for text retrieval, named entity recognition, and normalization of gene and protein mentions in biomedical text. The library can be used as a component to be integrated with other text-mining systems, as a framework to add user-specific extensions, and as an efficient stand-alone application for the identification of gene and protein names for data analysis. On the BioCreative III test data, the current version of Gnat achieves a Tap-20 score of 0.1987. AVAILABILITY The library and web services are implemented in Java and the sources are available from http://gnat.sourceforge.net. CONTACT jorg.hakenberg@roche.com.},
    author = {Hakenberg, J { \" { o } } rg and Gerner, Martin and Haeussler, Maximilian and Solt, Ill { \' { e } } s and Plake, Conrad and Schroeder, Michael and Gonzalez, Graciela and Nenadic, Goran and Bergman, Casey M},
    doi = {10.1093/bioinformatics/btr455},
    issn = {1367-4811},
    journal = {Bioinformatics (Oxford, England)},
    mendeley-groups = {Thesis try 2},
    month = {oct},
    number = {19},
    pages = {2769--71},
    pmid = {21813477},
    title = {{ The GNAT library for local and remote gene mention normalization. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/21813477 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3179658},
    volume = {27},
    year = {2011}
}

@article{Hsu2008,
    abstract = {MOTIVATION Tagging gene and gene product mentions in scientific text is an important initial step of literature mining. In this article, we describe in detail our gene mention tagger participated in BioCreative 2 challenge and analyze what contributes to its good performance. Our tagger is based on the conditional random fields model (CRF), the most prevailing method for the gene mention tagging task in BioCreative 2. Our tagger is interesting because it accomplished the highest F-scores among CRF-based methods and second over all. Moreover, we obtained our results by mostly applying open source packages, making it easy to duplicate our results. RESULTS We first describe in detail how we developed our CRF-based tagger. We designed a very high dimensional feature set that includes most of information that may be relevant. We trained bi-directional CRF models with the same set of features, one applies forward parsing and the other backward, and integrated two models based on the output scores and dictionary filtering. One of the most prominent factors that contributes to the good performance of our tagger is the integration of an additional backward parsing model. However, from the definition of CRF, it appears that a CRF model is symmetric and bi-directional parsing models will produce the same results. We show that due to different feature settings, a CRF model can be asymmetric and the feature setting for our tagger in BioCreative 2 not only produces different results but also gives backward parsing models slight but constant advantage over forward parsing model. To fully explore the potential of integrating bi-directional parsing models, we applied different asymmetric feature settings to generate many bi-directional parsing models and integrate them based on the output scores. Experimental results show that this integrated model can achieve even higher F-score solely based on the training corpus for gene mention tagging. AVAILABILITY Data sets, programs and an on-line service of our gene mention tagger can be accessed at http://aiia.iis.sinica.edu.tw/biocreative2.htm.},
    author = {Hsu, Chun-Nan and Chang, Yu-Ming and Kuo, Cheng-Ju and Lin, Yu-Shi and Huang, Han-Shen and Chung, I-Fang},
    doi = {10.1093/bioinformatics/btn183},
    issn = {1367-4811},
    journal = {Bioinformatics (Oxford, England)},
    mendeley-groups = {Thesis try 2},
    month = {jul},
    number = {13},
    pages = {i286--94},
    pmid = {18586726},
    title = {{ Integrating high dimensional bi-directional parsing models for gene mention tagging. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/18586726 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2718659},
    volume = {24},
    year = {2008}
}

@article{Leaman2008,
    abstract = {There has been an increasing amount of research on biomedical named entity recognition, the most basic text extraction problem, resulting in significant progress by different research teams around the world. This has created a need for a freely-available, open source system implementing the advances described in the literature. In this paper we present BANNER, an open-source, executable survey of advances in biomedical named entity recognition, intended to serve as a benchmark for the field. BANNER is implemented in Java as a machine-learning system based on conditional random fields and includes a wide survey of the best techniques recently described in the literature. It is designed to maximize domain independence by not employing brittle semantic features or rule-based processing steps, and achieves significantly better performance than existing baseline systems. It is therefore useful to developers as an extensible NER implementation, to researchers as a standard for comparing innovative techniques, and to biologists requiring the ability to find novel entities in large amounts of text.},
    author = {Leaman, Robert and Gonzalez, Graciela},
    issn = {2335-6928},
    journal = {Pacific Symposium on Biocomputing. Pacific Symposium on Biocomputing},
    mendeley-groups = {Thesis try 2},
    pages = {652--63},
    pmid = {18229723},
    title = {{ BANNER: an executable survey of advances in biomedical named entity recognition. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/18229723},
    year = {2008}
}

@article{Wei2015,
    abstract = {The automatic recognition of gene names and their associated database identifiers from biomedical text has been widely studied in recent years, as these tasks play an important role in many downstream text-mining applications. Despite significant previous research, only a small number of tools are publicly available and these tools are typically restricted to detecting only mention level gene names or only document level gene identifiers. In this work, we report GNormPlus: an end-to-end and open source system that handles both gene mention and identifier detection. We created a new corpus of 694 PubMed articles to support our development of GNormPlus, containing manual annotations for not only gene names and their identifiers, but also closely related concepts useful for gene name disambiguation, such as gene families and protein domains. GNormPlus integrates several advanced text-mining techniques, including SimConcept for resolving composite gene names. As a result, GNormPlus compares favorably to other state-of-the-art methods when evaluated on two widely used public benchmarking datasets, achieving 86.7 { \% } F1-score on the BioCreative II Gene Normalization task dataset and 50.1 { \% } F1-score on the BioCreative III Gene Normalization task dataset. The GNormPlus source code and its annotated corpus are freely available, and the results of applying GNormPlus to the entire PubMed are freely accessible through our web-based tool PubTator.},
    author = {Wei, Chih-Hsuan and Kao, Hung-Yu and Lu, Zhiyong},
    doi = {10.1155/2015/918710},
    issn = {2314-6141},
    journal = {BioMed research international},
    mendeley-groups = {Thesis try 2},
    pages = {918710},
    pmid = {26380306},
    title = {{ GNormPlus: An Integrative Approach for Tagging Genes, Gene Families, and Protein Domains. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/26380306 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4561873},
    volume = {2015},
    year = {2015}
}

@article{Leaman2015,
    abstract = {Chemical compounds and drugs are an important class of entities in biomedical research with great potential in a wide range of applications, including clinical medicine. Locating chemical named entities in the literature is a useful step in chemical text mining pipelines for identifying the chemical mentions, their properties, and their relationships as discussed in the literature. We introduce the tmChem system, a chemical named entity recognizer created by combining two independent machine learning models in an ensemble. We use the corpus released as part of the recent CHEMDNER task to develop and evaluate tmChem, achieving a micro-averaged f-measure of 0.8739 on the CEM subtask (mention-level evaluation) and 0.8745 f-measure on the CDI subtask (abstract-level evaluation). We also report a high-recall combination (0.9212 for CEM and 0.9224 for CDI). tmChem achieved the highest f-measure reported in the CHEMDNER task for the CEM subtask, and the high recall variant achieved the highest recall on both the CEM and CDI tasks. We report that tmChem is a state-of-the-art tool for chemical named entity recognition and that performance for chemical named entity recognition has now tied (or exceeded) the performance previously reported for genes and diseases. Future research should focus on tighter integration between the named entity recognition and normalization steps for improved performance. The source code and a trained model for both models of tmChem is available at: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmChem. The results of running tmChem (Model 2) on PubMed are available in PubTator: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator.},
    author = {Leaman, Robert and Wei, Chih-Hsuan and Lu, Zhiyong},
    doi = {10.1186/1758-2946-7-S1-S3},
    issn = {1758-2946},
    journal = {Journal of cheminformatics},
    mendeley-groups = {Thesis try 2},
    number = {Suppl 1 Text mining for chemistry and the CHEMDNER track},
    pages = {S3},
    pmid = {25810774},
    title = {{ tmChem: a high performance approach for chemical named entity recognition and normalization. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/25810774 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4331693},
    volume = {7},
    year = {2015}
}

@article{Corbett2018,
    abstract = {Chemical named entity recognition (NER) has traditionally been dominated by conditional random fields (CRF)-based approaches but given the success of the artificial neural network techniques known as ``deep learning'' we decided to examine them as an alternative to CRFs. We present here several chemical named entity recognition systems. The first system translates the traditional CRF-based idioms into a deep learning framework, using rich per-token features and neural word embeddings, and producing a sequence of tags using bidirectional long short term memory (LSTM) networks---a type of recurrent neural net. The second system eschews the rich feature set---and even tokenisation---in favour of character labelling using neural character embeddings and multiple LSTM layers. The third system is an ensemble that combines the results of the first two systems. Our original BioCreative V.5 competition entry was placed in the top group with the highest F scores, and subsequent using transfer learning have achieved a final F score of 90.33 { \% } on the test data (precision 91.47 { \% } , recall 89.21 { \% } ).},
    author = {Corbett, Peter and Boyle, John},
    doi = {10.1186/s13321-018-0313-8},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/Chemlistem chemical named entity recognition using recurrent neural networks - 2018 - Corbett, Boyle.pdf:pdf},
    issn = {1758-2946},
    journal = {Journal of Cheminformatics},
    month = {dec},
    number = {1},
    pages = {59},
    title = {{ Chemlistem: chemical named entity recognition using recurrent neural networks }},
    url = {https://doi.org/10.1186/s13321-018-0313-8},
    volume = {10},
    year = {2018}
}

@article{Leaman2013,
    abstract = {MOTIVATION Despite the central role of diseases in biomedical research, there have been much fewer attempts to automatically determine which diseases are mentioned in a text-the task of disease name normalization (DNorm)-compared with other normalization tasks in biomedical text mining research. METHODS In this article we introduce the first machine learning approach for DNorm, using the NCBI disease corpus and the MEDIC vocabulary, which combines MeSH { \textregistered } and OMIM. Our method is a high-performing and mathematically principled framework for learning similarities between mentions and concept names directly from training data. The technique is based on pairwise learning to rank, which has not previously been applied to the normalization task but has proven successful in large optimization problems for information retrieval. RESULTS We compare our method with several techniques based on lexical normalization and matching, MetaMap and Lucene. Our algorithm achieves 0.782 micro-averaged F-measure and 0.809 macro-averaged F-measure, an increase over the highest performing baseline method of 0.121 and 0.098, respectively. AVAILABILITY The source code for DNorm is available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/DNorm, along with a web-based demonstration and links to the NCBI disease corpus. Results on PubMed abstracts are available in PubTator: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator .},
    author = {Leaman, Robert and { Islamaj Dogan } , Rezarta and Lu, Zhiyong},
    doi = {10.1093/bioinformatics/btt474},
    issn = {1367-4811},
    journal = {Bioinformatics (Oxford, England)},
    month = {nov},
    number = {22},
    pages = {2909--17},
    pmid = {23969135},
    title = {{ DNorm: disease name normalization with pairwise learning to rank. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/23969135 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3810844},
    volume = {29},
    year = {2013}
}

@article{Gerner2010,
    abstract = {BACKGROUND: The task of recognizing and identifying species names in biomedical literature has recently been regarded as critical for a number of applications in text and data mining, including gene name recognition, species-specific document retrieval, and semantic enrichment of biomedical articles. RESULTS: In this paper we describe an open-source species name recognition and normalization software system, LINNAEUS, and evaluate its performance relative to several automatically generated biomedical corpora, as well as a novel corpus of full-text documents manually annotated for species mentions. LINNAEUS uses a dictionary-based approach (implemented as an efficient deterministic finite-state automaton) to identify species names and a set of heuristics to resolve ambiguous mentions. When compared against our manually annotated corpus, LINNAEUS performs with 94 { \% } recall and 97 { \% } precision at the mention level, and 98 { \% } recall and 90 { \% } precision at the document level. Our system successfully solves the problem of disambiguating uncertain species mentions, with 97 { \% } of all mentions in PubMed Central full-text documents resolved to unambiguous NCBI taxonomy identifiers. CONCLUSIONS: LINNAEUS is an open source, stand-alone software system capable of recognizing and normalizing species name mentions with speed and accuracy, and can therefore be integrated into a range of bioinformatics and text-mining applications. The software and manually annotated corpus can be downloaded freely at http://linnaeus.sourceforge.net/.},
    author = {Gerner, Martin and Nenadic, Goran and Bergman, Casey M.},
    doi = {10.1186/1471-2105-11-85},
    file = {:Users/cthoyt/ownCloud/Mendeley/2010/LINNAEUS A species name identification system for biomedical literature - 2010 - Gerner, Nenadic, Bergman.pdf:pdf},
    issn = {14712105},
    journal = {BMC Bioinformatics},
    title = {{ LINNAEUS: A species name identification system for biomedical literature }},
    volume = {11},
    year = {2010}
}

@article{Wei2012,
    abstract = {As suggested in recent studies, species recognition and disambiguation is one of the most critical and challenging steps in many downstream text-mining applications such as the gene normalization task and protein-protein interaction extraction. We report SR4GN: an open source tool for species recognition and disambiguation in biomedical text. In addition to the species detection function in existing tools, SR4GN is optimized for the Gene Normalization task. As such it is developed to link detected species with corresponding gene mentions in a document. SR4GN achieves 85.42 { \% } in accuracy and compares favorably to the other state-of-the-art techniques in benchmark experiments. Finally, SR4GN is implemented as a standalone software tool, thus making it convenient and robust for use in many text-mining applications. SR4GN can be downloaded at: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/downloads/SR4GN.},
    author = {Wei, Chih Hsuan and Kao, Hung Yu and Lu, Zhiyong},
    doi = {10.1371/journal.pone.0038460},
    file = {:Users/cthoyt/ownCloud/Mendeley/2012/SR4GN A Species Recognition Software Tool for Gene Normalization - 2012 - Wei, Kao, Lu.PDF:PDF},
    issn = {19326203},
    journal = {PLoS ONE},
    number = {6},
    pages = {7--11},
    title = {{ SR4GN: A species recognition software tool for gene normalization }},
    volume = {7},
    year = {2012}
}

@article{Lee2015,
    abstract = {Disease plays a central role in many areas of biomedical research and healthcare. However, the rapid growth of disease and treatment research creates barriers to the knowledge aggregation of PubMed database. Thus, a framework of disease mention recognition and normalization has become increasingly important for biomedical text mining. In this work, we utilize conditional random fields (CRFs) to develop a recognition system and optimize the results by customizing several post-processing steps, such as abbreviation resolution and consistency improvement. At the DNER subtask of BioCreative V CDR task, the system performance of disease normalization is 0.8646 of F-measure, especially a high precision (0.8963) on the normalization task.},
    author = {Lee, Hsin-Chun and Hsu, Yi-Yu and Kao, Hung-Yu},
    file = {:Users/cthoyt/ownCloud/Mendeley/2015/An enhanced CRF-based system for disease name entity recognition and normalization on BioCreative V DNER Task - 2015 - Lee, Hsu, Kao.pdf:pdf},
    journal = {Proceedings of the Fifth BioCreative Challenge Evaluation Workshop},
    keywords = {and normalization,biomedical text mining,conditional,disease name entity recognition,random fields},
    pages = {226--233},
    title = {{ An enhanced CRF-based system for disease name entity recognition and normalization on BioCreative V DNER Task }},
    year = {2015}
}

@article{Davis2012,
    abstract = {The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators manually curate a triad of chemical-gene, chemical-disease and gene-disease relationships from the scientific literature. The CTD curation paradigm uses controlled vocabularies for chemicals, genes and diseases. To curate disease information, CTD first had to identify a source of controlled terms. Two resources seemed to be good candidates: the Online Mendelian Inheritance in Man (OMIM) and the 'Diseases' branch of the National Library of Medicine's Medical Subject Headers (MeSH). To maximize the advantages of both, CTD biocurators undertook a novel initiative to map the flat list of OMIM disease terms into the hierarchical nature of the MeSH vocabulary. The result is CTD's 'merged disease vocabulary' (MEDIC), a unique resource that integrates OMIM terms, synonyms and identifiers with MeSH terms, synonyms, definitions, identifiers and hierarchical relationships. MEDIC is both a deep and broad vocabulary, composed of 9700 unique diseases described by more than 67 000 terms (including synonyms). It is freely available to download in various formats from CTD. While neither a true ontology nor a perfect solution, this vocabulary has nonetheless proved to be extremely successful and practical for our biocurators in generating over 2.5 million disease-associated toxicogenomic relationships in CTD. Other external databases have also begun to adopt MEDIC for their disease vocabulary. Here, we describe the construction, implementation, maintenance and use of MEDIC to raise awareness of this resource and to offer it as a putative scaffold in the formal construction of an official disease ontology. DATABASE URL: http://ctd.mdibl.org/voc.go?type=disease.},
    author = {Davis, Allan Peter and Wiegers, Thomas C. and Rosenstein, Michael C. and Mattingly, Carolyn J.},
    doi = {10.1093/database/bar065},
    issn = {17580463},
    journal = {Database},
    pages = {1--9},
    title = {{ MEDIC: A practical disease vocabulary used at the comparative toxicogenomics database }},
    volume = {2012},
    year = {2012}
}

@article{Kuo2009,
    abstract = {Background: To automatically process large quantities of biological literature for knowledge discovery and information curation, text mining tools are becoming essential. Abbreviation recognition is related to NER and can be considered as a pair recognition task of a terminology and its corresponding abbreviation from free text. The successful identification of abbreviation and its corresponding definition is not only a prerequisite to index terms of text databases to produce articles of related interests, but also a building block to improve existing gene mention tagging and gene normalization tools. Results: Our approach to abbreviation recognition (AR) is based on machine-learning, which exploits a novel set of rich features to learn rules from training data. Tested on the AB3P corpus, our system demonstrated a F-score of 89.90 { \% } with 95.86 { \% } precision at 84.64 { \% } recall, higher than the result achieved by the existing best AR performance system. We also annotated a new corpus of 1200 PubMed abstracts which was derived from BioCreative II gene normalization corpus. On our annotated corpus, our system achieved a F-score of 86.20 { \% } with 93.52 { \% } precision at 79.95 { \% } recall, which also outperforms all tested systems. Conclusion: By applying our system to extract all short form-long form pairs from all available PubMed abstracts, we have constructed BIOADI. Mining BIOADI reveals many interesting trends of bio-medical research. Besides, we also provide an off-line AR software in the download section on http://bioagent.iis.sinica.edu.tw/BIOADI/. ? 2009 Kuo et al; licensee BioMed Central Ltd.},
    author = {Kuo, Cheng Ju and Ling, Maurice H.T. and Lin, Kuan Ting and Hsu, Chun Nan},
    doi = {10.1186/1471-2105-10-S15-S7},
    file = {:Users/cthoyt/ownCloud/Mendeley/2009/and definitions in biological literature - 2009 - Kuo et al.pdf:pdf},
    isbn = {1471210510},
    issn = {14712105},
    journal = {BMC Bioinformatics},
    number = {SUPPL. 15},
    pages = {1--10},
    title = {{ BIOADI: A machine learning approach to identifying abbreviations and definitions in biological literature }},
    volume = {10},
    year = {2009}
}

@article{Mikolov2013,
    archivePrefix = {arXiv},
    arxivId = {1301.3781v3},
    author = {Mikolov, Tomas and Corrado, Greg and Chen, Kai and Dean, Jeffrey},
    eprint = {1301.3781v3},
    file = {:Users/cthoyt/ownCloud/Mendeley/2013/Efficient Estimation of Word Representations in Vector Space - 2013 - Mikolov et al.pdf:pdf},
    pages = {1--12},
    title = {{ Efficient Estimation of Word Representations in Vector Space }},
    year = {2013}
}

@article{Pennington2014,
    author = {{ Jeffrey Pennington } , Jeffrey and Socher, Richard and Manning, Christopher D.},
    file = {:Users/cthoyt/ownCloud/Mendeley/2014/Glove Global vectors for word representation - 2014 - Jeffrey Pennington, Socher, Manning.pdf:pdf},
    journal = {In EMNLP},
    title = {{ Glove: Global vectors for word representation }},
    year = {2014}
}

@article{Lample2016,
    abstract = {State-of-the-art named entity recognition systems rely heavily on hand-crafted features and domain-specific knowledge in order to learn effectively from the small, supervised training corpora that are available. In this paper, we introduce two new neural architectures---one based on bidirectional LSTMs and conditional random fields, and the other that constructs and labels segments using a transition-based approach inspired by shift-reduce parsers. Our models rely on two sources of information about words: character-based word representations learned from the supervised corpus and unsupervised word representations learned from unannotated corpora. Our models obtain state-of-the-art performance in NER in four languages without resorting to any language-specific knowledge or resources such as gazetteers.},
    archivePrefix = {arXiv},
    arxivId = {1603.01360},
    author = {Lample, Guillaume and Ballesteros, Miguel and Subramanian, Sandeep and Kawakami, Kazuya and Dyer, Chris},
    eprint = {1603.01360},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Neural Architectures for Named Entity Recognition - 2016 - Lample et al.pdf:pdf},
    pages = {260--270},
    title = {{ Neural Architectures for Named Entity Recognition }},
    url = {http://arxiv.org/abs/1603.01360},
    year = {2016}
}

@article{Kim2003,
    abstract = {MOTIVATION: Natural language processing (NLP) methods are regarded as being useful to raise the potential of text mining from biological literature. The lack of an extensively annotated corpus of this literature, however, causes a major bottleneck for applying NLP techniques. GENIA corpus is being developed to provide reference materials to let NLP techniques work for bio-textmining. RESULTS: GENIA corpus version 3.0 consisting of 2000 MEDLINE abstracts has been released with more than 400,000 words and almost 100,000 annotations for biological terms.},
    author = {Kim, J. D. and Ohta, T. and Tateisi, Y. and Tsujii, J.},
    doi = {10.1093/bioinformatics/btg1023},
    file = {:Users/cthoyt/ownCloud/Mendeley/2003/for bio-textmining - 2003 - Ohta, Tateisi, Tsujii.pdf:pdf},
    issn = {13674803},
    journal = {Bioinformatics},
    keywords = {Computational Molecular Biology,Corpus,Information Extraction,Natural Language Processing,Text Mining},
    number = {SUPPL. 1},
    pages = {180--182},
    title = {{ GENIA corpus - A semantically annotated corpus for bio-textmining }},
    volume = {19},
    year = {2003}
}

@article{Cote2006,
    abstract = {BACKGROUND: With the vast amounts of biomedical data being generated by high-throughput analysis methods, controlled vocabularies and ontologies are becoming increasingly important to annotate units of information for ease of search and retrieval. Each scientific community tends to create its own locally available ontology. The interfaces to query these ontologies tend to vary from group to group. We saw the need for a centralized location to perform controlled vocabulary queries that would offer both a lightweight web-accessible user interface as well as a consistent, unified SOAP interface for automated queries. RESULTS: The Ontology Lookup Service (OLS) was created to integrate publicly available biomedical ontologies into a single database. All modified ontologies are updated daily. A list of currently loaded ontologies is available online. The database can be queried to obtain information on a single term or to browse a complete ontology using AJAX. Auto-completion provides a user-friendly search mechanism. An AJAX-based ontology viewer is available to browse a complete ontology or subsets of it. A programmatic interface is available to query the webservice using SOAP. The service is described by a WSDL descriptor file available online. A sample Java client to connect to the webservice using SOAP is available for download from SourceForge. All OLS source code is publicly available under the open source Apache Licence. CONCLUSION: The OLS provides a user-friendly single entry point for publicly available ontologies in the Open Biomedical Ontology (OBO) format. It can be accessed interactively or programmatically at http://www.ebi.ac.uk/ontology-lookup/.},
    author = {Cote, RG and Jones, P and Apweiler, R and Hermjakob, H},
    doi = {10.1186/1471-2105-7-97},
    file = {:Users/cthoyt/ownCloud/Mendeley/2006/The Ontology Lookup Service, a lightweight cross-platform tool for controlled vocabulary queries. - 2006 - Cote et al.pdf:pdf;:Users/cthoyt/ownCloud/Mendeley/2006/The Ontology Lookup Service, a lightweight cross-platform tool for controlled vocabulary queries. - 2006 - Cote et al(2).pdf:pdf},
    issn = {1471-2105},
    journal = {BMC Bioinformatics},
    mendeley-groups = {Thesis,Paper Resources/PyBEL Application Note},
    pages = {1--7},
    pmid = {16507094},
    title = {{ The Ontology Lookup Service, a lightweight cross-platform tool for controlled vocabulary queries. }},
    volume = {7},
    year = {2006}
}

@article{Laibe2007,
    abstract = {BACKGROUND The Minimal Information Requested In the Annotation of biochemical Models (MIRIAM) is a set of guidelines for the annotation and curation processes of computational models, in order to facilitate their exchange and reuse. An important part of the standard consists in the controlled annotation of model components, based on Uniform Resource Identifiers. In order to enable interoperability of this annotation, the community has to agree on a set of standard URIs, corresponding to recognised data types. MIRIAM Resources are being developed to support the use of those URIs. RESULTS MIRIAM Resources are a set of on-line services created to catalogue data types, their URIs and the corresponding physical URLs (or resources), whether data types are controlled vocabularies or primary data resources. MIRIAM Resources are composed of several components: MIRIAM Database stores the information, MIRIAM Web Services allows to programmatically access the database, MIRIAM Library provides an access to the Web Services and MIRIAM Web Application is a way to access the data (human browsing) and also to edit or add entries. CONCLUSIONS The project MIRIAM Resources allows an easy access to MIRIAM URIs and the associated information and is therefore crucial to foster a general use of MIRIAM annotations in computational models of biological processes.},
    author = {Laibe, Camille and { Le Nov { \` { e } } re } , Nicolas},
    doi = {10.1186/1752-0509-1-58},
    file = {:Users/cthoyt/ownCloud/Mendeley/2007/MIRIAM Resources tools to generate and resolve robust cross-references in Systems Biology. - 2007 - Laibe, Le Nov { \` { e } } re.pdf:pdf},
    isbn = {1752-0509},
    issn = {1752-0509},
    journal = {BMC systems biology},
    month = {dec},
    pages = {58},
    pmid = {18078503},
    title = {{ MIRIAM Resources: tools to generate and resolve robust cross-references in Systems Biology. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/18078503 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2259379},
    volume = {1},
    year = {2007}
}

@article{Taylor2008,
    author = {Taylor, Chris F and Field, Dawn and Sansone, Susanna-Assunta and Aerts, Jan and Apweiler, Rolf and Ashburner, Michael and Ball, Catherine A and Binz, Pierre-Alain and Bogue, Molly and Booth, Tim and Brazma, Alvis and Brinkman, Ryan R and { Michael Clark } , Adam and Deutsch, Eric W and Fiehn, Oliver and Fostel, Jennifer and Ghazal, Peter and Gibson, Frank and Gray, Tanya and Grimes, Graeme and Hancock, John M and Hardy, Nigel W and Hermjakob, Henning and Julian, Randall K and Kane, Matthew and Kettner, Carsten and Kinsinger, Christopher and Kolker, Eugene and Kuiper, Martin and { Le Nov { \` { e } } re } , Nicolas and Leebens-Mack, Jim and Lewis, Suzanna E and Lord, Phillip and Mallon, Ann-Marie and Marthandan, Nishanth and Masuya, Hiroshi and McNally, Ruth and Mehrle, Alexander and Morrison, Norman and Orchard, Sandra and Quackenbush, John and Reecy, James M and Robertson, Donald G and Rocca-Serra, Philippe and Rodriguez, Henry and Rosenfelder, Heiko and Santoyo-Lopez, Javier and Scheuermann, Richard H and Schober, Daniel and Smith, Barry and Snape, Jason and Stoeckert, Christian J and Tipton, Keith and Sterk, Peter and Untergasser, Andreas and Vandesompele, Jo and Wiemann, Stefan},
    doi = {10.1038/nbt.1411},
    issn = {1546-1696},
    journal = {Nature biotechnology},
    mendeley-groups = {Thesis try 2},
    month = {aug},
    number = {8},
    pages = {889--96},
    pmid = {18688244},
    title = {{ Promoting coherent minimum reporting guidelines for biological and biomedical investigations: the MIBBI project. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/18688244 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2771753},
    volume = {26},
    year = {2008}
}

@article{Marchetti2008,
    author = {Marchetti, A and Ronzano, F},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/Formalizing Knowledge by Ontologies OWL and KIF - 2008 - Marchetti, Ronzano.pdf:pdf},
    journal = {Relat { \' { o } } rio apresentado L' { \ldots }},
    number = {003},
    title = {{ Formalizing Knowledge by Ontologies: OWL and KIF }},
    url = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/TechnicalPapers/WP02 { \_ } TR003 { \_ } V2 { \_ } OWL { \_ } KIF.pdf},
    year = {2008}
}

@article{Allen2008,
    abstract = {We describe a graphical logical formas a semantic representation for text understanding. This representation was designed to bridge the gap be- tween highly expressive "deep" representations of logical forms andmore shallow semantic encodings such as word senses and semantic relations. It preserves rich semantic content while allowing for compact ambigu- ity encoding and viable partial representations. We describe our system for semantic text processing, which has the TRIPS parser at the core, augmented with statistical preprocessing techniques and online lexical lookup. We also present an evaluation metric for the representation and use it to evaluate the performance of the TRIPS parser on the common task paragraphs. 343},
    author = {Allen, James F and Swift, Mary and { De Beaumont } , Will},
    doi = {10.3115/1626481.1626508},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/Deep semantic analysis of text - 2008 - Allen, Swift, De Beaumont.pdf:pdf},
    journal = {Proceedings of the 2008 Conference on Semantics in Text Processing STEP 08},
    keywords = {2,allen 1,james f,p semantic analysis of,text},
    pages = {343--354},
    title = {{ Deep semantic analysis of text }},
    url = {http://portal.acm.org/citation.cfm?doid=1626481.1626508},
    volume = {1},
    year = {2008}
}
@article{Alon2007,
    abstract = {Transcription regulation networks control the expression of genes. The transcription networks of well-studied microorganisms appear to be made up of a small set of recurring regulation patterns, called network motifs. The same network motifs have recently been found in diverse organisms from bacteria to humans, suggesting that they serve as basic building blocks of transcription networks. Here I review network motifs and their functions, with an emphasis on experimental studies. Network motifs in other biological networks are also mentioned, including signalling and neuronal networks.},
    author = {Alon, Uri},
    doi = {10.1038/nrg2102},
    file = {:Users/cthoyt/Dropbox/Mendeley/2007/Network motifs theory and experimental approaches. - 2007 - Alon.pdf:pdf},
    isbn = {1471-0056 (Print)$\backslash$r1471-0056 (Linking)},
    issn = {1471-0056},
    journal = {Nature reviews. Genetics},
    keywords = {Animals,Bacteria,Bacteria: genetics,Bacteria: metabolism,Evolution,Fungi,Fungi: genetics,Fungi: metabolism,Gene Expression Regulation,Genetic,Homeostasis,Humans,Models,Regulon,Regulon: genetics,Transcription,Transcription Factors,Transcription Factors: genetics,Transcription Factors: metabolism},
    number = {6},
    pages = {450--61},
    pmid = {17510665},
    title = {{ Network motifs: theory and experimental approaches. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/17510665},
    volume = {8},
    year = {2007}
}
@article{Ashburner2000,
    author = {Ashburner, M and Ball, C A and Blake, J A and Botstein, D and Butler, H and Cherry, J M and Davis, A P and Dolinski, K and Dwight, S S and Eppig, J T and Harris, M A and Hill, D P and Issel-Tarver, L and Kasarskis, A and Lewis, S and Matese, J C and Richardson, J E and Ringwald, M and Rubin, G M and Sherlock, G},
    doi = {10.1038/75556},
    issn = {1061-4036},
    journal = {Nature genetics},
    month = {may},
    number = {1},
    pages = {25--9},
    pmid = {10802651},
    title = {{ Gene ontology: tool for the unification of biology. The Gene Ontology Consortium. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/10802651 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3037419},
    volume = {25},
    year = {2000}
}
@article{Bateman2017,
    abstract = {{ \textcopyright } 2016 The Author(s). The UniProt knowledgebase is a large resource of protein sequences and associated detailed annotation. The database contains over 60 million sequences, of which over half a million sequences have been curated by experts who critically review experimental and predicted data for each protein. The remainder are automatically annotated based on rule systems that rely on the expert curated knowledge. Since our last update in 2014, we have more than doubled the number of reference proteomes to 5631, giving a greater coverage of taxonomic diversity. We implemente d a pipeline to remove redundant highly similar proteomes that were causing excessive redundancy in UniProt. The initial run of this pipeline reduced the number of sequences in UniProt by 47 million. For our users interested in the accessory proteomes, we have made available sets of pan proteome sequences that cover the diversity of sequences for each species that is found in its strains and sub-strains. To help interpretation of genomic variants, we provide tracks of detailed protein information for the major genome browsers. We provide a SPARQL endpoint that allows complex queries of the more than 22 billion triples of data in UniProt (http://sparql.uniprot.org/). UniProt resources can be accessed via the website at http://www.uniprot.org/.},
    author = {Bateman, Alex and Martin, Maria Jesus and O'Donovan, Claire and Magrane, Michele and Alpi, Emanuele and Antunes, Ricardo and Bely, Benoit and Bingley, Mark and Bonilla, Carlos and Britto, Ramona and Bursteinas, Borisas and Bye-AJee, Hema and Cowley, Andrew and { Da Silva } , Alan and { De Giorgi } , Maurizio and Dogan, Tunca and Fazzini, Francesco and Castro, Leyla Garcia and Figueira, Luis and Garmiri, Penelope and Georghiou, George and Gonzalez, Daniel and Hatton-Ellis, Emma and Li, Weizhong and Liu, Wudong and Lopez, Rodrigo and Luo, Jie and Lussi, Yvonne and MacDougall, Alistair and Nightingale, Andrew and Palka, Barbara and Pichler, Klemens and Poggioli, Diego and Pundir, Sangya and Pureza, Luis and Qi, Guoying and Rosanoff, Steven and Saidi, Rabie and Sawford, Tony and Shypitsyna, Aleksandra and Speretta, Elena and Turner, Edward and Tyagi, Nidhi and Volynkin, Vladimir and Wardell, Tony and Warner, Kate and Watkins, Xavier and Zaru, Rossana and Zellner, Hermann and Xenarios, Ioannis and Bougueleret, Lydie and Bridge, Alan and Poux, Sylvain and Redaschi, Nicole and Aimo, Lucila and ArgoudPuy, Ghislaine and Auchincloss, Andrea and Axelsen, Kristian and Bansal, Parit and Baratin, Delphine and Blatter, Marie Claude and Boeckmann, Brigitte and Bolleman, Jerven and Boutet, Emmanuel and Breuza, Lionel and Casal-Casas, Cristina and { De Castro } , Edouard and Coudert, Elisabeth and Cuche, Beatrice and Doche, Mikael and Dornevil, Dolnide and Duvaud, Severine and Estreicher, Anne and Famiglietti, Livia and Feuermann, Marc and Gasteiger, Elisabeth and Gehant, Sebastien and Gerritsen, Vivienne and Gos, Arnaud and Gruaz-Gumowski, Nadine and Hinz, Ursula and Hulo, Chantal and Jungo, Florence and Keller, Guillaume and Lara, Vicente and Lemercier, Philippe and Lieberherr, Damien and Lombardot, Thierry and Martin, Xavier and Masson, Patrick and Morgat, Anne and Neto, Teresa and Nouspikel, Nevila and Paesano, Salvo and Pedruzzi, Ivo and Pilbout, Sandrine and Pozzato, Monica and Pruess, Manuela and Rivoire, Catherine and Roechert, Bernd and Schneider, Michel and Sigrist, Christian and Sonesson, Karin and Staehli, Sylvie and Stutz, Andre and Sundaram, Shyamala and Tognolli, Michael and Verbregue, Laure and Veuthey, Anne Lise and Wu, Cathy H. and Arighi, Cecilia N. and Arminski, Leslie and Chen, Chuming and Chen, Yongxing and Garavelli, John S. and Huang, Hongzhan and Laiho, Kati and McGarvey, Peter and Natale, Darren A. and Ross, Karen and Vinayaka, C. R. and Wang, Qinghua and Wang, Yuqi and Yeh, Lai Su and Zhang, Jian},
    doi = {10.1093/nar/gkw1099},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/UniProt The universal protein knowledgebase - 2017 - Bateman et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D158--D169},
    title = {{ UniProt: The universal protein knowledgebase }},
    volume = {45},
    year = {2017}
}
@misc{Beckett2014,
    author = {Beckett, Dave},
    booktitle = {W3C Recommendation},
    title = {{ RDF/XML Syntax Specification }},
    url = {https://www.w3.org/TR/REC-rdf-syntax/},
    urldate = {2017-08-19},
    year = {2014}
}
@article{Bellazzi2014,
    abstract = {Big data are receiving an increasing attention in biomedicine and healthcare. It is therefore important to understand the reason why big data are assuming a crucial role for the biomedical informatics community. The capability of handling big data is becoming an enabler to carry out unprecedented research studies and to implement new models of healthcare delivery. Therefore, it is first necessary to deeply understand the four elements that constitute big data, namely Volume, Variety, Velocity, and Veracity, and their meaning in practice. Then, it is mandatory to understand where big data are present, and where they can be beneficially collected. There are research fields, such as translational bioinformatics, which need to rely on big data technologies to withstand the shock wave of data that is generated every day. Other areas, ranging from epidemiology to clinical care, can benefit from the exploitation of the large amounts of data that are nowadays available, from personal monitoring to primary care. However, building big data-enabled systems carries on relevant implications in terms of reproducibility of research studies and management of privacy and data access; proper actions should be taken to deal with these issues. An interesting consequence of the big data scenario is the availability of new software, methods, and tools, such as map-reduce, cloud computing, and concept drift machine learning algorithms, which will not only contribute to big data research, but may be beneficial in many biomedical informatics applications. The way forward with the big data opportunity will require properly applied engineering principles to design studies and applications, to avoid preconceptions or over-enthusiasms, to fully exploit the available technologies, and to improve data processing and data management regulations.},
    author = {Bellazzi, R},
    doi = {10.15265/IY-2014-0024},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/Big data and biomedical informatics a challenging opportunity. - 2014 - Bellazzi.pdf:pdf},
    issn = {2364-0502},
    journal = {Yearbook of medical informatics},
    keywords = {big data,cloud,data analytics,nosql,research reproducibility},
    pages = {8--13},
    pmid = {24853034},
    title = {{ Big data and biomedical informatics: a challenging opportunity. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4287065 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {9},
    year = {2014}
}
@article{Belleau2008,
    author = {Belleau, Fran { \c { c } } ois and Nolin, Marc-Alexandre and Tourigny, Nicole and Rigault, Philippe and Morissette, Jean},
    doi = {10.1016/j.jbi.2008.03.004},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/Bio2RDF Towards a mashup to build bioinformatics knowledge systems - 2008 - Belleau et al.pdf:pdf},
    issn = {15320464},
    journal = {Journal of Biomedical Informatics},
    keywords = {Data Integration,Folder - RDF and Semantic Web,Linked Biological Data,RDF},
    language = {en},
    mendeley-tags = {Data Integration,Folder - RDF and Semantic Web,Linked Biological Data,RDF},
    month = {oct},
    number = {5},
    pages = {706--716},
    shorttitle = {Bio2RDF},
    title = {{ Bio2RDF: Towards a mashup to build bioinformatics knowledge systems }},
    url = {http://linkinghub.elsevier.com/retrieve/pii/S1532046408000415},
    volume = {41},
    year = {2008}
}
@article{Bento2014,
    abstract = {ChEMBL is an open large-scale bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012 Nucleic Acids Research Database Issue. Since then, a variety of new data sources and improvements in functionality have contributed to the growth and utility of the resource. In particular, more comprehensive tracking of compounds from research stages through clinical development to market is provided through the inclusion of data from United States Adopted Name applications; a new richer data model for representing drug targets has been developed; and a number of methods have been put in place to allow users to more easily identify reliable data. Finally, access to ChEMBL is now available via a new Resource Description Framework format, in addition to the web-based interface, data downloads and web services.},
    author = {Bento, A. Patr { \' { i } } cia and Gaulton, Anna and Hersey, Anne and Bellis, Louisa J. and Chambers, Jon and Davies, Mark and Kr { \" { u } } ger, Felix A. and Light, Yvonne and Mak, Lora and McGlinchey, Shaun and Nowotka, Michal and Papadatos, George and Santos, Rita and Overington, John P.},
    doi = {10.1093/nar/gkt1031},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/The ChEMBL bioactivity database An update - 2014 - Bento et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {1083--1090},
    pmid = {24214965},
    title = {{ The ChEMBL bioactivity database: An update }},
    volume = {42},
    year = {2014}
}
@article{Bernabo2014,
    abstract = {Cellular signal transduction is a complex phenomenon, which plays a central role in cell surviving and adaptation. The great amount of molecular data to date present in literature, together with the adoption of high throughput technologies, on the one hand, made available to scientists an enormous quantity of information, on the other hand, failed to provide a parallel increase in the understanding of biological events. In this context, a new discipline arose, the systems biology, aimed to manage the information with a computational modeling-based approach. In particular, the use of biological networks has allowed the making of huge progress in this field. Here we discuss two possible application of the use of biological networks to explore cell signaling: the study of the architecture of signaling systems that cooperate in determining the acquisition of a complex cellular function (as it is the case of the process of activation of spermatozoa) and the organization of a single specific signaling systems expressed by different cells in different tissues (i.e. the endocannabinoid system). In both the cases we have found that the networks follow a scale free and small world topology, likely due to the evolutionary advantage of robustness against random damages, fastness and specific of information processing, and easy navigability.},
    author = {Bernab??, Nicola and Barboni, Barbara and Maccarrone, Mauro},
    doi = {10.1016/j.csbj.2014.09.002},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/The biological networks in studying cell signal transduction complexity The examples of sperm capacitation and of endocannabinoid system.pdf:pdf},
    isbn = {2001-0370},
    issn = {20010370},
    journal = {Computational and Structural Biotechnology Journal},
    keywords = {Biological networks,Endocannabinoid system,Network topology,Signal transduction,Spermatozoa,Systems biology},
    number = {18},
    pages = {11--21},
    pmid = {25379139},
    publisher = {Elsevier B.V.},
    title = {{ The biological networks in studying cell signal transduction complexity: The examples of sperm capacitation and of endocannabinoid system }},
    url = {http://dx.doi.org/10.1016/j.csbj.2014.09.002},
    volume = {11},
    year = {2014}
}
@article{Blalock2011,
    archivePrefix = {arXiv},
    arxivId = {NIHMS150003},
    author = {Blalock, Eric M. and Buechel, Heather M. and Popovic, Jelena and Geddes, James W. and Landfield, Philip W.},
    doi = {10.1007/s12020-009-9266-z.A},
    eprint = {NIHMS150003},
    file = {:Users/cthoyt/Dropbox/Mendeley/2011/Microarray analyses of laser-captured hippocampus reveal distinct gray and white matter signatures associated with incipient Alzheimer.pdf:pdf},
    isbn = {6176321972},
    issn = {15378276},
    number = {1},
    pages = {62--70},
    pmid = {1000000221},
    title = {{ Microarray analyses of laser-captured hippocampus reveal distinct gray and white matter signatures associated with incipient Alzheimer's disease }},
    volume = {37},
    year = {2011}
}
@article{Bodenreider2008,
    author = {Bodenreider, O},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/Biomedical Ontologies in Action Role in Knowledge Management , Data Integration and Decision Support - 2008 - Bodenreider.pdf:pdf},
    keywords = {biomedical ontologies,data integration,knowledge management},
    pages = {67--79},
    title = {{ Biomedical Ontologies in Action : Role in Knowledge Management , Data Integration and Decision Support }},
    year = {2008}
}
@misc{Bostock,
    author = {Bostock, Mike},
    title = {{ D3.js }},
    url = {http://d3js.org}
}
@article{Catlett2013,
    abstract = {BACKGROUND: Gene expression profiling and other genome-scale measurement technologies provide comprehensive information about molecular changes resulting from a chemical or genetic perturbation, or disease state. A critical challenge is the development of methods to interpret these large-scale data sets to identify specific biological mechanisms that can provide experimentally verifiable hypotheses and lead to the understanding of disease and drug action.$\backslash$n$\backslash$nRESULTS: We present a detailed description of Reverse Causal Reasoning (RCR), a reverse engineering methodology to infer mechanistic hypotheses from molecular profiling data. This methodology requires prior knowledge in the form of small networks that causally link a key upstream controller node representing a biological mechanism to downstream measurable quantities. These small directed networks are generated from a knowledge base of literature-curated qualitative biological cause-and-effect relationships expressed as a network. The small mechanism networks are evaluated as hypotheses to explain observed differential measurements. We provide a simple implementation of this methodology, Whistle, specifically geared towards the analysis of gene expression data and using prior knowledge expressed in Biological Expression Language (BEL). We present the Whistle analyses for three transcriptomic data sets using a publically available knowledge base. The mechanisms inferred by Whistle are consistent with the expected biology for each data set.$\backslash$n$\backslash$nCONCLUSIONS: Reverse Causal Reasoning yields mechanistic insights to the interpretation of gene expression profiling data that are distinct from and complementary to the results of analyses using ontology or pathway gene sets. This reverse engineering algorithm provides an evidence-driven approach to the development of models of disease, drug action, and drug toxicity.},
    author = {Catlett, Natalie L and Bargnesi, Anthony J and Ungerer, Stephen and Seagaran, Toby and Ladd, William and Elliston, Keith O and Pratt, Dexter},
    doi = {10.1186/1471-2105-14-340},
    file = {:Users/cthoyt/Dropbox/Mendeley/2013/Reverse causal reasoning applying qualitative causal knowledge to the interpretation of high-throughput data. - 2013 - Catlett et al.pdf:pdf},
    issn = {1471-2105},
    journal = {BMC bioinformatics},
    keywords = {Algorithms,Animals,Breast,Breast: cytology,Endothelium, Vascular,Endothelium, Vascular: cytology,Epithelial Cells,Epithelial Cells: cytology,Gene Expression Profiling,Gene Expression Profiling: methods,Genome, Human,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Histone-Lysine N-Methyltransferase,Histone-Lysine N-Methyltransferase: genetics,Humans,Insulin Resistance,Insulin Resistance: genetics,Knowledge Bases,Mice,Microarray Analysis,Molecular Probes,Molecular Probes: genetics,Nuclear Proteins,Nuclear Proteins: genetics},
    number = {1},
    pages = {340},
    pmid = {24266983},
    title = {{ Reverse causal reasoning: applying qualitative causal knowledge to the interpretation of high-throughput data. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4222496 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {14},
    year = {2013}
}
@article{Cerami2011,
    abstract = {Pathway Commons (http://www.pathwaycommons.org) is a collection of publicly available pathway data from multiple organisms. Pathway Commons provides a web-based interface that enables biologists to browse and search a comprehensive collection of pathways from multiple sources represented in a common language, a download site that provides integrated bulk sets of pathway information in standard or convenient formats and a web service that software developers can use to conveniently query and access all data. Database providers can share their pathway data via a common repository. Pathways include biochemical reactions, complex assembly, transport and catalysis events and physical interactions involving proteins, DNA, RNA, small molecules and complexes. Pathway Commons aims to collect and integrate all public pathway data available in standard formats. Pathway Commons currently contains data from nine databases with over 1400 pathways and 687,000 interactions and will be continually expanded and updated.},
    author = {Cerami, Ethan G. and Gross, Benjamin E. and Demir, Emek and Rodchenkov, Igor and Babur, { \" { O } } zg { \" { u } } n and Anwar, Nadia and Schultz, Nikolaus and Bader, Gary D. and Sander, Chris},
    doi = {10.1093/nar/gkq1039},
    file = {:Users/cthoyt/Dropbox/Mendeley/2011/Pathway Commons, a web resource for biological pathway data - 2011 - Cerami et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {SUPPL. 1},
    pages = {685--690},
    pmid = {21071392},
    title = {{ Pathway Commons, a web resource for biological pathway data }},
    volume = {39},
    year = {2011}
}
@article{Chindelevitch2012,
    abstract = {MOTIVATION: The interpretation of high-throughput datasets has remained one of the central challenges of computational biology over the past decade. Furthermore, as the amount of biological knowledge increases, it becomes more and more difficult to integrate this large body of knowledge in a meaningful manner. In this article, we propose a particular solution to both of these challenges.$\backslash$n$\backslash$nMETHODS: We integrate available biological knowledge by constructing a network of molecular interactions of a specific kind: causal interactions. The resulting causal graph can be queried to suggest molecular hypotheses that explain the variations observed in a high-throughput gene expression experiment. We show that a simple scoring function can discriminate between a large number of competing molecular hypotheses about the upstream cause of the changes observed in a gene expression profile. We then develop an analytical method for computing the statistical significance of each score. This analytical method also helps assess the effects of random or adversarial noise on the predictive power of our model.$\backslash$n$\backslash$nRESULTS: Our results show that the causal graph we constructed from known biological literature is extremely robust to random noise and to missing or spurious information. We demonstrate the power of our causal reasoning model on two specific examples, one from a cancer dataset and the other from a cardiac hypertrophy experiment. We conclude that causal reasoning models provide a valuable addition to the biologist's toolkit for the interpretation of gene expression data.$\backslash$n$\backslash$nAVAILABILITY AND IMPLEMENTATION: R source code for the method is available upon request.},
    author = {Chindelevitch, Leonid and Ziemek, Daniel and Enayetallah, Ahmed and Randhawa, Ranjit and Sidders, Ben and Brockel, Christoph and Huang, Enoch S.},
    doi = {10.1093/bioinformatics/bts090},
    issn = {13674803},
    journal = {Bioinformatics},
    pmid = {22355083},
    title = {{ Causal reasoning on biological networks: Interpreting transcriptional changes }},
    year = {2012}
}
@article{Chou2016,
    abstract = {MicroRNAs (miRNAs) are small non-coding RNAs of approximately 22 nucleotides, which negatively regulate the gene expression at the post-transcriptional level. This study describes an update of the miRTarBase (http://miRTarBase.mbc.nctu.edu.tw/) that provides information about experimentally validated miRNA-target interactions (MTIs). The latest update of the miRTarBase expanded it to identify systematically Argonaute-miRNA-RNA interactions from 138 crosslinking and immunoprecipitation sequencing (CLIP-seq) data sets that were generated by 21 independent studies. The database contains 4966 articles, 7439 strongly validated MTIs (using reporter assays or western blots) and 348 007 MTIs from CLIP-seq. The number of MTIs in the miRTarBase has increased around 7-fold since the 2014 miRTarBase update. The miRNA and gene expression profiles from The Cancer Genome Atlas (TCGA) are integrated to provide an effective overview of this exponential growth in the miRNA experimental data. These improvements make the miRTarBase one of the more comprehensively annotated, experimentally validated miRNA-target interactions databases and motivate additional miRNA research efforts.},
    author = {Chou, Chih Hung and Chang, Nai Wen and Shrestha, Sirjana and Hsu, Sheng Da and Lin, Yu Ling and Lee, Wei Hsiang and Yang, Chi Dung and Hong, Hsiao Chin and Wei, Ting Yen and Tu, Siang Jyun and Tsai, Tzi Ren and Ho, Shu Yi and Jian, Ting Yan and Wu, Hsin Yi and Chen, Pin Rong and Lin, Nai Chieh and Huang, Hsin Tzu and Yang, Tzu Ling and Pai, Chung Yuan and Tai, Chun San and Chen, Wen Liang and Huang, Chia Yen and Liu, Chun Chi and Weng, Shun Long and Liao, Kuang Wen and Hsu, Wen Lian and Huang, Hsien Da},
    doi = {10.1093/nar/gkv1258},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/miRTarBase 2016 Updates to the experimentally validated miRNA-target interactions database - 2016 - Chou et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D239--D247},
    pmid = {26590260},
    title = {{ miRTarBase 2016: Updates to the experimentally validated miRNA-target interactions database }},
    volume = {44},
    year = {2016}
}


@article{Davidson1995,
    abstract = {Scientific data of importance to biologists reside in a number of different data sources, such as GenBank, GSDB, SWISS-PROT, EMBL, and OMIM, among many others. Some of these data sources are conventional databases implemented using database management systems (DBMSs) and others are structured files maintained in a number of different formats (e.g., ASN.1 and ACE). In addition, software packages such as sequence analysis packages (e.g., BLAST and FASTA) produce data and can therefore be viewed as data sources. To counter the increasing dispersion and heterogeneity of data, different approaches to integrating these data sources are appearing throughout the bioinformatics community. This paper surveys the technical challenges to integration, classifies the approaches, and critiques the available tools and methodologies.},
    author = {Davidson, S B and Overton, C and Buneman, P},
    doi = {10.1089/cmb.1995.2.557},
    issn = {1066-5277 (Print)},
    journal = {Journal of computational biology : a journal of computational molecular cell biology},
    keywords = {Chromosomes, Artificial, Yeast,Data Interpretation, Statistical,Database Management Systems,Databases, Factual,Humans,Mathematics,Models, Genetic,Molecular Biology,Polymerase Chain Reaction,Repetitive Sequences, Nucleic Acid,Sequence Tagged Sites,Software},
    language = {eng},
    number = {4},
    pages = {557--572},
    pmid = {8634908},
    title = {{ Challenges in integrating biological data sources. }},
    volume = {2},
    year = {1995}
}
@article{Davis2017,
    abstract = {The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between environmental chemicals and gene products and their relationships to diseases. Chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature are integrated to generate expanded networks and predict many novel associations between different data types. CTD now contains over 15 million toxicogenomic relationships. To navigate this sea of data, we added several new features, including DiseaseComps (which finds comparable diseases that share toxicogenomic profiles), statistical scoring for inferred gene-disease and pathway-chemical relationships, filtering options for several tools to refine user analysis and our new Gene Set Enricher (which provides biological annotations that are enriched for gene sets). To improve data visualization, we added a Cytoscape Web view to our ChemComps feature, included color-coded interactions and created a 'slim list' for our MEDIC disease vocabulary (allowing diseases to be grouped for meta-analysis, visualization and better data management). CTD continues to promote interoperability with external databases by providing content and cross-links to their sites. Together, this wealth of expanded chemical-gene-disease data, combined with novel ways to analyze and view content, continues to help users generate testable hypotheses about the molecular mechanisms of environmental diseases.},
    author = {Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and King, Benjamin L. and McMorran, Roy and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.},
    doi = {10.1093/nar/gkw838},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/The Comparative Toxicogenomics Database Update 2017 - 2017 - Davis et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D972--D978},
    pmid = {23093600},
    title = {{ The Comparative Toxicogenomics Database: Update 2017 }},
    volume = {45},
    year = {2017}
}
@article{Deehan2012,
    abstract = {AIMS: To compare the molecular and biologic signatures of a balanced dual peroxisome proliferator-activated receptor (PPAR)-$\alpha$/$\gamma$ agonist, aleglitazar, with tesaglitazar (a dual PPAR-$\alpha$/$\gamma$ agonist) or a combination of pioglitazone (Pio; PPAR-$\gamma$ agonist) and fenofibrate (Feno; PPAR-$\alpha$ agonist) in human hepatocytes.$\backslash$n$\backslash$nMETHODS AND RESULTS: Gene expression microarray profiles were obtained from primary human hepatocytes treated with EC(50)-aligned low, medium and high concentrations of the three treatments. A systems biology approach, Causal Network Modeling, was used to model the data to infer upstream molecular mechanisms that may explain the observed changes in gene expression. Aleglitazar, tesaglitazar and Pio/Feno each induced unique transcriptional signatures, despite comparable core PPAR signaling. Although all treatments inferred qualitatively similar PPAR-$\alpha$ signaling, aleglitazar was inferred to have greater effects on high- and low-density lipoprotein cholesterol levels than tesaglitazar and Pio/Feno, due to a greater number of gene expression changes in pathways related to high-density and low-density lipoprotein metabolism. Distinct transcriptional and biologic signatures were also inferred for stress responses, which appeared to be less affected by aleglitazar than the comparators. In particular, Pio/Feno was inferred to increase NFE2L2 activity, a key component of the stress response pathway, while aleglitazar had no significant effect. All treatments were inferred to decrease proliferative signaling.$\backslash$n$\backslash$nCONCLUSIONS: Aleglitazar induces transcriptional signatures related to lipid parameters and stress responses that are unique from other dual PPAR-$\alpha$/$\gamma$ treatments. This may underlie observed favorable changes in lipid profiles in animal and clinical studies with aleglitazar and suggests a differentiated gene profile compared with other dual PPAR-$\alpha$/$\gamma$ agonist treatments.},
    author = {Deehan, Ren { \' { e } } e and Maerz-Weiss, Pia and Catlett, Natalie L. and Steiner, Guido and Wong, Ben and Wright, Matthew B. and Blander, Gil and Elliston, Keith O. and Ladd, William and Bobadilla, Maria and Mizrahi, Jacques and Haefliger, Carolina and Edgar, Alan},
    doi = {10.1371/journal.pone.0035012},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/Comparative transcriptional network modeling of three ppar- co-agonists reveals distinct metabolic gene signatures in primary human hepa.pdf:pdf},
    issn = {19326203},
    journal = {PLoS ONE},
    number = {4},
    pmid = {22514701},
    title = {{ Comparative transcriptional network modeling of three ppar-??/?? co-agonists reveals distinct metabolic gene signatures in primary human hepatocytes }},
    volume = {7},
    year = {2012}
}
@article{Demir2010,
    abstract = {Biological Pathway Exchange (BioPAX) is a standard language to represent biological pathways at the molecular and cellular level and to facilitate the exchange of pathway data. The rapid growth of the volume of pathway data has spurred the development of databases and computational tools to aid interpretation; however, use of these data is hampered by the current fragmentation of pathway information across many databases with incompatible formats. BioPAX, which was created through a community process, solves this problem by making pathway data substantially easier to collect, index, interpret and share. BioPAX can represent metabolic and signaling pathways, molecular and genetic interactions and gene regulation networks. Using BioPAX, millions of interactions, organized into thousands of pathways, from many organisms are available from a growing number of databases. This large amount of pathway data in a computable form will support visualization, analysis and biological discovery.},
    author = {Demir, Emek and Cary, Michael P and Paley, Suzanne and Fukuda, Ken and Lemer, Christian and Vastrik, Imre and Wu, Guanming and D'Eustachio, Peter and Schaefer, Carl and Luciano, Joanne and Schacherer, Frank and Martinez-Flores, Irma and Hu, Zhenjun and Jimenez-Jacinto, Veronica and Joshi-Tope, Geeta and Kandasamy, Kumaran and Lopez-Fuentes, Alejandra C and Mi, Huaiyu and Pichler, Elgar and Rodchenkov, Igor and Splendiani, Andrea and Tkachev, Sasha and Zucker, Jeremy and Gopinath, Gopal and Rajasimha, Harsha and Ramakrishnan, Ranjani and Shah, Imran and Syed, Mustafa and Anwar, Nadia and Babur, { \" { O } } zg { \" { u } } n and Blinov, Michael and Brauner, Erik and Corwin, Dan and Donaldson, Sylva and Gibbons, Frank and Goldberg, Robert and Hornbeck, Peter and Luna, Augustin and Murray-Rust, Peter and Neumann, Eric and Reubenacker, Oliver and Samwald, Matthias and van Iersel, Martijn and Wimalaratne, Sarala and Allen, Keith and Braun, Burk and Whirl-Carrillo, Michelle and Cheung, Kei-Hoi and Dahlquist, Kam and Finney, Andrew and Gillespie, Marc and Glass, Elizabeth and Gong, Li and Haw, Robin and Honig, Michael and Hubaut, Olivier and Kane, David and Krupa, Shiva and Kutmon, Martina and Leonard, Julie and Marks, Debbie and Merberg, David and Petri, Victoria and Pico, Alex and Ravenscroft, Dean and Ren, Liya and Shah, Nigam and Sunshine, Margot and Tang, Rebecca and Whaley, Ryan and Letovksy, Stan and Buetow, Kenneth H and Rzhetsky, Andrey and Schachter, Vincent and Sobral, Bruno S and Dogrusoz, Ugur and McWeeney, Shannon and Aladjem, Mirit and Birney, Ewan and Collado-Vides, Julio and Goto, Susumu and Hucka, Michael and Nov { \` { e } } re, Nicolas Le and Maltsev, Natalia and Pandey, Akhilesh and Thomas, Paul and Wingender, Edgar and Karp, Peter D and Sander, Chris and Bader, Gary D},
    doi = {10.1038/nbt1210-1308c},
    file = {:Users/cthoyt/Dropbox/Mendeley/2010/The BioPAX community standard for pathway data sharing - 2010 - Demir et al.pdf:pdf},
    issn = {1087-0156},
    journal = {Nature Biotechnology},
    number = {12},
    pages = {1308--1308},
    pmid = {20829833},
    title = {{ The BioPAX community standard for pathway data sharing }},
    url = {http://www.nature.com/doifinder/10.1038/nbt1210-1308c},
    volume = {28},
    year = {2010}
}
@article{Domingo-Fernandez2017,
    author = {Domingo-Fern { \' { a } } ndez, Daniel and Kodamullil, Alpha Tom and Iyappan, Anandhi and Naz, Mufassra and Emon, Mohammad Asif and Raschka, Tamara and Karki, Reagon and Springstubbe, Stephan and Ebeling, Christian and Hofmann-Apitius, Martin},
    doi = {10.1093/bioinformatics/btx399},
    journal = {Bioinformatics (Oxford, England)},
    title = {{ Multimodal Mechanistic Signatures for Neurodegenerative Diseases (NeuroMMSig): a web server for mechanism enrichmentle }},
    volume = {btx399},
    year = {2017}
}
@article{Emon2017,
    abstract = {Neurodegenerative diseases including Alzheimer's disease are complex to tackle because of the complexity of the brain, both in structure and function. Such complexity is reflected by the involvement of various brain regions and multiple pathways in the etiology of neurodegenerative diseases that render single drug target approaches ineffective. Particularly in the area of neurodegeneration, attention has been drawn to repurposing existing drugs with proven efficacy and safety profiles. However, there is a lack of systematic analysis of the brain chemical space to predict the feasibility of repurposing strategies. Using a mechanism-based, drug-target interaction modeling approach, we have identified promising drug candidates for repositioning. Mechanistic cause-and-effect models consolidate relevant prior knowledge on drugs, targets, and pathways from the scientific literature and integrate insights derived from experimental data. We demonstrate the power of this approach by predicting two repositioning candidates for Alzheimer's disease and one for amyotrophic lateral sclerosis.},
    author = {Emon, Mohammad Asif Emran Khan and Kodamullil, Alpha Tom and Karki, Reagon and Younesi, Erfan and Hofmann-Apitius, Martin},
    doi = {10.3233/JAD-160222},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/Using Drugs as Molecular Probes A Computational Chemical Biology Approach in Neurodegenerative Diseases - 2017 - Emon et al.pdf:pdf},
    issn = {13872877},
    journal = {Journal of Alzheimer's Disease},
    keywords = {alzheimer disease,amyotrophic lateral sclerosis,biological expression language,disease-drug modeling,drug repositioning,neurodegenerative diseases},
    number = {2},
    pages = {677--686},
    pmid = {28035920},
    title = {{ Using Drugs as Molecular Probes: A Computational Chemical Biology Approach in Neurodegenerative Diseases }},
    url = {http://www.medra.org/servlet/aliasResolver?alias=iospress { \& } doi=10.3233/JAD-160222},
    volume = {56},
    year = {2017}
}
@article{Fabregat2016,
    abstract = {Reactome (http://www.reactome.org) is a manually curated open-source open-data resource of human pathways and reactions. The current version 46 describes 7088 human proteins (34 { \% } of the predicted human proteome), participating in 6744 reactions based on data extracted from 15 107 research publications with PubMed links. The Reactome Web site and analysis tool set have been completely redesigned to increase speed, flexibility and user friendliness. The data model has been extended to support annotation of disease processes due to infectious agents and to mutation.},
    author = {Fabregat, Antonio and Sidiropoulos, Konstantinos and Garapati, Phani and Gillespie, Marc and Hausmann, Kerstin and Haw, Robin and Jassal, Bijay and Jupe, Steven and Korninger, Florian and McKay, Sheldon and Matthews, Lisa and May, Bruce and Milacic, Marija and Rothfels, Karen and Shamovsky, Veronica and Webber, Marissa and Weiser, Joel and Williams, Mark and Wu, Guanming and Stein, Lincoln and Hermjakob, Henning and D'Eustachio, Peter},
    doi = {10.1093/nar/gkv1351},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/The reactome pathway knowledgebase - 2016 - Fabregat et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D481--D487},
    pmid = {24243840},
    title = {{ The reactome pathway knowledgebase }},
    volume = {44},
    year = {2016}
}
@article{Fayyad1996,
    abstract = {Data mining and knowledge discovery in databases have been attracting a significant amount of research, industry, and media attention of late. What is all the excitement about? This article provides an overview of this emerging field, clarifying how data mining and knowledge discovery in databases are related both to each other and to related fields, such as machine learning, statistics, and databases. The article mentions particular real-world applications, specific data-mining techniques, challenges involved in real-world applications of knowledge discovery, and current and future research directions in the field.},
    archivePrefix = {arXiv},
    arxivId = {aimag.v17i3.1230},
    author = {Fayyad, Usama and Piatetsky-Shapiro, Gregory and Smyth, Padhraic},
    doi = {10.1609/aimag.v17i3.1230},
    eprint = {aimag.v17i3.1230},
    file = {:Users/cthoyt/Dropbox/Mendeley/1996/From Data Mining to Knowledge Discovery in Databases - 1996 - Fayyad, Piatetsky-Shapiro, Smyth.pdf:pdf},
    isbn = {0262560976},
    issn = {0738-4602},
    journal = {AI Magazine},
    number = {3},
    pages = {37},
    pmid = {12948721},
    title = {{ From Data Mining to Knowledge Discovery in Databases }},
    volume = {17},
    year = {1996}
}
@article{Finn2017,
    abstract = {InterPro (http://www.ebi.ac.uk/interpro/) is a freely available database used to classify protein sequences into families and to predict the presence of important domains and sites. InterProScan is the underlying software that allows both protein and nucleic acid sequences to be searched against InterPro's predictive models, which are provided by its member databases. Here, we report recent developments with InterPro and its associated software, including the addition of two new databases (SFLD and CDD), and the functionality to include residue-level annotation and prediction of intrinsic disorder. These developments enrich the annotations provided by InterPro, increase the overall number of residues annotated and allow more specific functional inferences.$\backslash$r$\backslash$n},
    author = {Finn, Robert D. and Attwood, Teresa K. and Babbitt, Patricia C. and Bateman, Alex and Bork, Peer and Bridge, Alan J. and Chang, Hsin Yu and Dosztanyi, Zsuzsanna and El-Gebali, Sara and Fraser, Matthew and Gough, Julian and Haft, David and Holliday, Gemma L. and Huang, Hongzhan and Huang, Xiaosong and Letunic, Ivica and Lopez, Rodrigo and Lu, Shennan and Marchler-Bauer, Aron and Mi, Huaiyu and Mistry, Jaina and Natale, Darren A. and Necci, Marco and Nuka, Gift and Orengo, Christine A. and Park, Youngmi and Pesseat, Sebastien and Piovesan, Damiano and Potter, Simon C. and Rawlings, Neil D. and Redaschi, Nicole and Richardson, Lorna and Rivoire, Catherine and Sangrador-Vegas, Amaia and Sigrist, Christian and Sillitoe, Ian and Smithers, Ben and Squizzato, Silvano and Sutton, Granger and Thanki, Narmada and Thomas, Paul D. and Tosatto, Silvio C.E. and Wu, Cathy H. and Xenarios, Ioannis and Yeh, Lai Su and Young, Siew Yit and Mitchell, Alex L.},
    doi = {10.1093/nar/gkw1107},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/InterPro in 2017-beyond protein family and domain annotations - 2017 - Finn et al.pdf:pdf},
    isbn = {2076792171},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D190--D199},
    pmid = {27899635},
    title = {{ InterPro in 2017-beyond protein family and domain annotations }},
    volume = {45},
    year = {2017}
}
@article{Finn2016,
    abstract = {In the last two years the Pfam database (http://pfam.xfam.org) has undergone a substantial reorganisation to reduce the effort involved in making a release, thereby permitting more frequent releases. Arguably the most significant of these changes is that Pfam is now primarily based on the UniProtKB reference proteomes, with the counts of matched sequences and species reported on the website restricted to this smaller set. Building families on reference proteomes sequences brings greater stability, which decreases the amount of manual curation required to maintain them. It also reduces the number of sequences displayed on the website, whilst still providing access to many important model organisms. Matches to the full UniProtKB database are, however, still available and Pfam annotations for individual UniProtKB sequences can still be retrieved. Some Pfam entries (1.6 { \% } ) which have no matches to reference proteomes remain; we are working with UniProt to see if sequences from them can be incorporated into reference proteomes. Pfam-B, the automatically-generated supplement to Pfam, has been removed. The current release (Pfam 29.0) includes 16 295 entries and 559 clans. The facility to view the relationship between families within a clan has been improved by the introduction of a new tool.},
    author = {Finn, Robert D. and Coggill, Penelope and Eberhardt, Ruth Y. and Eddy, Sean R. and Mistry, Jaina and Mitchell, Alex L. and Potter, Simon C. and Punta, Marco and Qureshi, Matloob and Sangrador-Vegas, Amaia and Salazar, Gustavo A. and Tate, John and Bateman, Alex},
    doi = {10.1093/nar/gkv1344},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/The Pfam protein families database Towards a more sustainable future - 2016 - Finn et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D279--D285},
    pmid = {26673716},
    title = {{ The Pfam protein families database: Towards a more sustainable future }},
    volume = {44},
    year = {2016}
}
@article{Finn2016a,
    author = {Finn, Robert D. and Coggill, Penelope and Eberhardt, Ruth Y. and Eddy, Sean R. and Mistry, Jaina and Mitchell, Alex L. and Potter, Simon C. and Punta, Marco and Qureshi, Matloob and Sangrador-Vegas, Amaia and Salazar, Gustavo A. and Tate, John and Bateman, Alex},
    doi = {10.1093/nar/gkv1344},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/The Pfam protein families database Towards a more sustainable future - 2016 - Finn et al.pdf:pdf},
    issn = {0305-1048, 1362-4962},
    journal = {Nucleic Acids Research},
    keywords = {Folder - Structural Bioinformatics Lab},
    language = {en},
    mendeley-tags = {Folder - Structural Bioinformatics Lab},
    month = {jan},
    number = {D1},
    pages = {D279--D285},
    shorttitle = {The Pfam protein families database},
    title = {{ The Pfam protein families database: towards a more sustainable future }},
    url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gkv1344},
    volume = {44},
    year = {2016}
}
@article{Franz2015,
    abstract = {UNLABELLED Cytoscape.js is an open-source JavaScript-based graph library. Its most common use case is as a visualization software component, so it can be used to render interactive graphs in a web browser. It also can be used in a headless manner, useful for graph operations on a server, such as Node.js. AVAILABILITY AND IMPLEMENTATION Cytoscape.js is implemented in JavaScript. Documentation, downloads and source code are available at http://js.cytoscape.org. CONTACT gary.bader@utoronto.ca.},
    author = {Franz, Max and Lopes, Christian T. and Huck, Gerardo and Dong, Yue and Sumer, Onur and Bader, Gary D.},
    doi = {10.1093/bioinformatics/btv557},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Cytoscape.js A graph theory library for visualisation and analysis - 2015 - Franz et al.pdf:pdf},
    issn = {14602059},
    journal = {Bioinformatics},
    number = {2},
    pages = {309--311},
    pmid = {26415722},
    title = {{ Cytoscape.js: A graph theory library for visualisation and analysis }},
    volume = {32},
    year = {2015}
}
@article{Frere2004,
    abstract = {A crucial aspect of pacemaker current (Ih) function is the regulation by cyclic nucleotides. To assess the endogenous mechanisms controlling cAMP levels in the vicinity of pacemaker channels, Ih regulation by G-protein-coupled neurotransmitter receptors was studied in mouse thalamocortical neurones. Activation of beta-adrenergic receptors with (-)-isoproterenol (Iso) led to a small steady enhancement of Ih amplitude, whereas activation of GABAB receptors with (+/-)-Baclofen (Bac) reduced Ih, consistent with an up- and down-regulation of basal cAMP levels, respectively. In contrast, a transient (taudecay, approximately 200 s), supralinear up-regulation of Ih was observed upon coapplication of Iso and Bac that was larger than that observed with Iso alone. This up-regulation appeared to involve a cAMP synthesis pathway distinct from that recruited by Iso, as it was associated with a reversible acceleration in Ih activation kinetics and an occlusion of modulation by photolytically released cAMP, yet showed an 11 mV as opposed to a 6 mV positive shift in the activation curve and an at least seven-fold increase in duration. GABA, in the presence of the GABAA antagonist picrotoxin, mimicked, whereas N-ethylmaleimide, an inhibitor of Gi-proteins, blocked the up-regulation, supporting a requirement for GABAB receptor activation in the potentiation. Activation of synaptic GABAB responses via stimulation of inhibitory afferents from the nucleus reticularis potentiated Iso-induced increments in Ih, suggesting that synaptically located receptors couple positively to cAMP synthesis induced by beta-adrenergic receptors. These findings indicate that distinct pathways of cAMP synthesis target the pacemaker current and the recruitment of these may be controlled by GABAergic activity within thalamic networks.},
    author = {Fr { \` { e } } re, Samuel G a and L { \" { u } } thi, Anita},
    doi = {10.1113/jphysiol.2003.050989},
    file = {:Users/cthoyt/Dropbox/Mendeley/2004/Pacemaker channels in mouse thalamocortical neurones are regulated by distinct pathways of cAMP synthesis. - 2004 - Fr { \` { e } } re, L { \" { u } } thi.pdf:pdf},
    isbn = {0022-3751 (Print)},
    issn = {0022-3751},
    journal = {The Journal of physiology},
    number = {Pt 1},
    pages = {111--125},
    pmid = {14678496},
    title = {{ Pacemaker channels in mouse thalamocortical neurones are regulated by distinct pathways of cAMP synthesis. }},
    volume = {554},
    year = {2004}
}
@article{Gaulton2012,
    abstract = {ChEMBL is an Open Data database containing binding, functional and ADMET information for a large number of drug-like bioactive compounds. These data are manually abstracted from the primary published literature on a regular basis, then further curated and standardized to maximize their quality and utility across a wide range of chemical biology and drug-discovery research problems. Currently, the database contains 5.4 million bioactivity measurements for more than 1 million compounds and 5200 protein targets. Access is available through a web-based interface, data downloads and web services at: https://www.ebi.ac.uk/chembldb.},
    author = {Gaulton, Anna and Bellis, Louisa J. and Bento, A. Patricia and Chambers, Jon and Davies, Mark and Hersey, Anne and Light, Yvonne and McGlinchey, Shaun and Michalovich, David and Al-Lazikani, Bissan and Overington, John P.},
    doi = {10.1093/nar/gkr777},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/ChEMBL A large-scale bioactivity database for drug discovery - 2012 - Gaulton et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {1100--1107},
    pmid = {21948594},
    title = {{ ChEMBL: A large-scale bioactivity database for drug discovery }},
    volume = {40},
    year = {2012}
}
@article{Gebel2013,
    abstract = {Towards the development of a systems biology-based risk assessment approach for environmental toxicants, including tobacco products in a systems toxicology setting such as the “21st Century Toxicology”, we are building a series of computable biological network models specific to non-diseased pulmonary and cardiovascular cells/tissues which capture the molecular events that can be activated following exposure to environmental toxicants. Here we extend on previous work and report on the construction and evaluation of a mechanistic network model focused on DNA damage response and the four main cellular fates induced by stress: autophagy, apoptosis, necroptosis, and senescence. In total, the network consists of 34 sub-models containing 1052 unique nodes and 1538 unique edges which are supported by 1231 PubMed-referenced literature citations. Causal node-edge relationships are described using the Biological Expression Language (BEL), which allows for the semantic representation of life science relationships in a computable format. The Network is provided in .XGMML format and can be viewed using freely available network visualization software, such as Cytoscape. },
    author = {Gebel, Stephan and Lichtner, Rosemarie B and Frushour, Brian and Schlage, Walter K and Hoang, Vy and Talikka, Marja and Hengstermann, Arnd and Mathis, Carole and Veljkovic, Emilija and Peck, Michael and Peitsch, Manuel C and Deehan, Renee and Hoeng, Julia and Westra, Jurjen W},
    doi = {10.4137/BBI.S11154},
    file = {:Users/cthoyt/Dropbox/Mendeley/2013/Construction of a Computable Network Model for DNA Damage, Autophagy, Cell Death, and Senescence - 2013 - Gebel et al.pdf:pdf},
    issn = {1177-9322},
    journal = {Bioinformatics and Biology Insights},
    month = {mar},
    pages = {97--117},
    publisher = {Libertas Academica},
    title = {{ Construction of a Computable Network Model for DNA Damage, Autophagy, Cell Death, and Senescence }},
    url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3596057/},
    volume = {7},
    year = {2013}
}
@article{Gilson2016,
    abstract = {BindingDB, www.bindingdb.org, is a publicly accessible database of experimental protein-small molecule interaction data. Its collection of over a million data entries derives primarily from scientific articles and, increasingly, US patents. BindingDB provides many ways to browse and search for data of interest, including an advanced search tool, which can cross searches of multiple query types, including text, chemical structure, protein sequence and numerical affinities. The PDB and PubMed provide links to data in BindingDB, and vice versa; and BindingDB provides links to pathway information, the ZINC catalog of available compounds, and other resources. The BindingDB website offers specialized tools that take advantage of its large data collection, including ones to generate hypotheses for the protein targets bound by a bioactive compound, and for the compounds bound by a new protein of known sequence; and virtual compound screening by maximal chemical similarity, binary kernel discrimination, and support vector machine methods. Specialized data sets are also available, such as binding data for hundreds of congeneric series of ligands, drawn from BindingDB and organized for use in validating drug design methods. BindingDB offers several forms of programmatic access, and comes with extensive background material and documentation. Here, we provide the first update of BindingDB since 2007, focusing on new and unique features and highlighting directions of importance to the field as a whole.},
    author = {Gilson, Michael K and Liu, Tiqing and Baitaluk, Michael and Nicola, George and Hwang, Linda and Chong, Jenny},
    doi = {10.1093/nar/gkv1072},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/BindingDB in 2015 A public database for medicinal chemistry, computational chemistry and systems pharmacology. - 2016 - Gilson et al.pdf:pdf},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    number = {D1},
    pages = {D1045--53},
    pmid = {26481362},
    title = {{ BindingDB in 2015: A public database for medicinal chemistry, computational chemistry and systems pharmacology. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4702793 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {44},
    year = {2016}
}
@article{Gilson2016a,
    abstract = {BindingDB, www.bindingdb.org, is a publicly accessible database of experimental protein-small molecule interaction data. Its collection of over a million data entries derives primarily from scientific articles and, increasingly, US patents. BindingDB provides many ways to browse and search for data of interest, including an advanced search tool, which can cross searches of multiple query types, including text, chemical structure, protein sequence and numerical affinities. The PDB and PubMed provide links to data in BindingDB, and vice versa; and BindingDB provides links to pathway information, the ZINC catalog of available compounds, and other resources. The BindingDB website offers specialized tools that take advantage of its large data collection, including ones to generate hypotheses for the protein targets bound by a bioactive compound, and for the compounds bound by a new protein of known sequence; and virtual compound screening by maximal chemical similarity, binary kernel discrimination, and support vector machine methods. Specialized data sets are also available, such as binding data for hundreds of congeneric series of ligands, drawn from BindingDB and organized for use in validating drug design methods. BindingDB offers several forms of programmatic access, and comes with extensive background material and documentation. Here, we provide the first update of BindingDB since 2007, focusing on new and unique features and highlighting directions of importance to the field as a whole.},
    author = {Gilson, Michael K. and Liu, Tiqing and Baitaluk, Michael and Nicola, George and Hwang, Linda and Chong, Jenny},
    doi = {10.1093/nar/gkv1072},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/BindingDB in 2015 A public database for medicinal chemistry, computational chemistry and systems pharmacology - 2016 - Gilson et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D1045--D1053},
    pmid = {26481362},
    title = {{ BindingDB in 2015: A public database for medicinal chemistry, computational chemistry and systems pharmacology }},
    volume = {44},
    year = {2016}
}
@article{Gray2015,
    abstract = {The HUGO Gene Nomenclature Committee situated at the European Bioinformatics Institute assigns unique symbols and names to human genes. Since 2011, the data within our database has expanded largely owing to an increase in naming pseudogenes and non-coding RNA genes, and we now have { \textgreater } 33,500 approved symbols. Our gene families and groups have also increased to nearly 500, with ∼45 { \% } of our gene entries associated to at least one family or group. We have also redesigned the HUGO Gene Nomenclature Committee website http://www.genenames.org creating a constant look and feel across the site and improving usability and readability for our users. The site provides a public access portal to our database with no restrictions imposed on access or the use of the data. Within this article, we review our online resources and data with particular emphasis on the updates to our website.},
    author = {Gray, Kristian A. and Yates, Bethan and Seal, Ruth L. and Wright, Mathew W. and Bruford, Elspeth A.},
    doi = {10.1093/nar/gku1071},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Genenames.org The HGNC resources in 2015 - 2015 - Gray et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D1079--D1085},
    pmid = {23161694},
    title = {{ Genenames.org: The HGNC resources in 2015 }},
    volume = {43},
    year = {2015}
}
@article{Hagberg2008,
    abstract = {NetworkX is a Python language package for exploration and analysis of networks and network algorithms. The core package provides data structures for representing many types of networks, or graphs, including simple graphs, directed graphs, and graphs with parallel edges and self-loops. The nodes in NetworkX graphs can be any (hashable) Python object and edges can contain arbitrary data; this flexibility makes NetworkX ideal for representing networks found in many different scientific fields. In addition to the basic data structures many graph algorithms are implemented for calculating network properties and structure measures: shortest paths, betweenness centrality, clustering, and degree distribution and many more. NetworkX can read and write various graph formats for easy exchange with existing data, and provides generators for many classic graphs and popular graph models, such as the Erdos-Renyi, Small World, and Barabasi-Albert models. The ease-of-use and flexibility of the Python programming language together with connection to the SciPy tools make NetworkX a powerful tool for scientific computations. We discuss some of our recent work studying synchronization of coupled oscillators to demonstrate how NetworkX enables research in the field of computational networks.},
    author = {Hagberg, Aric A. and Schult, Daniel A. and Swart, Pieter J.},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/Exploring network structure, dynamics, and function using NetworkX - 2008 - Hagberg, Schult, Swart.pdf:pdf},
    isbn = {3333333333},
    issn = {1540-9295},
    journal = {Proceedings of the 7th Python in Science Conference (SciPy 2008)},
    number = {SciPy},
    pages = {11--15},
    title = {{ Exploring network structure, dynamics, and function using NetworkX }},
    year = {2008}
}
@article{Hastings2013,
    abstract = {ChEBI (http://www.ebi.ac.uk/chebi) is a database and ontology of chemical entities of biological interest. Over the past few years, ChEBI has continued to grow steadily in content, and has added several new features. In addition to incorporating all user-requested compounds, our annotation efforts have emphasized immunology, natural products and metabolites in many species. All database entries are now 'is { \_ } a' classified within the ontology, meaning that all of the chemicals are available to semantic reasoning tools that harness the classification hierarchy. We have completely aligned the ontology with the Open Biomedical Ontologies (OBO) Foundry-recommended upper level Basic Formal Ontology. Furthermore, we have aligned our chemical classification with the classification of chemical-involving processes in the Gene Ontology (GO), and as a result of this effort, the majority of chemical-involving processes in GO are now defined in terms of the ChEBI entities that participate in them. This effort necessitated incorporating many additional biologically relevant compounds. We have incorporated additional data types including reference citations, and the species and component for metabolites. Finally, our website and web services have had several enhancements, most notably the provision of a dynamic new interactive graph-based ontology visualization.},
    author = {Hastings, Janna and { De Matos } , Paula and Dekker, Adriano and Ennis, Marcus and Harsha, Bhavana and Kale, Namrata and Muthukrishnan, Venkatesh and Owen, Gareth and Turner, Steve and Williams, Mark and Steinbeck, Christoph},
    doi = {10.1093/nar/gks1146},
    file = {:Users/cthoyt/Dropbox/Mendeley/2013/The ChEBI reference database and ontology for biologically relevant chemistry Enhancements for 2013 - 2013 - Hastings et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {456--463},
    pmid = {23180789},
    title = {{ The ChEBI reference database and ontology for biologically relevant chemistry: Enhancements for 2013 }},
    volume = {41},
    year = {2013}
}
@article{Hoeng2014,
    author = {Hoeng, Julia and Talikka, Marja and Martin, Florian and Sewer, Alain and Yang, Xiang and Iskandar, Anita and Schlage, Walter K. and Peitsch, Manuel C.},
    doi = {10.1016/j.drudis.2013.07.023},
    issn = {13596446},
    journal = {Drug Discovery Today},
    month = {feb},
    number = {2},
    pages = {183--192},
    title = {{ Case study: the role of mechanistic network models in systems toxicology }},
    url = {http://linkinghub.elsevier.com/retrieve/pii/S1359644613002614},
    volume = {19},
    year = {2014}
}
@article{Hofmann-apitius2015,
    author = {Hofmann-apitius, Martin},
    doi = {10.1186/s12916-015-0367-7},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Is dementia research ready for big data approaches - 2015 - Hofmann-apitius.pdf:pdf},
    isbn = {1291601503},
    issn = {1741-7015},
    journal = {BMC Medicine},
    keywords = {Big data,Data interoperability,Semantic harmonizat,big data,data interoperability,data mining,disease,disease modeling,semantic harmonization},
    pages = {4--7},
    publisher = {BMC Medicine},
    title = {{ Is dementia research ready for big data approaches ? }},
    url = {http://dx.doi.org/10.1186/s12916-015-0367-7},
    year = {2015}
}
@article{Hucka2003,
    abstract = {MOTIVATION: Molecular biotechnology now makes it possible to build elaborate systems models, but the systems biology community needs information standards if models are to be shared, evaluated and developed cooperatively. RESULTS: We summarize the Systems Biology Markup Language (SBML) Level 1, a free, open, XML-based format for representing biochemical reaction networks. SBML is a software-independent language for describing models common to research in many areas of computational biology, including cell signaling pathways, metabolic pathways, gene regulation, and others. AVAILABILITY: The specification of SBML Level 1 is freely available from http://www.sbml.org/},
    author = {Hucka, M and Finney, A and Sauro, H M and Bolouri, H and Doyle, J C and Kitano, H and Arkin, A P and Bornstein, B J and Bray, D and Cornish-Bowden, A and Cuellar, A A and Dronov, S and Gilles, E D and Ginkel, M and Gor, V and Goryanin, I I and Hedley, W J and Hodgman, T C and Hofmeyr, J-H and Hunter, P J and Juty, N S and Kasberger, J L and Kremling, A and Kummer, U and { Le Novere } , N and Loew, L M and Lucio, D and Mendes, P and Minch, E and Mjolsness, E D and Nakayama, Y and Nelson, M R and Nielsen, P F and Sakurada, T and Schaff, J C and Shapiro, B E and Shimizu, T S and Spence, H D and Stelling, J and Takahashi, K and Tomita, M and Wagner, J and Wang, J},
    institution = {SBML Forum},
    issn = {1367-4803 (Print)},
    journal = {Bioinformatics (Oxford, England)},
    keywords = {Database Management Systems,Databases, Factual,Documentation,Gene Expression Regulation,Hypermedia,Information Storage and Retrieval,Metabolism,Models, Biological,Models, Chemical,Programming Languages,Software,Software Design,Terminology as Topic,Vocabulary, Controlled,methods,physiology},
    language = {eng},
    month = {mar},
    number = {4},
    pages = {524--531},
    pmid = {12611808},
    title = {{ The systems biology markup language (SBML): a medium for representation and exchange of biochemical network models. }},
    volume = {19},
    year = {2003}
}
@article{Irin2015,
    abstract = {Neurodegenerative as well as autoimmune diseases have unclear aetiologies, but an increasing number of evidences report for a combination of genetic and epigenetic alterations that predispose for the development of disease. This review examines the major milestones in epigenetics research in the context of diseases and various computational approaches developed in the last decades to unravel new epigenetic modifications. However, there are limited studies that systematically link genetic and epigenetic alterations of DNA to the aetiology of diseases. In this work, we demonstrate how disease-related epigenetic knowledge can be systematically captured and integrated with heterogeneous information into a functional context using Biological Expression Language (BEL). This novel methodology, based on BEL, enables us to integrate epigenetic modifications such as DNA methylation or acetylation of histones into a specific disease network. As an example, we depict the integration of epigenetic and genetic factors in a functional context specific to Parkinson's disease (PD) and Multiple Sclerosis (MS).},
    author = {Irin, Afroza Khanam and Kodamullil, Alpha Tom and G { \" { u } } ndel, Michaela and Hofmann-Apitius, Martin},
    doi = {10.1155/2015/737168},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Computational Modelling Approaches on Epigenetic Factors in Neurodegenerative and Autoimmune Diseases and Their Mechanistic Analysis - 2.pdf:pdf},
    issn = {23147156},
    journal = {Journal of Immunology Research},
    pmid = {26636108},
    title = {{ Computational Modelling Approaches on Epigenetic Factors in Neurodegenerative and Autoimmune Diseases and Their Mechanistic Analysis }},
    volume = {2015},
    year = {2015}
}
@article{IyappanA.YounesiE.RedolfiA.VroomanH.KhannaS.FrisoniG.2017,
    abstract = {For the 2017, accepted.},
    author = {{ Iyappan A., Younesi E., Redolfi A., Vrooman H., Khanna S., Frisoni G. } , Hofmann-Apitius M.},
    title = {{ NeuroImage Feature Terminology (NIFT): A controlled terminology for the annotation of brain imaging features. }},
    year = {2017}
}
@article{Kanehisa2000,
    abstract = {KEGG (Kyoto Encyclopedia of Genes and Genomes) is a knowledge base for systematic analysis of gene functions, linking genomic information with higher order functional information. The genomic information is stored in the GENES database, which is a collection of gene catalogs for all the completely sequenced genomes and some partial genomes with up-to-date annotation of gene functions. The higher order functional information is stored in the PATHWAY database, which contains graphical representations of cellular processes, such as metabolism, membrane transport, signal transduction and cell cycle. The PATHWAY database is supplemented by a set of ortholog group tables for the information about conserved subpathways (pathway motifs), which are often encoded by positionally coupled genes on the chromosome and which are especially useful in predicting gene functions. A third database in KEGG is LIGAND for the information about chemical compounds, enzyme molecules and enzymatic reactions. KEGG provides Java graphics tools for browsing genome maps, comparing two genome maps and manipulating expression maps, as well as computational tools for sequence comparison, graph comparison and path computation. The KEGG databases are daily updated and made freely available (http://www. genome.ad.jp/kegg/).},
    author = {Kanehisa, M. and Goto, S.},
    issn = {0305-1048},
    journal = {Nucleic Acids Research},
    keywords = {Animals,Databases- Factual,Folder - Structural Bioinformatics Lab,Gene Expression,Genome,Humans,Information Storage and Retrieval,Japan,Proteins},
    language = {eng},
    mendeley-tags = {Animals,Databases- Factual,Folder - Structural Bioinformatics Lab,Gene Expression,Genome,Humans,Information Storage and Retrieval,Japan,Proteins},
    month = {jan},
    number = {1},
    pages = {27--30},
    shorttitle = {KEGG},
    title = {{ KEGG: kyoto encyclopedia of genes and genomes }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/10592173},
    volume = {28},
    year = {2000}
}
@article{Kanehisa2017,
    abstract = {KEGG (http://www.kegg.jp/ or http://www.genome.jp/kegg/) is an encyclopedia of genes and genomes. Assigning functional meanings to genes and genomes both at the molecular and higher levels is the primary objective of the KEGG database project. Molecular-level functions are stored in the KO (KEGG Orthology) database, where each KO is defined as a functional ortholog of genes and proteins. Higher-level functions are represented by networks of molecular interactions, reactions and relations in the forms of KEGG pathway maps, BRITE hierarchies and KEGG modules. In the past the KO database was developed for the purpose of defining nodes of molecular networks, but now the content has been expanded and the quality improved irrespective of whether or not the KOs appear in the three molecular network databases. The newly introduced addendum category of the GENES database is a collection of individual proteins whose functions are experimentally characterized and from which an increasing number of KOs are defined. Furthermore, the DISEASE and DRUG databases have been improved by systematic analysis of drug labels for better integration of diseases and drugs with the KEGG molecular networks. KEGG is moving towards becoming a comprehensive knowledge base for both functional interpretation and practical application of genomic information.},
    archivePrefix = {arXiv},
    arxivId = {1611.06654},
    author = {Kanehisa, Minoru and Furumichi, Miho and Tanabe, Mao and Sato, Yoko and Morishima, Kanae},
    doi = {10.1093/nar/gkw1092},
    eprint = {1611.06654},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/KEGG New perspectives on genomes, pathways, diseases and drugs - 2017 - Kanehisa et al.pdf:pdf},
    isbn = {2076792171},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D353--D361},
    pmid = {27899565},
    title = {{ KEGG: New perspectives on genomes, pathways, diseases and drugs }},
    volume = {45},
    year = {2017}
}
@article{Khatri2012,
    abstract = {Pathway analysis has become the first choice for gaining insight into the underlying biology of differentially expressed genes and proteins, as it reduces complexity and has increased explanatory power. We discuss the evolution of knowledge base-driven pathway analysis over its first decade, distinctly divided into three generations. We also discuss the limitations that are specific to each generation, and how they are addressed by successive generations of methods. We identify a number of annotation challenges that must be addressed to enable development of the next generation of pathway analysis methods. Furthermore, we identify a number of methodological challenges that the next generation of methods must tackle to take advantage of the technological advances in genomics and proteomics in order to improve specificity, sensitivity, and relevance of pathway analysis.},
    author = {Khatri, Purvesh and Sirota, Marina and Butte, Atul J.},
    doi = {10.1371/journal.pcbi.1002375},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/Ten years of pathway analysis Current approaches and outstanding challenges - 2012 - Khatri, Sirota, Butte.pdf:pdf},
    issn = {1553734X},
    journal = {PLoS Computational Biology},
    number = {2},
    pmid = {22383865},
    title = {{ Ten years of pathway analysis: Current approaches and outstanding challenges }},
    volume = {8},
    year = {2012}
}
@article{Kodamullil2015,
    abstract = {INTRODUCTION: The discovery and development of new treatments for Alzheimer's disease (AD) requires a profound mechanistic understanding of the disease. Here, we propose a model-driven approach supporting the systematic identification of putative disease mechanisms. METHODS: We have created a model for AD and a corresponding model for the normal physiology of neurons using biological expression language to systematically model causal and correlative relationships between biomolecules, pathways, and clinical readouts. Through model-model comparison we identify "chains of causal relationships" that lead to new insights into putative disease mechanisms. RESULTS: Using differential analysis of our models we identified a new mechanism explaining the effect of amyloid-beta on apoptosis via both the neurotrophic tyrosine kinase receptor, type 2 and nerve growth factor receptor branches of the neurotrophin signaling pathway. We also provide the example of a model-guided interpretation of genetic variation data for a comorbidity analysis between AD and type 2 diabetes mellitus. DISCUSSION: The two computable, literature-based models introduced here provide a powerful framework for the generation and validation of rational, testable hypotheses across disease areas.},
    author = {Kodamullil, Alpha Tom and Younesi, Erfan and Naz, Mufassra and Bagewadi, Shweta and Hofmann-Apitius, Martin},
    doi = {10.1016/j.jalz.2015.02.006},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Computable cause-and-effect models of healthy and Alzheimer's disease states and their mechanistic differential analysis - 2015 - Kodamu.pdf:pdf},
    journal = {Alzheimer's { \& } dementia : the journal of the Alzheimer's Association},
    keywords = {Alzheimer Disease,Amyloid beta-Protein Precursor,Animals,Brain,Comorbidity,Humans,Models, Neurological,Neurons,Polymorphism, Single Nucleotide,complications,epidemiology,genetics,metabolism,physiology,physiopathology},
    language = {eng},
    month = {nov},
    number = {11},
    pages = {1329--1339},
    pmid = {25849034},
    title = {{ Computable cause-and-effect models of healthy and Alzheimer's disease states and their mechanistic differential analysis. }},
    volume = {11},
    year = {2015}
}
@article{Laifenfeld2012,
    abstract = {The current drug discovery paradigm is long, costly, and prone to failure. For projects in early development, lack of efficacy in Phase II is a major contributor to the overall failure rate. Efficacy failures often occur from one of two major reasons: either the investigational agent did not achieve the required pharmacology or the mechanism targeted by the investigational agent did not significantly contribute to the disease in the tested patient population. The latter scenario can arise due to insufficient study power stemming from patient heterogeneity. If the subset of disease patients driven by the mechanism that is likely to respond to the drug can be identified and selected before enrollment begins, efficacy and response rates should improve. This will not only augment drug approval percentages, but will also minimize the number of patients at risk of side effects in the face of a suboptimal response to treatment. Here we describe a systems biology approach using molecular profiling data from patients at baseline for the development of predictive biomarker content to identify potential responders to a molecular targeted therapy before the drug is tested in humans. A case study is presented where a classifier to predict response to a TNF targeted therapy for ulcerative colitis is developed a priori and verified against a test set of patients where clinical outcomes are known. This approach will promote the tandem development of drugs with predictive response, patient selection biomarkers.},
    author = {Laifenfeld, Daphna and Drubin, David A and Catlett, Natalie L and Park, Jennifer S and { Van Hooser } , Aaron A and Frushour, Brian P and de Graaf, David and Fryburg, David A and Deehan, Renee},
    doi = {10.1007/978-1-4419-7210-1_38},
    issn = {0065-2598 (Print)},
    journal = {Advances in experimental medicine and biology},
    keywords = {Anti-Inflammatory Agents, Non-Steroidal,Antibodies, Monoclonal,Biomarkers,Colitis, Ulcerative,Drug Approval,Drug Discovery,Humans,Infliximab,Outcome Assessment (Health Care),Predictive Value of Tests,Reproducibility of Results,Signal Transduction,Systems Biology,Time Factors,Tumor Necrosis Factor-alpha,analysis,antagonists { \& } inhibitors,drug effects,drug therapy,immunology,metabolism,methods,therapeutic use},
    language = {eng},
    pages = {645--653},
    pmid = {22161357},
    title = {{ Early patient stratification and predictive biomarkers in drug discovery and development: a case study of ulcerative colitis anti-TNF therapy. }},
    volume = {736},
    year = {2012}
}
@article{Laifenfeld2014,
    abstract = {Drug-induced liver injury (DILI) represents a leading cause of acute liver failure. Although DILI can be discovered in preclinical animal toxicology studies and/or early clinical trials, some human DILI reactions, termed idiosyncratic DILI (IDILI), are less predictable, occur in a small number of individuals, and do not follow a clear dose-response relationship. The emergence of IDILI poses a critical health challenge for patients and a financial challenge for the pharmaceutical industry. Understanding the cellular and molecular mechanisms underlying IDILI is key to the development of models that can assess potential IDILI risk. This study used Reverse Causal Reasoning (RCR), a method to assess activation of molecular signaling pathways, on gene expression data from rats treated with IDILI or pharmacologic/chemical comparators (NON-DILI) at the maximum tolerated dose to identify mechanistic pathways underlying IDILI. Detailed molecular networks involved in mitochondrial injury, inflammation, and endoplasmic reticulum (ER) stress were found in response to IDILI drugs but not negative controls (NON-DILI). In vitro assays assessing mitochondrial or ER function confirmed the effect of IDILI compounds on these systems. Together our work suggests that using gene expression data can aid in understanding mechanisms underlying IDILI and can guide in vitro screening for IDILI. Specifically, RCR should be considered for compounds that do not show evidence of DILI in preclinical animal studies positive for mitochondrial dysfunction and ER stress assays, especially when the therapeutic index toward projected human maximum drug plasma concentration is low.},
    author = {Laifenfeld, Daphna and Qiu, Luping and Swiss, Rachel and Park, Jennifer and Macoritto, Michael and Will, Yvonne and Younis, Husam S. and Lawton, Michael},
    doi = {10.1093/toxsci/kft232},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/Utilization of causal reasoning of hepatic gene expression in rats to identify molecular pathways of idiosyncratic drug-induced liver in.pdf:pdf},
    issn = {10960929},
    journal = {Toxicological Sciences},
    keywords = {Biological modeling,Liver,Risk assessment,Safety evaluation,Systems biology,Systems toxicology,Toxicogenomics},
    number = {1},
    pages = {234--248},
    pmid = {24136188},
    title = {{ Utilization of causal reasoning of hepatic gene expression in rats to identify molecular pathways of idiosyncratic drug-induced liver injury }},
    volume = {137},
    year = {2014}
}
@article{Law2014,
    abstract = {DrugBank (http://www.drugbank.ca) is a comprehensive online database containing extensive biochemical and pharmacological information about drugs, their mechanisms and their targets. Since it was first described in 2006, DrugBank has rapidly evolved, both in response to user requests and in response to changing trends in drug research and development. Previous versions of DrugBank have been widely used to facilitate drug and in silico drug target discovery. The latest update, DrugBank 4.0, has been further expanded to contain data on drug metabolism, absorption, distribution, metabolism, excretion and toxicity (ADMET) and other kinds of quantitative structure activity relationships (QSAR) information. These enhancements are intended to facilitate research in xenobiotic metabolism (both prediction and characterization), pharmacokinetics, pharmacodynamics and drug design/discovery. For this release, { \textgreater } 1200 drug metabolites (including their structures, names, activity, abundance and other detailed data) have been added along with { \textgreater } 1300 drug metabolism reactions (including metabolizing enzymes and reaction types) and dozens of drug metabolism pathways. Another 30 predicted or measured ADMET parameters have been added to each DrugCard, bringing the average number of quantitative ADMET values for Food and Drug Administration-approved drugs close to 40. Referential nuclear magnetic resonance and MS spectra have been added for almost 400 drugs as well as spectral and mass matching tools to facilitate compound identification. This expanded collection of drug information is complemented by a number of new or improved search tools, including one that provides a simple analyses of drug-target, -enzyme and -transporter associations to provide insight on drug-drug interactions.},
    author = {Law, Vivian and Knox, Craig and Djoumbou, Yannick and Jewison, Tim and Guo, An Chi and Liu, Yifeng and MacIejewski, Adam and Arndt, David and Wilson, Michael and Neveu, Vanessa and Tang, Alexandra and Gabriel, Geraldine and Ly, Carol and Adamjee, Sakina and Dame, Zerihun T. and Han, Beomsoo and Zhou, You and Wishart, David S.},
    doi = {10.1093/nar/gkt1068},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/DrugBank 4.0 Shedding new light on drug metabolism - 2014 - Law et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {1091--1097},
    pmid = {24203711},
    title = {{ DrugBank 4.0: Shedding new light on drug metabolism }},
    volume = {42},
    year = {2014}
}
@article{Leiserson2015,
    abstract = {Cancers exhibit extensive mutational heterogeneity, and the resulting long-tail phenomenon complicates the discovery of genes and pathways that are significantly mutated in cancer. We perform a pan-cancer analysis of mutated networks in 3,281 samples from 12 cancer types from The Cancer Genome Atlas (TCGA) using HotNet2, a new algorithm to find mutated subnetworks that overcomes the limitations of existing single-gene, pathway and network approaches. We identify 16 significantly mutated subnetworks that comprise well-known cancer signaling pathways as well as subnetworks with less characterized roles in cancer, including cohesin, condensin and others. Many of these subnetworks exhibit co-occurring mutations across samples. These subnetworks contain dozens of genes with rare somatic mutations across multiple cancers; many of these genes have additional evidence supporting a role in cancer. By illuminating these rare combinations of mutations, pan-cancer network analyses provide a roadmap to investigate new diagnostic and therapeutic opportunities across cancer types.},
    archivePrefix = {arXiv},
    arxivId = {15334406},
    author = {Leiserson, Mark D M and Vandin, Fabio and Wu, Hsin-Ta and Dobson, Jason R and Eldridge, Jonathan V and Thomas, Jacob L and Papoutsaki, Alexandra and Kim, Younhun and Niu, Beifang and McLellan, Michael and Lawrence, Michael S and Gonzalez-Perez, Abel and Tamborero, David and Cheng, Yuwei and Ryslik, Gregory A and Lopez-Bigas, Nuria and Getz, Gad and Ding, Li and Raphael, Benjamin J},
    doi = {10.1038/ng.3168},
    eprint = {15334406},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Pan-cancer network analysis identifies combinations of rare somatic mutations across pathways and protein complexes. - 2015 - Leiserson.pdf:pdf},
    isbn = {9780124201187},
    issn = {1546-1718},
    journal = {Nature genetics},
    month = {feb},
    number = {2},
    pages = {106--14},
    pmid = {25501392},
    title = {{ Pan-cancer network analysis identifies combinations of rare somatic mutations across pathways and protein complexes. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/25501392 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4444046},
    volume = {47},
    year = {2015}
}
@article{Leon2013,
    author = {Leon, Paula and Woodman, Marmaduke and McIntosh, Randy and Jirsa, Viktor},
    doi = {10.1186/1471-2202-14-S1-P193},
    file = {:Users/cthoyt/Dropbox/Mendeley/2013/The Virtual Brain a neuroinformatics platform for simulating large-scale brain network models - 2013 - Leon et al.pdf:pdf},
    isbn = {1471220214},
    issn = {1471-2202},
    journal = {BMC Neuroscience},
    number = {Suppl 1},
    pages = {P193--24},
    title = {{ The Virtual Brain: a neuroinformatics platform for simulating large-scale brain network models }},
    url = {http://www.biomedcentral.com/1471-2202/14/S1/P193 { \% } 5Cnpapers3://publication/doi/10.1186/1471-2202-14-S1-P193},
    volume = {14},
    year = {2013}
}
@article{Madan2016,
    abstract = {Network-based approaches have become extremely important in systems biology to achieve a better understanding of biological mechanisms. For network representation, the Biological Expression Language (BEL) is well designed to collate findings from the scientific literature into biological network models. To facilitate encoding and biocuration of such findings in BEL, a BEL Information Extraction Workflow (BELIEF) was developed. BELIEF provides a web-based curation interface, the BELIEF Dashboard, that incorporates text mining techniques to support the biocurator in the generation of BEL networks. The underlying UIMA-based text mining pipeline (BELIEF Pipeline) uses several named entity recognition processes and rela-tionship extraction methods to detect concepts and BEL relationships in literature. The BELIEF Dashboard allows easy curation of the automatically generated BEL statements and their con-text annotations. Resulting BEL statements and their context annotations can be syntactically and semantically verified to ensure consistency in the BEL network. In summary, the work-flow supports experts in different stages of systems biology network building. Based on the BioCreative V BEL track evaluation, we show that the BELIEF Pipeline automatically extracts relationships with an F-score of 36.4 { \% } and fully correct statements can be obtained with an F-score of 30.8 { \% } . Participation in the BioCreative V Interactive task (IAT) track with BELIEF re-vealed a systems usability scale (SUS) of 67. Considering the complexity of the task for new users—learning BEL, working with a completely new interface, and performing complex cur-ation—a score so close to the overall SUS average highlights the usability of BELIEF.},
    author = {Madan, Sumit and Hodapp, Sven and Senger, Philipp and Ansari, Sam and Szostak, Justyna and Hoeng, Julia and Peitsch, Manuel and Fluck, Juliane},
    doi = {10.1093/database/baw136},
    file = {:Users/cthoyt/Dropbox/Mendeley/2016/The BEL information extraction workflow (BELIEF) Evaluation in the BioCreative V BEL and IAT track - 2016 - Madan et al.pdf:pdf},
    issn = {17580463},
    journal = {Database},
    number = {September 2017},
    pages = {1--17},
    pmid = {27694210},
    title = {{ The BEL information extraction workflow (BELIEF): Evaluation in the BioCreative V BEL and IAT track }},
    volume = {2016},
    year = {2016}
}

@article{Martin2014,
    abstract = {High-throughput measurement technologies such as microarrays provide complex datasets reflecting mechanisms perturbed in an experiment, typically a treatment vs. control design. Analysis of these information rich data can be guided based on a priori knowledge, such as networks or set of related proteins or genes. Among those, cause-and-effect network models are becoming increasingly popular and more than eighty such models, describing processes involved in cell proliferation, cell fate, cell stress, and inflammation have already been published. A meaningful systems toxicology approach to study the response of a cell system, or organism, exposed to bio-active substances requires a quantitative measure of dose-response at network level, to go beyond the differential expression of single genes.},
    author = {Martin, Florian and Sewer, Alain and Talikka, Marja and Xiang, Yang and Hoeng, Julia and Peitsch, Manuel C},
    doi = {10.1186/1471-2105-15-238},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/Quantification of biological network perturbations for mechanistic insight and diagnostics using two-layer causal models - 2014 - Martin.pdf:pdf},
    issn = {1471-2105},
    journal = {BMC Bioinformatics},
    keywords = {causal network model,systems biology,transcriptomics data},
    number = {1},
    pages = {238},
    pmid = {25015298},
    title = {{ Quantification of biological network perturbations for mechanistic insight and diagnostics using two-layer causal models }},
    url = {http://www.biomedcentral.com.ezproxy.stanford.edu/1471-2105/15/238 { \% } 5Cnpapers3://publication/doi/10.1186/1471-2105-15-238},
    volume = {15},
    year = {2014}
}
@article{Martin2012,
    abstract = {BACKGROUND: High-throughput measurement technologies produce data sets that have the potential to elucidate the biological impact of disease, drug treatment, and environmental agents on humans. The scientific community faces an ongoing challenge in the analysis of these rich data sources to more accurately characterize biological processes that have been perturbed at the mechanistic level. Here, a new approach is built on previous methodologies in which high-throughput data was interpreted using prior biological knowledge of cause and effect relationships. These relationships are structured into network models that describe specific biological processes, such as inflammatory signaling or cell cycle progression. This enables quantitative assessment of network perturbation in response to a given stimulus.$\backslash$n$\backslash$nRESULTS: Four complementary methods were devised to quantify treatment-induced activity changes in processes described by network models. In addition, companion statistics were developed to qualify significance and specificity of the results. This approach is called Network Perturbation Amplitude (NPA) scoring because the amplitudes of treatment-induced perturbations are computed for biological network models. The NPA methods were tested on two transcriptomic data sets: normal human bronchial epithelial (NHBE) cells treated with the pro-inflammatory signaling mediator TNF$\alpha$, and HCT116 colon cancer cells treated with the CDK cell cycle inhibitor R547. Each data set was scored against network models representing different aspects of inflammatory signaling and cell cycle progression, and these scores were compared with independent measures of pathway activity in NHBE cells to verify the approach. The NPA scoring method successfully quantified the amplitude of TNF$\alpha$-induced perturbation for each network model when compared against NF-$\kappa$B nuclear localization and cell number. In addition, the degree and specificity to which CDK-inhibition affected cell cycle and inflammatory signaling were meaningfully determined.$\backslash$n$\backslash$nCONCLUSIONS: The NPA scoring method leverages high-throughput measurements and a priori literature-derived knowledge in the form of network models to characterize the activity change for a broad collection of biological processes at high-resolution. Applications of this framework include comparative assessment of the biological impact caused by environmental factors, toxic substances, or drug treatments.},
    author = {Martin, Florian and Thomson, Ty M and Sewer, Alain and Drubin, David a and Mathis, Carole and Weisensee, Dirk and Pratt, Dexter and Hoeng, Julia and Peitsch, Manuel C},
    doi = {10.1186/1752-0509-6-54},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/Assessment of network perturbation amplitudes by applying high-throughput data to causal biological networks. - 2012 - Martin et al.pdf:pdf},
    issn = {1752-0509},
    journal = {BMC systems biology},
    keywords = {Cell Cycle,Humans,Models, Biological,NF-kappa B,NF-kappa B: metabolism,Signal Transduction,Systems Biology,Systems Biology: methods,Tumor Necrosis Factor-alpha,Tumor Necrosis Factor-alpha: metabolism},
    pages = {54},
    pmid = {22651900},
    title = {{ Assessment of network perturbation amplitudes by applying high-throughput data to causal biological networks. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3433335 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {6},
    year = {2012}
}
@article{Martin2012a,
    abstract = {BACKGROUND: High-throughput measurement technologies produce data sets that have the potential to elucidate the biological impact of disease, drug treatment, and environmental agents on humans. The scientific community faces an ongoing challenge in the analysis of these rich data sources to more accurately characterize biological processes that have been perturbed at the mechanistic level. Here, a new approach is built on previous methodologies in which high-throughput data was interpreted using prior biological knowledge of cause and effect relationships. These relationships are structured into network models that describe specific biological processes, such as inflammatory signaling or cell cycle progression. This enables quantitative assessment of network perturbation in response to a given stimulus.$\backslash$n$\backslash$nRESULTS: Four complementary methods were devised to quantify treatment-induced activity changes in processes described by network models. In addition, companion statistics were developed to qualify significance and specificity of the results. This approach is called Network Perturbation Amplitude (NPA) scoring because the amplitudes of treatment-induced perturbations are computed for biological network models. The NPA methods were tested on two transcriptomic data sets: normal human bronchial epithelial (NHBE) cells treated with the pro-inflammatory signaling mediator TNF$\alpha$, and HCT116 colon cancer cells treated with the CDK cell cycle inhibitor R547. Each data set was scored against network models representing different aspects of inflammatory signaling and cell cycle progression, and these scores were compared with independent measures of pathway activity in NHBE cells to verify the approach. The NPA scoring method successfully quantified the amplitude of TNF$\alpha$-induced perturbation for each network model when compared against NF-$\kappa$B nuclear localization and cell number. In addition, the degree and specificity to which CDK-inhibition affected cell cycle and inflammatory signaling were meaningfully determined.$\backslash$n$\backslash$nCONCLUSIONS: The NPA scoring method leverages high-throughput measurements and a priori literature-derived knowledge in the form of network models to characterize the activity change for a broad collection of biological processes at high-resolution. Applications of this framework include comparative assessment of the biological impact caused by environmental factors, toxic substances, or drug treatments.},
    author = {Martin, Florian and Thomson, Ty M and Sewer, Alain and Drubin, David a and Mathis, Carole and Weisensee, Dirk and Pratt, Dexter and Hoeng, Julia and Peitsch, Manuel C},
    doi = {10.1186/1752-0509-6-54},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/Assessment of network perturbation amplitudes by applying high-throughput data to causal biological networks. - 2012 - Martin et al(2).pdf:pdf},
    issn = {1752-0509},
    journal = {BMC systems biology},
    keywords = {Cell Cycle,Humans,Models, Biological,NF-kappa B,NF-kappa B: metabolism,Signal Transduction,Systems Biology,Systems Biology: methods,Tumor Necrosis Factor-alpha,Tumor Necrosis Factor-alpha: metabolism},
    pages = {54},
    pmid = {22651900},
    title = {{ Assessment of network perturbation amplitudes by applying high-throughput data to causal biological networks. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3433335 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {6},
    year = {2012}
}
@article{Massone2011,
    abstract = {Alternative splicing is a central component of human brain complexity; nonetheless, its regulatory mechanisms are still largely unclear. In this work, we describe a novel non-coding (nc) RNA (named 17A) RNA polymerase (pol) III-dependent embedded in the human G-protein-coupled receptor 51 gene (GPR51, GABA B2 receptor). The stable expression of 17A in SHSY5Y neuroblastoma cells induces the synthesis of an alternative splicing isoform that abolish GABA B2 intracellular signaling (i.e., inhibition of cAMP accumulation and activation of K(+) channels). Indeed, 17A is expressed in human brain, and we report that it is upregulated in cerebral tissues derived from Alzheimer disease patients. We demonstrate that 17A expression in neuroblastoma cells enhances the secretion of amyloid beta peptide (Abeta) and the Abeta x-42/Alphabeta x-40 peptide ratio and that its synthesis is induced in response to inflammatory stimuli. These data correlate, for the first time, the activity of a novel pol III-dependent ncRNA to alternative splicing events and, possibly, to neurodegeneration induced by abnormal GABA B function. We anticipate that further analysis of pol III-dependent regulation of alternative splicing will disclose novel regulatory pathways associated to brain physiology and/or pathology.},
    author = {Massone, Sara and Vassallo, Irene and Fiorino, Gloria and Castelnuovo, Manuele and Barbieri, Federica and Borghi, Roberta and Tabaton, Massimo and Robello, Mauro and Gatta, Elena and Russo, Claudio and Florio, Tullio and Dieci, Giorgio and Cancedda, Ranieri and Pagano, Aldo},
    doi = {10.1016/j.nbd.2010.09.019},
    journal = {Neurobiology of disease},
    keywords = {Alternative Splicing,Alzheimer Disease,Base Sequence,Cell Line, Tumor,HeLa Cells,Humans,Inflammation Mediators,Molecular Sequence Data,RNA Polymerase III,RNA, Long Noncoding,RNA, Untranslated,Receptors, GABA-A,Signal Transduction,Up-Regulation,chemistry,genetics,metabolism,pathology,pharmacology,physiology},
    language = {eng},
    month = {feb},
    number = {2},
    pages = {308--317},
    pmid = {20888417},
    title = {{ 17A, a novel non-coding RNA, regulates GABA B alternative splicing and signaling in response to inflammatory stimuli and in Alzheimer disease. }},
    volume = {41},
    year = {2011}
}
@misc{McGuire,
    author = {McGuire, Paul},
    title = {{ PyParsing }},
    url = {http://pyparsing.wikispaces.com/}
}
@article{MiritAladjemOzgunBaburGaryD.BaderEmekDemirIgorRodchenkov2011,
    author = {{ Mirit Aladjem, Ozgun Babur, Gary D. Bader, Emek Demir, Igor Rodchenkov } , ...},
    file = {:Users/cthoyt/Dropbox/Mendeley/2011/BioPAX – Biological Pathways Exchange Language Level 3, Release Version 1 Documentation - 2011 - Mirit Aladjem, Ozgun Babur, Gary D. B.pdf:pdf},
    number = {July},
    pages = {1--165},
    title = {{ BioPAX – Biological Pathways Exchange Language Level 3, Release Version 1 Documentation }},
    url = {papers3://publication/uuid/4757737D-58C5-4974-80D9-08DE9F80A7F0},
    year = {2011}
}
@article{Mungall2012,
    abstract = {We present Uberon, an integrated cross-species ontology consisting of over 6,500 classes representing a variety of anatomical entities, organized according to traditional anatomical classification criteria. The ontology represents structures in a species-neutral way and includes extensive associations to existing species-centric anatomical ontologies, allowing integration of model organism and human data. Uberon provides a necessary bridge between anatomical structures in different taxa for cross-species inference. It uses novel methods for representing taxonomic variation, and has proved to be essential for translational phenotype analyses. Uberon is available at http://uberon.org },
    author = {Mungall, Christopher J and Torniai, Carlo and Gkoutos, Georgios V and Lewis, Suzanna E and Haendel, Melissa A},
    doi = {10.1186/gb-2012-13-1-r5},
    file = {:Users/cthoyt/Dropbox/Mendeley/2012/Uberon, an integrative multi-species anatomy ontology - 2012 - Mungall et al.pdf:pdf},
    issn = {1474-760X},
    journal = {Genome Biology},
    month = {jan},
    number = {1},
    pages = {R5},
    title = {{ Uberon, an integrative multi-species anatomy ontology }},
    url = {https://doi.org/10.1186/gb-2012-13-1-r5},
    volume = {13},
    year = {2012}
}
@article{Orchard2014,
    abstract = {IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).},
    author = {Orchard, Sandra and Ammari, Mais and Aranda, Bruno and Breuza, Lionel and Briganti, Leonardo and Broackes-Carter, Fiona and Campbell, Nancy H. and Chavali, Gayatri and Chen, Carol and Del-Toro, Noemi and Duesbury, Margaret and Dumousseau, Marine and Galeota, Eugenia and Hinz, Ursula and Iannuccelli, Marta and Jagannathan, Sruthi and Jimenez, Rafael and Khadake, Jyoti and Lagreid, Astrid and Licata, Luana and Lovering, Ruth C. and Meldal, Birgit and Melidoni, Anna N. and Milagros, Mila and Peluso, Daniele and Perfetto, Livia and Porras, Pablo and Raghunath, Arathi and Ricard-Blum, Sylvie and Roechert, Bernd and Stutz, Andre and Tognolli, Michael and { Van Roey } , Kim and Cesareni, Gianni and Hermjakob, Henning},
    doi = {10.1093/nar/gkt1115},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/The MIntAct project - IntAct as a common curation platform for 11 molecular interaction databases - 2014 - Orchard et al.pdf:pdf},
    issn = {03051048},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {358--363},
    pmid = {24234451},
    title = {{ The MIntAct project - IntAct as a common curation platform for 11 molecular interaction databases }},
    volume = {42},
    year = {2014}
}
@article{Palmer2005,
    abstract = {In an effort to identify genes that may be important for drug-abuse liability, we mapped behavioral quantitative trait loci (bQTL) for sensitivity to the locomotor stimulant effect of methamphetamine (MA) using two mouse lines that were selectively bred for high MA-induced activity (HMACT) or low MA-induced activity (LMACT). We then examined gene expression differences between these lines in the nucleus accumbens, using 20 U74Av2 Affymetrix microarrays and quantitative polymerase chain reaction (qPCR). Expression differences were detected for several genes, including Casein Kinase 1 Epsilon (Csnkle), glutamate receptor, ionotropic, AMPA1 (GluR1), GABA B1 receptor (Gabbr1), and dopamine- and cAMP-regulated phosphoprotein of 32 kDa (Darpp-32). We used the www.WebQTL.org database to identify QTL that regulate the expression of the genes identified by the microarrays (expression QTL; eQTL). This approach identified an eQTL for Csnkle on Chromosome 15 (LOD = 3.8) that comapped with a bQTL for the MA stimulation phenotype (LOD = 4.5), suggesting that a single allele may cause both traits. The chromosomal region containing this QTL has previously been associated with sensitivity to the stimulant effects of cocaine. These results suggest that selection was associated with (and likely caused) altered gene expression that is partially attributable to different frequencies of gene expression polymorphisms. Combining classical genetics with analysis of whole-genome gene expression and bioinformatic resources provides a powerful method for provisionally identifying genes that influence complex traits. The identified genes provide excellent candidates for future hypothesis-driven studies, translational genetic studies, and pharmacological interventions.},
    author = {Palmer, Abraham A and Verbitsky, Miguel and Suresh, Rathi and Kamens, Helen M and Reed, Cheryl L and Li, Na and Burkhart-Kasch, Sue and McKinnon, Carrie S and Belknap, John K and Gilliam, T Conrad and Phillips, Tamara J},
    issn = {0938-8990 (Print)},
    journal = {Mammalian genome : official journal of the International Mammalian Genome Society},
    keywords = {Animals,Chromosome Mapping,Crosses, Genetic,Drug Resistance,Female,Gene Expression Regulation,Genetic Markers,Genotype,Male,Methamphetamine,Mice,Mice, Inbred C57BL,Mice, Inbred DBA,Oligonucleotide Array Sequence Analysis,Quantitative Trait Loci,RNA,drug effects,genetics,isolation { \& } purification,pharmacology},
    language = {eng},
    month = {may},
    number = {5},
    pages = {291--305},
    pmid = {16104378},
    title = {{ Gene expression differences in mice divergently selected for methamphetamine sensitivity. }},
    volume = {16},
    year = {2005}
}
@article{Payne2007,
    abstract = {The use of conceptual knowledge collections or structures within the biomedical domain is pervasive, spanning a variety of applications including controlled terminologies, semantic networks, ontologies, and database schemas. A number of theoretical constructs and practical methods or techniques support the development and evaluation of conceptual knowledge collections. This review will provide an overview of the current state of knowledge concerning conceptual knowledge acquisition, drawing from multiple contributing academic disciplines such as biomedicine, computer science, cognitive science, education, linguistics, semiotics, and psychology. In addition, multiple taxonomic approaches to the description and selection of conceptual knowledge acquisition and evaluation techniques will be proposed in order to partially address the apparent fragmentation of the current literature concerning this domain. { \textcopyright } 2007 Elsevier Inc. All rights reserved.},
    author = {Payne, Philip R.O. and Mendon { \c { c } } a, Eneida A. and Johnson, Stephen B. and Starren, Justin B.},
    doi = {10.1016/j.jbi.2007.03.005},
    file = {:Users/cthoyt/Dropbox/Mendeley/2007/Conceptual knowledge acquisition in biomedicine A methodological review - 2007 - Payne et al.pdf:pdf},
    issn = {15320464},
    journal = {Journal of Biomedical Informatics},
    keywords = {Conceptual knowledge,Knowledge acquisition,Knowledge engineering},
    number = {5},
    pages = {582--602},
    title = {{ Conceptual knowledge acquisition in biomedicine: A methodological review }},
    volume = {40},
    year = {2007}
}
@article{Perez2007,
    abstract = {T he backbone of scientific computing is mostly a collection of high-perfor-mance code written in Fortran, C, and C++ that typically runs in batch mode on large systems, clusters, and supercomputers. However, over the past decade, high-level environ-ments that integrate easy-to-use interpreted lan-guages, comprehensive numerical libraries, and visualization facilities have become extremely popu-lar in this field. As hardware becomes faster, the crit-ical bottleneck in scientific computing isn't always the computer's processing time; the scientist's time is also a consideration. For this reason, systems that allow rapid algorithmic exploration, data analysis, and vi-sualization have become a staple of daily scientific work. The Interactive Data Language (IDL) and Matlab (for numerical work), and Mathematica and Maple (for work that includes symbolic manipula-tion) are well-known commercial environments of this kind. GNU Data Language, Octave, Maxima and Sage provide their open source counterparts. All these systems offer an interactive command line in which code can be run immediately, without having to go through the traditional edit/com-pile/execute cycle. This flexible style matches well the spirit of computing in a scientific context, in which determining what computations must be performed next often requires significant work. An interactive environment lets scientists look at data, test new ideas, combine algorithmic approaches, and evaluate their outcome directly. This process might lead to a final result, or it might clarify how they need to build a more static, large-scale pro-duction code. As this article shows, Python (www.python.org) is an excellent tool for such a workflow. 1 The IPython project (http://ipython.scipy.org) aims to not only provide a greatly enhanced Python shell but also facilities for interactive distributed and par-allel computing, as well as a comprehensive set of tools for building special-purpose interactive envi-ronments for scientific computing.},
    author = {P { \' { e } } rez, F. and Granger, B.E.},
    doi = {doi:10.1109/MCSE.2007.53.},
    file = {:Users/cthoyt/Dropbox/Mendeley/2007/IPython A System for Interactive Scientific Computing Python An Open and General- Purpose Environment - 2007 - P { \' { e } } rez, Granger.pdf:pdf},
    isbn = {3518437208},
    issn = {15219615},
    journal = {Computing in Science and Engineering},
    number = {3},
    pages = {21--29},
    pmid = {1000224768},
    title = {{ IPython: A System for Interactive Scientific Computing Python: An Open and General- Purpose Environment }},
    url = {http://ipython.org},
    volume = {9},
    year = {2007}
}
@article{Pico2008,
    abstract = {WikiPathways provides a collaborative platform for creating, updating, and sharing pathway diagrams and serves as an example of content curation by the biology community.},
    author = {Pico, Alexander R. and Kelder, Thomas and { Van Iersel } , Martijn P. and Hanspers, Kristina and Conklin, Bruce R. and Evelo, Chris},
    doi = {10.1371/journal.pbio.0060184},
    file = {:Users/cthoyt/Dropbox/Mendeley/2008/WikiPathways Pathway Editing for the People - 2008 - Pico et al.pdf:pdf},
    issn = {15449173},
    journal = {PLoS Biology},
    number = {7},
    pages = {1403--1407},
    pmid = {18651794},
    title = {{ WikiPathways: Pathway Editing for the People }},
    volume = {6},
    year = {2008}
}
@article{Placzek2017,
    abstract = {The BRENDA enzyme database (www.brenda-enzymes.org) has developed into the main enzyme and enzyme-ligand information system in its 30 years of existence. The information is manually extracted from primary literature and extended by text mining procedures, integration of external data and prediction algorithms. Approximately 3 million data from 83 000 enzymes and 137 000 literature references constitute the manually annotated core. Text mining procedures extend these data with information on occurrence, enzyme-disease relationships and kinetic data. Prediction algorithms contribute locations and genome annotations. External data and links complete the data with sequences and 3D structures. A total of 206 000 enzyme ligands provide functional and structural data. BRENDA offers a complex query tool engine allowing the users an efficient access to the data via different search methods and explorers. The new design of the BRENDA entry page and the enzyme summary pages improves the user access and the performance. New interactive and intuitive BRENDA pathway maps give an overview on biochemical processes and facilitate the visualization of enzyme, ligand and organism information in the biochemical context. SCOPe and CATH, databases for protein structure classification, are included. New online and video tutorials provide online training for the users. BRENDA is freely available for academic users.},
    author = {Placzek, Sandra and Schomburg, Ida and Chang, Antje and Jeske, Lisa and Ulbrich, Marcus and Tillack, Jana and Schomburg, Dietmar},
    doi = {10.1093/nar/gkw952},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/BRENDA in 2017 New perspectives and new tools in BRENDA - 2017 - Placzek et al.pdf:pdf},
    issn = {13624962},
    journal = {Nucleic Acids Research},
    number = {D1},
    pages = {D380--D388},
    pmid = {27924025},
    title = {{ BRENDA in 2017: New perspectives and new tools in BRENDA }},
    volume = {45},
    year = {2017}
}
@misc{Pratt,
    author = {Pratt, Dexter},
    title = {{ CX Data Model }}
}
@article{Pratt2015,
    abstract = {Networks are a powerful and flexible methodology for expressing biological knowledge for computation and communication. Network-encoded information can include systematic screens for molecular interactions, biological relationships curated from literature, and outputs from analyses of Big Data. NDEx, the Network Data Exchange (www.ndexbio.org), is an online commons where scientists can upload, share, and publicly distribute networks. Networks in NDEx receive globally unique accession IDs and can be stored for private use, shared in pre-publication collaboration, or released for public access. Standard and novel data formats are accommodated in a flexible storage model. Organizations can use NDEx as a distribution channel for networks they generate or curate. Developers of bioinformatic applications can store and query NDEx networks via a common programmatic interface. NDEx helps expand the role of networks in scientific discourse and facilitates the integration of networks as data in publications. It is a step toward an ecosystem in which networks bearing data, hypotheses, and findings flow easily between scientists.},
    author = {Pratt, Dexter and Chen, Jing and Welker, David and Rivas, Ricardo and Pillich, Rudolf and Rynkov, Vladimir and Ono, Keiichiro and Miello, Carol and Hicks, Lyndon and Szalma, Sandor and Stojmirovic, Aleksandar and Dobrin, Radu and Braxenthaler, Michael and Kuentzer, Jan and Demchak, Barry and Ideker, Trey},
    doi = {10.1016/j.cels.2015.10.001},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/NDEx, the Network Data Exchange - 2015 - Pratt et al.pdf:pdf},
    isbn = {24054712 (Linking)},
    issn = {24054712},
    journal = {Cell Systems},
    number = {4},
    pages = {302--305},
    pmid = {26594663},
    publisher = {Elsevier Inc.},
    title = {{ NDEx, the Network Data Exchange }},
    url = {http://dx.doi.org/10.1016/j.cels.2015.10.001},
    volume = {1},
    year = {2015}
}
@article{ROGERS1963,
    author = {ROGERS, F B},
    file = {:Users/cthoyt/Dropbox/Mendeley/1963/Medical subject headings. - 1963 - ROGERS.pdf:pdf},
    issn = {0025-7338},
    journal = {Bulletin of the Medical Library Association},
    keywords = {LIBRARIES,MEDICAL,SUBJECT HEADINGS},
    month = {jan},
    pages = {114--6},
    pmid = {13982385},
    title = {{ Medical subject headings. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/13982385 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC197951},
    volume = {51},
    year = {1963}
}
@article{Saez-Rodriguez2011,
    abstract = {Substantial effort in recent years has been devoted to constructing and analyzing large-scale gene and protein networks on the basis of "omic" data and literature mining. These interaction graphs provide valuable insight into the topologies of complex biological networks but are rarely context specific and cannot be used to predict the responses of cell signaling proteins to specific ligands or drugs. Conversely, traditional approaches to analyzing cell signaling are narrow in scope and cannot easily make use of network-level data. Here, we combine network analysis and functional experimentation by using a hybrid approach in which graphs are converted into simple mathematical models that can be trained against biochemical data. Specifically, we created Boolean logic models of immediate-early signaling in liver cells by training a literature-based prior knowledge network against biochemical data obtained from primary human hepatocytes and 4 hepatocellular carcinoma cell lines exposed to combinations of cytokines and small-molecule kinase inhibitors. Distinct families of models were recovered for each cell type, and these families clustered topologically into normal and diseased sets.},
    author = {Saez-Rodriguez, Julio and Alexopoulos, Leonidas G. and Zhang, Ming Sheng and Morris, Melody K. and Lauffenburger, Douglas A. and Sorger, Peter K.},
    doi = {10.1158/0008-5472.CAN-10-4453},
    file = {:Users/cthoyt/Dropbox/Mendeley/2011/Comparing signaling networks between normal and transformed hepatocytes using discrete logical models - 2011 - Saez-Rodriguez et al.pdf:pdf},
    issn = {00085472},
    journal = {Cancer Research},
    number = {16},
    pages = {5400--5411},
    pmid = {21742771},
    title = {{ Comparing signaling networks between normal and transformed hepatocytes using discrete logical models }},
    volume = {71},
    year = {2011}
}
@article{Schomburg2017,
    abstract = {Enzymes, representing the largest and by far most complex group of proteins, play an essential role in all processes of life, including metabolism, gene expression, cell division, the immune system, and others. Their function, also connected to most diseases or stress control makes them interesting targets for research and applications in biotechnology, medical treatments, or diagnosis. Their functional parameters and other properties are collected, integrated, and made available to the scientific community in the BRaunschweig ENzyme DAtabase (BRENDA). In the last 30 years BRENDA has developed into one of the most highly used biological databases worldwide. The data contents, the process of data acquisition, data integration and control, the ways to access the data, and visualizations provided by the website are described and discussed.},
    author = {Schomburg, I and Jeske, L and Ulbrich, M and Placzek, S and Chang, A and Schomburg, D},
    doi = {10.1016/j.jbiotec.2017.04.020},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/The BRENDA enzyme information system-From a database to an expert system. - 2017 - Schomburg et al.pdf:pdf},
    issn = {1873-4863},
    journal = {Journal of biotechnology},
    keywords = {Enzyme database,Enzyme kinetics,Enzyme-catalysed reactions,Enzyme-ligand interaction,Enzymes and diseases,Metabolic pathways},
    month = {apr},
    pmid = {28438579},
    title = {{ The BRENDA enzyme information system-From a database to an expert system. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/28438579},
    year = {2017}
}
@article{Selventa2011,
    abstract = {The Biological Expression Language (BEL)is a language for representing scientific findings in the life sciences in a computable form. BEL is designed to represent scientific findings by capturing causal and correlative relationships in context, where context can include information about the biological and experimental system in which the relationships were observed, the supporting publications cited and the process of curation.},
    author = {Selventa},
    file = {:Users/cthoyt/Dropbox/Mendeley/2011/Biological Expression Language V1.0 Overview - 2011 - Selventa.pdf:pdf},
    title = {{ Biological Expression Language V1.0 Overview }},
    url = {http://www.openbel.org/},
    year = {2011}
}
@article{Sherry2001,
    abstract = {In response to a need for a general catalog of genome variation to address the large-scale sampling designs required by association studies, gene mapping and evolutionary biology, the National Center for Biotechnology Information (NCBI) has established the dbSNP database [S.T.Sherry, M.Ward and K. Sirotkin (1999) Genome Res., 9, 677-679]. Submissions to dbSNP will be integrated with other sources of information at NCBI such as GenBank, PubMed, LocusLink and the Human Genome Project data. The complete contents of dbSNP are available to the public at website: http://www.ncbi.nlm.nih.gov/SNP. The complete contents of dbSNP can also be downloaded in multiple formats via anonymous FTP at ftp://ncbi.nlm.nih.gov/snp/.},
    author = {Sherry, S T and Ward, M H and Kholodov, M and Baker, J and Phan, L and Smigielski, E M and Sirotkin, K},
    issn = {1362-4962 (Electronic)},
    journal = {Nucleic acids research},
    keywords = {Animals,Biotechnology,Databases, Factual,Genetic Variation,Humans,Information Services,Internet,National Institutes of Health (U.S.),National Library of Medicine (U.S.),Polymorphism, Single Nucleotide,United States,genetics},
    language = {eng},
    month = {jan},
    number = {1},
    pages = {308--311},
    pmid = {11125122},
    title = {{ dbSNP: the NCBI database of genetic variation. }},
    volume = {29},
    year = {2001}
}
@article{Slater2014,
    author = {Slater, Ted},
    doi = {10.1016/j.drudis.2013.12.011},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/Recent advances in modeling languages for pathway maps and computable biological networks - 2014 - Slater.pdf:pdf},
    issn = {13596446},
    journal = {Drug Discovery Today},
    month = {feb},
    number = {2},
    pages = {193--198},
    title = {{ Recent advances in modeling languages for pathway maps and computable biological networks }},
    url = {http://linkinghub.elsevier.com/retrieve/pii/S1359644614000063},
    volume = {19},
    year = {2014}
}
@article{Smith2010,
    author = {Smith, Barry and Ashburner, Michael and Rosse, Cornelius and Bard, Jonathan and Bug, William and Ceusters, Werner and Goldberg, Louis J and Eilbeck, Karen and Ireland, Amelia and Christopher, J},
    doi = {10.1038/nbt1346.The},
    file = {:Users/cthoyt/Dropbox/Mendeley/2010/NIH Public Access - 2010 - Smith et al.pdf:pdf},
    number = {11},
    title = {{ NIH Public Access }},
    volume = {25},
    year = {2010}
}
@article{Subramanian2005,
    abstract = {Although genomewide RNA expression analysis has become a routine tool in biomedical research, extracting biological insight from such information remains a major challenge. Here, we describe a powerful analytical method called Gene Set Enrichment Analysis (GSEA) for interpreting gene expression data. The method derives its power by focusing on gene sets, that is, groups of genes that share common biological function, chromosomal location, or regulation. We demonstrate how GSEA yields insights into several cancer-related data sets, including leukemia and lung cancer. Notably, where single-gene analysis finds little similarity between two independent studies of patient survival in lung cancer, GSEA reveals many biological pathways in common. The GSEA method is embodied in a freely available software package, together with an initial database of 1,325 biologically defined gene sets.},
    author = {Subramanian, Aravind and Tamayo, Pablo and Mootha, Vamsi K and Mukherjee, Sayan and Ebert, Benjamin L and Gillette, Michael A and Paulovich, Amanda and Pomeroy, Scott L and Golub, Todd R and Lander, Eric S and Mesirov, Jill P},
    doi = {10.1073/pnas.0506580102},
    file = {:Users/cthoyt/Dropbox/Mendeley/2005/Gene set enrichment analysis a knowledge-based approach for interpreting genome-wide expression profiles. - 2005 - Subramanian et al.pdf:pdf},
    isbn = {0027-8424 (Print)$\backslash$r0027-8424 (Linking)},
    issn = {0027-8424},
    journal = {Proceedings of the National Academy of Sciences of the United States of America},
    number = {43},
    pages = {15545--50},
    pmid = {16199517},
    title = {{ Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/16199517 { \% } 0Ahttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC1239896},
    volume = {102},
    year = {2005}
}
@article{Talikka2015,
    abstract = {With the wealth of publications and data available, powerful and transparent computational approaches are required to represent measured data and scientific knowledge in a computable and searchable format. We developed a set of biological network models, scripted in the Biological Expression Language, that reflect causal signaling pathways across a wide range of biological processes, including cell fate, cell stress, cell proliferation, inflammation, tissue repair and angiogenesis in the pulmonary and cardiovascular context. This comprehensive collection of networks is now freely available to the scientific community in a centralized web-based repository, the Causal Biological Network database, which is composed of over 120 manually curated and well annotated biological network models and can be accessed at http://causalbionet.com. The website accesses a MongoDB, which stores all versions of the networks as JSON objects and allows users to search for genes, proteins, biological processes, small molecules and keywords in the network descriptions to retrieve biological networks of interest. The content of the networks can be visualized and browsed. Nodes and edges can be filtered and all supporting evidence for the edges can be browsed and is linked to the original articles in PubMed. Moreover, networks may be downloaded for further visualization and evaluation. Database URL: http://causalbionet.com},
    author = {Talikka, Marja and Boue, Stephanie and Schlage, Walter K.},
    doi = {10.1007/978-1-4939-2778-4_3},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/Causal biological network database A comprehensive platform of causal biological network models focused on the pulmonary and vascular sy.pdf:pdf},
    isbn = {9781493927784},
    issn = {19406053},
    journal = {Computational Systems Toxicology},
    keywords = {Biological expression language,Causal biological network models,Computational toxicology,Database},
    pages = {65--93},
    pmid = {25887162},
    title = {{ Causal biological network database: A comprehensive platform of causal biological network models focused on the pulmonary and vascular systems }},
    year = {2015}
}
@article{TheUniProtConsortium2015,
    author = {{ The UniProt Consortium }},
    doi = {10.1093/nar/gku989},
    issn = {0305-1048, 1362-4962},
    journal = {Nucleic Acids Research},
    keywords = {Folder - Structural Bioinformatics Lab},
    language = {en},
    mendeley-tags = {Folder - Structural Bioinformatics Lab},
    month = {jan},
    number = {D1},
    pages = {D204--D212},
    shorttitle = {UniProt},
    title = {{ UniProt: a hub for protein information }},
    url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gku989},
    volume = {43},
    year = {2015}
}
@article{Valenzuela-Escarcega2015,
    abstract = {We describe the design, development, and API of ODIN (Open Domain INformer), a domain-independent, rule-based event extraction (EE) framework. The proposed EE approach is: simple (most events are captured with simple lexico-syntactic patterns), powerful (the lan-guage can capture complex constructs, such as events taking other events as arguments, and regular expressions over syntactic graphs), robust (to recover from syntactic parsing er-rors, syntactic patterns can be freely mixed with surface, token-based patterns), and fast (the runtime environment processes 110 sen-tences/second in a real-world domain with a grammar of over 200 rules). We used this framework to develop a grammar for the bio-chemical domain, which approached human performance. Our EE framework is accom-panied by a web-based user interface for the rapid development of event grammars and vi-sualization of matches. The ODIN framework and the domain-specific grammars are avail-able as open-source code.},
    author = {Valenzuela-Esc { \' { a } } rcega, Marco A and Hahn-Powell, Gus and Hicks, Thomas and Surdeanu, Mihai},
    doi = {10.3115/v1/P15-4022},
    file = {:Users/cthoyt/Dropbox/Mendeley/2015/A Domain-independent Rule-based Framework for Event Extraction - 2015 - Valenzuela-Esc { \' { a } } rcega et al.pdf:pdf},
    isbn = {9781941643990},
    journal = {Proceedings of ACL-IJCNLP 2015 System Demonstrations},
    pages = {127--132},
    title = {{ A Domain-independent Rule-based Framework for Event Extraction }},
    url = {http://aclanthology.info/papers/a-domain-independent-rule-based-framework-for-event-extraction},
    year = {2015}
}
@article{Vasilyev2014,
    abstract = {BACKGROUND: We recently published in BMC Systems Biology an approach for calculating the perturbation amplitudes of causal network models by integrating gene differential expression data. This approach relies on the process of score aggregation, which combines the perturbations at the level of the individual network nodes into a global measure that quantifies the perturbation of the network as a whole. Such "bottom-up" aggregation relates the changes in molecular entities measured by omics technologies to systems-level phenotypes. However, the aggregation method we used is limited to a specific class of causal network models called "causally consistent", which is equivalent to the notion of balance of a signed graph used in graph theory. As a consequence of this limitation, our aggregation method cannot be used in the many relevant cases involving "causally inconsistent" network models such as those containing negative feedbacks.$\backslash$n$\backslash$nFINDINGS: In this note, we propose an algorithm called "sampling of spanning trees" (SST) that extends our published aggregation method to causally inconsistent network models by replacing the signed relationships between the network nodes by an appropriate continuous measure. The SST algorithm is based on spanning trees, which are a particular class of subgraphs used in graph theory, and on a sampling procedure leveraging the properties of specific random walks on the graph. This algorithm is applied to several cases of biological interest.$\backslash$n$\backslash$nCONCLUSIONS: The SST algorithm provides a practical means of aggregating nodal values over causally inconsistent network models based on solid mathematical foundations. We showed its utility in systems biology, where the nodal values can be perturbation amplitudes of protein activities or gene differential expressions, while the networks can be models of cellular signaling or expression regulation. Since the SST algorithm is based on general graph-theoretical considerations, it is scalable to arbitrary graph sizes and can potentially be used for performing quantitative analyses in any context involving signed graphs.},
    author = {Vasilyev, Dmitry M and Thomson, Ty M and Frushour, Brian P and Martin, Florian and Sewer, Alain},
    doi = {10.1186/1756-0500-7-516},
    file = {:Users/cthoyt/Dropbox/Mendeley/2014/An algorithm for score aggregation over causal biological networks based on random walk sampling. - 2014 - Vasilyev et al.pdf:pdf},
    issn = {1756-0500},
    journal = {BMC research notes},
    keywords = {causal biological network,causal consistency,random walk,signed graph,spanning tree},
    pages = {516},
    pmid = {25113603},
    title = {{ An algorithm for score aggregation over causal biological networks based on random walk sampling. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4266947 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {7},
    year = {2014}
}
@article{Zhang,
    author = {Zhang, Bin and Gaiteri, Chris and Bodea, Liviu-gabriel and Wang, Zhi and Mcelwee, Joshua and Podtelezhnikov, Alexei A and Zhang, Chunsheng and Xie, Tao and Tran, Linh and Dobrin, Radu and Fluder, Eugene and Clurman, Bruce and Melquist, Stacey and Narayanan, Manikandan and Suver, Christine and Shah, Hardik and Mahajan, Milind and Gillis, Tammy and Mysore, Jayalakshmi and Macdonald, Marcy E and Lamb, John R and Bennett, David A and Molony, Cliona and Stone, David J and Gudnason, Vilmundur and Myers, Amanda J and Schadt, Eric E and Neumann, Harald and Zhu, Jun and Emilsson, Valur},
    doi = {10.1016/j.cell.2013.03.030},
    file = {:Users/cthoyt/Dropbox/Mendeley/Unknown/Resource Integrated Systems Approach Identifies Genetic Nodes and Networks in Late-Onset Alzheimer ' s Disease - Unknown - Zhang et al.pdf:pdf},
    issn = {0092-8674},
    journal = {Cell},
    number = {3},
    pages = {707--720},
    publisher = {Elsevier Inc.},
    title = {{ Resource Integrated Systems Approach Identifies Genetic Nodes and Networks in Late-Onset Alzheimer ' s Disease }},
    url = {http://dx.doi.org/10.1016/j.cell.2013.03.030},
    volume = {153}
}
@article{Zhang2017,
    author = {Zhang, Wei and Chien, Jeremy and Yong, Jeongsik and Kuang, Rui},
    doi = {10.1038/s41698-017-0029-7},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/Network-based machine learning and graph theory algorithms for precision oncology - 2017 - Zhang et al.pdf:pdf},
    issn = {2397-768X},
    journal = {npj Precision Oncology},
    number = {1},
    pages = {25},
    publisher = {Springer US},
    title = {{ Network-based machine learning and graph theory algorithms for precision oncology }},
    url = {http://www.nature.com/articles/s41698-017-0029-7},
    volume = {1},
    year = {2017}
}
@article{Merico2010,
    abstract = {BACKGROUND:Gene-set enrichment analysis is a useful technique to help functionally characterize large gene lists, such as the results of gene expression experiments. This technique finds functionally coherent gene-sets, such as pathways, that are statistically over-represented in a given gene list. Ideally, the number of resulting sets is smaller than the number of genes in the list, thus simplifying interpretation. However, the increasing number and redundancy of gene-sets used by many current enrichment analysis software works against this ideal.$\backslash$n$\backslash$nPRINCIPAL FINDINGS:To overcome gene-set redundancy and help in the interpretation of large gene lists, we developed "Enrichment Map", a network-based visualization method for gene-set enrichment results. Gene-sets are organized in a network, where each set is a node and edges represent gene overlap between sets. Automated network layout groups related gene-sets into network clusters, enabling the user to quickly identify the major enriched functional themes and more easily interpret the enrichment results.$\backslash$n$\backslash$nCONCLUSIONS:Enrichment Map is a significant advance in the interpretation of enrichment analysis. Any research project that generates a list of genes can take advantage of this visualization framework. Enrichment Map is implemented as a freely available and user friendly plug-in for the Cytoscape network visualization software (http://baderlab.org/Software/EnrichmentMap/).},
    author = {Merico, Daniele and Isserlin, Ruth and Stueker, Oliver and Emili, Andrew and Bader, Gary D.},
    doi = {10.1371/journal.pone.0013984},
    file = {:Users/cthoyt/Dropbox/Mendeley/2010/Enrichment Map A Network-Based Method for Gene-Set Enrichment Visualization and Interpretation - 2010 - Merico et al.PDF:PDF},
    fissn = {19326203},
    journal = {PLoS ONE},
    mendeley-groups = {Thesis},
    number = {11},
    pmid = {21085593},
    title = {{ Enrichment map: A network-based method for gene-set enrichment visualization and interpretation }},
    volume = {5},
    year = {2010}
}
@article{Funahashi2003,
    title = "CellDesigner: a process diagram editor for gene-regulatory and biochemical networks",
    journal = "BIOSILICO",
    volume = "1",
    number = "5",
    pages = "159 - 162",
    year = "2003",
    issn = "1478-5382",
    doi = "https://doi.org/10.1016/S1478-5382(03)02370-9",
    url = "http://www.sciencedirect.com/science/article/pii/S1478538203023709",
    author = "Akira Funahashi and Mineo Morohashi and Hiroaki Kitano and Naoki Tanimura",
    keywords = "systems biology",
    keywords = "SBML",
    keywords = "SWB",
    keywords = "process diagram editor",
    keywords = "software"
}
@article{Cavalli2017,
    abstract = {While molecular subgrouping has revolutionized medulloblastoma classification, the extent of heterogeneity within subgroups is unknown. Similarity network fusion (SNF) applied to genome-wide DNA methylation and gene expression data across 763 primary samples identifies very homogeneous clusters of patients, supporting the presence of medulloblastoma subtypes. After integration of somatic copy-number alterations, and clinical features specific to each cluster, we identify 12 different subtypes of medulloblastoma. Integrative analysis using SNF further delineates group 3 from group 4 medulloblastoma, which is not as readily apparent through analyses of individual data types. Two clear subtypes of infants with Sonic Hedgehog medulloblastoma with disparate outcomes and biology are identified. Medulloblastoma subtypes identified through integrative clustering have important implications for stratification of future clinical trials.},
    author = {Cavalli, Florence M.G. and Remke, Marc and Rampasek, Ladislav and Peacock, John and Shih, David J.H. and Luu, Betty and Garzia, Livia and Torchia, Jonathon and Nor, Carolina and Morrissy, A. Sorana and Agnihotri, Sameer and Thompson, Yuan Yao and Kuzan-Fischer, Claudia M. and Farooq, Hamza and Isaev, Keren and Daniels, Craig and Cho, Byung Kyu and Kim, Seung Ki and Wang, Kyu Chang and Lee, Ji Yeoun and Grajkowska, Wieslawa A. and Perek-Polnik, Marta and Vasiljevic, Alexandre and Faure-Conter, Cecile and Jouvet, Anne and Giannini, Caterina and { Nageswara Rao } , Amulya A. and Li, Kay Ka Wai and Ng, Ho Keung and Eberhart, Charles G. and Pollack, Ian F. and Hamilton, Ronald L. and Gillespie, G. Yancey and Olson, James M. and Leary, Sarah and Weiss, William A. and Lach, Boleslaw and Chambless, Lola B. and Thompson, Reid C. and Cooper, Michael K. and Vibhakar, Rajeev and Hauser, Peter and van Veelen, Marie Lise C. and Kros, Johan M. and French, Pim J. and Ra, Young Shin and Kumabe, Toshihiro and L { \' { o } } pez-Aguilar, Enrique and Zitterbart, Karel and Sterba, Jaroslav and Finocchiaro, Gaetano and Massimino, Maura and { Van Meir } , Erwin G. and Osuka, Satoru and Shofuda, Tomoko and Klekner, Almos and Zollo, Massimo and Leonard, Jeffrey R. and Rubin, Joshua B. and Jabado, Nada and Albrecht, Steffen and Mora, Jaume and { Van Meter } , Timothy E. and Jung, Shin and Moore, Andrew S. and Hallahan, Andrew R. and Chan, Jennifer A. and Tirapelli, Daniela P.C. and Carlotti, Carlos G. and Fouladi, Maryam and Pimentel, Jos { \' { e } } and Faria, Claudia C. and Saad, Ali G. and Massimi, Luca and Liau, Linda M. and Wheeler, Helen and Nakamura, Hideo and Elbabaa, Samer K. and Perezpe { \~ { n } } a-Diazconti, Mario and { Chico Ponce de Le { \' { o } } n } , Fernando and Robinson, Shenandoah and Zapotocky, Michal and Lassaletta, Alvaro and Huang, Annie and Hawkins, Cynthia E. and Tabori, Uri and Bouffet, Eric and Bartels, Ute and Dirks, Peter B. and Rutka, James T. and Bader, Gary D. and Reimand, J { \" { u } } ri and Goldenberg, Anna and Ramaswamy, Vijay and Taylor, Michael D.},
    doi = {10.1016/j.ccell.2017.05.005},
    file = {:Users/cthoyt/Dropbox/Mendeley/2017/Intertumoral Heterogeneity within Medulloblastoma Article Intertumoral Heterogeneity within Medulloblastoma Subgroups - 2017 - Cavalli e.pdf:pdf},
    issn = {18783686},
    journal = {Cancer Cell},
    keywords = {copy number,gene expression,integrative clustering,medulloblastoma,methylation,subgroups},
    mendeley-groups = {Thesis},
    number = {6},
    pages = {737--754.e6},
    pmid = {28609654},
    title = {{ Intertumoral Heterogeneity within Medulloblastoma Subgroups }},
    volume = {31},
    year = {2017}
}

@article{Noriega2010,
    abstract = {Gamma-aminobutyric acid (GABA) is the major inhibitory neurotransmitter in the brain, and the responsiveness of neurons to GABA can be modulated by sex steroids. To better understand how ovarian steroids influence GABAergic system in the primate brain, we evaluated the expression of genes encoding GABA receptor subunits, glutamic acid decarboxylase (GAD) and a GABA transporter in the brains of female rhesus macaques. Ovariectomized adults were subjected to a hormone replacement paradigm involving either 17$\beta$-estradiol (E), or E plus progesterone (E+P). Untreated animals served as controls. Using GeneChip { \textregistered } microarray analysis and real-time RT-PCR (qPCR), we examined gene expression differences within and between the amygdala (AMD), hippocampus (HPC) and arcuate nuclei of the medial basal hypothalamus (MBH). The results from PCR corresponded with results from representative GeneChip { \textregistered } probesets, and showed similar effects of sex steroids on GABA receptor subunit gene expression in the AMD and HPC, and a more pronounced expression than in the MBH. Exposure to E+P attenuated GAD1, GAD2 and SLC32A1 gene expression in the AMD and HPC, but not in the MBH. GABA receptor subunit gene expression was generally higher in the AMD and HPC than in the MBH, with the exception of receptor subunits $\epsilon$ and $\gamma$2. Taken together, the data demonstrate differential regulation of GABA receptor subunits and GABAergic system components in the MBH compared to the AMD and HPC of rhesus macaques. Elevated $\epsilon$ and reduced $\delta$ subunit expression in the MBH supports the hypothesis that the hypothalamic GABAergic system is resistant to the modulatory effects of sex steroids. },
    author = {Noriega, Nigel C and Eghlidi, Dominique H and Garyfallou, Vasilios T and Kohama, Steven G and Kryger, Sharon G and Urbanski, Henryk F},
    booktitle = {Brain research},
    doi = {10.1016/j.brainres.2009.10.011},
    issn = {0006-8993 (Print)},
    language = {eng},
    month = {jan},
    pages = {28--42},
    pmid = {19833106},
    title = {{ Influence of 17$\beta$-estradiol and progesterone on GABAergic gene expression in the arcuate nucleus, amygdala and hippocampus of the rhesus macaque }},
    volume = {1307},
    year = {2010}
}

@article{Sarntivijai2014,
    abstract = {BACKGROUND: Cell lines have been widely used in biomedical research. The community-based Cell Line Ontology (CLO) is a member of the OBO Foundry library that covers the domain of cell lines. Since its publication two years ago, significant updates have been made, including new groups joining the CLO consortium, new cell line cells, upper level alignment with the Cell Ontology (CL) and the Ontology for Biomedical Investigation, and logical extensions.$\backslash$n$\backslash$nCONSTRUCTION AND CONTENT: Collaboration among the CLO, CL, and OBI has established consensus definitions of cell line-specific terms such as 'cell line', 'cell line cell', 'cell line culturing', and 'mortal' vs. 'immortal cell line cell'. A cell line is a genetically stable cultured cell population that contains individual cell line cells. The hierarchical structure of the CLO is built based on the hierarchy of the in vivo cell types defined in CL and tissue types (from which cell line cells are derived) defined in the UBERON cross-species anatomy ontology. The new hierarchical structure makes it easier to browse, query, and perform automated classification. We have recently added classes representing more than 2,000 cell line cells from the RIKEN BRC Cell Bank to CLO. Overall, the CLO now contains { \~ { } } 38,000 classes of specific cell line cells derived from over 200 in vivo cell types from various organisms.$\backslash$n$\backslash$nUTILITY AND DISCUSSION: The CLO has been applied to different biomedical research studies. Example case studies include annotation and analysis of EBI ArrayExpress data, bioassays, and host-vaccine/pathogen interaction. CLO's utility goes beyond a catalogue of cell line types. The alignment of the CLO with related ontologies combined with the use of ontological reasoners will support sophisticated inferencing to advance translational informatics development.},
    author = {Sarntivijai, Sirarat and Lin, Yu and Xiang, Zuoshuang and Meehan, Terrence F. and Diehl, Alexander D. and Vempati, Uma D. and Sch { \" { u } } rer, Stephan C. and Pang, Chao and Malone, James and Parkinson, Helen and Liu, Yue and Takatsuki, Terue and Saijo, Kaoru and Masuya, Hiroshi and Nakamura, Yukio and Brush, Matthew H. and Haendel, Melissa A. and Zheng, Jie and Stoeckert, Christian J. and Peters, Bjoern and Mungall, Christopher J. and Carey, Thomas E. and States, David J. and Athey, Brian D. and He, Yongqun},
    doi = {10.1186/2041-1480-5-37},
    file = {:Users/cthoyt/ownCloud/Mendeley/2014/CLO The cell line ontology - 2014 - Sarntivijai et al.pdf:pdf},
    isbn = {20411480 (Linking)},
    issn = {20411480},
    journal = {Journal of Biomedical Semantics},
    keywords = {Anatomy,Cell line,Cell line cell,Cell line cell culturing,Immortal cell line cell,Mortal cell line cell},
    number = {1},
    pages = {1--10},
    pmid = {25852852},
    title = {{ CLO: The cell line ontology }},
    volume = {5},
    year = {2014}
}

@article{Smith2007,
    annote = {10.1038/nbt1346},
    author = {Smith, Barry and Ashburner, Michael and Rosse, Cornelius and Bard, Jonathan and Bug, William and Ceusters, Werner and Goldberg, Louis J and Eilbeck, Karen and Ireland, Amelia and Mungall, Christopher J and Leontis, Neocles and Rocca-Serra, Philippe and Ruttenberg, Alan and Sansone, Susanna-Assunta and Scheuermann, Richard H and Shah, Nigam and Whetzel, Patricia L and Lewis, Suzanna},
    issn = {1087-0156},
    journal = {Nat Biotech},
    month = {nov},
    number = {11},
    pages = {1251--1255},
    publisher = {Nature Publishing Group},
    title = {{ The OBO Foundry: coordinated evolution of ontologies to support biomedical data integration }},
    url = {http://dx.doi.org/10.1038/nbt1346},
    volume = {25},
    year = {2007}
}
@article{Whetzel2011,
    abstract = {The National Center for Biomedical Ontology (NCBO) is one of the National Centers for Biomedical Computing funded under the NIH Roadmap Initiative. Contributing to the national computing infrastructure, NCBO has developed BioPortal, a web portal that provides access to a library of biomedical ontologies and terminologies (http://bioportal.bioontology.org) via the NCBO Web services. BioPortal enables community participation in the evaluation and evolution of ontology content by providing features to add mappings between terms, to add comments linked to specific ontology terms and to provide ontology reviews. The NCBO Web services (http://www.bioontology.org/wiki/index.php/NCBO { \_ } REST { \_ } services) enable this functionality and provide a uniform mechanism to access ontologies from a variety of knowledge representation formats, such as Web Ontology Language (OWL) and Open Biological and Biomedical Ontologies (OBO) format. The Web services provide multi-layered access to the ontology content, from getting all terms in an ontology to retrieving metadata about a term. Users can easily incorporate the NCBO Web services into software applications to generate semantically aware applications and to facilitate structured data collection.},
    author = {Whetzel, Patricia L and Noy, Natalya F and Shah, Nigam H and Alexander, Paul R and Nyulas, Csongor and Tudorache, Tania and Musen, Mark A},
    doi = {10.1093/nar/gkr469},
    issn = {1362-4962 (Electronic)},
    journal = {Nucleic acids research},
    keywords = {Internet,Software,Terminology as Topic,Vocabulary, Controlled},
    language = {eng},
    month = {jul},
    number = {Web Server issue},
    pages = {W541--5},
    pmid = {21672956},
    title = {{ BioPortal: enhanced functionality via new Web services from the National Center for Biomedical Ontology to access and use ontologies in software applications. }},
    volume = {39},
    year = {2011}
}

@article{LeNovere2009,
    abstract = {Circuit diagrams and Unified Modeling Language diagrams are just two examples of standard visual languages that help accelerate work by promoting regularity, removing ambiguity and enabling software tool support for communication of complex information. Ironically, despite having one of the highest ratios of graphical to textual information, biology still lacks standard graphical notations. The recent deluge of biological knowledge makes addressing this deficit a pressing concern. Toward this goal, we present the Systems Biology Graphical Notation (SBGN), a visual language developed by a community of biochemists, modelers and computer scientists. SBGN consists of three complementary languages: process diagram, entity relationship diagram and activity flow diagram. Together they enable scientists to represent networks of biochemical interactions in a standard, unambiguous way. We believe that SBGN will foster efficient and accurate representation, visualization, storage, exchange and reuse of information on all kinds of biological knowledge, from gene regulation, to metabolism, to cellular signaling.},
    author = {{ Le Nov { \` { e } } re } , Nicolas and Hucka, Michael and Mi, Huaiyu and Moodie, Stuart and Schreiber, Falk and Sorokin, Anatoly and Demir, Emek and Wegner, Katja and Aladjem, Mirit I and Wimalaratne, Sarala M and Bergman, Frank T and Gauges, Ralph and Ghazal, Peter and Kawaji, Hideya and Li, Lu and Matsuoka, Yukiko and Vill { \' { e } } ger, Alice and Boyd, Sarah E and Calzone, Laurence and Courtot, Melanie and Dogrusoz, Ugur and Freeman, Tom C and Funahashi, Akira and Ghosh, Samik and Jouraku, Akiya and Kim, Sohyoung and Kolpakov, Fedor and Luna, Augustin and Sahle, Sven and Schmidt, Esther and Watterson, Steven and Wu, Guanming and Goryanin, Igor and Kell, Douglas B and Sander, Chris and Sauro, Herbert and Snoep, Jacky L and Kohn, Kurt and Kitano, Hiroaki},
    doi = {10.1038/nbt.1558},
    file = {:Users/cthoyt/ownCloud/Mendeley/2009/Le Nov { \` { e } } re et al. - 2009 - The Systems Biology Graphical Notation.pdf:pdf},
    isbn = {1087-0156},
    issn = {1546-1696},
    journal = {Nat. Biotechnol.},
    month = {aug},
    number = {8},
    pages = {735--41},
    pmid = {19668183},
    title = {{ The Systems Biology Graphical Notation. }},
    url = {http://www.nature.com/nbt/journal/v27/n8/abs/nbt.1558.html http://www.ncbi.nlm.nih.gov/pubmed/19668183},
    volume = {27},
    year = {2009}
}

@article{Domingo-Fernandez2019a,
    abstract = {The complexity of representing biological systems is compounded by an ever-expanding body of knowledge emerging from multi-omics experiments. A number of pathway databases have facilitated pathway-centric approaches that assist in the interpretation of molecular signatures yielded by these experiments. However, the lack of interoperability between pathway databases has hindered the ability to harmonize these resources and to exploit their consolidated knowledge. Such a unification of pathway knowledge is imperative in enhancing the comprehension and modeling of biological abstractions.},
    author = {Domingo-Fern { \' { a } } ndez, Daniel and Mubeen, Sarah and Mar { \' { i } } n-Lla { \' { o } } , Josep and Hoyt, Charles Tapley and Hofmann-Apitius, Martin},
    doi = {10.1186/s12859-019-2863-9},
    file = {:Users/cthoyt/ownCloud/Mendeley/2019/Domingo-Fern { \' { a } } ndez et al. - 2019 - PathMe merging and exploring mechanistic pathway knowledge.pdf:pdf},
    issn = {1471-2105},
    journal = {BMC Bioinformatics},
    number = {1},
    pages = {243},
    title = {{ PathMe: merging and exploring mechanistic pathway knowledge }},
    url = {https://doi.org/10.1186/s12859-019-2863-9},
    volume = {20},
    year = {2019}
}

@article{Domingo-Fernandez2018,
    abstract = {Although pathways are widely used for the analysis and representation of biological systems, their lack of clear boundaries, their dispersion across numerous databases, and the lack of interoperability impedes the evaluation of the coverage, agreements, and discrepancies between them. Here, we present ComPath, an ecosystem that supports curation of pathway mappings between databases and fosters the exploration of pathway knowledge through several novel visualizations. We have curated mappings between three of the major pathway databases and present a case study focusing on Parkinson's disease that illustrates how ComPath can generate new biological insights by identifying pathway modules, clusters, and cross-talks with these mappings. The ComPath source code and resources are available at https://github.com/ComPathand the web application can be accessed at https://compath.scai.fraunhofer.de/.},
    author = {Domingo-Fernandez, Daniel and Hoyt, Charles Tapley and Alvarez, Carlos Bobis and Marin-Llao, Josep and Hofmann-Apitius, Martin and Domingo-Fern { \' { a } } ndez, Daniel and Hoyt, Charles Tapley and Bobis- { \' { A } } lvarez, Carlos and Mar { \' { i } } n-Lla { \' { o } } , Josep and Hofmann-Apitius, Martin},
    doi = {10.1038/s41540-018-0078-8},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/ComPath An ecosystem for exploring, analyzing, and curating pathway databases - 2018 - Domingo-Fernandez et al.pdf:pdf},
    issn = {2056-7189},
    journal = {npj Systems Biology and Applications},
    number = {1},
    pages = {3},
    publisher = {Springer US},
    title = {{ ComPath: an ecosystem for exploring, analyzing, and curating mappings across pathway databases }},
    url = {https://doi.org/10.1038/s41540-018-0078-8 https://www.biorxiv.org/content/early/2018/06/21/353235},
    volume = {5},
    year = {2018}
}


@article{Novichkova2003,
    abstract = {MOTIVATION: The importance of extracting biomedical information from scientific publications is well recognized. A number of information extraction systems for the biomedical domain have been reported, but none of them have become widely used in practical applications. Most proposals to date make rather simplistic assumptions about the syntactic aspect of natural language. There is an urgent need for a system that has broad coverage and performs well in real-text applications. RESULTS: We present a general biomedical domain-oriented NLP engine called MedScan that efficiently processes sentences from MEDLINE abstracts and produces a set of regularized logical structures representing the meaning of each sentence. The engine utilizes a specially developed context-free grammar and lexicon. Preliminary evaluation of the system's performance, accuracy, and coverage exhibited encouraging results. Further approaches for increasing the coverage and reducing parsing ambiguity of the engine, as well as its application for information extraction are discussed.},
    author = {Novichkova, Svetlana and Egorov, Sergei and Daraselia, Nikolai},
    doi = {10.1093/bioinformatics/btg207},
    file = {:Users/cthoyt/ownCloud/Mendeley/2003/Novichkova, Egorov, Daraselia - 2003 - MedScan, a natural language processing engine for MEDLINE abstracts.pdf:pdf},
    isbn = {1367-4803},
    issn = {13674803},
    journal = {Bioinformatics},
    number = {13},
    pages = {1699--1706},
    pmid = {12967967},
    title = {{ MedScan, a natural language processing engine for MEDLINE abstracts }},
    volume = {19},
    year = {2003}
}

@article{Rzhetsky2004,
    abstract = {The immense growth in the volume of research literature and experimental data in the field of molecular biology calls for efficient automatic methods to capture and store information. In recent years, several groups have worked on specific problems in this area, such as automated selection of articles pertinent to molecular biology, or automated extraction of information using natural-language processing, information visualization, and generation of specialized knowledge bases for molecular biology. GeneWays is an integrated system that combines several such subtasks. It analyzes interactions between molecular substances, drawing on multiple sources of information to infer a consensus view of molecular networks. GeneWays is designed as an open platform, allowing researchers to query, review, and critique stored information. { \textcopyright } 2003 Elsevier Inc. All rights reserved.},
    author = {Rzhetsky, Andrey and Iossifov, Ivan and Koike, Tomohiro and Krauthammer, Michael and Kra, Pauline and Morris, Mitzi and Yu, Hong and Dubou { \' { e } } , Pablo Ariel and Weng, Wubin and Wilbur, W. John and Hatzivassiloglou, Vasileios and Friedman, Carol},
    doi = {10.1016/j.jbi.2003.10.001},
    file = {:Users/cthoyt/ownCloud/Mendeley/2004/Rzhetsky et al. - 2004 - GeneWays A system for extracting, analyzing, visualizing, and integrating molecular pathway data.pdf:pdf},
    issn = {15320464},
    journal = {J. Biomed. Inform.},
    keywords = {Artificial intelligence,Bioinformatics,Database,Information extraction,Knowledge engineering,Machine learning,Molecular interactions,Molecular networks,Text mining},
    mendeley-groups = {Doctoral},
    number = {1},
    pages = {43--53},
    title = {{ GeneWays: A system for extracting, analyzing, visualizing, and integrating molecular pathway data }},
    volume = {37},
    year = {2004}
}

@article{Ravikumar2017,
    abstract = {Extracting meaningful relationships with semantic significance from biomedical literature is often a challenging task. BioCreative V track4 challenge for the first time has organized a comprehensive shared task to test the robustness of the text-mining algorithms in extracting semantically meaningful assertions from the evidence statement in biomedical text. In this work, we tested the ability of a rule-based semantic parser to extract Biological Expression Language (BEL) statements from evidence sentences culled out of biomedical literature as part of BioCreative V Track4 challenge. The system achieved an overall best Fmeasure of 21.29 { \% } in extracting the complete BEL statement. For relation extraction, the system achieved an F-measure of 65.13 { \% } on test data set. Our system achieved the best performance in five of the six criteria that was adopted for evaluation by the task organizers. Lack of ability to derive semantic inferences, limitation in the rule sets to map the textual extrac)},
    archivePrefix = {arXiv},
    arxivId = {1611.06654},
    author = {Ravikumar, K. E. and Rastegar-Mojarad, Majid and Liu, Hongfang},
    doi = {10.1093/database/baw156},
    eprint = {1611.06654},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Ravikumar, Rastegar-Mojarad, Liu - 2017 - BELMiner Adapting a rule-based relation extraction system to extract biological expression lan.pdf:pdf},
    isbn = {17580463 (Linking)},
    issn = {17580463},
    journal = {Database},
    mendeley-groups = {Doctoral},
    number = {1},
    pages = {1--12},
    pmid = {28365720},
    title = {{ BELMiner: Adapting a rule-based relation extraction system to extract biological expression language statements from bio-medical literature evidence sentences }},
    volume = {2017},
    year = {2017}
}
@article{Lai2016,
    abstract = {Biological expression language (BEL) is one of the most popular languages to represent the causal and correlative relationships among biological events. Automatically extracting and representing biomedical events using BEL can help biologists quickly survey and understand relevant literature. Recently, many researchers have shown interest in biomedical event extraction. However, the task is still a challenge for current systems because of the complexity of integrating different information extraction tasks such as named entity recognition (NER), named entity normalization (NEN) and relation extraction into a single system. In this study, we introduce our BelSmile system, which uses a semantic-role-labeling (SRL)-based approach to extract the NEs and events for BEL statements. BelSmile combines our previous NER, NEN and SRL systems. We evaluate BelSmile using the BioCreative V BEL task dataset. Our system achieved an F-score of 27.8 { \% } , 7 { \% } higher than the top BioCreative V system. The three main contributions of this study are (i) an effective pipeline approach to extract BEL statements, and (ii) a syntactic-based labeler to extract subject-verb-object tuples. We also implement a web-based version of BelSmile (iii) that is publicly available at iisrserv.csie.ncu.edu.tw/belsmile.},
    author = {Lai, Po Ting and Lo, Yu Yan and Huang, Ming Siang and Hsiao, Yu Cheng and Tsai, Richard Tzong Han},
    doi = {10.1093/database/baw064},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Lai et al. - 2016 - BelSmile a biomedical semantic role labeling approach for extracting biological expression language from text.pdf:pdf},
    isbn = {17580463 (Electronic)},
    issn = {17580463},
    journal = {Database (Oxford).},
    mendeley-groups = {Doctoral},
    number = {June 2018},
    pages = {1--9},
    pmid = {27173520},
    title = {{ BelSmile: a biomedical semantic role labeling approach for extracting biological expression language from text }},
    volume = {2016},
    year = {2016}
}
@article{Rastegar-Mojarad2016,
    abstract = {Biological expression language (BEL) is one of the main formal representation models of biological networks. The primary source of information for curating biological networks in BEL representation has been literature. It remains a challenge to identify relevant articles and the corresponding evidence statements for curating and validating BEL statements. In this paper, we describe BELTracker, a tool used to retrieve and rank evidence sentences from PubMed abstracts and full-text articles for a given BEL statement (per the 2015 task requirements of BioCreative V BEL Task). The system is comprised of three main components, (i) translation of a given BEL statement to an information retrieval (IR) query, (ii) retrieval of relevant PubMed citations and (iii) finding and ranking the evidence sentences in those citations. BELTracker uses a combination of multiple approaches based on traditional IR, machine learning, and heuristics to accomplish the task. The system identified and ranked at least one fully relevant evidence sentence in the top 10 retrieved sentences for 72 out of 97 BEL statements in the test set. BELTracker achieved a precision of 0.392, 0.532 and 0.615 when evaluated with three criteria, namely full, relaxed and context criteria, respectively, by the task organizers. Our team at Mayo Clinic was the only participant in this task. BELTracker is available as a RESTful API and is available for public use.Database URL: http://www.openbionlp.org:8080/BelTracker/finder/Given { \_ } BEL { \_ } Statement.},
    author = {Rastegar-Mojarad, Majid and { Komandur Elayavilli } , Ravikumar and Liu, Hongfang},
    doi = {10.1093/database/baw079},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Rastegar-Mojarad, Komandur Elayavilli, Liu - 2016 - BELTracker evidence sentence retrieval for BEL statements.pdf:pdf},
    isbn = {17580463 (Electronic)},
    issn = {17580463},
    journal = {Database (Oxford).},
    mendeley-groups = {Doctoral},
    number = {June},
    pages = {1--11},
    pmid = {27173525},
    title = {{ BELTracker: evidence sentence retrieval for BEL statements }},
    volume = {2016},
    year = {2016}
}

@inproceedings{Bordes2013,
    author = {Bordes, Antoine and Usunier, Nicolas and Garcia-Dur\' { a } n, Alberto and Weston, Jason and Yakhnenko, Oksana},
    title = {Translating Embeddings for Modeling Multi-relational Data},
    booktitle = {Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2},
    series = {NIPS'13},
    year = {2013},
    location = {Lake Tahoe, Nevada},
    pages = {2787--2795},
    numpages = {9},
    url = {http://dl.acm.org/citation.cfm?id=2999792.2999923},
    acmid = {2999923},
    publisher = {Curran Associates Inc.},
    address = {USA},
}

@inproceedings{Wang2014,
    author = {Wang, Zhen and Zhang, Jianwen and Feng, Jianlin and Chen, Zheng},
    title = {Knowledge Graph Embedding by Translating on Hyperplanes},
    booktitle = {Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence},
    series = {AAAI'14},
    year = {2014},
    location = {Qu\&\#233;bec City, Qu\&\#233;bec, Canada},
    pages = {1112--1119},
    numpages = {8},
    url = {http://dl.acm.org/citation.cfm?id=2893873.2894046},
    acmid = {2894046},
    publisher = {AAAI Press},
}

@inproceedings{Lin2015,
    author = {Lin, Yankai and Liu, Zhiyuan and Sun, Maosong and Liu, Yang and Zhu, Xuan},
    title = {Learning Entity and Relation Embeddings for Knowledge Graph Completion},
    booktitle = {Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence},
    series = {AAAI'15},
    year = {2015},
    isbn = {0-262-51129-0},
    location = {Austin, Texas},
    pages = {2181--2187},
    numpages = {7},
    url = {http://dl.acm.org/citation.cfm?id=2886521.2886624},
    acmid = {2886624},
    publisher = {AAAI Press},
}

@inproceedings{Ji2015,
    title = "Knowledge Graph Embedding via Dynamic Mapping Matrix",
    author = "Ji, Guoliang  and
      He, Shizhu  and
      Xu, Liheng  and
      Liu, Kang  and
      Zhao, Jun",
    booktitle = "Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
    month = jul,
    year = "2015",
    address = "Beijing, China",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/P15-1067",
    doi = "10.3115/v1/P15-1067",
    pages = "687--696",
}

@article{Bordes2014,
    author = {Bordes, Antoine and Glorot, Xavier and Weston, Jason and Bengio, Yoshua},
    title = {A Semantic Matching Energy Function for Learning with Multi-relational Data},
    journal = {Mach. Learn.},
    issue_date = {February 2014},
    volume = {94},
    number = {2},
    month = feb,
    year = {2014},
    issn = {0885-6125},
    pages = {233--259},
    numpages = {27},
    url = {https://doi.org/10.1007/s10994-013-5363-6},
    doi = {10.1007/s10994-013-5363-6},
    acmid = {2583671},
    publisher = {Kluwer Academic Publishers},
    address = {Norwell, MA, USA},
    keywords = {Multi-relational data, Neural networks, Word-sense disambiguation},
}

@inproceedings{Bordes2011,
    author = {Bordes, Antoine and Weston, Jason and Collobert, Ronan and Bengio, Yoshua},
    title = {Learning Structured Embeddings of Knowledge Bases},
    booktitle = {Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence},
    series = {AAAI'11},
    year = {2011},
    location = {San Francisco, California},
    pages = {301--306},
    numpages = {6},
    url = {http://dl.acm.org/citation.cfm?id=2900423.2900470},
    acmid = {2900470},
    publisher = {AAAI Press},
}

@inproceedings{Nickel2011,
    author = {Nickel, Maximilian and Tresp, Volker and Kriegel, Hans-Peter},
    title = {A Three-way Model for Collective Learning on Multi-relational Data},
    booktitle = {Proceedings of the 28th International Conference on International Conference on Machine Learning},
    series = {ICML'11},
    year = {2011},
    isbn = {978-1-4503-0619-5},
    location = {Bellevue, Washington, USA},
    pages = {809--816},
    numpages = {8},
    url = {http://dl.acm.org/citation.cfm?id=3104482.3104584},
    acmid = {3104584},
    publisher = {Omnipress},
    address = {USA},
}

@inproceedings{Dong2014,
    author = {Dong, Xin and Gabrilovich, Evgeniy and Heitz, Geremy and Horn, Wilko and Lao, Ni and Murphy, Kevin and Strohmann, Thomas and Sun, Shaohua and Zhang, Wei},
    title = {Knowledge Vault: A Web-scale Approach to Probabilistic Knowledge Fusion},
    booktitle = {Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
    series = {KDD '14},
    year = {2014},
    isbn = {978-1-4503-2956-9},
    location = {New York, New York, USA},
    pages = {601--610},
    numpages = {10},
    url = {http://doi.acm.org/10.1145/2623330.2623623},
    doi = {10.1145/2623330.2623623},
    acmid = {2623623},
    publisher = {ACM},
    address = {New York, NY, USA},
    keywords = {information extraction, knowledge bases, machine learning, probabilistic models},
}

@article{Yang2014,
    abstract = {We consider learning representations of entities and relations in KBs using the neural-embedding approach. We show that most existing models, including NTN (Socher et al., 2013) and TransE (Bordes et al., 2013b), can be generalized under a unified learning framework, where entities are low-dimensional vectors learned from a neural network and relations are bilinear and/or linear mapping functions. Under this framework, we compare a variety of embedding models on the link prediction task. We show that a simple bilinear formulation achieves new state-of-the-art results for the task (achieving a top-10 accuracy of 73.2 { \% } vs. 54.7 { \% } by TransE on Freebase). Furthermore, we introduce a novel approach that utilizes the learned relation embeddings to mine logical rules such as "BornInCity(a,b) and CityInCountry(b,c) = { \textgreater } Nationality(a,c)". We find that embeddings learned from the bilinear objective are particularly good at capturing relational semantics and that the composition of relations is characterized by matrix multiplication. More interestingly, we demonstrate that our embedding-based rule extraction approach successfully outperforms a state-of-the-art confidence-based rule mining approach in mining Horn rules that involve compositional reasoning.},
    archivePrefix = {arXiv},
    arxivId = {1412.6575},
    author = {Yang, Bishan and Yih, Wen-tau and He, Xiaodong and Gao, Jianfeng and Deng, Li},
    eprint = {1412.6575},
    month = {dec},
    title = {{ Embedding Entities and Relations for Learning and Inference in Knowledge Bases }},
    url = {http://arxiv.org/abs/1412.6575},
    year = {2014}
}

@article{Dettmers2017,
    abstract = {Link prediction for knowledge graphs is the task of predicting missing relationships between entities. Previous work on link prediction has focused on shallow, fast models which can scale to large knowledge graphs. However, these models learn less expressive features than deep, multi-layer models -- which potentially limits performance. In this work, we introduce ConvE, a multi-layer convolutional network model for link prediction, and report state-of-the-art results for several established datasets. We also show that the model is highly parameter efficient, yielding the same performance as DistMult and R-GCN with 8x and 17x fewer parameters. Analysis of our model suggests that it is particularly effective at modelling nodes with high indegree -- which are common in highly-connected, complex knowledge graphs such as Freebase and YAGO3. In addition, it has been noted that the WN18 and FB15k datasets suffer from test set leakage, due to inverse relations from the training set being present in the test set -- however, the extent of this issue has so far not been quantified. We find this problem to be severe: a simple rule-based model can achieve state-of-the-art results on both WN18 and FB15k. To ensure that models are evaluated on datasets where simply exploiting inverse relations cannot yield competitive results, we investigate and validate several commonly used datasets -- deriving robust variants where necessary. We then perform experiments on these robust datasets for our own and several previously proposed models and find that ConvE achieves state-of-the-art Mean Reciprocal Rank across most datasets.},
    archivePrefix = {arXiv},
    arxivId = {1707.01476},
    author = {Dettmers, Tim and Minervini, Pasquale and Stenetorp, Pontus and Riedel, Sebastian},
    eprint = {1707.01476},
    month = {jul},
    title = {{ Convolutional 2D Knowledge Graph Embeddings }},
    url = {http://arxiv.org/abs/1707.01476},
    year = {2017}
}

@article{Perozzi2014,
    abstract = {We present DeepWalk, a novel approach for learning latent representations of vertices in a network. These latent representations encode social relations in a continuous vector space, which is easily exploited by statistical models. DeepWalk generalizes recent advancements in language modeling and unsupervised feature learning (or deep learning) from sequences of words to graphs. DeepWalk uses local information obtained from truncated random walks to learn latent representations by treating walks as the equivalent of sentences. We demonstrate DeepWalk's latent representations on several multi-label network classification tasks for social networks such as BlogCatalog, Flickr, and YouTube. Our results show that DeepWalk outperforms challenging baselines which are allowed a global view of the network, especially in the presence of missing information. DeepWalk's representations can provide { \$ } F { \_ } 1 { \$ } scores up to 10 { \% } higher than competing methods when labeled data is sparse. In some experiments, DeepWalk's representations are able to outperform all baseline methods while using 60 { \% } less training data. DeepWalk is also scalable. It is an online learning algorithm which builds useful incremental results, and is trivially parallelizable. These qualities make it suitable for a broad class of real world applications such as network classification, and anomaly detection.},
    archivePrefix = {arXiv},
    arxivId = {1403.6652},
    author = {Perozzi, Bryan and Al-Rfou, Rami and Skiena, Steven},
    doi = {10.1145/2623330.2623732},
    eprint = {1403.6652},
    file = {:Users/cthoyt/ownCloud/Mendeley/2014/DeepWalk Online Learning of Social Representations - 2014 - Perozzi, Al-Rfou, Skiena.pdf:pdf},
    isbn = {9781450329569},
    issn = {9781450329569},
    pmid = {903},
    title = {{ DeepWalk: Online Learning of Social Representations }},
    url = {http://arxiv.org/abs/1403.6652 { \% } 0Ahttp://dx.doi.org/10.1145/2623330.2623732},
    year = {2014}
}
@article{Sheikh2018,
    abstract = {Network representation learning (NRL) enables the application of machine learning tasks such as classification, prediction and recommendation to networks. Apart from their graph structure, networks are often associated with diverse infor-mation in the form of attributes. Most NRL methods have focused just on structural information, and separately apply a traditional representation learning on attributes. When multiple sources of information are available, using a combination of them may be beneficial as they complement each other in generating accurate contexts; moreover, their combined use may be fundamental when the information sources are sparse. The learning methods should thus preserve both the structural and attribute aspects. In this paper, we investigate how attributes can be modeled, and subsequently used along with structural information in learning the representation. We introduce the gat2vec framework that uses structural information to generate structural contexts, attributes to generate attribute contexts, and employs a shallow neural network model to learn a joint representation from them. We evaluate our proposed method against state-of-the-art baselines, using real-world datasets on vertex classification (multi-class and multi-label), link-prediction, and visualization tasks. The experiments show that gat2vec is effective in exploiting multiple sources of information, thus learning accurate representations and outperforming the state-of-the-art in the aforementioned B Nasrullah Sheikh N. Sheikh et al. tasks. Finally, we perform query tasks on learned representation and show how the qualitative analysis of results has better performance as well.},
    author = {Sheikh, Nasrullah and Kefato, Zekarias and Montresor, Alberto},
    doi = {10.1007/s00607-018-0622-9},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/Gat2Vec Representation Learning for Attributed Graphs - 2018 - Sheikh, Kefato, Montresor.pdf:pdf},
    issn = {0010485X},
    journal = {Computing},
    keywords = {Attributed graphs,Deep learning,Network embedding,Unsupervised learning},
    pages = {1--23},
    publisher = {Springer Vienna},
    title = {{ Gat2Vec: Representation Learning for Attributed Graphs }},
    url = {https://doi.org/10.1007/s00607-018-0622-9},
    year = {2018}
}

@inproceedings{Grover2016,
    address = {New York, NY, USA},
    archivePrefix = {arXiv},
    arxivId = {1607.00653v1},
    author = {Grover, Aditya and Leskovec, Jure},
    booktitle = {Proceedings of the 22Nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
    doi = {10.1145/2939672.2939754},
    eprint = {1607.00653v1},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Node2Vec Scalable Feature Learning for Networks - 2016 - Grover, Leskovec.pdf:pdf},
    isbn = {978-1-4503-4232-2},
    keywords = {dings,feature learning,graph representations,information networks,node embed-,node embeddings},
    pages = {855--864},
    publisher = {ACM},
    series = {KDD '16},
    title = {{ Node2Vec: Scalable Feature Learning for Networks }},
    url = {http://doi.acm.org/10.1145/2939672.2939754},
    year = {2016}
}

@inproceedings{Ebeling2017,
    author = {Ebeling, C. and Konotopez, A. and Hoyt, C. T. and Domingo-Fernández, D.},
    year = {2017},
    title = {{ Software framework for the analysis of causal and correlative biomedical knowledge graphs }},
    publisher = {PHAGO General Assembly}
}

@article{Gao2018,
    abstract = {Representation learning provides new and powerful graph analytical approaches and tools for the highly valued data science challenge of mining knowledge graphs. Since previous graph analytical methods have mostly focused on homogeneous graphs, an important current challenge is extending this methodology for richly heterogeneous graphs and knowledge domains. The biomedical sciences are such a domain, reflecting the complexity of biology, with entities such as genes, proteins, drugs, diseases, and phenotypes, and relationships such as gene co-expression, biochemical regulation, and biomolecular inhibition or activation. Therefore, the semantics of edges and nodes are critical for representation learning and knowledge discovery in real world biomedical problems. In this paper, we propose the edge2vec model, which represents graphs considering edge semantics. An edge-type transition matrix is trained by an Expectation-Maximization approach, and a stochastic gradient descent model is employed to learn node embedding on a heterogeneous graph via the trained transition matrix. edge2vec is validated on three biomedical domain tasks: biomedical entity classification, compound-gene bioactivity prediction, and biomedical information retrieval. Results show that by considering edge-types into node embedding learning in heterogeneous graphs, $\backslash$textbf { \ { } edge2vec { \ } } $\backslash$ significantly outperforms state-of-the-art models on all three tasks. We propose this method for its added value relative to existing graph analytical methodology, and in the real world context of biomedical knowledge discovery applicability.},
    archivePrefix = {arXiv},
    arxivId = {1809.02269},
    author = {Gao, Zheng and Fu, Gang and Ouyang, Chunping and Tsutsui, Satoshi and Liu, Xiaozhong and Yang, Jeremy and Gessner, Christopher and Foote, Brian and Wild, David and Yu, Qi and Ding, Ying},
    eprint = {1809.02269},
    month = {sep},
    title = {{ edge2vec: Representation learning using edge semantics for biomedical knowledge discovery }},
    url = {http://arxiv.org/abs/1809.02269},
    year = {2018}
}

@article{Tang2015,
    abstract = {This paper studies the problem of embedding very large information networks into low-dimensional vector spaces, which is useful in many tasks such as visualization, node classification, and link prediction. Most existing graph embedding methods do not scale for real world information networks which usually contain millions of nodes. In this paper, we propose a novel network embedding method called the "LINE," which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures. An edge-sampling algorithm is proposed that addresses the limitation of the classical stochastic gradient descent and improves both the effectiveness and the efficiency of the inference. Empirical experiments prove the effectiveness of the LINE on a variety of real-world information networks, including language networks, social networks, and citation networks. The algorithm is very efficient, which is able to learn the embedding of a network with millions of vertices and billions of edges in a few hours on a typical single machine. The source code of the LINE is available online.},
    archivePrefix = {arXiv},
    arxivId = {1503.03578},
    author = {Tang, Jian and Qu, Meng and Wang, Mingzhe and Zhang, Ming and Yan, Jun and Mei, Qiaozhu},
    doi = {10.1145/2736277.2741093},
    eprint = {1503.03578},
    month = {mar},
    title = {{ LINE: Large-scale Information Network Embedding }},
    url = {http://arxiv.org/abs/1503.03578 http://dx.doi.org/10.1145/2736277.2741093},
    year = {2015}
}

@article{Wang2017,
    author = {Q. { Wang } and Z. { Mao } and B. { Wang } and L. { Guo }},
    journal = {IEEE Transactions on Knowledge and Data Engineering},
    title = {Knowledge Graph Embedding: A Survey of Approaches and Applications},
    year = {2017},
    volume = {29},
    number = {12},
    pages = {2724-2743},
    keywords = {graph theory;learning (artificial intelligence);knowledge graph embedding;KG embedding;relation extraction;KG completion;continuous vector spaces;Statistical analysis;Knowledge discovery;Graphical models;Matrix decomposition;Systematics;Market research;Semantics;Statistical relational learning;knowledge graph embedding;latent factor models;tensor/matrix factorization models},
    doi = {10.1109/TKDE.2017.2754499},
    ISSN = {1041-4347},
    month = {Dec}
}

@article{Peterson1977,
    author = {Peterson, James L.},
    title = {Petri Nets},
    journal = {ACM Comput. Surv.},
    issue_date = {Sept. 1977},
    volume = {9},
    number = {3},
    month = sep,
    year = {1977},
    issn = {0360-0300},
    pages = {223--252},
    numpages = {30},
    url = {http://doi.acm.org/10.1145/356698.356702},
    doi = {10.1145/356698.356702},
    acmid = {356702},
    publisher = {ACM},
    address = {New York, NY, USA},
}

@article{Moreau2012,
    title = {Computational tools for prioritizing candidate genes: boosting disease gene discovery},
    author = {Moreau, Yves and Tranchevent, L { \'e } on-Charles},
    journal = {Nature Reviews Genetics},
    volume = {13},
    number = {8},
    pages = {523},
    year = {2012},
    publisher = {Nature Publishing Group}
}

@article{Emig2013,
    title = {Drug target prediction and repositioning using an integrated network-based approach},
    author = {Emig, Dorothea and Ivliev, Alexander and Pustovalova, Olga and Lancashire, Lee and Bureeva, Svetlana and Nikolsky, Yuri and Bessarabova, Marina},
    journal = {PLoS One},
    volume = {8},
    number = {4},
    pages = {e60618},
    year = {2013},
    publisher = {Public Library of Science}
}

@article{Kristiadi2018,
    abstract = {Knowledge graphs, on top of entities and their relationships, contain other important elements: literals. Literals encode interesting properties (e.g. the height) of entities that are not captured by links between entities alone. Most of the existing work on embedding (or latent feature) based knowledge graph analysis focuses mainly on the relations between entities. In this work, we study the effect of incorporating literal information into existing link prediction methods. Our approach, which we name LiteralE, is an extension that can be plugged into existing latent feature methods. LiteralE merges entity embeddings with their literal information using a learnable, parametrized function, such as a simple linear or nonlinear transformation, or a multilayer neural network. We extend several popular embedding models based on LiteralE and evaluate their performance on the task of link prediction. Despite its simplicity, LiteralE proves to be an effective way to incorporate literal information into existing embedding based methods, improving their performance on different standard datasets, which we augmented with their literals and provide as testbed for further research.},
    archivePrefix = {arXiv},
    arxivId = {1802.00934},
    author = {Kristiadi, Agustinus and Khan, Mohammad Asif and Lukovnikov, Denis and Lehmann, Jens and Fischer, Asja},
    eprint = {1802.00934},
    month = {feb},
    title = {{ Incorporating Literals into Knowledge Graph Embeddings }},
    url = {http://arxiv.org/abs/1802.00934},
    year = {2018}
}

@article{Lee2013,
    abstract = {The past decade has seen rapid growth in the use of diverse compound libraries in classical phenotypic screens to identify modulators of a given process. The subsequent process of identifying the molecular targets of active hits, also called 'target deconvolution', is an essential step for understanding compound mechanism of action and for using the identified hits as tools for further dissection of a given biological process. Recent advances in 'omics' technologies, coupled with in silico approaches and the reduced cost of whole genome sequencing, have greatly improved the workflow of target deconvolution and have contributed to a renaissance of 'modern' phenotypic profiling. In this review, we will outline how both new and old techniques are being used in the difficult process of target identification and validation as well as discuss some of the ongoing challenges remaining for phenotypic screening. { \textcopyright } 2013 Elsevier Ltd.},
    author = {Lee, Jiyoun and Bogyo, Matthew},
    doi = {10.1016/j.cbpa.2012.12.022},
    file = {:Users/cthoyt/ownCloud/Mendeley/2013/Target deconvolution techniques in modern phenotypic profiling - 2013 - Lee, Bogyo.pdf:pdf},
    issn = {13675931},
    journal = {Current Opinion in Chemical Biology},
    number = {1},
    pages = {118--126},
    publisher = {Elsevier Ltd},
    title = {{ Target deconvolution techniques in modern phenotypic profiling }},
    url = {http://dx.doi.org/10.1016/j.cbpa.2012.12.022},
    volume = {17},
    year = {2013}
}

@article{Kluyver2016,
    abstract = {It is increasingly necessary for researchers in all fields to write computer code, and in order to reproduce research results, it is important that this code is published. We present Jupyter notebooks, a document format for publishing code, results and explanations in a form that is both readable and executable. We discuss various tools and use cases for notebook documents.},
    author = {Kluyver, Thomas and Ragan-kelley, Benjamin and P { \' { e } } rez, Fernando and Granger, Brian and Bussonnier, Matthias and Frederic, Jonathan and Kelley, Kyle and Hamrick, Jessica and Grout, Jason and Corlay, Sylvain and Ivanov, Paul and Avila, Dami { \' { a } } n and Abdalla, Safia and Willing, Carol},
    doi = {10.3233/978-1-61499-649-1-87},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Jupyter Notebooks—a publishing format for reproducible computational workflows - 2016 - Kluyver et al.pdf:pdf},
    isbn = {9781614996491},
    journal = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
    keywords = {notebook,reproducibility,research code},
    pages = {87--90},
    title = {{ Jupyter Notebooks—a publishing format for reproducible computational workflows }},
    year = {2016}
}

@article{Gyori2017,
    abstract = {Word models describing molecular mechanisms are a common currency in spoken and written communication in biomedicine but are of limited use in predicting the behavior of complex biological networks. We present an approach to building computational models directly from natural language using automated assembly. Molecular mechanisms described in simple English are read by natural language processing algorithms, converted into an intermediate representation and assembled into executable or network models. We have implemented this approach in the Integrated Network and Dynamical Reasoning Assembler (INDRA), which draws on existing natural language processing systems as well as pathway information in Pathway Commons and other online resources. We demonstrate the use of INDRA and natural language to model three biological processes of increasing scope: (i) p53 dynamics in response to DNA damage, (ii) adaptive drug resistance in BRAF-V600E mutant melanomas, and (iii) the RAS signaling pathway. The use of natural language for modeling makes routine tasks more efficient for modeling practitioners and increases the accessibility and transparency of models for the broader biology community.},
    author = {Gyori, Benjamin M and Bachman, John A and Subramanian, Kartik and Muhlich, Jeremy L and Galescu, Lucian},
    doi = {10.1101/119834},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/From word models to executable models of signaling networks using automated assembly - 2017 - Gyori et al(2).pdf:pdf},
    issn = {1744-4292},
    journal = {bioRxiv},
    pages = {119834},
    pmid = {29175850},
    title = {{ From word models to executable models of signaling networks using automated assembly }},
    url = {http://biorxiv.org/content/early/2017/03/23/119834},
    year = {2017}
}

@article{Rodriguez-Esteban2015,
    abstract = {Biological curation, or biocuration, is often studied from the perspective of creating and maintaining databases that have the goal of mapping and tracking certain areas of biology. However, much biocuration is, in fact, dedicated to finite and time-limited projects in which insufficient resources demand trade-offs. This typically more ephemeral type of curation is nonetheless of importance in biomedical research. Here, I propose a framework to understand such restricted curation projects from the point of view of return on curation (ROC), value, efficiency and productivity. Moreover, I suggest general strategies to optimize these curation efforts, such as the 'multiple strategies' approach, as well as a metric called overhead that can be used in the context of managing curation resources.},
    author = {Rodriguez-Esteban, Raul},
    doi = {10.1093/database/bav116},
    file = {:Users/cthoyt/ownCloud/Mendeley/2015/Biocuration with insufficient resources and fixed timelines - 2015 - Rodriguez-Esteban.pdf:pdf},
    isbn = {4161687753},
    issn = {17580463},
    journal = {Database},
    number = {1},
    pages = {1--9},
    pmid = {26708987},
    title = {{ Biocuration with insufficient resources and fixed timelines }},
    volume = {2015},
    year = {2015}
}

@article{Schmidt2012,
    abstract = {The extent of proteolytic processing of the amyloid precursor protein (APP) into neurotoxic amyloid-$\beta$ (A$\beta$) peptides is central to the pathology of Alzheimer's disease (AD). Accordingly, modifiers that increase A$\beta$ production rates are risk factors in the sporadic form of AD. In a novel systems biology approach, we combined quantitative biochemical studies with mathematical modelling to establish a kinetic model of amyloidogenic processing, and to evaluate the influence by SORLA/SORL1, an inhibitor of APP processing and important genetic risk factor. Contrary to previous hypotheses, our studies demonstrate that secretases represent allosteric enzymes that require cooperativity by APP oligomerization for efficient processing. Cooperativity enables swift adaptive changes in secretase activity with even small alterations in APP concentration. We also show that SORLA prevents APP oligomerization both in cultured cells and in the brain in vivo, eliminating the preferred form of the substrate and causing secretases to switch to a less efficient non-allosteric mode of action. These data represent the first mathematical description of the contribution of genetic risk factors to AD substantiating the relevance of subtle changes in SORLA levels for amyloidogenic processing as proposed for patients carrying SORL1 risk alleles.},
    author = {Schmidt, Vanessa and Baum, Katharina and Lao, Angelyn and Rateitschak, Katja and Schmitz, Yvonne and Teichmann, Anke and Wiesner, Burkhard and Petersen, Claus Munck and Nykjaer, Anders and Wolf, Jana and Wolkenhauer, Olaf and Willnow, Thomas E},
    doi = {10.1038/emboj.2011.352},
    file = {:Users/cthoyt/ownCloud/Mendeley/2012/Quantitative modelling of amyloidogenic processing and its influence by SORLA in Alzheimer's disease - 2012 - Schmidt et al.pdf:pdf},
    isbn = {1460-2075 (Electronic) 0261-4189 (Linking)},
    issn = {0261-4189},
    journal = {The EMBO Journal},
    keywords = {amyloidogenic processing,lr11,secretases,sorl1,vps10p domain receptors},
    month = {jan},
    number = {1},
    pages = {187--200},
    pmid = {21989385},
    publisher = {Nature Publishing Group},
    title = {{ Quantitative modelling of amyloidogenic processing and its influence by SORLA in Alzheimer's disease }},
    url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3252570/ http://dx.doi.org/10.1038/emboj.2011.352},
    volume = {31},
    year = {2012}
}

@article{Slenter2018,
    abstract = {WikiPathways (wikipathways.org) captures the collective knowledge represented in biological pathways. By providing a database in a curated, machine readable way, omics data analysis and visualization is enabled. WikiPathways and other pathway databases are used to analyze experimental data by research groups in many fields. Due to the open and collaborative nature of the WikiPathways platform, our content keeps growing and is getting more accurate, making WikiPathways a reliable and rich pathway database. Previously, however, the focus was primarily on genes and proteins, leaving many metabolites with only limited annotation. Recent curation efforts focused on improving the annotation of metabolism and metabolic pathways by associating unmapped metabolites with database identifiers and providing more detailed interaction knowledge. Here, we report the outcomes of the continued growth and curation efforts, such as a doubling of the number of annotated metabolite nodes in WikiPathways. Furthermore, we introduce an OpenAPI documentation of our web services and the FAIR (Findable, Accessible, Interoperable and Reusable) annotation of resources to increase the interoperability of the knowledge encoded in these pathways and experimental omics data. New search options, monthly downloads, more links to metabolite databases, and new portals make pathway knowledge more effortlessly accessible to individual researchers and research communities.},
    author = {Slenter, Denise N and Kutmon, Martina and Hanspers, Kristina and Riutta, Anders and Windsor, Jacob and Nunes, Nuno and M { \' { e } } lius, Jonathan and Cirillo, Elisa and Coort, Susan L and Digles, Daniela and Ehrhart, Friederike and Giesbertz, Pieter and Kalafati, Marianthi and Martens, Marvin and Miller, Ryan and Nishida, Kozo and Rieswijk, Linda and Waagmeester, Andra and Eijssen, Lars M T and Evelo, Chris T and Pico, Alexander R and Willighagen, Egon L},
    doi = {10.1093/nar/gkx1064},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/WikiPathways a multifaceted pathway database bridging metabolomics to other omics research. - 2018 - Slenter et al.pdf:pdf},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    month = {jan},
    number = {D1},
    pages = {D661--D667},
    pmid = {29136241},
    title = {{ WikiPathways: a multifaceted pathway database bridging metabolomics to other omics research. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/29136241 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5753270},
    volume = {46},
    year = {2018}
}

@article{Ali2019,
    abstract = {SUMMARY Knowledge graph embeddings (KGEs) have received significant attention in other domains due to their ability to predict links and create dense representations for graphs' nodes and edges. However, the software ecosystem for their application to bioinformatics remains limited and inaccessible for users without expertise in programming and machine learning. Therefore, we developed BioKEEN (Biological KnowlEdge EmbeddiNgs) and PyKEEN (Python KnowlEdge EmbeddiNgs) to facilitate their easy use through an interactive command line interface. Finally, we present a case study in which we used a novel biological pathway mapping resource to predict links that represent pathway crosstalks and hierarchies. AVAILABILITY BioKEEN and PyKEEN are open source Python packages publicly available under the MIT License at https://github.com/SmartDataAnalytics/BioKEEN and https://github.com/SmartDataAnalytics/PyKEEN. SUPPLEMENTARY INFORMATION Supplementary data are available at Bioinformatics online.},
    author = {Ali, Mehdi and Hoyt, Charles Tapley and Domingo-Fern { \' { a } } ndez, Daniel and Lehmann, Jens and Jabeen, Hajira},
    doi = {10.1093/bioinformatics/btz117},
    editor = {Wren, Jonathan},
    issn = {1367-4811},
    journal = {Bioinformatics (Oxford, England)},
    month = {feb},
    pmid = {30768158},
    title = {{ BioKEEN: A library for learning and evaluating biological knowledge graph embeddings. }},
    url = {https://dx.doi.org/10.1093/bioinformatics/btz117 https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz117/5320556 http://www.ncbi.nlm.nih.gov/pubmed/30768158},
    year = {2019}
}

@article{Turei2016,
    author = {T { \" { u } } rei, D { \' { e } } nes and Korcsm { \' { a } } ros, Tam { \' { a } } s and Saez-Rodriguez, Julio},
    doi = {10.1038/nmeth.4077},
    issn = {1548-7091},
    journal = {Nature Methods},
    month = {dec},
    number = {12},
    pages = {966--967},
    title = {{ OmniPath: guidelines and gateway for literature-curated signaling pathway resources }},
    url = {http://www.nature.com/articles/nmeth.4077},
    volume = {13},
    year = {2016}
}

@article{Himmelstein2017,
    abstract = {The ability to computationally predict whether a compound treats a disease would improve the economy and success rate of drug approval. This study describes Project Rephetio to systematically model drug efficacy based on 755 existing treatments. First, we constructed Hetionet (neo4j.het.io), an integrative network encoding knowledge from millions of biomedical studies. Hetionet v1.0 consists of 47,031 nodes of 11 types and 2,250,197 relationships of 24 types. Data were integrated from 29 public resources to connect compounds, diseases, genes, anatomies, pathways, biological processes, molecular functions, cellular components, pharmacologic classes, side effects, and symptoms. Next, we identified network patterns that distinguish treatments from non-treatments. Then, we predicted the probability of treatment for 209,168 compound-disease pairs (het.io/repurpose). Our predictions validated on two external sets of treatment and provided pharmacological insights on epilepsy, suggesting they will help prioritize drug repurposing candidates. This study was entirely open and received realtime feedback from 40 community members.},
    author = {Himmelstein, Daniel Scott and Lizee, Antoine and Hessler, Christine and Brueggeman, Leo and Chen, Sabrina L and Hadley, Dexter and Green, Ari and Khankhanian, Pouya and Baranzini, Sergio E},
    doi = {10.7554/eLife.26726},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Systematic integration of biomedical knowledge prioritizes drugs for repurposing. - 2017 - Himmelstein et al.pdf:pdf},
    issn = {2050-084X},
    journal = {eLife},
    keywords = {computational biology,drug repurposing,heterogeneous networks,human,machine learning,systems biology},
    month = {sep},
    pmid = {28936969},
    title = {{ Systematic integration of biomedical knowledge prioritizes drugs for repurposing. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/28936969 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5640425},
    volume = {6},
    year = {2017}
}

@article{Karlebach2008,
    abstract = {Gene regulatory networks have an important role in every process of life, including cell differentiation, metabolism, the cell cycle and signal transduction. By understanding the dynamics of these networks we can shed light on the mechanisms of diseases that occur when these cellular processes are dysregulated. Accurate prediction of the behaviour of regulatory networks will also speed up biotechnological projects, as such predictions are quicker and cheaper than lab experiments. Computational methods, both for supporting the development of network models and for the analysis of their functionality, have already proved to be a valuable research tool.},
    author = {Karlebach, Guy and Shamir, Ron},
    doi = {10.1038/nrm2503},
    file = {:Users/cthoyt/ownCloud/Mendeley/2008/Modelling and analysis of gene regulatory networks. - 2008 - Karlebach, Shamir.pdf:pdf},
    isbn = {1471-0080 (Electronic)$\backslash$r1471-0072 (Linking)},
    issn = {1471-0072},
    journal = {Nature reviews. Molecular cell biology},
    number = {10},
    pages = {770--780},
    pmid = {18797474},
    title = {{ Modelling and analysis of gene regulatory networks. }},
    volume = {9},
    year = {2008}
}

@article{Lopez2013,
    abstract = {Mathematical equations are fundamental to modeling biological networks, but as networks get large and revisions frequent, it becomes difficult to manage equations directly or to combine previously developed models. Multiple simultaneous efforts to create graphical standards, rule-based languages, and integrated software workbenches aim to simplify biological modeling but none fully meets the need for transparent, extensible, and reusable models. In this paper we describe PySB, an approach in which models are not only created using programs, they are programs. PySB draws on programmatic modeling concepts from little b and ProMot, the rule-based languages BioNetGen and Kappa and the growing library of Python numerical tools. Central to PySB is a library of macros encoding familiar biochemical actions such as binding, catalysis, and polymerization, making it possible to use a high-level, action-oriented vocabulary to construct detailed models. As Python programs, PySB models leverage tools and practices from the open-source software community, substantially advancing our ability to distribute and manage the work of testing biochemical hypotheses. We illustrate these ideas using new and previously published models of apoptosis.},
    author = {Lopez, Carlos F. and Muhlich, Jeremy L. and Bachman, John A. and Sorger, Peter K.},
    doi = {10.1038/msb.2013.1},
    file = {:Users/cthoyt/ownCloud/Mendeley/2013/Programming biological models in Python using PySB. - 2013 - Lopez et al.pdf:pdf},
    isbn = {1744-4292 (Electronic)$\backslash$n1744-4292 (Linking)},
    issn = {1744-4292},
    journal = {Molecular systems biology},
    keywords = {Apoptosis,Apoptosis: physiology,Biological,Computer Simulation,Mitochondria,Mitochondria: physiology,Models,Programming Languages,Proto-Oncogene Proteins c-bcl-2,Proto-Oncogene Proteins c-bcl-2: physiology,Software},
    number = {646},
    pages = {646},
    pmid = {23423320},
    publisher = {Nature Publishing Group},
    title = {{ Programming biological models in Python using PySB. }},
    url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3588907 { \& } tool=pmcentrez { \& } rendertype=abstract},
    volume = {9},
    year = {2013}
}

@article{Wishart2018,
    abstract = {DrugBank (www.drugbank.ca) is a web-enabled database containing comprehensive molecular information about drugs, their mechanisms, their interactions and their targets. First described in 2006, DrugBank has continued to evolve over the past 12 years in response to marked improvements to web standards and changing needs for drug research and development. This year's update, DrugBank 5.0, represents the most significant upgrade to the database in more than 10 years. In many cases, existing data content has grown by 100 { \% } or more over the last update. For instance, the total number of investigational drugs in the database has grown by almost 300 { \% } , the number of drug-drug interactions has grown by nearly 600 { \% } and the number of SNP-associated drug effects has grown more than 3000 { \% } . Significant improvements have been made to the quantity, quality and consistency of drug indications, drug binding data as well as drug-drug and drug-food interactions. A great deal of brand new data have also been added to DrugBank 5.0. This includes information on the influence of hundreds of drugs on metabolite levels (pharmacometabolomics), gene expression levels (pharmacotranscriptomics) and protein expression levels (pharmacoprotoemics). New data have also been added on the status of hundreds of new drug clinical trials and existing drug repurposing trials. Many other important improvements in the content, interface and performance of the DrugBank website have been made and these should greatly enhance its ease of use, utility and potential applications in many areas of pharmacological research, pharmaceutical science and drug education.},
    author = {Wishart, David S and Feunang, Yannick D and Guo, An C and Lo, Elvis J and Marcu, Ana and Grant, Jason R and Sajed, Tanvir and Johnson, Daniel and Li, Carin and Sayeeda, Zinat and Assempour, Nazanin and Iynkkaran, Ithayavani and Liu, Yifeng and Maciejewski, Adam and Gale, Nicola and Wilson, Alex and Chin, Lucy and Cummings, Ryan and Le, Diana and Pon, Allison and Knox, Craig and Wilson, Michael},
    doi = {10.1093/nar/gkx1037},
    file = {:Users/cthoyt/ownCloud/Mendeley/2018/DrugBank 5.0 a major update to the DrugBank database for 2018. - 2018 - Wishart et al.pdf:pdf},
    issn = {1362-4962},
    journal = {Nucleic acids research},
    month = {jan},
    number = {D1},
    pages = {D1074--D1082},
    pmid = {29126136},
    title = {{ DrugBank 5.0: a major update to the DrugBank database for 2018. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/29126136 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5753335},
    volume = {46},
    year = {2018}
}

@misc{Hoyt2019KnowledgeLevels,
    title = {Levels of Knowledge Abstraction in Systems Biology},
    url = {https://figshare.com/articles/Levels_of_Knowledge_Abstraction_in_Systems_Biology/9521567/1},
    DOI = {10.6084/m9.figshare.9521567.v1},
    abstractNote = {The interplay of the three levels of knowledge abstraction in systems biology.},
    key = content,publisher = {figshare}, author = {Hoyt, Charles Tapley},
    year = {2019},
    month = {Aug}
}

@misc{Hoyt2018GeneHalfLife,
    title = {What is the Half-Life of a HGNC Gene Symbol?},
    howpublished = {\url { https://github.com/bio2bel/bio2bel-notebooks/blob/master/gene_symbol_half_life.ipynb }},
    note = {Accessed: 2019-08-12}
}
@article{Nickel2016,
    abstract = {Relational machine learning studies methods for the statistical analysis of relational, or graph-structured, data. In this paper, we provide a review of how such statistical models can be "trained" on large knowledge graphs, and then used to predict new facts about the world (which is equivalent to predicting new edges in the graph). In particular, we discuss two fundamentally different kinds of statistical relational models, both of which can scale to massive datasets. The first is based on latent feature models such as tensor factorization and multiway neural networks. The second is based on mining observable patterns in the graph. We also show how to combine these latent and observable models to get improved modeling power at decreased computational cost. Finally, we discuss how such statistical models of graphs can be combined with text-based information extraction methods for automatically constructing knowledge graphs from the Web. To this end, we also discuss Google's Knowledge Vault project as an example of such combination.},
    author = {Nickel, Maximilian and Murphy, Kevin and Tresp, Volker and Gabrilovich, Evgeniy},
    doi = {10.1109/JPROC.2015.2483592},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/A Review of Relational Machine Learning for Knowledge Graphs - 2016 - Nickel et al.pdf:pdf},
    issn = {0018-9219},
    journal = {Proceedings of the IEEE},
    keywords = {graph-based models,knowledge extraction,knowledge graphs,latent feature models,statistical relational},
    mendeley-groups = {Bio2BEL Manuscript References,Recuration Paper},
    month = {jan},
    number = {1},
    pages = {11--33},
    title = {{ A Review of Relational Machine Learning for Knowledge Graphs }},
    url = {http://arxiv.org/abs/1503.00759 https://ieeexplore.ieee.org/document/7358050/},
    volume = {104},
    year = {2016}
}

@article{Bottou2010,
    abstract = {During the last decade, the data sizes have grown faster than the speed of processors. In this context, the capabilities of statistical machine learning methods is limited by the computing time rather than the sample size. A more precise analysis uncovers qualitatively different tradeoffs for the case of small-scale and large-scale learning problems. The large-scale case involves the computational complexity of the underlying optimization algorithm in non-trivial ways. Unlikely optimization algorithms such as stochastic gradient descent show amazing performance for large-scale problems. In particular, second order stochastic gradient and averaged stochastic gradient are asymptotically efficient after a single pass on the training set.},
    author = {Bottou, L { \' { e } } on},
    doi = {10.1007/978-3-7908-2604-3-16},
    file = {:Users/cthoyt/ownCloud/Mendeley/Unknown/Large-Scale Machine Learning with Stochastic Gradient Descent - Unknown - America, Nj.pdf:pdf},
    isbn = {9783790826036},
    journal = {Proceedings of COMPSTAT 2010 - 19th International Conference on Computational Statistics, Keynote, Invited and Contributed Papers},
    keywords = {Efficiency,Online learning,Stochastic gradient descent},
    pages = {177--186},
    title = {{ Large-scale machine learning with stochastic gradient descent }},
    year = {2010}
}

@article{Girvan2002,
    abstract = {A number of recent studies have focused on the statistical properties of networked systems such as social networks and the Worldwide Web. Researchers have concentrated particularly on a few properties that seem to be common to many networks: the small-world property, power-law degree distributions, and network transitivity. In this article, we highlight another property that is found in many networks, the property of community structure, in which network nodes are joined together in tightly knit groups, between which there are only looser connections. We propose a method for detecting such communities, built around the idea of using centrality indices to find community boundaries. We test our method on computer-generated and real-world graphs whose community structure is already known and find that the method detects this known structure with high sensitivity and reliability. We also apply the method to two networks whose community structure is not well known--a collaboration network and a food web--and find that it detects significant and informative community divisions in both cases.},
    author = {Girvan, M and Newman, M E J},
    doi = {10.1073/pnas.122653799},
    issn = {0027-8424},
    journal = {Proceedings of the National Academy of Sciences of the United States of America},
    month = {jun},
    number = {12},
    pages = {7821--6},
    pmid = {12060727},
    title = {{ Community structure in social and biological networks. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/12060727 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC122977},
    volume = {99},
    year = {2002}
}

@article{Albert2008,
    abstract = {Modern life sciences research increasingly relies on computational solutions, from large scale data analyses to theoretical modeling. Within the theoretical models Boolean networks occupy an increasing role as they are eminently suited at mapping biological observations and hypotheses into a mathematical formalism. The conceptual underpinnings of Boolean modeling are very accessible even without a background in quantitative sciences, yet it allows life scientists to describe and explore a wide range of surprisingly complex phenomena. In this paper we provide a clear overview of the concepts used in Boolean simulations, present a software library that can perform these simulations based on simple text inputs and give three case studies. The large scale simulations in these case studies demonstrate the Boolean paradigms and their applicability as well as the advanced features and complex use cases that our software package allows. Our software is distributed via a liberal Open Source license and is freely accessible from http://booleannet.googlecode.com { \textcopyright } 2008 Albert et al; licensee BioMed Central Ltd.},
    author = {Albert, Istv { \' { a } } n and Thakar, Juilee and Li, Song and Zhang, Ranran and Albert, R { \' { e } } ka},
    doi = {10.1186/1751-0473-3-16},
    file = {:Users/cthoyt/ownCloud/Mendeley/2008/Source Code for Biology and Boolean network simulations for life scientists - 2008 - Albert et al.pdf:pdf},
    issn = {17510473},
    journal = {Source Code for Biology and Medicine},
    pages = {1--8},
    title = {{ Boolean network simulations for life scientists }},
    volume = {3},
    year = {2008}
}

@article{Karki2017,
    abstract = {BACKGROUND Various studies suggest a comorbid association between Alzheimer's disease (AD) and type 2 diabetes mellitus (T2DM) indicating that there could be shared underlying pathophysiological mechanisms. OBJECTIVE This study aims to systematically model relevant knowledge at the molecular level to find a mechanistic rationale explaining the existing comorbid association between AD and T2DM. METHOD We have used a knowledge-based modeling approach to build two network models for AD and T2DM using Biological Expression Language (BEL), which is capable of capturing and representing causal and correlative relationships at both molecular and clinical levels from various knowledge resources. RESULTS Using comparative analysis, we have identified several putative "shared pathways". We demonstrate, at a mechanistic level, how the insulin signaling pathway is related to other significant AD pathways such as the neurotrophin signaling pathway, PI3K/AKT signaling, MTOR signaling, and MAPK signaling and how these pathways do cross-talk with each other both in AD and T2DM. In addition, we present a mechanistic hypothesis that explains both favorable and adverse effects of the anti-diabetic drug metformin in AD. CONCLUSION The two computable models introduced here provide a powerful framework to identify plausible mechanistic links shared between AD and T2DM and thereby identify targeted pathways for new therapeutics. Our approach can also be used to provide mechanistic answers to the question of why some T2DM treatments seem to increase the risk of AD.},
    author = {Karki, Reagon and Kodamullil, Alpha Tom and Hofmann-Apitius, Martin},
    doi = {10.3233/JAD-170440},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Comorbidity Analysis between Alzheimer's Disease and Type 2 Diabetes Mellitus (T2DM) Based on Shared Pathways and the Role of T2DM Drugs.pdf:pdf},
    issn = {18758908},
    journal = {Journal of Alzheimer's Disease},
    keywords = {Alzheimer's disease,OpenBEL,comorbidity,disease mechanisms,disease modeling,metformin,type 2 diabetes mellitus},
    mendeley-groups = {Recuration Paper},
    number = {2},
    pages = {721--731},
    title = {{ Comorbidity Analysis between Alzheimer's Disease and Type 2 Diabetes Mellitus (T2DM) Based on Shared Pathways and the Role of T2DM Drugs }},
    volume = {60},
    year = {2017}
}

@article{Naz2016,
    abstract = {The work we present here is based on the recent extension of the syntax of the Biological Expression Language (BEL), which now allows for the representation of genetic variation information in cause-and-effect models. In our article, we describe, how genetic variation information can be used to identify candidate disease mechanisms in diseases with complex aetiology such as Alzheimer's disease and Parkinson's disease. In those diseases, we have to assume that many genetic variants contribute moderately to the overall dysregulation that in the case of neurodegenerative diseases has such a long incubation time until the first clinical symptoms are detectable. Owing to the multilevel nature of dysregulation events, systems biomedicine modelling approaches need to combine mechanistic information from various levels, including gene expression, microRNA (miRNA) expression, protein-protein interaction, genetic variation and pathway. OpenBEL, the open source version of BEL, has recently been extended to match this requirement, and we demonstrate in our article, how candidate mechanisms for early dysregulation events in Alzheimer's disease can be identified based on an integrative mining approach that identifies 'chains of causation' that include single nucleotide polymorphism information in BEL models.},
    author = {Naz, Mufassra and Kodamullil, Alpha Tom and Hofmann-Apitius, Martin},
    doi = {10.1093/bib/bbv063},
    file = {:Users/cthoyt/ownCloud/Mendeley/2016/Reasoning over genetic variance information in cause-and-effect models of neurodegenerative diseases. - 2016 - Naz, Kodamullil, Hofmann-.pdf:pdf},
    issn = {1477-4054},
    journal = {Briefings in bioinformatics},
    keywords = {Alzheimer's disease,BEL model,GWAS,causal reasoning,cause-and-effect,genetic variants},
    mendeley-groups = {Bio2BEL Manuscript References,Recuration Paper},
    number = {3},
    pages = {505--16},
    pmid = {26249223},
    title = {{ Reasoning over genetic variance information in cause-and-effect models of neurodegenerative diseases. }},
    url = {http://bib.oxfordjournals.org/lookup/doi/10.1093/bib/bbv063 { \% } 5Cnhttp://www.ncbi.nlm.nih.gov/pubmed/26249223 http://www.ncbi.nlm.nih.gov/pubmed/26249223 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4870396},
    volume = {17},
    year = {2016}
}

@article{Ali2017,
    abstract = {The automatic extraction of biomedical relations and entities from text has become extremely important in systems biology. For coding the extracted information, the Biological Expression Language (BEL) can be used. A BEL- statement consists of a subject (entity), a predicate (type of relationship), and an object (entity or a further BEL-statement). This paper describes a system based on neural networks (NNs) to extract BEL-statements in the context of the BioCreAtivE 2017 track 3 (task 1) challenge. In our approach, the overall problem is divided into four subtasks: (i) the detection of named entities (NER), (ii) deciding whether a pair of entities participate in a relation, (iii) determining which of the entities participating in a relation is the subject/object entity, and (iv) extracting the type of the relation. By merging the solutions of the subtasks, the BEL- statements are generated. Except for the named entity recognition, (convolutional) NNs were used to solve the tasks. The results show that a neural net based approach is reasonable to use for the extraction of biomedical relations. The limitations of our system are related to the small size (compared to other NN-based applications) of the data set. We argue that by overcoming this limitation, promising results can be expected from NN-based approaches in future.},
    author = {Ali, Mehdi and Madan, Sumit and Fischer, Asja and Petzka, Henning and Fluck, Juliane},
    file = {:Users/cthoyt/ownCloud/Mendeley/2017/Automatic Extraction of BEL-Statements based on Neural Networks - 2017 - Ali et al.pdf:pdf},
    journal = {Proceedings of BioCreative VI Challenge and Workshop},
    keywords = {bel,biological expression language,biological knowledge out of,compare systems for extracting,convolutional neural network,offers a platform to,relation extraction,text mining,the biocreative vi bel,track task 1 2},
    number = {October},
    title = {{ Automatic Extraction of BEL-Statements based on Neural Networks }},
    year = {2017}
}

@article{Kwon2019,
    author = {Kwon, Mijin and Yim, Soorin and Kim, Gwangmin and Lee, Saehwan and Jeong, Chungsun and Lee, Doheon},
    doi = {10.1186/s12859-019-2812-7},
    file = {:Users/cthoyt/ownCloud/Mendeley/2019/CODA-ML Context-specific biological knowledge representation for systemic physiology analysis - 2019 - Kwon et al.pdf:pdf},
    issn = {14712105},
    journal = {BMC Bioinformatics},
    keywords = {Biological context,Biological knowledge,Essential biological information,Molecular specification,Standard language},
    number = {Suppl 10},
    title = {{ CODA-ML: Context-specific biological knowledge representation for systemic physiology analysis }},
    volume = {20},
    year = {2019}
}

@article{Mubeen2019,
    abstract = {Background: Pathway-centric approaches are widely used to interpret and contextualize -omics data. However, databases contain different representations of the same biological pathway, which may lead to different results of statistical enrichment analysis and predictive models in the context of precision medicine. Results: We have performed an in-depth benchmarking of the impact of pathway database choice on statistical enrichment analysis and predictive modeling. We analyzed five cancer datasets using three major pathway databases and developed an approach to merge several databases into a single integrative database: MPath. Our results show that equivalent pathways from different databases yield disparate results in statistical enrichment analysis. Moreover, we observed a significant dataset-dependent impact on performance of machine learning models on different prediction tasks. Further, MPath significantly improved prediction performance and reduced the variance of prediction performances in some cases. At the same time, MPath yielded more consistent and biologically plausible results in the statistical enrichment analyses. Finally, we implemented a software package designed to make our comparative analysis with these and additional databases fully reproducible and to facilitate the update of our integrative pathway resource in the future. Conclusion: This benchmarking study demonstrates that pathway database choice can influence the results of statistical enrichment analysis and prediction modeling. Therefore, we recommend the use of multiple pathway databases or the use of integrative databases.},
    author = {Mubeen, Sarah and Hoyt, Charles Tapley and Gem { \" { u } } nd, Andr { \' { e } } and Hofmann-Apitius, Martin and Fr { \" { o } } hlich, Holger and Domingo-Fern { \' { a } } ndez, Daniel},
    doi = {10.1101/654442},
    journal = {bioRxiv},
    month = {jan},
    pages = {654442},
    title = {{ The Impact of Pathway Database Choice on Statistical Enrichment Analysis and Predictive Modeling }},
    url = {http://biorxiv.org/content/early/2019/05/31/654442.abstract},
    year = {2019}
}

@article{Stapor2018,
    author = {Stapor, Paul and Weindl, Daniel and Ballnus, Benjamin and Hug, Sabine and Loos, Carolin and Fiedler, Anna and Krause, Sabrina and Hro { \ss } , Sabrina and Fr { \" { o } } hlich, Fabian and Hasenauer, Jan},
    doi = {10.1093/bioinformatics/btx676},
    editor = {Wren, Jonathan},
    issn = {1367-4803},
    journal = {Bioinformatics},
    month = {feb},
    number = {4},
    pages = {705--707},
    title = {{ PESTO: Parameter EStimation TOolbox }},
    url = {https://academic.oup.com/bioinformatics/article/34/4/705/4562504},
    volume = {34},
    year = {2018}
}

@inproceedings{Dong2017,
    author = {Dong, Yuxiao and Chawla, Nitesh V. and Swami, Ananthram},
    title = {Metapath2Vec: Scalable Representation Learning for Heterogeneous Networks},
    booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
    series = {KDD '17},
    year = {2017},
    isbn = {978-1-4503-4887-4},
    location = {Halifax, NS, Canada},
    pages = {135--144},
    numpages = {10},
    url = {http://doi.acm.org/10.1145/3097983.3098036},
    doi = {10.1145/3097983.3098036},
    acmid = {3098036},
    publisher = {ACM},
    address = {New York, NY, USA},
    keywords = {feature learning, heterogeneous information networks, heterogeneous representation learning, latent representations, network embedding},
}

@misc{Hoyt2018BELSchema,
    title = {The Schema of Biological Expression Language},
    url = {https://figshare.com/articles/The_Schema_of_Biological_Expression_Language/5808771/2},
    DOI = {10.6084/m9.figshare.5808771.v2},
    publisher = {figshare},
    author = {Hoyt, Charles Tapley},
    year = {2018},
    month = {Jan}
}

@article{Arrowsmith2011,
    author = {Arrowsmith, John},
    doi = {10.1038/nrd3375},
    issn = {1474-1784},
    journal = {Nature reviews. Drug discovery},
    month = {feb},
    number = {2},
    pages = {87},
    pmid = {21283095},
    title = {{ Trial watch: phase III and submission failures: 2007-2010. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/21283095},
    volume = {10},
    year = {2011}
}
@article{Arrowsmith2011a,
    author = {Arrowsmith, John},
    doi = {10.1038/nrd3439},
    issn = {1474-1784},
    journal = {Nature reviews. Drug discovery},
    month = {may},
    number = {5},
    pages = {328--9},
    pmid = {21532551},
    title = {{ Trial watch: Phase II failures: 2008-2010. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/21532551},
    volume = {10},
    year = {2011}
}

@article{Gashaw2012,
    abstract = {Novel therapeutics in areas with a high unmet medical need are based on innovative drug targets. Although 'biologicals' have enlarged the space of druggable molecules, the number of appropriate drug targets is still limited. Discovering and assessing the potential therapeutic benefit of a drug target is based not only on experimental, mechanistic and pharmacological studies but also on a theoretical molecular druggability assessment, an early evaluation of potential side effects and considerations regarding opportunities for commercialization. This article defines key properties of a good drug target from the perspective of a pharmaceutical company.},
    author = {Gashaw, Isabella and Ellinghaus, Peter and Sommer, Anette and Asadullah, Khusru},
    doi = {10.1016/j.drudis.2011.12.008},
    issn = {1878-5832},
    journal = {Drug discovery today},
    month = {feb},
    pages = {S24--30},
    pmid = {22155646},
    title = {{ What makes a good drug target? }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/22155646},
    volume = {17 Suppl},
    year = {2012}
}

@article{Isik2015,
    abstract = {Drugs bind to their target proteins, which interact with downstream effectors and ultimately perturb the transcriptome of a cancer cell. These perturbations reveal information about their source, i.e., drugs' targets. Here, we investigate whether these perturbations and protein interaction networks can uncover drug targets and key pathways. We performed the first systematic analysis of over 500 drugs from the Connectivity Map. First, we show that the gene expression of drug targets is usually not significantly affected by the drug perturbation. Hence, expression changes after drug treatment on their own are not sufficient to identify drug targets. However, ranking of candidate drug targets by network topological measures prioritizes the targets. We introduce a novel measure, local radiality, which combines perturbed genes and functional interaction network information. The new measure outperforms other methods in target prioritization and proposes cancer-specific pathways from drugs to affected genes for the first time. Local radiality identifies more diverse targets with fewer neighbors and possibly less side effects.},
    author = {Isik, Zerrin and Baldow, Christoph and Cannistraci, Carlo Vittorio and Schroeder, Michael},
    doi = {10.1038/srep17417},
    issn = {2045-2322},
    journal = {Scientific reports},
    month = {nov},
    pages = {17417},
    pmid = {26615774},
    title = {{ Drug target prioritization by perturbed gene expression and network information. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/26615774 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4663505},
    volume = {5},
    year = {2015}
}

@article{Moseley2007,
    abstract = {The completion of the Human Genome Project has revealed a multitude of potential avenues for the identification of therapeutic targets. Extensive sequence information enables the identification of novel genes but does not facilitate a thorough understanding of how changes in gene expression control the molecular mechanisms underlying the development and regulation of a cell or the progression of disease. Proteomics encompasses the study of proteins expressed by a population of cells, and evaluates changes in protein expression, post-translational modifications, protein interactions, protein structure and splice variants, all of which are imperative for a complete understanding of protein function within the cell. From the outset, proteomics has been used to compare the protein profiles of cells in healthy and diseased states and as such can be used to identify proteins associated with disease development and progression. These candidate proteins might provide novel targets for new therapeutic agents or aid the development of assays for disease biomarkers. This review provides an overview of the current proteomic techniques available and focuses on their application in the search for novel therapeutic targets for the treatment of disease.},
    author = {Moseley, Fleur L and Bicknell, Katrina A and Marber, Michael S and Brooks, Gavin},
    doi = {10.1211/jpp.59.5.0001},
    issn = {0022-3573},
    journal = {The Journal of pharmacy and pharmacology},
    month = {may},
    number = {5},
    pages = {609--28},
    pmid = {17524226},
    title = {{ The use of proteomics to identify novel therapeutic targets for the treatment of disease. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/17524226},
    volume = {59},
    year = {2007}
}


@article{VanWesten2011,
    abstract = {‘Proteochemometric modeling' is a bioactivity modeling technique founded on the description of both small molecules (the ligands), and proteins (the targets). By combining those two elements of a ligand – target interaction proteochemometrics techniques model the interaction complex or the full ligand – targ MedChemComm 2011 review articles 21st International Symposium on Medicinal Chemistry (EFMC-ISMC 2010)},
    author = {{ Van Westen } , Gerard J P and Wegner, J??rg K. and Ijzerman, Adriaan P. and { Van Vlijmen } , Herman W T and Bender, A.},
    doi = {10.1039/c0md00165a},
    file = {:Users/cthoyt/ownCloud/Mendeley/2011/Proteochemometric modeling as a tool to design selective compounds and for extrapolating to novel targets - 2011 - Van Westen et al.pdf:pdf},
    isbn = {2040-2511},
    issn = {20402503},
    journal = {MedChemComm},
    number = {1},
    pages = {16--30},
    title = {{ Proteochemometric modeling as a tool to design selective compounds and for extrapolating to novel targets }},
    volume = {2},
    year = {2011}
}

@article{Vass2016,
    abstract = {Protein-ligand interaction fingerprints (IFPs) are binary 1D representations of the 3D structure of protein-ligand complexes encoding the presence or absence of specific interactions between the binding pocket amino acids and the ligand. Various implementations of IFPs have been developed and successfully applied for post-processing molecular docking results for G Protein-Coupled Receptor (GPCR) ligand binding mode prediction and virtual ligand screening. Novel interaction fingerprint methods enable structural chemogenomics and polypharmacology predictions by complementing the increasing amount of GPCR structural data. Machine learning methods are increasingly used to derive relationships between bioactivity data and fingerprint descriptors of chemical and structural information of binding sites, ligands, and protein-ligand interactions. Factors that influence the application of IFPs include structure preparation, binding site definition, fingerprint similarity assessment, and data processing and these factors pose challenges as well possibilities to optimize interaction fingerprint methods for GPCR drug discovery.},
    author = {Vass, M { \' { a } } rton and Kooistra, Albert J and Ritschel, Tina and Leurs, Rob and de Esch, Iwan Jp and de Graaf, Chris},
    doi = {10.1016/j.coph.2016.07.007},
    issn = {1471-4973},
    journal = {Current opinion in pharmacology},
    pages = {59--68},
    pmid = {27479316},
    title = {{ Molecular interaction fingerprint approaches for GPCR drug discovery. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/27479316},
    volume = {30},
    year = {2016}
}
@article{Schneider2018,
    abstract = {Pharmacological drug actions are often caused by multi-target effects. While most of the currently approved synthetic drugs were designed to interact with a single 'on-target', these chemical agents often interact with additional 'off-targets'. Understanding and rationalizing these multiple interactions will be indispensable for the design of future precision medicines. We employed computational predictions of drug-target interactions to analyze functional drug-drug relationships. 900 approved drugs were represented in terms of their predicted activity fingerprints, considering 1158 potential target activities. A drug relationship network was constructed based on fingerprint similarity. The resulting network graph highlights clusters of compounds sharing similar predicted on- and off-targets, and allows to identify mutual targets of drugs that were originally developed for different therapeutic indications. Such an analysis offers straightforward access to spotting potential off-target liabilities and drug-drug interactions, as well as drug repurposing opportunities.},
    author = {Schneider, Petra and Schneider, Gisbert},
    doi = {10.1002/minf.201800050},
    issn = {1868-1751},
    journal = {Molecular informatics},
    keywords = {Chemogenomics,drug design,network,pharmacophore,side effect},
    number = {9-10},
    pages = {e1800050},
    pmid = {29797496},
    title = {{ Polypharmacological Drug-target Inference for Chemogenomics. }},
    url = {http://www.ncbi.nlm.nih.gov/pubmed/29797496},
    volume = {37},
    year = {2018}
}

@article{Chen2018,
    author = {Tian Qi Chen and
 Yulia Rubanova and
 Jesse Bettencourt and
 David Duvenaud},
    title = {Neural Ordinary Differential Equations},
    journal = {CoRR},
    volume = {abs/1806.07366},
    year = {2018},
    url = {http://arxiv.org/abs/1806.07366},
    archivePrefix = {arXiv},
    eprint = {1806.07366},
    timestamp = {Mon, 22 Jul 2019 14:09:23 +0200},
    biburl = {https://dblp.org/rec/bib/journals/corr/abs-1806-07366},
    bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{Bar-Sinai2019,
    abstract = {In many physical systems, the governing equations are known with high confidence, but direct numerical solution is prohibitively expensive. Often this situation is alleviated by writing effective equations to approximate dynamics below the grid scale. This process is often impossible to perform analytically and is often ad hoc. Here we propose data-driven discretization, a method that uses machine learning to systematically derive discretizations for continuous physical systems. On a series of model problems, data-driven discretization gives accurate solutions with a dramatic drop in required resolution.The numerical solution of partial differential equations (PDEs) is challenging because of the need to resolve spatiotemporal features over wide length- and timescales. Often, it is computationally intractable to resolve the finest features in the solution. The only recourse is to use approximate coarse-grained representations, which aim to accurately represent long-wavelength dynamics while properly accounting for unresolved small-scale physics. Deriving such coarse-grained equations is notoriously difficult and often ad hoc. Here we introduce data-driven discretization, a method for learning optimized approximations to PDEs based on actual solutions to the known underlying equations. Our approach uses neural networks to estimate spatial derivatives, which are optimized end to end to best satisfy the equations on a low-resolution grid. The resulting numerical methods are remarkably accurate, allowing us to integrate in time a collection of nonlinear equations in 1 spatial dimension at resolutions 4 { \ { } $\backslash$texttimes { \ } } to 8 { \ { } $\backslash$texttimes { \ } } coarser than is possible with standard finite-difference methods.},
    author = {Bar-Sinai, Yohai and Hoyer, Stephan and Hickey, Jason and Brenner, Michael P},
    doi = {10.1073/pnas.1814058116},
    issn = {0027-8424},
    journal = {Proceedings of the National Academy of Sciences},
    number = {31},
    pages = {15344--15349},
    publisher = {National Academy of Sciences},
    title = {{ Learning data-driven discretizations for partial differential equations }},
    url = {https://www.pnas.org/content/116/31/15344},
    volume = {116},
    year = {2019}
}

@article{Innes2019,
    author = {Mike Innes and Alan Edelman and Keno Fischer and Christopher Rackauckas and Elliot Saba and Viral B. Shah and Will Tebbutt},
    title = {A Differentiable Programming System to Bridge Machine Learning and
 Scientific Computing},
    journal = {CoRR},
    volume = {abs/1907.07587},
    year = {2019},
    url = {http://arxiv.org/abs/1907.07587},
    archivePrefix = {arXiv},
    eprint = {1907.07587},
    timestamp = {Tue, 23 Jul 2019 10:54:22 +0200},
    biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-07587},
    bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{Gaudet2017,
    author = {Gaudet, Pascale and Michel, Pierre-Andr { \' { e } } and Zahn-Zabal, Monique and Britan, Aurore and Cusin, Isabelle and Domagalski, Marcin and Duek, Paula D. and Gateau, Alain and Gleizes, Anne and Hinard, Val { \' { e } } rie and { Rech de Laval } , Valentine and Lin, JinJin and Nikitin, Frederic and Schaeffer, Mathieu and Teixeira, Daniel and Lane, Lydie and Bairoch, Amos},
    doi = {10.1093/nar/gkw1062},
    issn = {0305-1048},
    journal = {Nucleic Acids Research},
    month = {jan},
    number = {D1},
    pages = {D177--D182},
    title = {{ The neXtProt knowledgebase on human proteins: 2017 update }},
    url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkw1062},
    volume = {45},
    year = {2017}
}