Evolving Data Models with JanusGraph

Ryan Stauffer
Feb 13, 2019 · 10 min read
Image for post
Image for post
Image for post
Image for post
Image for post
Image for post
Our initial schema design

Defining an Initial Data Model

$ bin/gremlin.sh
gremlin> graph = JanusGraphFactory.build().
set('storage.backend', 'inmemory').open()
gremlin> mgmt = graph.openManagement()
// Vertices
Orchestra = mgmt.makeVertexLabel('Orchestra').make()
Artist = mgmt.makeVertexLabel('Artist').make()
Work = mgmt.makeVertexLabel('Work').make()
Concert = mgmt.makeVertexLabel('Concert').make()
// Edge
COMPOSER = mgmt.makeEdgeLabel('COMPOSER').
multiplicity(MANY2ONE).make()
SOLOIST = mgmt.makeEdgeLabel('SOLOIST').
multiplicity(SIMPLE).make()
CONDUCTOR = mgmt.makeEdgeLabel('CONDUCTOR').
multiplicity(SIMPLE).make()
ORCHESTRA = mgmt.makeEdgeLabel('ORCHESTRA').
multiplicity(SIMPLE).make()
INCLUDES = mgmt.makeEdgeLabel('INCLUDES').
multiplicity(SIMPLE).make()
// Define Vertex Property Keys
// Orchestra
name = mgmt.makePropertyKey('name').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Orchestra, name)
// Artist
lastName = mgmt.makePropertyKey('lastName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
firstName = mgmt.makePropertyKey('firstName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
gender = mgmt.makePropertyKey('gender').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
nationality = mgmt.makePropertyKey('nationality').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
deceased = mgmt.makePropertyKey('deceased').
dataType(Boolean.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Artist, lastName, firstName, gender,
nationality, deceased)
// Work
title = mgmt.makePropertyKey('title').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
compositionDate = mgmt.makePropertyKey('compositionYear').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
soloInstrument = mgmt.makePropertyKey('soloInstrument').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Work, title, compositionDate, soloInstrument)
// Concert
firstDate = mgmt.makePropertyKey('firstDate').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
numShows = mgmt.makePropertyKey('numShows').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Concert, name, firstDate, numShows)

// Define connections as (edgeLabel, outVertexLabel, inVertexLabel)
mgmt.addConnection(COMPOSER, Work, Artist)
mgmt.addConnection(SOLOIST, Work, Artist)
mgmt.addConnection(CONDUCTOR, Work, Artist)
mgmt.addConnection(ORCHESTRA, Concert, Orchestra)
mgmt.addConnection(INCLUDES, Concert, Work)
mgmt.commit()

A Quick Aside on Schema Naming

Answering Questions with our Graph

Image for post
Image for post
$ bin/gremlin.sh -i InitialSetup.groovy
g.V().has('Artist', 'lastName', 'Salonen').
inE().outV().in('INCLUDES').order().
path().by('lastName').by(label).by('title').by('name')
==>[Salonen,CONDUCTOR,Also sprach Zarathustra,
Esa-Pekka Salonen Conducts US Premiere by Tansy Davies]
==>[Salonen,COMPOSER,Wing on Wing,
Premieres by Esa-Pekka Salonen and Anna Thorvaldsdottir]
==>[Salonen,COMPOSER,Cello Concerto,Salonen & Yo-Yo Ma]
==>[Salonen,CONDUCTOR,Cello Concerto,Salonen & Yo-Yo Ma]
g.V().has('Artist', 'lastName', 'Salonen').
in('COMPOSER').out('CONDUCTOR').
path().by('lastName').by('title').by('lastName')
==>[Salonen,Cello Concerto,Salonen]
==>[Salonen,Wing on Wing,Gilbert]
Image for post
Image for post
Did Esa-Pekka Salonen perform with Alisa Weilerstein or Yo-Yo Ma? Who knows…
Image for post
Image for post
Much clearer

A Second Attempt

Image for post
Image for post
// We'll need a new Management API transaction
mgmt = graph.openManagement()
// Vertex Label
Performance = mgmt.makeVertexLabel('Performance').make()
// Properties
performanceDate = mgmt.makePropertyKey('performanceDate').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Performance, performanceDate)
// Define a new Edge
PERFORMED = mgmt.makeEdgeLabel('PERFORMED').
multiplicity(ONE2MANY).make()
// We need to retrieve our labels within our open transaction
Orchestra = mgmt.getVertexLabel('Orchestra')
Artist = mgmt.getVertexLabel('Artist')
Work = mgmt.getVertexLabel('Work')
Concert = mgmt.getVertexLabel('Concert')
SOLOIST = mgmt.getEdgeLabel('SOLOIST')
CONDUCTOR = mgmt.getEdgeLabel('CONDUCTOR')
ORCHESTRA = mgmt.getEdgeLabel('ORCHESTRA')
INCLUDES = mgmt.getEdgeLabel('INCLUDES')
// Create new connections
mgmt.addConnection(SOLOIST, Performance, Artist)
mgmt.addConnection(CONDUCTOR, Performance, Artist)
mgmt.addConnection(ORCHESTRA, Performance, Orchestra)
mgmt.addConnection(INCLUDES, Concert, Performance)
mgmt.addConnection(PERFORMED, Work, Performance)
mgmt.commit()
g.V().hasLabel('Work').as('w').in('INCLUDES').
hasLabel('Concert').as('c').
map(addV('Performance').as('p').
property('performanceDate', values('firstDate')).
addE('PERFORMED').from('w').
select('p').addE('INCLUDES').from('c')).iterate()
g.V().hasLabel('Performance').as('p').in('PERFORMED').
outE('CONDUCTOR').as('OLD').inV().as('cond').
addE('CONDUCTOR').from('p').
select('OLD').drop().iterate()
g.V().hasLabel('Performance').as('p').in('PERFORMED').
outE('SOLOIST').as('OLD').inV().as('soloist').
addE('SOLOIST').from('p').
select('OLD').drop().iterate()
g.V().hasLabel(‘Performance’).as(‘p’).in(‘PERFORMED’).
in(‘INCLUDES’).out(‘ORCHESTRA’).
addE(‘ORCHESTRA’).from(‘p’).iterate()
g.V().hasLabel(‘Performance’).outE().inV().path().by(label)
==>[Performance,CONDUCTOR,Artist]
==>[Performance,ORCHESTRA,Orchestra]
==>[Performance,SOLOIST,Artist]
==>[Performance,CONDUCTOR,Artist]
==>[Performance,ORCHESTRA,Orchestra]
==>[Performance,CONDUCTOR,Artist]
==>[Performance,ORCHESTRA,Orchestra]
g.V().hasLabel(‘Work’).outE().inV().path().by(label)
==>[Work,COMPOSER,Artist]
==>[Work,PERFORMED,Performance]
==>[Work,COMPOSER,Artist]
==>[Work,PERFORMED,Performance]
==>[Work,COMPOSER,Artist]
==>[Work,PERFORMED,Performance]
// 3 Performances were created
// Each has connections to Conductor, Soloist, and Orchestra
assert 3 == g.V().hasLabel('Performance').count().next()
assert 3 == g.V().hasLabel('Performance').
out('CONDUCTOR').hasLabel('Artist').count().next()
assert 1 == g.V().hasLabel('Performance').
out('SOLOIST').hasLabel('Artist').count().next()
assert 3 == g.V().hasLabel('Performance').
out('ORCHESTRA').hasLabel('Orchestra').count().next()
// Conductor, Soloist, Orchestra are NOT directly connected to Works
assert 0 == g.V().hasLabel('Work').outE('CONDUCTOR').count().next()
assert 0 == g.V().hasLabel('Work').outE('SOLOIST').count().next()
assert 0 == g.V().hasLabel('Work').outE('ORCHESTRA').count().next()
Image for post
Image for post
The diagram may be a bit crowded, but our model allows for concise access to all of our data (The INCLUDES edges between Concert and Performance have been excluded for readability)
g.V().hasLabel(‘Artist’).as(‘a’).
in(‘COMPOSER’).out(‘PERFORMED’).out(‘CONDUCTOR’).
where(eq(‘a’)).values(‘lastName’)
==>Salonen
// Or more verbosely to view the path
g.V().hasLabel(‘Artist’).as(‘a’).
inE(‘COMPOSER’).outV().outE(‘PERFORMED’).inV().
outE(‘CONDUCTOR’).inV().where(eq(‘a’)).
path().by(‘lastName’).by(label).by(‘title’).
by(label).by(‘performanceDate’).by(label).by(‘lastName’)
==>[Salonen,COMPOSER,Cello Concerto,
PERFORMED,3/9/2017,CONDUCTOR,Salonen]
g.V().has(‘Artist’, ‘lastName’, ‘Salonen’).
in(‘CONDUCTOR’).out(‘ORCHESTRA’).values(‘name’)
==>New York Philharmonic
==>Chicago Symphony Orchestra

Enharmonic

We publish thought-provoking stories and technical articles…

Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch

Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore

Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just $5/month. Upgrade

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store