Avengers: Infinity War

Create a Data Marvel — Part 3: Hydrating the Model

Jennifer Reif
Dec 12, 2018 · 10 min read
Marvel comics data model in Neo4j

More Importing — “Hydrating” the Model

WITH apoc.date.format(timestamp(), “ms”, ‘yyyyMMddHHmmss’) AS ts
WITH “&ts=” + ts + “&apikey=” + $marvel_public + “&hash=” + apoc.util.md5([ts,$marvel_private,$marvel_public]) as suffix
CALL apoc.periodic.iterate(‘MATCH (c:Character) WHERE c.resourceURI IS NOT NULL AND NOT exists((c)<-[:INCLUDES]-()) RETURN c LIMIT 100’,
‘CALL apoc.util.sleep(2000)
CALL apoc.load.json(c.resourceURI+”/comics?format=comic&formatType=comic&limit=100"+$suffix)
YIELD value
WITH c, value.data.results as results
WHERE results IS NOT NULL
UNWIND results as result
MERGE (comic:ComicIssue {id: result.id})
ON CREATE SET comic.name = result.title,
comic.issueNumber = result.issueNumber,
comic.pageCount = result.pageCount,
comic.resourceURI = result.resourceURI,
comic.thumbnail = result.thumbnail.path +
”.” + result.thumbnail.extension
WITH c, comic, result
MERGE (comic)-[r:INCLUDES]->(c)
WITH c, comic, result WHERE result.series IS NOT NULL
UNWIND result.series as comicSeries
MERGE (series:Series {id: toInt(split(comicSeries.resourceURI,”/”)[-1])})
ON CREATE SET series.name = comicSeries.name,
series.resourceURI = comicSeries.resourceURI
WITH c, comic, series, result
MERGE (comic)-[r2:BELONGS_TO]->(series)
WITH c, comic, result, result.creators.items as items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (creator:Creator {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET creator.name = item.name,
creator.resourceURI = item.resourceURI
WITH c, comic, result, creator
MERGE (comic)-[r3:CREATED_BY]->(creator)
WITH c, comic, result, result.stories.items as items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (story:Story {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET story.name = item.name,
story.resourceURI = item.resourceURI,
story.type = item.type
WITH c, comic, result, story
MERGE (comic)-[r4:MADE_OF]->(story)
WITH c, comic, result, result.events.items AS items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (event:Event {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET event.name = item.name,
event.resourceURI = item.resourceURI
MERGE (comic)-[r5:PART_OF]->(event)’,
{batchSize: 20, iterateList:false, retries:2, params:{suffix:suffix}});
//First section
WITH apoc.date.format(timestamp(), “ms”, ‘yyyyMMddHHmmss’) AS ts
WITH “&ts=” + ts + “&apikey=” + $marvel_public + “&hash=” + apoc.util.md5([ts,$marvel_private,$marvel_public]) as suffix
CALL apoc.periodic.iterate(‘MATCH (c:Character) WHERE c.resourceURI IS NOT NULL AND NOT exists((c)<-[:INCLUDES]-()) RETURN c LIMIT 100’,
‘CALL apoc.util.sleep(2000)
CALL apoc.load.json(c.resourceURI+”/comics?format=comic&formatType=comic&limit=100"+$suffix)
YIELD value
//Second section
WITH c, value.data.results as results
WHERE results IS NOT NULL
UNWIND results as result
MERGE (comic:ComicIssue {id: result.id})
ON CREATE SET comic.name = result.title,
comic.issueNumber = result.issueNumber,
comic.pageCount = result.pageCount,
comic.resourceURI = result.resourceURI,
comic.thumbnail = result.thumbnail.path +
”.” + result.thumbnail.extension
WITH c, comic, result
MERGE (comic)-[r:INCLUDES]->(c)
//Third section on Series, Creators, Stories, Events
WITH c, comic, result WHERE result.series IS NOT NULL
UNWIND result.series as comicSeries
MERGE (series:Series {id: toInt(split(comicSeries.resourceURI,”/”)[-1])})
ON CREATE SET series.name = comicSeries.name,
series.resourceURI = comicSeries.resourceURI
WITH c, comic, series, result
MERGE (comic)-[r2:BELONGS_TO]->(series)
WITH c, comic, result, result.creators.items as items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (creator:Creator {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET creator.name = item.name,
creator.resourceURI = item.resourceURI
WITH c, comic, result, creator
MERGE (comic)-[r3:CREATED_BY]->(creator)
WITH c, comic, result, result.stories.items as items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (story:Story {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET story.name = item.name,
story.resourceURI = item.resourceURI,
story.type = item.type
WITH c, comic, result, story
MERGE (comic)-[r4:MADE_OF]->(story)
WITH c, comic, result, result.events.items AS items
WHERE items IS NOT NULL
UNWIND items as item
MERGE (event:Event {id: toInt(split(item.resourceURI,”/”)[-1])})
ON CREATE SET event.name = item.name,
event.resourceURI = item.resourceURI
MERGE (comic)-[r5:PART_OF]->(event)’,

Filling in More Details

//load any extra Series data
WITH apoc.date.format(timestamp(), “ms”, ‘yyyyMMddHHmmss’) AS ts
WITH “&ts=” + ts + “&apikey=” + $marvel_public + “&hash=” + apoc.util.md5([ts,$marvel_private,$marvel_public]) as suffix
CALL apoc.periodic.iterate(
‘MATCH (s:Series) WHERE s.resourceURI IS NOT NULL
AND not exists(s.startYear) RETURN s LIMIT 100’,
‘CALL apoc.util.sleep(2000)
CALL apoc.load.json(s.resourceURI+”?limit=100" + $suffix)
YIELD value
WITH value.data.results as results
WHERE results IS NOT NULL
UNWIND results as result
MERGE (series:Series {id: result.id})
SET series.startYear = result.startYear,
series.endYear = result.endYear,
series.rating = result.rating,
series.thumbnail = result.thumbnail.path +
”.” + result.thumbnail.extension’,
{batchSize: 20, iterateList: false, params: {suffix:suffix}});//load any extra Event data
WITH apoc.date.format(timestamp(), “ms”, ‘yyyyMMddHHmmss’) AS ts
WITH “&ts=” + ts + “&apikey=” + $marvel_public + “&hash=” + apoc.util.md5([ts,$marvel_private,$marvel_public]) as suffix
CALL apoc.periodic.iterate(
‘MATCH (event:Event) WHERE event.resourceURI IS NOT NULL
AND NOT exists(event.start) RETURN DISTINCT event LIMIT 100’,
‘CALL apoc.util.sleep(2000)
CALL apoc.load.json(event.resourceURI+”?limit=100"+$suffix)
YIELD value
UNWIND value.data.results as result
MERGE (e:Event {id: result.id})
SET e.start = result.start,
e.end = result.end’,
{batchSize: 20, iterateList:false, params: {suffix:suffix}});

What I Learned

Next Steps

Resources

Neo4j Developer Blog

Developer Content around Graph Databases, Neo4j, Cypher, Data Science, Graph Analytics, GraphQL and more.

Jennifer Reif

Written by

Jennifer Reif is an avid developer and problem-solver. She enjoys learning new technologies, sometimes on a daily basis! Her Twitter handle is @JMHReif.

Neo4j Developer Blog

Developer Content around Graph Databases, Neo4j, Cypher, Data Science, Graph Analytics, GraphQL and more.