Skip to content

Commit

Permalink
Collect and write template pages. Write templates and properties to s…
Browse files Browse the repository at this point in the history
…eparate files
  • Loading branch information
samuell committed Aug 16, 2016
1 parent e4d84ac commit 32b7873
Showing 1 changed file with 142 additions and 48 deletions.
190 changes: 142 additions & 48 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const (
)

func main() {
flowbase.InitLogInfo()
//flowbase.InitLogDebug()

inFileName := flag.String("in", "", "The input file name")
outFileName := flag.String("out", "", "The output file name")
Expand All @@ -46,47 +46,56 @@ func main() {
// ------------------------------------------

// Create a pipeline runner
pipeRunner := flowbase.NewPipelineRunner()
net := flowbase.NewPipelineRunner()

// Read in-file
ttlFileRead := NewTurtleFileReader()
pipeRunner.AddProcess(ttlFileRead)
net.AddProcess(ttlFileRead)

// Aggregate per subject
aggregator := NewAggregateTriplesPerSubject()
pipeRunner.AddProcess(aggregator)
net.AddProcess(aggregator)

// Create an subject-indexed "index" of all triples
indexCreator := NewCreateResourceIndex()
pipeRunner.AddProcess(indexCreator)
net.AddProcess(indexCreator)

// Fan-out the triple index to the converter and serializer
indexFanOut := NewResourceIndexFanOut()
pipeRunner.AddProcess(indexFanOut)
net.AddProcess(indexFanOut)

// Serialize the index back to individual subject-tripleaggregates
indexToAggr := NewResourceIndexToTripleAggregates()
pipeRunner.AddProcess(indexToAggr)
net.AddProcess(indexToAggr)

// Convert TripleAggregate to WikiPage
triplesToWikiConverter := NewTripleAggregateToWikiPageConverter()
pipeRunner.AddProcess(triplesToWikiConverter)
net.AddProcess(triplesToWikiConverter)

//categoryFilterer := NewCategoryFilterer([]string{"DataEntry"})
//pipeRunner.AddProcess(categoryFilterer)
//net.AddProcess(categoryFilterer)

// Pretty-print wiki page data
//wikiPagePrinter := NewWikiPagePrinter()
//pipeRunner.AddProcess(wikiPagePrinter)
//net.AddProcess(wikiPagePrinter)

useTemplates := true
xmlCreator := NewMWXMLCreator(useTemplates)
pipeRunner.AddProcess(xmlCreator)
net.AddProcess(xmlCreator)

//printer := NewStringPrinter()
//pipeRunner.AddProcess(printer)
strFileWriter := NewStringFileWriter(*outFileName)
pipeRunner.AddProcess(strFileWriter)
//net.AddProcess(printer)
templateWriter := NewStringFileWriter(str.Replace(*outFileName, ".xml", "_templates.xml", 1))
net.AddProcess(templateWriter)

propertyWriter := NewStringFileWriter(str.Replace(*outFileName, ".xml", "_properties.xml", 1))
net.AddProcess(propertyWriter)

pageWriter := NewStringFileWriter(*outFileName)
net.AddProcess(pageWriter)

snk := flowbase.NewSink()
net.AddProcess(snk)

// ------------------------------------------
// Connect network
Expand All @@ -107,7 +116,13 @@ func main() {

triplesToWikiConverter.OutPage = xmlCreator.InWikiPage

xmlCreator.Out = strFileWriter.In
xmlCreator.OutTemplates = templateWriter.In
xmlCreator.OutProperties = propertyWriter.In
xmlCreator.OutPages = pageWriter.In

snk.Connect(templateWriter.OutDone)
snk.Connect(propertyWriter.OutDone)
snk.Connect(pageWriter.OutDone)

// ------------------------------------------
// Send in-data and run
Expand All @@ -118,8 +133,7 @@ func main() {
ttlFileRead.InFileName <- *inFileName
}()

pipeRunner.Run()

net.Run()
}

// ================================================================================
Expand Down Expand Up @@ -419,6 +433,7 @@ const (
URITypeUndefined
URITypePredicate
URITypeClass
URITypeTemplate
)

// Code -----------------------------------------------------------------------
Expand Down Expand Up @@ -489,21 +504,8 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
}

if tr.Pred.String() == typePropertyURI || tr.Pred.String() == subClassPropertyURI {

catExists := false
for _, existingCat := range page.Categories {
if valueStr == existingCat {
catExists = true
break
}
}

if !catExists {
page.AddCategory(valueStr)
}

page.AddCategoryUnique(valueStr)
} else {

page.AddFactUnique(NewFact(propertyStr, valueStr))
}
}
Expand All @@ -521,7 +523,7 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
predPageIndex[page.Title].AddFactUnique(fact)
}
for _, cat := range page.Categories {
predPageIndex[page.Title].AddCategory(cat)
predPageIndex[page.Title].AddCategoryUnique(cat)
}
} else {
// If page does not exist, use the newly created one
Expand Down Expand Up @@ -599,6 +601,8 @@ func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, u
factTitle += " ..."
}

factTitle = upperCaseFirst(factTitle)

if uriType == URITypePredicate {
pageTitle = "Property:" + factTitle
} else if uriType == URITypeClass {
Expand Down Expand Up @@ -656,16 +660,20 @@ func (p *CategoryFilterer) Run() {
// --------------------------------------------------------------------------------

type MWXMLCreator struct {
InWikiPage chan *WikiPage
Out chan string
UseTemplates bool
InWikiPage chan *WikiPage
OutTemplates chan string
OutProperties chan string
OutPages chan string
UseTemplates bool
}

func NewMWXMLCreator(useTemplates bool) *MWXMLCreator {
return &MWXMLCreator{
InWikiPage: make(chan *WikiPage, BUFSIZE),
Out: make(chan string, BUFSIZE),
UseTemplates: useTemplates,
InWikiPage: make(chan *WikiPage, BUFSIZE),
OutTemplates: make(chan string, BUFSIZE),
OutProperties: make(chan string, BUFSIZE),
OutPages: make(chan string, BUFSIZE),
UseTemplates: useTemplates,
}
}

Expand All @@ -689,36 +697,57 @@ const wikiXmlTpl = `

var pageTypeToMWNamespace = map[int]int{
URITypeClass: 14,
URITypeTemplate: 10,
URITypePredicate: 102,
URITypeUndefined: 0,
}

func (p *MWXMLCreator) Run() {
defer close(p.Out)
tplPropertyIdx := make(map[string]map[string]int)

p.Out <- "<mediawiki>\n"
defer close(p.OutTemplates)
defer close(p.OutProperties)
defer close(p.OutPages)

p.OutPages <- "<mediawiki>\n"
p.OutProperties <- "<mediawiki>\n"

for page := range p.InWikiPage {

wikiText := ""

if p.UseTemplates && len(page.Categories) > 0 { // We need at least one category, as to name the (to-be) template

wikiText += "{{" + page.Categories[0] + "\n" // TODO: What to do when we have multipel categories?
templateName := page.Categories[0]
templateTitle := "Template:" + templateName

// Make sure template page exists
if tplPropertyIdx[templateTitle] == nil {
tplPropertyIdx[templateTitle] = make(map[string]int)
}

// Add facts as parameters to the template
wikiText += "{{" + templateName + "\n" // TODO: What to do when we have multipel categories?

// Add facts as parameters to the template call
var lastProperty string
for _, fact := range page.Facts {
// Write facts to template call on current page

val := escapeWikiChars(fact.Value)
if fact.Property == lastProperty {
wikiText += "," + fact.Value + "\n"
wikiText += "," + val + "\n"
} else {
wikiText += "|" + str.Replace(fact.Property, " ", "_", -1) + "=" + fact.Value + "\n"
wikiText += "|" + spacesToUnderscores(fact.Property) + "=" + val + "\n"
}

lastProperty = fact.Property

// Add fact to the relevant template page
tplPropertyIdx[templateTitle][fact.Property] = 1
}

// Add categories as multi-valued call to the "categories" value of the template
wikiText += "|categories="
wikiText += "|Categories="
for i, cat := range page.Categories {
if i == 0 {
wikiText += cat
Expand All @@ -732,7 +761,7 @@ func (p *MWXMLCreator) Run() {

// Add fact statements
for _, fact := range page.Facts {
wikiText += fmtFact(fact.Property, fact.Value)
wikiText += fmtFact(fact.Property, escapeWikiChars(fact.Value))
}

// Add category statements
Expand All @@ -745,10 +774,33 @@ func (p *MWXMLCreator) Run() {
xmlData := fmt.Sprintf(wikiXmlTpl, page.Title, pageTypeToMWNamespace[page.Type], time.Now().Format("2006-01-02T15:04:05Z"), wikiText)

// Print out the generated XML one line at a time
p.Out <- xmlData
if page.Type == URITypePredicate {
p.OutProperties <- xmlData
} else {
p.OutPages <- xmlData
}
}
p.OutPages <- "</mediawiki>\n"
p.OutProperties <- "</mediawiki>\n"

p.OutTemplates <- "<mediawiki>\n"
// Create template pages
for tplName, tplProperties := range tplPropertyIdx {
tplText := `{|class="wikitable smwtable"
!colspan="2"|{{PAGENAMEE}}
`
for property, _ := range tplProperties {
argName := spacesToUnderscores(property)
tplText += fmt.Sprintf("|-\n!%s\n|{{#arraymap:{{{%s|}}}|,|x|[[%s::x]]|,}}\n", property, argName, property)

This comment has been minimized.

Copy link
@mwjames

mwjames Aug 18, 2016

@samuell #arraymap? Maybe using [0] instead to separate multi-value assignments otherwise you would need SF installed which some of us don't have.

[0] https://www.semantic-mediawiki.org/wiki/Help:Setting_values/Working_with_the_separator_parameter

This comment has been minimized.

Copy link
@samuell

samuell Aug 18, 2016

Author Member

@mwjames Ah, Yes! Yea, I also thought there must be a way to do this without semantic forms, but didn't know about this one (too long time since I was hacking SMW it seems). Will fix in next release. Many thanks!

}
tplText += "|}\n\n"
// Add categories
tplText += "{{#arraymap:{{{Categories}}}|,|x|[[Category:x]]|}}\n"

p.Out <- "</mediawiki>\n"
xmlData := fmt.Sprintf(wikiXmlTpl, tplName, pageTypeToMWNamespace[URITypeTemplate], time.Now().Format("2006-01-02T15:04:05Z"), tplText)
p.OutTemplates <- xmlData
}
p.OutTemplates <- "</mediawiki>\n"
}

// --------------------------------------------------------------------------------
Expand Down Expand Up @@ -868,17 +920,21 @@ func (p *StringPrinter) Run() {

type StringFileWriter struct {
In chan string
OutDone chan interface{}
fileName string
}

func NewStringFileWriter(fileName string) *StringFileWriter {
return &StringFileWriter{
In: make(chan string, BUFSIZE),
OutDone: make(chan interface{}, BUFSIZE),
fileName: fileName,
}
}

func (p *StringFileWriter) Run() {
defer close(p.OutDone)

fh, err := os.Create(p.fileName)
if err != nil {
panic("Could not create output file: " + err.Error())
Expand All @@ -887,8 +943,13 @@ func (p *StringFileWriter) Run() {
for s := range p.In {
fh.WriteString(s)
}

flowbase.Debug.Printf("Sending done signal on chan %v now in StringFileWriter ...\n", p.OutDone)
p.OutDone <- &DoneSignal{}
}

type DoneSignal struct{}

// --------------------------------------------------------------------------------
// IP: RDFTriple
// --------------------------------------------------------------------------------
Expand Down Expand Up @@ -962,6 +1023,19 @@ func (p *WikiPage) AddCategory(category string) {
p.Categories = append(p.Categories, category)
}

func (p *WikiPage) AddCategoryUnique(category string) {
catExists := false
for _, existingCat := range p.Categories {
if category == existingCat {
catExists = true
break
}
}
if !catExists {
p.AddCategory(category)
}
}

// Helper type: Fact

type Fact struct {
Expand Down Expand Up @@ -1000,3 +1074,23 @@ func removeLastWord(inStr string) string {
outStr := str.Join(append(bits[:len(bits)-1]), " ")
return outStr
}

func spacesToUnderscores(inStr string) string {
return str.Replace(inStr, " ", "_", -1)
}

func upperCaseFirst(inStr string) string {
var outStr string
if inStr != "" {
outStr = str.ToUpper(inStr[0:1]) + inStr[1:]
}
return outStr
}

func escapeWikiChars(inStr string) string {
outStr := str.Replace(inStr, "[", "(", -1)
outStr = str.Replace(outStr, "]", ")", -1)
outStr = str.Replace(outStr, "|", ",", -1)
outStr = str.Replace(outStr, "=", "-", -1)
return outStr
}

0 comments on commit 32b7873

Please sign in to comment.