truncate: updated docs

lmorg · Sep 22, 2024 · e23799d · e23799d
1 parent a95095c
commit e23799d
Show file tree

Hide file tree

Showing 9 changed files with 412 additions and 77 deletions.
diff --git a/app/app.go b/app/app.go
@@ -16,9 +16,9 @@ const Name = "murex"
 const (
 	Major     = 7
 	Minor     = 0
-	Revision  = 103
+	Revision  = 107
 	Branch    = "851/file-redirect"
-	BuildDate = "2024-09-21  01:33:30"
+	BuildDate = "2024-09-22  23:17:58"
 )
 
 // Copyright is the copyright owner string

diff --git a/builtins/core/io/write.go b/builtins/core/io/write.go
@@ -20,9 +20,10 @@ func init() {
 }
 
 const (
-	_WAIT_EOF_LONG       = "--wait-for-eof"
-	_WAIT_EOF_SHORT      = "-w"
-	_DONT_CHECK_PIPELINE = "--ignore-pipeline-check"
+	_WAIT_EOF_SHORT        = "-w"
+	_WAIT_EOF_LONG         = "--wait-for-eof"
+	_IGNORE_PIPELINE_SHORT = "-i"
+	_IGNORE_PIPELINE_LONG  = "--ignore-pipeline-check"
 )
 
 func cmdTruncateFile(p *lang.Process) error { return writeFile(p, truncateFile) }
@@ -41,28 +42,35 @@ func writeFile(p *lang.Process, fn func(io.Reader, string) error) error {
 		return fn(bytes.NewBuffer([]byte{}), filename)
 	}
 
-	if filename == _DONT_CHECK_PIPELINE {
-		filename, err = p.Parameters.String(1)
-		if err != nil {
-			return err
+	if filename == _IGNORE_PIPELINE_SHORT || filename == _IGNORE_PIPELINE_LONG {
+		parameter2, err := p.Parameters.String(1)
+		if err == nil {
+			return fn(p.Stdin, parameter2)
 		}
-		return fn(p.Stdin, filename)
+		// no second parameter so lets assume the flag was actually a file name
 	}
 
 	wait := filename == _WAIT_EOF_SHORT || filename == _WAIT_EOF_LONG
 
 	if wait {
-		filename, err = p.Parameters.String(1)
-		if err != nil {
-			return err
+		parameter2, err := p.Parameters.String(1)
+		if err == nil {
+			filename = parameter2
+
+		} else {
+			// no second parameter so lets assume the flag was actually a file name
+			wait = false
 		}
-	} else {
+	}
+
+	if !wait {
 		wait = isFileOpen(p, filename)
+		if wait {
+			_, _ = p.Stderr.Writeln([]byte(fmt.Sprintf("warning: '%s' appears as a parameter elsewhere in the pipeline so I'm going to cache the file in RAM before writing to disk.\n       : This message can be suppressed using `%s` or `%s`.", filename, _WAIT_EOF_LONG, _IGNORE_PIPELINE_LONG)))
+		}
 	}
 
-	if wait {
-		_, _ = p.Stderr.Writeln([]byte(fmt.Sprintf("warning: '%s' appears as a parameter elsewhere in the pipeline so I'm going to cache the file in RAM before writing to disk.\n       : this message can be suppressed using `%s` or `%s`.", filename, _WAIT_EOF_LONG, _DONT_CHECK_PIPELINE)))
-	} else {
+	if !wait {
 		return fn(p.Stdin, filename)
 	}
 

diff --git a/builtins/core/io/write_doc.yaml b/builtins/core/io/write_doc.yaml
@@ -12,13 +12,105 @@
     Otherwise a new file is created.
   Usage: |-
     ```
-    <stdin> |> filename
+    <stdin> |> [ -i | --ignore-pipeline-check ] filename
+   
+    <stdin> |> [ -w | --wait-for-eof ] filename
     ```
   Examples: |-
     ```
     g * |> files.txt
     ```
-  Detail:
+  Flags:
+    "--ignore-pipeline-check": >-
+      Don't check if _filename_ is a parameter for an earlier command in the pipeline
+    "-i": >-
+      Alias for `--ignore-pipeline`
+    
+    "--wait-for-eof": >-
+      Wait for stdin to return an EOF before opening _filename_
+    "-w": >-
+      Alias for `--wait-for-eof`
+  Detail: |-
+    ### Race Condition Detection
+
+    If no flags are specified, then `|>` will check if the filename supplied is
+    used in any parameters for other commands in the pipeline. If it has been, then
+    `|>` will wait for an **EOF** (End Of File) from stdin before opening _filename_.
+
+    This is to allow pipelines like the following:
+
+    ```
+    open example.log | regexp m/error/ |> example.log
+    ```
+
+    Under traditional shells and Murex's normal scheduler, all commands in a
+    pipeline would run concurrently. This leads to a race condition where `|>`
+    opens (and thus truncates) a file before other commands can read from it.
+
+    However by default, `|>` will check the pipeline to look for any other
+    references of _filename_ and if it exists, it will wait for an EOF before
+    `|>` truncates _filename_.
+
+    This wait for EOF behaviour can be forced with the `--wait-for-eof` / `-w`
+    flag.
+
+    Alternatively, if you want to force `|>` to run concurrently then you can
+    disable the pipeline check with the `--ignore-pipeline-check` / `-i` flag.
+
+    #### High Memory Usage
+
+    > WARNING! Waiting for EOF will cause `|>` to cache the pipeline into RAM.
+    > If your pipeline is parsing multi-gigabyte or larger files then you may
+    > experience performance issues.
+    
+    For large datasets, it might be preferable to write to a temporary file first.
+    
+    ```
+    open example.log | regexp m/error/ |> example.log.tmp
+    mv example.log.tmp example.log
+    ```
+
+    The move operation should be instantaneous on most filesystems because your
+    operating system will just alter filesystem metadata rather than move the file
+    contents.
+
+    ### Flag Without A Filename
+
+    If you specify a flag without a filename, eg `|> --wait-for-eof`, then it is
+    assumed that the flag _is_ the filename.
+
+    ### Syntactic Sugar
+
+    While `|>` is referred to as an operator, it's actually a pipe followed by a
+    builtin:
+    
+    ```
+    out "foobar" | > example.txt
+    ```
+
+    Thus you can actually use `>` by itself.
+
+    ### Creating An Empty File
+
+    If `>` is at the start of a pipeline then it is treated as null input. This a
+    convenient shortcut to create an empty file or blank an existing file.
+
+    **Create a new empty file:**
+
+    ```
+    > newfile
+    ```
+
+    **Clear a large log file without deleting the file itself:**
+
+    ```
+    > large.log
+    ```
+
+    ### Appending A File
+
+    To append a file (ie write at the end of the file without overwriting its
+    contents) use `>>` instead.
   Synonyms:
     - ">"
     - "|>"
@@ -32,3 +124,60 @@
     - pipe-posix
     - namedpipe
     - pipe-err
+    - schedulers
+
+
+
+- DocumentID: file-append
+  Title: >-
+     `>>` Append File
+  CategoryID: parser
+  SubCategoryIDs: [ parser.pipes ]
+  Summary: >-
+    Writes stdin to disk - appending contents if file already exists
+  Description: |-
+    This is used to redirect the stdout of a command and append it to a file. If
+    that file does not exist, then the file is created.
+
+    This behaves similarly to the [Bash (et al) token](https://www.gnu.org/software/bash/manual/bash.html#Appending-Redirected-Output)
+    except it doesn't support adding alternative file descriptor numbers. Instead
+    you will need to use named pipes to achieve the same effect in Murex.
+  Examples: |-
+    ```
+    » out "Hello" >> example.txt
+    » out "World!" >> example.txt
+    » open example.txt
+    Hello
+    World!
+    ```
+  Detail: |-
+    ### Syntactic Sugar
+
+    This is just syntactic sugar for `-> >>`. Thus when the parser reads code like
+    the following:
+    
+    ```
+    out "foobar" >> example.txt
+    ```
+
+    it will compile an abstract syntax tree which would reflect the following code
+    instead:
+
+    ```
+    out "foobar" | >> example.txt
+    ```
+
+    ### Truncating A File
+
+    To truncate a file (ie overwrite its contents) use `|>` instead.
+  Synonyms:
+    - ">>"
+    - "fappend"
+  Related:
+  - pipe
+  - pipe-arrow
+  - pipe-posix
+  - namedpipe
+  - pipeline
+  - file-truncate
+  - out
diff --git a/builtins/core/io/write_test.go b/builtins/core/io/write_test.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"testing"
 
-	_ "github.com/lmorg/murex/builtins"
 	"github.com/lmorg/murex/test"
 )
 

diff --git a/builtins/core/io/write_unix_test.go b/builtins/core/io/write_unix_test.go
@@ -0,0 +1,132 @@
+//go:build !windows
+// +build !windows
+
+package io
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/lmorg/murex/test"
+)
+
+func TestWriteFilePipelineFlags(t *testing.T) {
+	file := t.TempDir()
+	file += "/TestWriteFilePipelineFlags"
+
+	tests := []test.MurexTest{
+		{
+			Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[1]s.%[2]d; open %[1]s.%[2]d`,
+				file, 0),
+			Stdout: "^0\n10\n20\n$",
+			Stderr: "^warning",
+		},
+		{
+			Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[3]s %[1]s.%[2]d; open %[1]s.%[2]d`,
+				file, 1, _WAIT_EOF_SHORT),
+			Stdout: "^0\n10\n20\n$",
+			Stderr: "^$",
+		},
+		{
+			Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[3]s %[1]s.%[2]d; open %[1]s.%[2]d`,
+				file, 2, _WAIT_EOF_LONG),
+			Stdout: "^0\n10\n20\n$",
+			Stderr: "^$",
+		},
+		{
+			Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s %[1]s.%[2]d; g <!null> %[3]s`,
+				file, 3, _IGNORE_PIPELINE_SHORT),
+			Stdout:  "^$",
+			Stderr:  "^$",
+			ExitNum: 1, // just because of ending g
+		},
+		{
+			Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s %[1]s.%[2]d; g <!null> %[3]s`,
+				file, 4, _IGNORE_PIPELINE_LONG), Stdout: "^$",
+			Stderr:  "^$",
+			ExitNum: 1, // just because of ending g
+		},
+
+		// two tests here because the regexp is being quirky and I want to check beginning and end of string
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 5, _WAIT_EOF_SHORT),
+			Stdout: "^$",
+			Stderr: `^Error in .g.`,
+		},
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 6, _WAIT_EOF_SHORT),
+			Stdout: "^$",
+			Stderr: `Error: no data returned\n$`,
+		},
+		// two tests here because the regexp is being quirky and I want to check beginning and end of string
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 5, _WAIT_EOF_LONG),
+			Stdout: "^$",
+			Stderr: `^Error in .g.`,
+		},
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 6, _WAIT_EOF_LONG),
+			Stdout: "^$",
+			Stderr: `Error: no data returned\n$`,
+		},
+		// two tests here because the regexp is being quirky and I want to check beginning and end of string
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 7, _IGNORE_PIPELINE_SHORT),
+			Stdout: "^$",
+			Stderr: `^Error in .g.`,
+		},
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 8, _IGNORE_PIPELINE_SHORT),
+			Stdout: "^$",
+			Stderr: `Error: no data returned\n$`,
+		},
+		// two tests here because the regexp is being quirky and I want to check beginning and end of string
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 5, _IGNORE_PIPELINE_LONG),
+			Stdout: "^$",
+			Stderr: `^Error in .g.`,
+		},
+		{
+			Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
+				file, 6, _IGNORE_PIPELINE_LONG),
+			Stdout: "^$",
+			Stderr: `Error: no data returned\n$`,
+		},
+	}
+
+	test.RunMurexTestsRx(tests, t)
+}
+
+func TestWriteEmptyFile(t *testing.T) {
+	file := t.TempDir()
+	file += "/TestWriteFilePipelineFlags"
+
+	tests := []test.MurexTest{
+		// two tests here because the regexp is being quirky and I want to check beginning and end of string
+		{
+			Block: fmt.Sprintf(`g %[1]s.%[2]d; > %[1]s.%[2]d; g %[1]s.%[2]d`,
+				file, 0),
+			Stdout: fmt.Sprintf(`^\[\"%[1]s.%[2]d\"\]\n$`,
+				file, 0),
+			Stderr:  `^Error in .g.`,
+			ExitNum: 0, // no error because last command succeeded
+		},
+		{
+			Block: fmt.Sprintf(`g %[1]s.%[2]d; > %[1]s.%[2]d; g %[1]s.%[2]d`,
+				file, 1),
+			Stdout: fmt.Sprintf(`^\[\"%[1]s.%[2]d\"\]\n$`,
+				file, 1),
+			Stderr:  `Error: no data returned\n$`,
+			ExitNum: 0, // no error because last command succeeded
+		},
+	}
+
+	test.RunMurexTestsRx(tests, t)
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,6 @@ import ( @@
     	"fmt"
     	"testing"
-    	_ "github.com/lmorg/murex/builtins"
     	"github.com/lmorg/murex/test"
     )
@@ Expand Down @@