Skip to content

Commit

Permalink
truncate: updated docs
Browse files Browse the repository at this point in the history
  • Loading branch information
lmorg committed Sep 22, 2024
1 parent a95095c commit e23799d
Show file tree
Hide file tree
Showing 9 changed files with 412 additions and 77 deletions.
4 changes: 2 additions & 2 deletions app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ const Name = "murex"
const (
Major = 7
Minor = 0
Revision = 103
Revision = 107
Branch = "851/file-redirect"
BuildDate = "2024-09-21 01:33:30"
BuildDate = "2024-09-22 23:17:58"
)

// Copyright is the copyright owner string
Expand Down
38 changes: 23 additions & 15 deletions builtins/core/io/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ func init() {
}

const (
_WAIT_EOF_LONG = "--wait-for-eof"
_WAIT_EOF_SHORT = "-w"
_DONT_CHECK_PIPELINE = "--ignore-pipeline-check"
_WAIT_EOF_SHORT = "-w"
_WAIT_EOF_LONG = "--wait-for-eof"
_IGNORE_PIPELINE_SHORT = "-i"
_IGNORE_PIPELINE_LONG = "--ignore-pipeline-check"
)

func cmdTruncateFile(p *lang.Process) error { return writeFile(p, truncateFile) }
Expand All @@ -41,28 +42,35 @@ func writeFile(p *lang.Process, fn func(io.Reader, string) error) error {
return fn(bytes.NewBuffer([]byte{}), filename)
}

if filename == _DONT_CHECK_PIPELINE {
filename, err = p.Parameters.String(1)
if err != nil {
return err
if filename == _IGNORE_PIPELINE_SHORT || filename == _IGNORE_PIPELINE_LONG {
parameter2, err := p.Parameters.String(1)
if err == nil {
return fn(p.Stdin, parameter2)
}
return fn(p.Stdin, filename)
// no second parameter so lets assume the flag was actually a file name
}

wait := filename == _WAIT_EOF_SHORT || filename == _WAIT_EOF_LONG

if wait {
filename, err = p.Parameters.String(1)
if err != nil {
return err
parameter2, err := p.Parameters.String(1)
if err == nil {
filename = parameter2

} else {
// no second parameter so lets assume the flag was actually a file name
wait = false
}
} else {
}

if !wait {
wait = isFileOpen(p, filename)
if wait {
_, _ = p.Stderr.Writeln([]byte(fmt.Sprintf("warning: '%s' appears as a parameter elsewhere in the pipeline so I'm going to cache the file in RAM before writing to disk.\n : This message can be suppressed using `%s` or `%s`.", filename, _WAIT_EOF_LONG, _IGNORE_PIPELINE_LONG)))
}
}

if wait {
_, _ = p.Stderr.Writeln([]byte(fmt.Sprintf("warning: '%s' appears as a parameter elsewhere in the pipeline so I'm going to cache the file in RAM before writing to disk.\n : this message can be suppressed using `%s` or `%s`.", filename, _WAIT_EOF_LONG, _DONT_CHECK_PIPELINE)))
} else {
if !wait {
return fn(p.Stdin, filename)
}

Expand Down
153 changes: 151 additions & 2 deletions builtins/core/io/write_doc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,105 @@
Otherwise a new file is created.
Usage: |-
```
<stdin> |> filename
<stdin> |> [ -i | --ignore-pipeline-check ] filename
<stdin> |> [ -w | --wait-for-eof ] filename
```
Examples: |-
```
g * |> files.txt
```
Detail:
Flags:
"--ignore-pipeline-check": >-
Don't check if _filename_ is a parameter for an earlier command in the pipeline
"-i": >-
Alias for `--ignore-pipeline`
"--wait-for-eof": >-
Wait for stdin to return an EOF before opening _filename_
"-w": >-
Alias for `--wait-for-eof`
Detail: |-
### Race Condition Detection
If no flags are specified, then `|>` will check if the filename supplied is
used in any parameters for other commands in the pipeline. If it has been, then
`|>` will wait for an **EOF** (End Of File) from stdin before opening _filename_.
This is to allow pipelines like the following:
```
open example.log | regexp m/error/ |> example.log
```
Under traditional shells and Murex's normal scheduler, all commands in a
pipeline would run concurrently. This leads to a race condition where `|>`
opens (and thus truncates) a file before other commands can read from it.
However by default, `|>` will check the pipeline to look for any other
references of _filename_ and if it exists, it will wait for an EOF before
`|>` truncates _filename_.
This wait for EOF behaviour can be forced with the `--wait-for-eof` / `-w`
flag.
Alternatively, if you want to force `|>` to run concurrently then you can
disable the pipeline check with the `--ignore-pipeline-check` / `-i` flag.
#### High Memory Usage
> WARNING! Waiting for EOF will cause `|>` to cache the pipeline into RAM.
> If your pipeline is parsing multi-gigabyte or larger files then you may
> experience performance issues.
For large datasets, it might be preferable to write to a temporary file first.
```
open example.log | regexp m/error/ |> example.log.tmp
mv example.log.tmp example.log
```
The move operation should be instantaneous on most filesystems because your
operating system will just alter filesystem metadata rather than move the file
contents.
### Flag Without A Filename
If you specify a flag without a filename, eg `|> --wait-for-eof`, then it is
assumed that the flag _is_ the filename.
### Syntactic Sugar
While `|>` is referred to as an operator, it's actually a pipe followed by a
builtin:
```
out "foobar" | > example.txt
```
Thus you can actually use `>` by itself.
### Creating An Empty File
If `>` is at the start of a pipeline then it is treated as null input. This a
convenient shortcut to create an empty file or blank an existing file.
**Create a new empty file:**
```
> newfile
```
**Clear a large log file without deleting the file itself:**
```
> large.log
```
### Appending A File
To append a file (ie write at the end of the file without overwriting its
contents) use `>>` instead.
Synonyms:
- ">"
- "|>"
Expand All @@ -32,3 +124,60 @@
- pipe-posix
- namedpipe
- pipe-err
- schedulers



- DocumentID: file-append
Title: >-
`>>` Append File
CategoryID: parser
SubCategoryIDs: [ parser.pipes ]
Summary: >-
Writes stdin to disk - appending contents if file already exists
Description: |-
This is used to redirect the stdout of a command and append it to a file. If
that file does not exist, then the file is created.
This behaves similarly to the [Bash (et al) token](https://www.gnu.org/software/bash/manual/bash.html#Appending-Redirected-Output)
except it doesn't support adding alternative file descriptor numbers. Instead
you will need to use named pipes to achieve the same effect in Murex.
Examples: |-
```
» out "Hello" >> example.txt
» out "World!" >> example.txt
» open example.txt
Hello
World!
```
Detail: |-
### Syntactic Sugar
This is just syntactic sugar for `-> >>`. Thus when the parser reads code like
the following:
```
out "foobar" >> example.txt
```
it will compile an abstract syntax tree which would reflect the following code
instead:
```
out "foobar" | >> example.txt
```
### Truncating A File
To truncate a file (ie overwrite its contents) use `|>` instead.
Synonyms:
- ">>"
- "fappend"
Related:
- pipe
- pipe-arrow
- pipe-posix
- namedpipe
- pipeline
- file-truncate
- out
1 change: 0 additions & 1 deletion builtins/core/io/write_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"testing"

_ "github.com/lmorg/murex/builtins"
"github.com/lmorg/murex/test"
)

Expand Down
132 changes: 132 additions & 0 deletions builtins/core/io/write_unix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//go:build !windows
// +build !windows

package io

import (
"fmt"
"testing"

"github.com/lmorg/murex/test"
)

func TestWriteFilePipelineFlags(t *testing.T) {
file := t.TempDir()
file += "/TestWriteFilePipelineFlags"

tests := []test.MurexTest{
{
Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[1]s.%[2]d; open %[1]s.%[2]d`,
file, 0),
Stdout: "^0\n10\n20\n$",
Stderr: "^warning",
},
{
Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[3]s %[1]s.%[2]d; open %[1]s.%[2]d`,
file, 1, _WAIT_EOF_SHORT),
Stdout: "^0\n10\n20\n$",
Stderr: "^$",
},
{
Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp m/0/ |> %[3]s %[1]s.%[2]d; open %[1]s.%[2]d`,
file, 2, _WAIT_EOF_LONG),
Stdout: "^0\n10\n20\n$",
Stderr: "^$",
},
{
Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s %[1]s.%[2]d; g <!null> %[3]s`,
file, 3, _IGNORE_PIPELINE_SHORT),
Stdout: "^$",
Stderr: "^$",
ExitNum: 1, // just because of ending g
},
{
Block: fmt.Sprintf(`a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s %[1]s.%[2]d; g <!null> %[3]s`,
file, 4, _IGNORE_PIPELINE_LONG), Stdout: "^$",
Stderr: "^$",
ExitNum: 1, // just because of ending g
},

// two tests here because the regexp is being quirky and I want to check beginning and end of string
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 5, _WAIT_EOF_SHORT),
Stdout: "^$",
Stderr: `^Error in .g.`,
},
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 6, _WAIT_EOF_SHORT),
Stdout: "^$",
Stderr: `Error: no data returned\n$`,
},
// two tests here because the regexp is being quirky and I want to check beginning and end of string
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 5, _WAIT_EOF_LONG),
Stdout: "^$",
Stderr: `^Error in .g.`,
},
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 6, _WAIT_EOF_LONG),
Stdout: "^$",
Stderr: `Error: no data returned\n$`,
},
// two tests here because the regexp is being quirky and I want to check beginning and end of string
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 7, _IGNORE_PIPELINE_SHORT),
Stdout: "^$",
Stderr: `^Error in .g.`,
},
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 8, _IGNORE_PIPELINE_SHORT),
Stdout: "^$",
Stderr: `Error: no data returned\n$`,
},
// two tests here because the regexp is being quirky and I want to check beginning and end of string
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 5, _IGNORE_PIPELINE_LONG),
Stdout: "^$",
Stderr: `^Error in .g.`,
},
{
Block: fmt.Sprintf(`g %[3]s; a [0..20] |> %[1]s.%[2]d; open %[1]s.%[2]d -> regexp s/0// |> %[3]s; rm -- %[3]s`,
file, 6, _IGNORE_PIPELINE_LONG),
Stdout: "^$",
Stderr: `Error: no data returned\n$`,
},
}

test.RunMurexTestsRx(tests, t)
}

func TestWriteEmptyFile(t *testing.T) {
file := t.TempDir()
file += "/TestWriteFilePipelineFlags"

tests := []test.MurexTest{
// two tests here because the regexp is being quirky and I want to check beginning and end of string
{
Block: fmt.Sprintf(`g %[1]s.%[2]d; > %[1]s.%[2]d; g %[1]s.%[2]d`,
file, 0),
Stdout: fmt.Sprintf(`^\[\"%[1]s.%[2]d\"\]\n$`,
file, 0),
Stderr: `^Error in .g.`,
ExitNum: 0, // no error because last command succeeded
},
{
Block: fmt.Sprintf(`g %[1]s.%[2]d; > %[1]s.%[2]d; g %[1]s.%[2]d`,
file, 1),
Stdout: fmt.Sprintf(`^\[\"%[1]s.%[2]d\"\]\n$`,
file, 1),
Stderr: `Error: no data returned\n$`,
ExitNum: 0, // no error because last command succeeded
},
}

test.RunMurexTestsRx(tests, t)
}
Loading

0 comments on commit e23799d

Please sign in to comment.