package dotenv import ( "bytes" "errors" "fmt" "regexp" "strconv" "strings" "unicode" ) const ( charComment = '#' prefixSingleQuote = '\'' prefixDoubleQuote = '"' ) var ( escapeSeqRegex = regexp.MustCompile(`(\\(?:[abcfnrtv$"\\]|0\d{0,3}))`) exportRegex = regexp.MustCompile(`^export\s+`) ) type parser struct { line int } func newParser() *parser { return &parser{ line: 1, } } func (p *parser) parseBytes(src []byte, out map[string]string, lookupFn LookupFn) error { cutset := src if lookupFn == nil { lookupFn = noLookupFn } for { cutset = p.getStatementStart(cutset) if cutset == nil { // reached end of file break } key, left, inherited, err := p.locateKeyName(cutset) if err != nil { return err } if strings.Contains(key, " ") { return fmt.Errorf("line %d: key cannot contain a space", p.line) } if inherited { value, ok := lookupFn(key) if ok { out[key] = value } cutset = left continue } value, left, err := p.extractVarValue(left, out, lookupFn) if err != nil { return err } out[key] = value cutset = left } return nil } // getStatementPosition returns position of statement begin. // // It skips any comment line or non-whitespace character. func (p *parser) getStatementStart(src []byte) []byte { pos := p.indexOfNonSpaceChar(src) if pos == -1 { return nil } src = src[pos:] if src[0] != charComment { return src } // skip comment section pos = bytes.IndexFunc(src, isCharFunc('\n')) if pos == -1 { return nil } return p.getStatementStart(src[pos:]) } // locateKeyName locates and parses key name and returns rest of slice func (p *parser) locateKeyName(src []byte) (string, []byte, bool, error) { var key string var inherited bool // trim "export" and space at beginning src = bytes.TrimLeftFunc(exportRegex.ReplaceAll(src, nil), isSpace) // locate key name end and validate it in single loop offset := 0 loop: for i, char := range src { rchar := rune(char) if isSpace(rchar) { continue } switch char { case '=', ':', '\n': // library also supports yaml-style value declaration key = string(src[0:i]) offset = i + 1 inherited = char == '\n' break loop case '_', '.', '-', '[', ']': default: // variable name should match [A-Za-z0-9_.-] if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) { continue } return "", nil, inherited, fmt.Errorf( `line %d: unexpected character %q in variable name`, p.line, string(char)) } } if len(src) == 0 { return "", nil, inherited, errors.New("zero length string") } // trim whitespace key = strings.TrimRightFunc(key, unicode.IsSpace) cutset := bytes.TrimLeftFunc(src[offset:], isSpace) return key, cutset, inherited, nil } // extractVarValue extracts variable value and returns rest of slice func (p *parser) extractVarValue(src []byte, envMap map[string]string, lookupFn LookupFn) (string, []byte, error) { quote, isQuoted := hasQuotePrefix(src) if !isQuoted { // unquoted value - read until new line value, rest, _ := bytes.Cut(src, []byte("\n")) p.line++ // Remove inline comments on unquoted lines value, _, _ = bytes.Cut(value, []byte(" #")) value = bytes.TrimRightFunc(value, unicode.IsSpace) retVal, err := expandVariables(string(value), envMap, lookupFn) return retVal, rest, err } // lookup quoted string terminator for i := 1; i < len(src); i++ { if src[i] == '\n' { p.line++ } if char := src[i]; char != quote { continue } // skip escaped quote symbol (\" or \', depends on quote) if prevChar := src[i-1]; prevChar == '\\' { continue } // trim quotes value := string(src[1:i]) if quote == prefixDoubleQuote { // expand standard shell escape sequences & then interpolate // variables on the result retVal, err := expandVariables(expandEscapes(value), envMap, lookupFn) if err != nil { return "", nil, err } value = retVal } return value, src[i+1:], nil } // return formatted error if quoted string is not terminated valEndIndex := bytes.IndexFunc(src, isCharFunc('\n')) if valEndIndex == -1 { valEndIndex = len(src) } return "", nil, fmt.Errorf("line %d: unterminated quoted value %s", p.line, src[:valEndIndex]) } func expandEscapes(str string) string { out := escapeSeqRegex.ReplaceAllStringFunc(str, func(match string) string { if match == `\$` { // `\$` is not a Go escape sequence, the expansion parser uses // the special `$$` syntax // both `FOO=\$bar` and `FOO=$$bar` are valid in an env file and // will result in FOO w/ literal value of "$bar" (no interpolation) return "$$" } if strings.HasPrefix(match, `\0`) { // octal escape sequences in Go are not prefixed with `\0`, so // rewrite the prefix, e.g. `\0123` -> `\123` -> literal value "S" match = strings.Replace(match, `\0`, `\`, 1) } // use Go to unquote (unescape) the literal // see https://go.dev/ref/spec#Rune_literals // // NOTE: Go supports ADDITIONAL escapes like `\x` & `\u` & `\U`! // These are NOT supported, which is why we use a regex to find // only matches we support and then use `UnquoteChar` instead of a // `Unquote` on the entire value v, _, _, err := strconv.UnquoteChar(match, '"') if err != nil { return match } return string(v) }) return out } func (p *parser) indexOfNonSpaceChar(src []byte) int { return bytes.IndexFunc(src, func(r rune) bool { if r == '\n' { p.line++ } return !unicode.IsSpace(r) }) } // hasQuotePrefix reports whether charset starts with single or double quote and returns quote character func hasQuotePrefix(src []byte) (byte, bool) { if len(src) == 0 { return 0, false } switch quote := src[0]; quote { case prefixDoubleQuote, prefixSingleQuote: return quote, true // isQuoted default: return 0, false } } func isCharFunc(char rune) func(rune) bool { return func(v rune) bool { return v == char } } // isSpace reports whether the rune is a space character but not line break character // // this differs from unicode.IsSpace, which also applies line break as space func isSpace(r rune) bool { switch r { case '\t', '\v', '\f', '\r', ' ', 0x85, 0xA0: return true } return false }